LLVM 19.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
60#include "llvm/Support/Debug.h"
65#include <algorithm>
66#include <cassert>
67#include <cstdint>
68#include <iterator>
69#include <new>
70#include <utility>
71#include <vector>
72
73using namespace llvm;
74
75#define DEBUG_TYPE "arm-instrinfo"
76
77#define GET_INSTRINFO_CTOR_DTOR
78#include "ARMGenInstrInfo.inc"
79
80static cl::opt<bool>
81EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
82 cl::desc("Enable ARM 2-addr to 3-addr conv"));
83
84/// ARM_MLxEntry - Record information about MLA / MLS instructions.
86 uint16_t MLxOpc; // MLA / MLS opcode
87 uint16_t MulOpc; // Expanded multiplication opcode
88 uint16_t AddSubOpc; // Expanded add / sub opcode
89 bool NegAcc; // True if the acc is negated before the add / sub.
90 bool HasLane; // True if instruction has an extra "lane" operand.
91};
92
93static const ARM_MLxEntry ARM_MLxTable[] = {
94 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
95 // fp scalar ops
96 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
97 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
98 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
99 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
100 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
101 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
102 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
103 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
104
105 // fp SIMD ops
106 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
107 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
108 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
109 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
110 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
111 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
112 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
113 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
114};
115
117 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
118 Subtarget(STI) {
119 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
120 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
121 llvm_unreachable("Duplicated entries?");
122 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
123 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
124 }
125}
126
127// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
128// currently defaults to no prepass hazard recognizer.
131 const ScheduleDAG *DAG) const {
132 if (usePreRAHazardRecognizer()) {
133 const InstrItineraryData *II =
134 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
135 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
136 }
138}
139
140// Called during:
141// - pre-RA scheduling
142// - post-RA scheduling when FeatureUseMISched is set
144 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
146
147 // We would like to restrict this hazard recognizer to only
148 // post-RA scheduling; we can tell that we're post-RA because we don't
149 // track VRegLiveness.
150 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
151 // banks banked on bit 2. Assume that TCMs are in use.
152 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
154 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
155
156 // Not inserting ARMHazardRecognizerFPMLx because that would change
157 // legacy behavior
158
160 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
161 return MHR;
162}
163
164// Called during post-RA scheduling when FeatureUseMISched is not set
167 const ScheduleDAG *DAG) const {
169
170 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
171 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
172
174 if (BHR)
175 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
176 return MHR;
177}
178
181 LiveIntervals *LIS) const {
182 // FIXME: Thumb2 support.
183
184 if (!EnableARM3Addr)
185 return nullptr;
186
187 MachineFunction &MF = *MI.getParent()->getParent();
188 uint64_t TSFlags = MI.getDesc().TSFlags;
189 bool isPre = false;
190 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
191 default: return nullptr;
193 isPre = true;
194 break;
196 break;
197 }
198
199 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
200 // operation.
201 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
202 if (MemOpc == 0)
203 return nullptr;
204
205 MachineInstr *UpdateMI = nullptr;
206 MachineInstr *MemMI = nullptr;
207 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
208 const MCInstrDesc &MCID = MI.getDesc();
209 unsigned NumOps = MCID.getNumOperands();
210 bool isLoad = !MI.mayStore();
211 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
212 const MachineOperand &Base = MI.getOperand(2);
213 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
214 Register WBReg = WB.getReg();
215 Register BaseReg = Base.getReg();
216 Register OffReg = Offset.getReg();
217 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
218 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
219 switch (AddrMode) {
220 default: llvm_unreachable("Unknown indexed op!");
221 case ARMII::AddrMode2: {
222 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
223 unsigned Amt = ARM_AM::getAM2Offset(OffImm);
224 if (OffReg == 0) {
225 if (ARM_AM::getSOImmVal(Amt) == -1)
226 // Can't encode it in a so_imm operand. This transformation will
227 // add more than 1 instruction. Abandon!
228 return nullptr;
229 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
231 .addReg(BaseReg)
232 .addImm(Amt)
233 .add(predOps(Pred))
234 .add(condCodeOp());
235 } else if (Amt != 0) {
237 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
238 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
239 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
240 .addReg(BaseReg)
241 .addReg(OffReg)
242 .addReg(0)
243 .addImm(SOOpc)
244 .add(predOps(Pred))
245 .add(condCodeOp());
246 } else
247 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
248 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
249 .addReg(BaseReg)
250 .addReg(OffReg)
251 .add(predOps(Pred))
252 .add(condCodeOp());
253 break;
254 }
255 case ARMII::AddrMode3 : {
256 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
257 unsigned Amt = ARM_AM::getAM3Offset(OffImm);
258 if (OffReg == 0)
259 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
260 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
261 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
262 .addReg(BaseReg)
263 .addImm(Amt)
264 .add(predOps(Pred))
265 .add(condCodeOp());
266 else
267 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
268 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
269 .addReg(BaseReg)
270 .addReg(OffReg)
271 .add(predOps(Pred))
272 .add(condCodeOp());
273 break;
274 }
275 }
276
277 std::vector<MachineInstr*> NewMIs;
278 if (isPre) {
279 if (isLoad)
280 MemMI =
281 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
282 .addReg(WBReg)
283 .addImm(0)
284 .addImm(Pred);
285 else
286 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
287 .addReg(MI.getOperand(1).getReg())
288 .addReg(WBReg)
289 .addReg(0)
290 .addImm(0)
291 .addImm(Pred);
292 NewMIs.push_back(MemMI);
293 NewMIs.push_back(UpdateMI);
294 } else {
295 if (isLoad)
296 MemMI =
297 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
298 .addReg(BaseReg)
299 .addImm(0)
300 .addImm(Pred);
301 else
302 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
303 .addReg(MI.getOperand(1).getReg())
304 .addReg(BaseReg)
305 .addReg(0)
306 .addImm(0)
307 .addImm(Pred);
308 if (WB.isDead())
309 UpdateMI->getOperand(0).setIsDead();
310 NewMIs.push_back(UpdateMI);
311 NewMIs.push_back(MemMI);
312 }
313
314 // Transfer LiveVariables states, kill / dead info.
315 if (LV) {
316 for (const MachineOperand &MO : MI.operands()) {
317 if (MO.isReg() && MO.getReg().isVirtual()) {
318 Register Reg = MO.getReg();
319
320 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
321 if (MO.isDef()) {
322 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
323 if (MO.isDead())
324 LV->addVirtualRegisterDead(Reg, *NewMI);
325 }
326 if (MO.isUse() && MO.isKill()) {
327 for (unsigned j = 0; j < 2; ++j) {
328 // Look at the two new MI's in reverse order.
329 MachineInstr *NewMI = NewMIs[j];
330 if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))
331 continue;
332 LV->addVirtualRegisterKilled(Reg, *NewMI);
333 if (VI.removeKill(MI))
334 VI.Kills.push_back(NewMI);
335 break;
336 }
337 }
338 }
339 }
340 }
341
342 MachineBasicBlock &MBB = *MI.getParent();
343 MBB.insert(MI, NewMIs[1]);
344 MBB.insert(MI, NewMIs[0]);
345 return NewMIs[0];
346}
347
348// Branch analysis.
349// Cond vector output format:
350// 0 elements indicates an unconditional branch
351// 2 elements indicates a conditional branch; the elements are
352// the condition to check and the CPSR.
353// 3 elements indicates a hardware loop end; the elements
354// are the opcode, the operand value to test, and a dummy
355// operand used to pad out to 3 operands.
358 MachineBasicBlock *&FBB,
360 bool AllowModify) const {
361 TBB = nullptr;
362 FBB = nullptr;
363
365 if (I == MBB.instr_begin())
366 return false; // Empty blocks are easy.
367 --I;
368
369 // Walk backwards from the end of the basic block until the branch is
370 // analyzed or we give up.
371 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
372 // Flag to be raised on unanalyzeable instructions. This is useful in cases
373 // where we want to clean up on the end of the basic block before we bail
374 // out.
375 bool CantAnalyze = false;
376
377 // Skip over DEBUG values, predicated nonterminators and speculation
378 // barrier terminators.
379 while (I->isDebugInstr() || !I->isTerminator() ||
380 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
381 I->getOpcode() == ARM::t2DoLoopStartTP){
382 if (I == MBB.instr_begin())
383 return false;
384 --I;
385 }
386
387 if (isIndirectBranchOpcode(I->getOpcode()) ||
388 isJumpTableBranchOpcode(I->getOpcode())) {
389 // Indirect branches and jump tables can't be analyzed, but we still want
390 // to clean up any instructions at the tail of the basic block.
391 CantAnalyze = true;
392 } else if (isUncondBranchOpcode(I->getOpcode())) {
393 TBB = I->getOperand(0).getMBB();
394 } else if (isCondBranchOpcode(I->getOpcode())) {
395 // Bail out if we encounter multiple conditional branches.
396 if (!Cond.empty())
397 return true;
398
399 assert(!FBB && "FBB should have been null.");
400 FBB = TBB;
401 TBB = I->getOperand(0).getMBB();
402 Cond.push_back(I->getOperand(1));
403 Cond.push_back(I->getOperand(2));
404 } else if (I->isReturn()) {
405 // Returns can't be analyzed, but we should run cleanup.
406 CantAnalyze = true;
407 } else if (I->getOpcode() == ARM::t2LoopEnd &&
408 MBB.getParent()
411 if (!Cond.empty())
412 return true;
413 FBB = TBB;
414 TBB = I->getOperand(1).getMBB();
415 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
416 Cond.push_back(I->getOperand(0));
417 Cond.push_back(MachineOperand::CreateImm(0));
418 } else {
419 // We encountered other unrecognized terminator. Bail out immediately.
420 return true;
421 }
422
423 // Cleanup code - to be run for unpredicated unconditional branches and
424 // returns.
425 if (!isPredicated(*I) &&
426 (isUncondBranchOpcode(I->getOpcode()) ||
427 isIndirectBranchOpcode(I->getOpcode()) ||
428 isJumpTableBranchOpcode(I->getOpcode()) ||
429 I->isReturn())) {
430 // Forget any previous condition branch information - it no longer applies.
431 Cond.clear();
432 FBB = nullptr;
433
434 // If we can modify the function, delete everything below this
435 // unconditional branch.
436 if (AllowModify) {
437 MachineBasicBlock::iterator DI = std::next(I);
438 while (DI != MBB.instr_end()) {
439 MachineInstr &InstToDelete = *DI;
440 ++DI;
441 // Speculation barriers must not be deleted.
442 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
443 continue;
444 InstToDelete.eraseFromParent();
445 }
446 }
447 }
448
449 if (CantAnalyze) {
450 // We may not be able to analyze the block, but we could still have
451 // an unconditional branch as the last instruction in the block, which
452 // just branches to layout successor. If this is the case, then just
453 // remove it if we're allowed to make modifications.
454 if (AllowModify && !isPredicated(MBB.back()) &&
458 return true;
459 }
460
461 if (I == MBB.instr_begin())
462 return false;
463
464 --I;
465 }
466
467 // We made it past the terminators without bailing out - we must have
468 // analyzed this branch successfully.
469 return false;
470}
471
473 int *BytesRemoved) const {
474 assert(!BytesRemoved && "code size not handled");
475
477 if (I == MBB.end())
478 return 0;
479
480 if (!isUncondBranchOpcode(I->getOpcode()) &&
481 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
482 return 0;
483
484 // Remove the branch.
485 I->eraseFromParent();
486
487 I = MBB.end();
488
489 if (I == MBB.begin()) return 1;
490 --I;
491 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
492 return 1;
493
494 // Remove the branch.
495 I->eraseFromParent();
496 return 2;
497}
498
503 const DebugLoc &DL,
504 int *BytesAdded) const {
505 assert(!BytesAdded && "code size not handled");
507 int BOpc = !AFI->isThumbFunction()
508 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
509 int BccOpc = !AFI->isThumbFunction()
510 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
511 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
512
513 // Shouldn't be a fall through.
514 assert(TBB && "insertBranch must not be told to insert a fallthrough");
515 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
516 "ARM branch conditions have two or three components!");
517
518 // For conditional branches, we use addOperand to preserve CPSR flags.
519
520 if (!FBB) {
521 if (Cond.empty()) { // Unconditional branch?
522 if (isThumb)
524 else
525 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
526 } else if (Cond.size() == 2) {
527 BuildMI(&MBB, DL, get(BccOpc))
528 .addMBB(TBB)
529 .addImm(Cond[0].getImm())
530 .add(Cond[1]);
531 } else
532 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
533 return 1;
534 }
535
536 // Two-way conditional branch.
537 if (Cond.size() == 2)
538 BuildMI(&MBB, DL, get(BccOpc))
539 .addMBB(TBB)
540 .addImm(Cond[0].getImm())
541 .add(Cond[1]);
542 else if (Cond.size() == 3)
543 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
544 if (isThumb)
545 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
546 else
547 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
548 return 2;
549}
550
553 if (Cond.size() == 2) {
554 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
556 return false;
557 }
558 return true;
559}
560
562 if (MI.isBundle()) {
564 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
565 while (++I != E && I->isInsideBundle()) {
566 int PIdx = I->findFirstPredOperandIdx();
567 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
568 return true;
569 }
570 return false;
571 }
572
573 int PIdx = MI.findFirstPredOperandIdx();
574 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
575}
576
578 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
579 const TargetRegisterInfo *TRI) const {
580
581 // First, let's see if there is a generic comment for this operand
582 std::string GenericComment =
584 if (!GenericComment.empty())
585 return GenericComment;
586
587 // If not, check if we have an immediate operand.
588 if (!Op.isImm())
589 return std::string();
590
591 // And print its corresponding condition code if the immediate is a
592 // predicate.
593 int FirstPredOp = MI.findFirstPredOperandIdx();
594 if (FirstPredOp != (int) OpIdx)
595 return std::string();
596
597 std::string CC = "CC::";
599 return CC;
600}
601
604 unsigned Opc = MI.getOpcode();
605 if (isUncondBranchOpcode(Opc)) {
606 MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
607 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
608 .addImm(Pred[0].getImm())
609 .addReg(Pred[1].getReg());
610 return true;
611 }
612
613 int PIdx = MI.findFirstPredOperandIdx();
614 if (PIdx != -1) {
615 MachineOperand &PMO = MI.getOperand(PIdx);
616 PMO.setImm(Pred[0].getImm());
617 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
618
619 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
620 // IT block. This affects how they are printed.
621 const MCInstrDesc &MCID = MI.getDesc();
623 assert(MCID.operands()[1].isOptionalDef() &&
624 "CPSR def isn't expected operand");
625 assert((MI.getOperand(1).isDead() ||
626 MI.getOperand(1).getReg() != ARM::CPSR) &&
627 "if conversion tried to stop defining used CPSR");
628 MI.getOperand(1).setReg(ARM::NoRegister);
629 }
630
631 return true;
632 }
633 return false;
634}
635
637 ArrayRef<MachineOperand> Pred2) const {
638 if (Pred1.size() > 2 || Pred2.size() > 2)
639 return false;
640
641 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
642 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
643 if (CC1 == CC2)
644 return true;
645
646 switch (CC1) {
647 default:
648 return false;
649 case ARMCC::AL:
650 return true;
651 case ARMCC::HS:
652 return CC2 == ARMCC::HI;
653 case ARMCC::LS:
654 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
655 case ARMCC::GE:
656 return CC2 == ARMCC::GT;
657 case ARMCC::LE:
658 return CC2 == ARMCC::LT;
659 }
660}
661
663 std::vector<MachineOperand> &Pred,
664 bool SkipDead) const {
665 bool Found = false;
666 for (const MachineOperand &MO : MI.operands()) {
667 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
668 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
669 if (ClobbersCPSR || IsCPSR) {
670
671 // Filter out T1 instructions that have a dead CPSR,
672 // allowing IT blocks to be generated containing T1 instructions
673 const MCInstrDesc &MCID = MI.getDesc();
674 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
675 SkipDead)
676 continue;
677
678 Pred.push_back(MO);
679 Found = true;
680 }
681 }
682
683 return Found;
684}
685
687 for (const auto &MO : MI.operands())
688 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
689 return true;
690 return false;
691}
692
694 switch (MI->getOpcode()) {
695 default: return true;
696 case ARM::tADC: // ADC (register) T1
697 case ARM::tADDi3: // ADD (immediate) T1
698 case ARM::tADDi8: // ADD (immediate) T2
699 case ARM::tADDrr: // ADD (register) T1
700 case ARM::tAND: // AND (register) T1
701 case ARM::tASRri: // ASR (immediate) T1
702 case ARM::tASRrr: // ASR (register) T1
703 case ARM::tBIC: // BIC (register) T1
704 case ARM::tEOR: // EOR (register) T1
705 case ARM::tLSLri: // LSL (immediate) T1
706 case ARM::tLSLrr: // LSL (register) T1
707 case ARM::tLSRri: // LSR (immediate) T1
708 case ARM::tLSRrr: // LSR (register) T1
709 case ARM::tMUL: // MUL T1
710 case ARM::tMVN: // MVN (register) T1
711 case ARM::tORR: // ORR (register) T1
712 case ARM::tROR: // ROR (register) T1
713 case ARM::tRSB: // RSB (immediate) T1
714 case ARM::tSBC: // SBC (register) T1
715 case ARM::tSUBi3: // SUB (immediate) T1
716 case ARM::tSUBi8: // SUB (immediate) T2
717 case ARM::tSUBrr: // SUB (register) T1
719 }
720}
721
722/// isPredicable - Return true if the specified instruction can be predicated.
723/// By default, this returns true for every instruction with a
724/// PredicateOperand.
726 if (!MI.isPredicable())
727 return false;
728
729 if (MI.isBundle())
730 return false;
731
733 return false;
734
735 const MachineFunction *MF = MI.getParent()->getParent();
736 const ARMFunctionInfo *AFI =
738
739 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
740 // In their ARM encoding, they can't be encoded in a conditional form.
741 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
742 return false;
743
744 // Make indirect control flow changes unpredicable when SLS mitigation is
745 // enabled.
746 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
747 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
748 return false;
749 if (ST.hardenSlsBlr() && isIndirectCall(MI))
750 return false;
751
752 if (AFI->isThumb2Function()) {
753 if (getSubtarget().restrictIT())
754 return isV8EligibleForIT(&MI);
755 }
756
757 return true;
758}
759
760namespace llvm {
761
762template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
763 for (const MachineOperand &MO : MI->operands()) {
764 if (!MO.isReg() || MO.isUndef() || MO.isUse())
765 continue;
766 if (MO.getReg() != ARM::CPSR)
767 continue;
768 if (!MO.isDead())
769 return false;
770 }
771 // all definitions of CPSR are dead
772 return true;
773}
774
775} // end namespace llvm
776
777/// GetInstSize - Return the size of the specified MachineInstr.
778///
780 const MachineBasicBlock &MBB = *MI.getParent();
781 const MachineFunction *MF = MBB.getParent();
782 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
783
784 const MCInstrDesc &MCID = MI.getDesc();
785
786 switch (MI.getOpcode()) {
787 default:
788 // Return the size specified in .td file. If there's none, return 0, as we
789 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
790 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
791 // contrast to AArch64 instructions which have a default size of 4 bytes for
792 // example.
793 return MCID.getSize();
794 case TargetOpcode::BUNDLE:
795 return getInstBundleLength(MI);
796 case ARM::CONSTPOOL_ENTRY:
797 case ARM::JUMPTABLE_INSTS:
798 case ARM::JUMPTABLE_ADDRS:
799 case ARM::JUMPTABLE_TBB:
800 case ARM::JUMPTABLE_TBH:
801 // If this machine instr is a constant pool entry, its size is recorded as
802 // operand #2.
803 return MI.getOperand(2).getImm();
804 case ARM::SPACE:
805 return MI.getOperand(1).getImm();
806 case ARM::INLINEASM:
807 case ARM::INLINEASM_BR: {
808 // If this machine instr is an inline asm, measure it.
809 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
811 Size = alignTo(Size, 4);
812 return Size;
813 }
814 }
815}
816
817unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
818 unsigned Size = 0;
820 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
821 while (++I != E && I->isInsideBundle()) {
822 assert(!I->isBundle() && "No nested bundle!");
824 }
825 return Size;
826}
827
830 unsigned DestReg, bool KillSrc,
831 const ARMSubtarget &Subtarget) const {
832 unsigned Opc = Subtarget.isThumb()
833 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
834 : ARM::MRS;
835
837 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
838
839 // There is only 1 A/R class MRS instruction, and it always refers to
840 // APSR. However, there are lots of other possibilities on M-class cores.
841 if (Subtarget.isMClass())
842 MIB.addImm(0x800);
843
844 MIB.add(predOps(ARMCC::AL))
845 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
846}
847
850 unsigned SrcReg, bool KillSrc,
851 const ARMSubtarget &Subtarget) const {
852 unsigned Opc = Subtarget.isThumb()
853 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
854 : ARM::MSR;
855
856 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
857
858 if (Subtarget.isMClass())
859 MIB.addImm(0x800);
860 else
861 MIB.addImm(8);
862
863 MIB.addReg(SrcReg, getKillRegState(KillSrc))
866}
867
869 MIB.addImm(ARMVCC::None);
870 MIB.addReg(0);
871 MIB.addReg(0); // tp_reg
872}
873
875 Register DestReg) {
877 MIB.addReg(DestReg, RegState::Undef);
878}
879
881 MIB.addImm(Cond);
882 MIB.addReg(ARM::VPR, RegState::Implicit);
883 MIB.addReg(0); // tp_reg
884}
885
887 unsigned Cond, unsigned Inactive) {
889 MIB.addReg(Inactive);
890}
891
894 const DebugLoc &DL, MCRegister DestReg,
895 MCRegister SrcReg, bool KillSrc) const {
896 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
897 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
898
899 if (GPRDest && GPRSrc) {
900 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
901 .addReg(SrcReg, getKillRegState(KillSrc))
903 .add(condCodeOp());
904 return;
905 }
906
907 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
908 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
909
910 unsigned Opc = 0;
911 if (SPRDest && SPRSrc)
912 Opc = ARM::VMOVS;
913 else if (GPRDest && SPRSrc)
914 Opc = ARM::VMOVRS;
915 else if (SPRDest && GPRSrc)
916 Opc = ARM::VMOVSR;
917 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
918 Opc = ARM::VMOVD;
919 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
920 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
921
922 if (Opc) {
923 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
924 MIB.addReg(SrcReg, getKillRegState(KillSrc));
925 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
926 MIB.addReg(SrcReg, getKillRegState(KillSrc));
927 if (Opc == ARM::MVE_VORR)
928 addUnpredicatedMveVpredROp(MIB, DestReg);
929 else if (Opc != ARM::MQPRCopy)
930 MIB.add(predOps(ARMCC::AL));
931 return;
932 }
933
934 // Handle register classes that require multiple instructions.
935 unsigned BeginIdx = 0;
936 unsigned SubRegs = 0;
937 int Spacing = 1;
938
939 // Use VORRq when possible.
940 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
941 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
942 BeginIdx = ARM::qsub_0;
943 SubRegs = 2;
944 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
945 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
946 BeginIdx = ARM::qsub_0;
947 SubRegs = 4;
948 // Fall back to VMOVD.
949 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
950 Opc = ARM::VMOVD;
951 BeginIdx = ARM::dsub_0;
952 SubRegs = 2;
953 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
954 Opc = ARM::VMOVD;
955 BeginIdx = ARM::dsub_0;
956 SubRegs = 3;
957 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
958 Opc = ARM::VMOVD;
959 BeginIdx = ARM::dsub_0;
960 SubRegs = 4;
961 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
962 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
963 BeginIdx = ARM::gsub_0;
964 SubRegs = 2;
965 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
966 Opc = ARM::VMOVD;
967 BeginIdx = ARM::dsub_0;
968 SubRegs = 2;
969 Spacing = 2;
970 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
971 Opc = ARM::VMOVD;
972 BeginIdx = ARM::dsub_0;
973 SubRegs = 3;
974 Spacing = 2;
975 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
976 Opc = ARM::VMOVD;
977 BeginIdx = ARM::dsub_0;
978 SubRegs = 4;
979 Spacing = 2;
980 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
981 !Subtarget.hasFP64()) {
982 Opc = ARM::VMOVS;
983 BeginIdx = ARM::ssub_0;
984 SubRegs = 2;
985 } else if (SrcReg == ARM::CPSR) {
986 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
987 return;
988 } else if (DestReg == ARM::CPSR) {
989 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
990 return;
991 } else if (DestReg == ARM::VPR) {
992 assert(ARM::GPRRegClass.contains(SrcReg));
993 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
994 .addReg(SrcReg, getKillRegState(KillSrc))
996 return;
997 } else if (SrcReg == ARM::VPR) {
998 assert(ARM::GPRRegClass.contains(DestReg));
999 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
1000 .addReg(SrcReg, getKillRegState(KillSrc))
1002 return;
1003 } else if (DestReg == ARM::FPSCR_NZCV) {
1004 assert(ARM::GPRRegClass.contains(SrcReg));
1005 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1006 .addReg(SrcReg, getKillRegState(KillSrc))
1008 return;
1009 } else if (SrcReg == ARM::FPSCR_NZCV) {
1010 assert(ARM::GPRRegClass.contains(DestReg));
1011 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1012 .addReg(SrcReg, getKillRegState(KillSrc))
1014 return;
1015 }
1016
1017 assert(Opc && "Impossible reg-to-reg copy");
1018
1021
1022 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1023 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1024 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1025 Spacing = -Spacing;
1026 }
1027#ifndef NDEBUG
1028 SmallSet<unsigned, 4> DstRegs;
1029#endif
1030 for (unsigned i = 0; i != SubRegs; ++i) {
1031 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1032 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1033 assert(Dst && Src && "Bad sub-register");
1034#ifndef NDEBUG
1035 assert(!DstRegs.count(Src) && "destructive vector copy");
1036 DstRegs.insert(Dst);
1037#endif
1038 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1039 // VORR (NEON or MVE) takes two source operands.
1040 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1041 Mov.addReg(Src);
1042 }
1043 // MVE VORR takes predicate operands in place of an ordinary condition.
1044 if (Opc == ARM::MVE_VORR)
1046 else
1047 Mov = Mov.add(predOps(ARMCC::AL));
1048 // MOVr can set CC.
1049 if (Opc == ARM::MOVr)
1050 Mov = Mov.add(condCodeOp());
1051 }
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov->addRegisterDefined(DestReg, TRI);
1054 if (KillSrc)
1055 Mov->addRegisterKilled(SrcReg, TRI);
1056}
1057
1058std::optional<DestSourcePair>
1060 // VMOVRRD is also a copy instruction but it requires
1061 // special way of handling. It is more complex copy version
1062 // and since that we are not considering it. For recognition
1063 // of such instruction isExtractSubregLike MI interface fuction
1064 // could be used.
1065 // VORRq is considered as a move only if two inputs are
1066 // the same register.
1067 if (!MI.isMoveReg() ||
1068 (MI.getOpcode() == ARM::VORRq &&
1069 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1070 return std::nullopt;
1071 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1072}
1073
1074std::optional<ParamLoadedValue>
1076 Register Reg) const {
1077 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1078 Register DstReg = DstSrcPair->Destination->getReg();
1079
1080 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1081 // than the copy registers. Consider for example:
1082 //
1083 // s16 = VMOVS s0
1084 // s17 = VMOVS s1
1085 // call @callee(d0)
1086 //
1087 // We'd like to describe the call site value of d0 as d8, but this requires
1088 // gathering and merging the descriptions for the two VMOVS instructions.
1089 //
1090 // We also don't handle the reverse situation, where the forwarding reg is
1091 // narrower than the copy destination:
1092 //
1093 // d8 = VMOVD d0
1094 // call @callee(s1)
1095 //
1096 // We need to produce a fragment description (the call site value of s1 is
1097 // /not/ just d8).
1098 if (DstReg != Reg)
1099 return std::nullopt;
1100 }
1102}
1103
1104const MachineInstrBuilder &
1106 unsigned SubIdx, unsigned State,
1107 const TargetRegisterInfo *TRI) const {
1108 if (!SubIdx)
1109 return MIB.addReg(Reg, State);
1110
1112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1113 return MIB.addReg(Reg, State, SubIdx);
1114}
1115
1118 Register SrcReg, bool isKill, int FI,
1119 const TargetRegisterClass *RC,
1120 const TargetRegisterInfo *TRI,
1121 Register VReg) const {
1122 MachineFunction &MF = *MBB.getParent();
1123 MachineFrameInfo &MFI = MF.getFrameInfo();
1124 Align Alignment = MFI.getObjectAlign(FI);
1125
1128 MFI.getObjectSize(FI), Alignment);
1129
1130 switch (TRI->getSpillSize(*RC)) {
1131 case 2:
1132 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1133 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1134 .addReg(SrcReg, getKillRegState(isKill))
1135 .addFrameIndex(FI)
1136 .addImm(0)
1137 .addMemOperand(MMO)
1139 } else
1140 llvm_unreachable("Unknown reg class!");
1141 break;
1142 case 4:
1143 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1144 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1145 .addReg(SrcReg, getKillRegState(isKill))
1146 .addFrameIndex(FI)
1147 .addImm(0)
1148 .addMemOperand(MMO)
1150 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1151 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1152 .addReg(SrcReg, getKillRegState(isKill))
1153 .addFrameIndex(FI)
1154 .addImm(0)
1155 .addMemOperand(MMO)
1157 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1158 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1159 .addReg(SrcReg, getKillRegState(isKill))
1160 .addFrameIndex(FI)
1161 .addImm(0)
1162 .addMemOperand(MMO)
1164 } else
1165 llvm_unreachable("Unknown reg class!");
1166 break;
1167 case 8:
1168 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1169 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1170 .addReg(SrcReg, getKillRegState(isKill))
1171 .addFrameIndex(FI)
1172 .addImm(0)
1173 .addMemOperand(MMO)
1175 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1176 if (Subtarget.hasV5TEOps()) {
1177 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1178 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1179 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1180 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1182 } else {
1183 // Fallback to STM instruction, which has existed since the dawn of
1184 // time.
1185 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1186 .addFrameIndex(FI)
1187 .addMemOperand(MMO)
1189 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1190 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1191 }
1192 } else
1193 llvm_unreachable("Unknown reg class!");
1194 break;
1195 case 16:
1196 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1197 // Use aligned spills if the stack can be realigned.
1198 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1199 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1200 .addFrameIndex(FI)
1201 .addImm(16)
1202 .addReg(SrcReg, getKillRegState(isKill))
1203 .addMemOperand(MMO)
1205 } else {
1206 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1207 .addReg(SrcReg, getKillRegState(isKill))
1208 .addFrameIndex(FI)
1209 .addMemOperand(MMO)
1211 }
1212 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1213 Subtarget.hasMVEIntegerOps()) {
1214 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1215 MIB.addReg(SrcReg, getKillRegState(isKill))
1216 .addFrameIndex(FI)
1217 .addImm(0)
1218 .addMemOperand(MMO);
1220 } else
1221 llvm_unreachable("Unknown reg class!");
1222 break;
1223 case 24:
1224 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1225 // Use aligned spills if the stack can be realigned.
1226 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1227 Subtarget.hasNEON()) {
1228 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1229 .addFrameIndex(FI)
1230 .addImm(16)
1231 .addReg(SrcReg, getKillRegState(isKill))
1232 .addMemOperand(MMO)
1234 } else {
1236 get(ARM::VSTMDIA))
1237 .addFrameIndex(FI)
1239 .addMemOperand(MMO);
1240 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1241 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1242 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1243 }
1244 } else
1245 llvm_unreachable("Unknown reg class!");
1246 break;
1247 case 32:
1248 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1249 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1250 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1251 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1252 Subtarget.hasNEON()) {
1253 // FIXME: It's possible to only store part of the QQ register if the
1254 // spilled def has a sub-register index.
1255 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1256 .addFrameIndex(FI)
1257 .addImm(16)
1258 .addReg(SrcReg, getKillRegState(isKill))
1259 .addMemOperand(MMO)
1261 } else if (Subtarget.hasMVEIntegerOps()) {
1262 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1263 .addReg(SrcReg, getKillRegState(isKill))
1264 .addFrameIndex(FI)
1265 .addMemOperand(MMO);
1266 } else {
1268 get(ARM::VSTMDIA))
1269 .addFrameIndex(FI)
1271 .addMemOperand(MMO);
1272 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1273 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1274 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1275 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1276 }
1277 } else
1278 llvm_unreachable("Unknown reg class!");
1279 break;
1280 case 64:
1281 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1282 Subtarget.hasMVEIntegerOps()) {
1283 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1284 .addReg(SrcReg, getKillRegState(isKill))
1285 .addFrameIndex(FI)
1286 .addMemOperand(MMO);
1287 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1288 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1289 .addFrameIndex(FI)
1291 .addMemOperand(MMO);
1292 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1293 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1294 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1295 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1296 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1297 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1298 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1299 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1300 } else
1301 llvm_unreachable("Unknown reg class!");
1302 break;
1303 default:
1304 llvm_unreachable("Unknown reg class!");
1305 }
1306}
1307
1309 int &FrameIndex) const {
1310 switch (MI.getOpcode()) {
1311 default: break;
1312 case ARM::STRrs:
1313 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1314 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1315 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1316 MI.getOperand(3).getImm() == 0) {
1317 FrameIndex = MI.getOperand(1).getIndex();
1318 return MI.getOperand(0).getReg();
1319 }
1320 break;
1321 case ARM::STRi12:
1322 case ARM::t2STRi12:
1323 case ARM::tSTRspi:
1324 case ARM::VSTRD:
1325 case ARM::VSTRS:
1326 case ARM::VSTR_P0_off:
1327 case ARM::MVE_VSTRWU32:
1328 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1329 MI.getOperand(2).getImm() == 0) {
1330 FrameIndex = MI.getOperand(1).getIndex();
1331 return MI.getOperand(0).getReg();
1332 }
1333 break;
1334 case ARM::VST1q64:
1335 case ARM::VST1d64TPseudo:
1336 case ARM::VST1d64QPseudo:
1337 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1338 FrameIndex = MI.getOperand(0).getIndex();
1339 return MI.getOperand(2).getReg();
1340 }
1341 break;
1342 case ARM::VSTMQIA:
1343 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1344 FrameIndex = MI.getOperand(1).getIndex();
1345 return MI.getOperand(0).getReg();
1346 }
1347 break;
1348 case ARM::MQQPRStore:
1349 case ARM::MQQQQPRStore:
1350 if (MI.getOperand(1).isFI()) {
1351 FrameIndex = MI.getOperand(1).getIndex();
1352 return MI.getOperand(0).getReg();
1353 }
1354 break;
1355 }
1356
1357 return 0;
1358}
1359
1361 int &FrameIndex) const {
1363 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1364 Accesses.size() == 1) {
1365 FrameIndex =
1366 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1367 ->getFrameIndex();
1368 return true;
1369 }
1370 return false;
1371}
1372
1375 Register DestReg, int FI,
1376 const TargetRegisterClass *RC,
1377 const TargetRegisterInfo *TRI,
1378 Register VReg) const {
1379 DebugLoc DL;
1380 if (I != MBB.end()) DL = I->getDebugLoc();
1381 MachineFunction &MF = *MBB.getParent();
1382 MachineFrameInfo &MFI = MF.getFrameInfo();
1383 const Align Alignment = MFI.getObjectAlign(FI);
1386 MFI.getObjectSize(FI), Alignment);
1387
1388 switch (TRI->getSpillSize(*RC)) {
1389 case 2:
1390 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1391 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1392 .addFrameIndex(FI)
1393 .addImm(0)
1394 .addMemOperand(MMO)
1396 } else
1397 llvm_unreachable("Unknown reg class!");
1398 break;
1399 case 4:
1400 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1401 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1402 .addFrameIndex(FI)
1403 .addImm(0)
1404 .addMemOperand(MMO)
1406 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1407 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1408 .addFrameIndex(FI)
1409 .addImm(0)
1410 .addMemOperand(MMO)
1412 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1413 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1414 .addFrameIndex(FI)
1415 .addImm(0)
1416 .addMemOperand(MMO)
1418 } else
1419 llvm_unreachable("Unknown reg class!");
1420 break;
1421 case 8:
1422 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1423 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1424 .addFrameIndex(FI)
1425 .addImm(0)
1426 .addMemOperand(MMO)
1428 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1430
1431 if (Subtarget.hasV5TEOps()) {
1432 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1433 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1434 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1435 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1437 } else {
1438 // Fallback to LDM instruction, which has existed since the dawn of
1439 // time.
1440 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1441 .addFrameIndex(FI)
1442 .addMemOperand(MMO)
1444 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1445 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1446 }
1447
1448 if (DestReg.isPhysical())
1449 MIB.addReg(DestReg, RegState::ImplicitDefine);
1450 } else
1451 llvm_unreachable("Unknown reg class!");
1452 break;
1453 case 16:
1454 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1455 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1456 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1457 .addFrameIndex(FI)
1458 .addImm(16)
1459 .addMemOperand(MMO)
1461 } else {
1462 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1463 .addFrameIndex(FI)
1464 .addMemOperand(MMO)
1466 }
1467 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1468 Subtarget.hasMVEIntegerOps()) {
1469 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1470 MIB.addFrameIndex(FI)
1471 .addImm(0)
1472 .addMemOperand(MMO);
1474 } else
1475 llvm_unreachable("Unknown reg class!");
1476 break;
1477 case 24:
1478 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1479 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1480 Subtarget.hasNEON()) {
1481 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1482 .addFrameIndex(FI)
1483 .addImm(16)
1484 .addMemOperand(MMO)
1486 } else {
1487 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1488 .addFrameIndex(FI)
1489 .addMemOperand(MMO)
1491 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1492 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1493 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1494 if (DestReg.isPhysical())
1495 MIB.addReg(DestReg, RegState::ImplicitDefine);
1496 }
1497 } else
1498 llvm_unreachable("Unknown reg class!");
1499 break;
1500 case 32:
1501 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1502 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1503 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1504 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1505 Subtarget.hasNEON()) {
1506 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1507 .addFrameIndex(FI)
1508 .addImm(16)
1509 .addMemOperand(MMO)
1511 } else if (Subtarget.hasMVEIntegerOps()) {
1512 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1513 .addFrameIndex(FI)
1514 .addMemOperand(MMO);
1515 } else {
1516 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1517 .addFrameIndex(FI)
1519 .addMemOperand(MMO);
1520 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1521 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1522 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1523 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1524 if (DestReg.isPhysical())
1525 MIB.addReg(DestReg, RegState::ImplicitDefine);
1526 }
1527 } else
1528 llvm_unreachable("Unknown reg class!");
1529 break;
1530 case 64:
1531 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1532 Subtarget.hasMVEIntegerOps()) {
1533 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1534 .addFrameIndex(FI)
1535 .addMemOperand(MMO);
1536 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1537 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1538 .addFrameIndex(FI)
1540 .addMemOperand(MMO);
1541 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1542 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1543 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1544 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1545 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1546 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1547 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1548 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1549 if (DestReg.isPhysical())
1550 MIB.addReg(DestReg, RegState::ImplicitDefine);
1551 } else
1552 llvm_unreachable("Unknown reg class!");
1553 break;
1554 default:
1555 llvm_unreachable("Unknown regclass!");
1556 }
1557}
1558
1560 int &FrameIndex) const {
1561 switch (MI.getOpcode()) {
1562 default: break;
1563 case ARM::LDRrs:
1564 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1565 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1566 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1567 MI.getOperand(3).getImm() == 0) {
1568 FrameIndex = MI.getOperand(1).getIndex();
1569 return MI.getOperand(0).getReg();
1570 }
1571 break;
1572 case ARM::LDRi12:
1573 case ARM::t2LDRi12:
1574 case ARM::tLDRspi:
1575 case ARM::VLDRD:
1576 case ARM::VLDRS:
1577 case ARM::VLDR_P0_off:
1578 case ARM::MVE_VLDRWU32:
1579 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1580 MI.getOperand(2).getImm() == 0) {
1581 FrameIndex = MI.getOperand(1).getIndex();
1582 return MI.getOperand(0).getReg();
1583 }
1584 break;
1585 case ARM::VLD1q64:
1586 case ARM::VLD1d8TPseudo:
1587 case ARM::VLD1d16TPseudo:
1588 case ARM::VLD1d32TPseudo:
1589 case ARM::VLD1d64TPseudo:
1590 case ARM::VLD1d8QPseudo:
1591 case ARM::VLD1d16QPseudo:
1592 case ARM::VLD1d32QPseudo:
1593 case ARM::VLD1d64QPseudo:
1594 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1595 FrameIndex = MI.getOperand(1).getIndex();
1596 return MI.getOperand(0).getReg();
1597 }
1598 break;
1599 case ARM::VLDMQIA:
1600 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1601 FrameIndex = MI.getOperand(1).getIndex();
1602 return MI.getOperand(0).getReg();
1603 }
1604 break;
1605 case ARM::MQQPRLoad:
1606 case ARM::MQQQQPRLoad:
1607 if (MI.getOperand(1).isFI()) {
1608 FrameIndex = MI.getOperand(1).getIndex();
1609 return MI.getOperand(0).getReg();
1610 }
1611 break;
1612 }
1613
1614 return 0;
1615}
1616
1618 int &FrameIndex) const {
1620 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1621 Accesses.size() == 1) {
1622 FrameIndex =
1623 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1624 ->getFrameIndex();
1625 return true;
1626 }
1627 return false;
1628}
1629
1630/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1631/// depending on whether the result is used.
1632void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1633 bool isThumb1 = Subtarget.isThumb1Only();
1634 bool isThumb2 = Subtarget.isThumb2();
1635 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1636
1637 DebugLoc dl = MI->getDebugLoc();
1638 MachineBasicBlock *BB = MI->getParent();
1639
1640 MachineInstrBuilder LDM, STM;
1641 if (isThumb1 || !MI->getOperand(1).isDead()) {
1642 MachineOperand LDWb(MI->getOperand(1));
1643 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1644 : isThumb1 ? ARM::tLDMIA_UPD
1645 : ARM::LDMIA_UPD))
1646 .add(LDWb);
1647 } else {
1648 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1649 }
1650
1651 if (isThumb1 || !MI->getOperand(0).isDead()) {
1652 MachineOperand STWb(MI->getOperand(0));
1653 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1654 : isThumb1 ? ARM::tSTMIA_UPD
1655 : ARM::STMIA_UPD))
1656 .add(STWb);
1657 } else {
1658 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1659 }
1660
1661 MachineOperand LDBase(MI->getOperand(3));
1662 LDM.add(LDBase).add(predOps(ARMCC::AL));
1663
1664 MachineOperand STBase(MI->getOperand(2));
1665 STM.add(STBase).add(predOps(ARMCC::AL));
1666
1667 // Sort the scratch registers into ascending order.
1669 SmallVector<unsigned, 6> ScratchRegs;
1670 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1671 ScratchRegs.push_back(MO.getReg());
1672 llvm::sort(ScratchRegs,
1673 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1674 return TRI.getEncodingValue(Reg1) <
1675 TRI.getEncodingValue(Reg2);
1676 });
1677
1678 for (const auto &Reg : ScratchRegs) {
1679 LDM.addReg(Reg, RegState::Define);
1680 STM.addReg(Reg, RegState::Kill);
1681 }
1682
1683 BB->erase(MI);
1684}
1685
1687 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1688 expandLoadStackGuard(MI);
1689 MI.getParent()->erase(MI);
1690 return true;
1691 }
1692
1693 if (MI.getOpcode() == ARM::MEMCPY) {
1694 expandMEMCPY(MI);
1695 return true;
1696 }
1697
1698 // This hook gets to expand COPY instructions before they become
1699 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1700 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1701 // changed into a VORR that can go down the NEON pipeline.
1702 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1703 return false;
1704
1705 // Look for a copy between even S-registers. That is where we keep floats
1706 // when using NEON v2f32 instructions for f32 arithmetic.
1707 Register DstRegS = MI.getOperand(0).getReg();
1708 Register SrcRegS = MI.getOperand(1).getReg();
1709 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1710 return false;
1711
1713 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1714 &ARM::DPRRegClass);
1715 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1716 &ARM::DPRRegClass);
1717 if (!DstRegD || !SrcRegD)
1718 return false;
1719
1720 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1721 // legal if the COPY already defines the full DstRegD, and it isn't a
1722 // sub-register insertion.
1723 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1724 return false;
1725
1726 // A dead copy shouldn't show up here, but reject it just in case.
1727 if (MI.getOperand(0).isDead())
1728 return false;
1729
1730 // All clear, widen the COPY.
1731 LLVM_DEBUG(dbgs() << "widening: " << MI);
1732 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1733
1734 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1735 // or some other super-register.
1736 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1737 if (ImpDefIdx != -1)
1738 MI.removeOperand(ImpDefIdx);
1739
1740 // Change the opcode and operands.
1741 MI.setDesc(get(ARM::VMOVD));
1742 MI.getOperand(0).setReg(DstRegD);
1743 MI.getOperand(1).setReg(SrcRegD);
1744 MIB.add(predOps(ARMCC::AL));
1745
1746 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1747 // register scavenger and machine verifier, so we need to indicate that we
1748 // are reading an undefined value from SrcRegD, but a proper value from
1749 // SrcRegS.
1750 MI.getOperand(1).setIsUndef();
1751 MIB.addReg(SrcRegS, RegState::Implicit);
1752
1753 // SrcRegD may actually contain an unrelated value in the ssub_1
1754 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1755 if (MI.getOperand(1).isKill()) {
1756 MI.getOperand(1).setIsKill(false);
1757 MI.addRegisterKilled(SrcRegS, TRI, true);
1758 }
1759
1760 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1761 return true;
1762}
1763
1764/// Create a copy of a const pool value. Update CPI to the new index and return
1765/// the label UID.
1766static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1769
1770 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1771 assert(MCPE.isMachineConstantPoolEntry() &&
1772 "Expecting a machine constantpool entry!");
1773 ARMConstantPoolValue *ACPV =
1774 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1775
1776 unsigned PCLabelId = AFI->createPICLabelUId();
1777 ARMConstantPoolValue *NewCPV = nullptr;
1778
1779 // FIXME: The below assumes PIC relocation model and that the function
1780 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1781 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1782 // instructions, so that's probably OK, but is PIC always correct when
1783 // we get here?
1784 if (ACPV->isGlobalValue())
1786 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1787 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1788 else if (ACPV->isExtSymbol())
1789 NewCPV = ARMConstantPoolSymbol::
1790 Create(MF.getFunction().getContext(),
1791 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1792 else if (ACPV->isBlockAddress())
1793 NewCPV = ARMConstantPoolConstant::
1794 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1796 else if (ACPV->isLSDA())
1797 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1798 ARMCP::CPLSDA, 4);
1799 else if (ACPV->isMachineBasicBlock())
1800 NewCPV = ARMConstantPoolMBB::
1801 Create(MF.getFunction().getContext(),
1802 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1803 else
1804 llvm_unreachable("Unexpected ARM constantpool value type!!");
1805 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1806 return PCLabelId;
1807}
1808
1811 Register DestReg, unsigned SubIdx,
1812 const MachineInstr &Orig,
1813 const TargetRegisterInfo &TRI) const {
1814 unsigned Opcode = Orig.getOpcode();
1815 switch (Opcode) {
1816 default: {
1818 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1819 MBB.insert(I, MI);
1820 break;
1821 }
1822 case ARM::tLDRpci_pic:
1823 case ARM::t2LDRpci_pic: {
1824 MachineFunction &MF = *MBB.getParent();
1825 unsigned CPI = Orig.getOperand(1).getIndex();
1826 unsigned PCLabelId = duplicateCPV(MF, CPI);
1827 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1829 .addImm(PCLabelId)
1830 .cloneMemRefs(Orig);
1831 break;
1832 }
1833 }
1834}
1835
1838 MachineBasicBlock::iterator InsertBefore,
1839 const MachineInstr &Orig) const {
1840 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1842 for (;;) {
1843 switch (I->getOpcode()) {
1844 case ARM::tLDRpci_pic:
1845 case ARM::t2LDRpci_pic: {
1846 MachineFunction &MF = *MBB.getParent();
1847 unsigned CPI = I->getOperand(1).getIndex();
1848 unsigned PCLabelId = duplicateCPV(MF, CPI);
1849 I->getOperand(1).setIndex(CPI);
1850 I->getOperand(2).setImm(PCLabelId);
1851 break;
1852 }
1853 }
1854 if (!I->isBundledWithSucc())
1855 break;
1856 ++I;
1857 }
1858 return Cloned;
1859}
1860
1862 const MachineInstr &MI1,
1863 const MachineRegisterInfo *MRI) const {
1864 unsigned Opcode = MI0.getOpcode();
1865 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1866 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1867 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1868 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1869 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1870 Opcode == ARM::t2MOV_ga_pcrel) {
1871 if (MI1.getOpcode() != Opcode)
1872 return false;
1873 if (MI0.getNumOperands() != MI1.getNumOperands())
1874 return false;
1875
1876 const MachineOperand &MO0 = MI0.getOperand(1);
1877 const MachineOperand &MO1 = MI1.getOperand(1);
1878 if (MO0.getOffset() != MO1.getOffset())
1879 return false;
1880
1881 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1882 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1883 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1884 Opcode == ARM::t2MOV_ga_pcrel)
1885 // Ignore the PC labels.
1886 return MO0.getGlobal() == MO1.getGlobal();
1887
1888 const MachineFunction *MF = MI0.getParent()->getParent();
1889 const MachineConstantPool *MCP = MF->getConstantPool();
1890 int CPI0 = MO0.getIndex();
1891 int CPI1 = MO1.getIndex();
1892 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1893 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1894 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1895 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1896 if (isARMCP0 && isARMCP1) {
1897 ARMConstantPoolValue *ACPV0 =
1898 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1899 ARMConstantPoolValue *ACPV1 =
1900 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1901 return ACPV0->hasSameValue(ACPV1);
1902 } else if (!isARMCP0 && !isARMCP1) {
1903 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1904 }
1905 return false;
1906 } else if (Opcode == ARM::PICLDR) {
1907 if (MI1.getOpcode() != Opcode)
1908 return false;
1909 if (MI0.getNumOperands() != MI1.getNumOperands())
1910 return false;
1911
1912 Register Addr0 = MI0.getOperand(1).getReg();
1913 Register Addr1 = MI1.getOperand(1).getReg();
1914 if (Addr0 != Addr1) {
1915 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1916 return false;
1917
1918 // This assumes SSA form.
1919 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1920 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1921 // Check if the loaded value, e.g. a constantpool of a global address, are
1922 // the same.
1923 if (!produceSameValue(*Def0, *Def1, MRI))
1924 return false;
1925 }
1926
1927 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1928 // %12 = PICLDR %11, 0, 14, %noreg
1929 const MachineOperand &MO0 = MI0.getOperand(i);
1930 const MachineOperand &MO1 = MI1.getOperand(i);
1931 if (!MO0.isIdenticalTo(MO1))
1932 return false;
1933 }
1934 return true;
1935 }
1936
1938}
1939
1940/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1941/// determine if two loads are loading from the same base address. It should
1942/// only return true if the base pointers are the same and the only differences
1943/// between the two addresses is the offset. It also returns the offsets by
1944/// reference.
1945///
1946/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1947/// is permanently disabled.
1949 int64_t &Offset1,
1950 int64_t &Offset2) const {
1951 // Don't worry about Thumb: just ARM and Thumb2.
1952 if (Subtarget.isThumb1Only()) return false;
1953
1954 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1955 return false;
1956
1957 auto IsLoadOpcode = [&](unsigned Opcode) {
1958 switch (Opcode) {
1959 default:
1960 return false;
1961 case ARM::LDRi12:
1962 case ARM::LDRBi12:
1963 case ARM::LDRD:
1964 case ARM::LDRH:
1965 case ARM::LDRSB:
1966 case ARM::LDRSH:
1967 case ARM::VLDRD:
1968 case ARM::VLDRS:
1969 case ARM::t2LDRi8:
1970 case ARM::t2LDRBi8:
1971 case ARM::t2LDRDi8:
1972 case ARM::t2LDRSHi8:
1973 case ARM::t2LDRi12:
1974 case ARM::t2LDRBi12:
1975 case ARM::t2LDRSHi12:
1976 return true;
1977 }
1978 };
1979
1980 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1981 !IsLoadOpcode(Load2->getMachineOpcode()))
1982 return false;
1983
1984 // Check if base addresses and chain operands match.
1985 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1986 Load1->getOperand(4) != Load2->getOperand(4))
1987 return false;
1988
1989 // Index should be Reg0.
1990 if (Load1->getOperand(3) != Load2->getOperand(3))
1991 return false;
1992
1993 // Determine the offsets.
1994 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1995 isa<ConstantSDNode>(Load2->getOperand(1))) {
1996 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1997 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1998 return true;
1999 }
2000
2001 return false;
2002}
2003
2004/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2005/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2006/// be scheduled togther. On some targets if two loads are loading from
2007/// addresses in the same cache line, it's better if they are scheduled
2008/// together. This function takes two integers that represent the load offsets
2009/// from the common base address. It returns true if it decides it's desirable
2010/// to schedule the two loads together. "NumLoads" is the number of loads that
2011/// have already been scheduled after Load1.
2012///
2013/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2014/// is permanently disabled.
2016 int64_t Offset1, int64_t Offset2,
2017 unsigned NumLoads) const {
2018 // Don't worry about Thumb: just ARM and Thumb2.
2019 if (Subtarget.isThumb1Only()) return false;
2020
2021 assert(Offset2 > Offset1);
2022
2023 if ((Offset2 - Offset1) / 8 > 64)
2024 return false;
2025
2026 // Check if the machine opcodes are different. If they are different
2027 // then we consider them to not be of the same base address,
2028 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2029 // In this case, they are considered to be the same because they are different
2030 // encoding forms of the same basic instruction.
2031 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2032 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2033 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2034 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2035 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2036 return false; // FIXME: overly conservative?
2037
2038 // Four loads in a row should be sufficient.
2039 if (NumLoads >= 3)
2040 return false;
2041
2042 return true;
2043}
2044
2046 const MachineBasicBlock *MBB,
2047 const MachineFunction &MF) const {
2048 // Debug info is never a scheduling boundary. It's necessary to be explicit
2049 // due to the special treatment of IT instructions below, otherwise a
2050 // dbg_value followed by an IT will result in the IT instruction being
2051 // considered a scheduling hazard, which is wrong. It should be the actual
2052 // instruction preceding the dbg_value instruction(s), just like it is
2053 // when debug info is not present.
2054 if (MI.isDebugInstr())
2055 return false;
2056
2057 // Terminators and labels can't be scheduled around.
2058 if (MI.isTerminator() || MI.isPosition())
2059 return true;
2060
2061 // INLINEASM_BR can jump to another block
2062 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2063 return true;
2064
2065 if (isSEHInstruction(MI))
2066 return true;
2067
2068 // Treat the start of the IT block as a scheduling boundary, but schedule
2069 // t2IT along with all instructions following it.
2070 // FIXME: This is a big hammer. But the alternative is to add all potential
2071 // true and anti dependencies to IT block instructions as implicit operands
2072 // to the t2IT instruction. The added compile time and complexity does not
2073 // seem worth it.
2075 // Make sure to skip any debug instructions
2076 while (++I != MBB->end() && I->isDebugInstr())
2077 ;
2078 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2079 return true;
2080
2081 // Don't attempt to schedule around any instruction that defines
2082 // a stack-oriented pointer, as it's unlikely to be profitable. This
2083 // saves compile time, because it doesn't require every single
2084 // stack slot reference to depend on the instruction that does the
2085 // modification.
2086 // Calls don't actually change the stack pointer, even if they have imp-defs.
2087 // No ARM calling conventions change the stack pointer. (X86 calling
2088 // conventions sometimes do).
2089 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
2090 return true;
2091
2092 return false;
2093}
2094
2097 unsigned NumCycles, unsigned ExtraPredCycles,
2098 BranchProbability Probability) const {
2099 if (!NumCycles)
2100 return false;
2101
2102 // If we are optimizing for size, see if the branch in the predecessor can be
2103 // lowered to cbn?z by the constant island lowering pass, and return false if
2104 // so. This results in a shorter instruction sequence.
2105 if (MBB.getParent()->getFunction().hasOptSize()) {
2106 MachineBasicBlock *Pred = *MBB.pred_begin();
2107 if (!Pred->empty()) {
2108 MachineInstr *LastMI = &*Pred->rbegin();
2109 if (LastMI->getOpcode() == ARM::t2Bcc) {
2111 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2112 if (CmpMI)
2113 return false;
2114 }
2115 }
2116 }
2117 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2118 MBB, 0, 0, Probability);
2119}
2120
2123 unsigned TCycles, unsigned TExtra,
2124 MachineBasicBlock &FBB,
2125 unsigned FCycles, unsigned FExtra,
2126 BranchProbability Probability) const {
2127 if (!TCycles)
2128 return false;
2129
2130 // In thumb code we often end up trading one branch for a IT block, and
2131 // if we are cloning the instruction can increase code size. Prevent
2132 // blocks with multiple predecesors from being ifcvted to prevent this
2133 // cloning.
2134 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2135 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2136 return false;
2137 }
2138
2139 // Attempt to estimate the relative costs of predication versus branching.
2140 // Here we scale up each component of UnpredCost to avoid precision issue when
2141 // scaling TCycles/FCycles by Probability.
2142 const unsigned ScalingUpFactor = 1024;
2143
2144 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2145 unsigned UnpredCost;
2146 if (!Subtarget.hasBranchPredictor()) {
2147 // When we don't have a branch predictor it's always cheaper to not take a
2148 // branch than take it, so we have to take that into account.
2149 unsigned NotTakenBranchCost = 1;
2150 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2151 unsigned TUnpredCycles, FUnpredCycles;
2152 if (!FCycles) {
2153 // Triangle: TBB is the fallthrough
2154 TUnpredCycles = TCycles + NotTakenBranchCost;
2155 FUnpredCycles = TakenBranchCost;
2156 } else {
2157 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2158 TUnpredCycles = TCycles + TakenBranchCost;
2159 FUnpredCycles = FCycles + NotTakenBranchCost;
2160 // The branch at the end of FBB will disappear when it's predicated, so
2161 // discount it from PredCost.
2162 PredCost -= 1 * ScalingUpFactor;
2163 }
2164 // The total cost is the cost of each path scaled by their probabilites
2165 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2166 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2167 UnpredCost = TUnpredCost + FUnpredCost;
2168 // When predicating assume that the first IT can be folded away but later
2169 // ones cost one cycle each
2170 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2171 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2172 }
2173 } else {
2174 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2175 unsigned FUnpredCost =
2176 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2177 UnpredCost = TUnpredCost + FUnpredCost;
2178 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2179 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2180 }
2181
2182 return PredCost <= UnpredCost;
2183}
2184
2185unsigned
2187 unsigned NumInsts) const {
2188 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2189 // ARM has a condition code field in every predicable instruction, using it
2190 // doesn't change code size.
2191 if (!Subtarget.isThumb2())
2192 return 0;
2193
2194 // It's possible that the size of the IT is restricted to a single block.
2195 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2196 return divideCeil(NumInsts, MaxInsts) * 2;
2197}
2198
2199unsigned
2201 // If this branch is likely to be folded into the comparison to form a
2202 // CB(N)Z, then removing it won't reduce code size at all, because that will
2203 // just replace the CB(N)Z with a CMP.
2204 if (MI.getOpcode() == ARM::t2Bcc &&
2206 return 0;
2207
2208 unsigned Size = getInstSizeInBytes(MI);
2209
2210 // For Thumb2, all branches are 32-bit instructions during the if conversion
2211 // pass, but may be replaced with 16-bit instructions during size reduction.
2212 // Since the branches considered by if conversion tend to be forward branches
2213 // over small basic blocks, they are very likely to be in range for the
2214 // narrow instructions, so we assume the final code size will be half what it
2215 // currently is.
2216 if (Subtarget.isThumb2())
2217 Size /= 2;
2218
2219 return Size;
2220}
2221
2222bool
2224 MachineBasicBlock &FMBB) const {
2225 // Reduce false anti-dependencies to let the target's out-of-order execution
2226 // engine do its thing.
2227 return Subtarget.isProfitableToUnpredicate();
2228}
2229
2230/// getInstrPredicate - If instruction is predicated, returns its predicate
2231/// condition, otherwise returns AL. It also returns the condition code
2232/// register by reference.
2234 Register &PredReg) {
2235 int PIdx = MI.findFirstPredOperandIdx();
2236 if (PIdx == -1) {
2237 PredReg = 0;
2238 return ARMCC::AL;
2239 }
2240
2241 PredReg = MI.getOperand(PIdx+1).getReg();
2242 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2243}
2244
2246 if (Opc == ARM::B)
2247 return ARM::Bcc;
2248 if (Opc == ARM::tB)
2249 return ARM::tBcc;
2250 if (Opc == ARM::t2B)
2251 return ARM::t2Bcc;
2252
2253 llvm_unreachable("Unknown unconditional branch opcode!");
2254}
2255
2257 bool NewMI,
2258 unsigned OpIdx1,
2259 unsigned OpIdx2) const {
2260 switch (MI.getOpcode()) {
2261 case ARM::MOVCCr:
2262 case ARM::t2MOVCCr: {
2263 // MOVCC can be commuted by inverting the condition.
2264 Register PredReg;
2266 // MOVCC AL can't be inverted. Shouldn't happen.
2267 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2268 return nullptr;
2269 MachineInstr *CommutedMI =
2270 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2271 if (!CommutedMI)
2272 return nullptr;
2273 // After swapping the MOVCC operands, also invert the condition.
2274 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2276 return CommutedMI;
2277 }
2278 }
2279 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2280}
2281
2282/// Identify instructions that can be folded into a MOVCC instruction, and
2283/// return the defining instruction.
2285ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2286 const TargetInstrInfo *TII) const {
2287 if (!Reg.isVirtual())
2288 return nullptr;
2289 if (!MRI.hasOneNonDBGUse(Reg))
2290 return nullptr;
2291 MachineInstr *MI = MRI.getVRegDef(Reg);
2292 if (!MI)
2293 return nullptr;
2294 // Check if MI can be predicated and folded into the MOVCC.
2295 if (!isPredicable(*MI))
2296 return nullptr;
2297 // Check if MI has any non-dead defs or physreg uses. This also detects
2298 // predicated instructions which will be reading CPSR.
2299 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2300 // Reject frame index operands, PEI can't handle the predicated pseudos.
2301 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2302 return nullptr;
2303 if (!MO.isReg())
2304 continue;
2305 // MI can't have any tied operands, that would conflict with predication.
2306 if (MO.isTied())
2307 return nullptr;
2308 if (MO.getReg().isPhysical())
2309 return nullptr;
2310 if (MO.isDef() && !MO.isDead())
2311 return nullptr;
2312 }
2313 bool DontMoveAcrossStores = true;
2314 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2315 return nullptr;
2316 return MI;
2317}
2318
2321 unsigned &TrueOp, unsigned &FalseOp,
2322 bool &Optimizable) const {
2323 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2324 "Unknown select instruction");
2325 // MOVCC operands:
2326 // 0: Def.
2327 // 1: True use.
2328 // 2: False use.
2329 // 3: Condition code.
2330 // 4: CPSR use.
2331 TrueOp = 1;
2332 FalseOp = 2;
2333 Cond.push_back(MI.getOperand(3));
2334 Cond.push_back(MI.getOperand(4));
2335 // We can always fold a def.
2336 Optimizable = true;
2337 return false;
2338}
2339
2343 bool PreferFalse) const {
2344 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2345 "Unknown select instruction");
2346 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2347 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2348 bool Invert = !DefMI;
2349 if (!DefMI)
2350 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2351 if (!DefMI)
2352 return nullptr;
2353
2354 // Find new register class to use.
2355 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2356 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2357 Register DestReg = MI.getOperand(0).getReg();
2358 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2359 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2360 if (!MRI.constrainRegClass(DestReg, FalseClass))
2361 return nullptr;
2362 if (!MRI.constrainRegClass(DestReg, TrueClass))
2363 return nullptr;
2364
2365 // Create a new predicated version of DefMI.
2366 // Rfalse is the first use.
2367 MachineInstrBuilder NewMI =
2368 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2369
2370 // Copy all the DefMI operands, excluding its (null) predicate.
2371 const MCInstrDesc &DefDesc = DefMI->getDesc();
2372 for (unsigned i = 1, e = DefDesc.getNumOperands();
2373 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2374 NewMI.add(DefMI->getOperand(i));
2375
2376 unsigned CondCode = MI.getOperand(3).getImm();
2377 if (Invert)
2379 else
2380 NewMI.addImm(CondCode);
2381 NewMI.add(MI.getOperand(4));
2382
2383 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2384 if (NewMI->hasOptionalDef())
2385 NewMI.add(condCodeOp());
2386
2387 // The output register value when the predicate is false is an implicit
2388 // register operand tied to the first def.
2389 // The tie makes the register allocator ensure the FalseReg is allocated the
2390 // same register as operand 0.
2391 FalseReg.setImplicit();
2392 NewMI.add(FalseReg);
2393 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2394
2395 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2396 SeenMIs.insert(NewMI);
2397 SeenMIs.erase(DefMI);
2398
2399 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2400 // DefMI would be invalid when tranferred inside the loop. Checking for a
2401 // loop is expensive, but at least remove kill flags if they are in different
2402 // BBs.
2403 if (DefMI->getParent() != MI.getParent())
2404 NewMI->clearKillInfo();
2405
2406 // The caller will erase MI, but not DefMI.
2408 return NewMI;
2409}
2410
2411/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2412/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2413/// def operand.
2414///
2415/// This will go away once we can teach tblgen how to set the optional CPSR def
2416/// operand itself.
2420};
2421
2423 {ARM::ADDSri, ARM::ADDri},
2424 {ARM::ADDSrr, ARM::ADDrr},
2425 {ARM::ADDSrsi, ARM::ADDrsi},
2426 {ARM::ADDSrsr, ARM::ADDrsr},
2427
2428 {ARM::SUBSri, ARM::SUBri},
2429 {ARM::SUBSrr, ARM::SUBrr},
2430 {ARM::SUBSrsi, ARM::SUBrsi},
2431 {ARM::SUBSrsr, ARM::SUBrsr},
2432
2433 {ARM::RSBSri, ARM::RSBri},
2434 {ARM::RSBSrsi, ARM::RSBrsi},
2435 {ARM::RSBSrsr, ARM::RSBrsr},
2436
2437 {ARM::tADDSi3, ARM::tADDi3},
2438 {ARM::tADDSi8, ARM::tADDi8},
2439 {ARM::tADDSrr, ARM::tADDrr},
2440 {ARM::tADCS, ARM::tADC},
2441
2442 {ARM::tSUBSi3, ARM::tSUBi3},
2443 {ARM::tSUBSi8, ARM::tSUBi8},
2444 {ARM::tSUBSrr, ARM::tSUBrr},
2445 {ARM::tSBCS, ARM::tSBC},
2446 {ARM::tRSBS, ARM::tRSB},
2447 {ARM::tLSLSri, ARM::tLSLri},
2448
2449 {ARM::t2ADDSri, ARM::t2ADDri},
2450 {ARM::t2ADDSrr, ARM::t2ADDrr},
2451 {ARM::t2ADDSrs, ARM::t2ADDrs},
2452
2453 {ARM::t2SUBSri, ARM::t2SUBri},
2454 {ARM::t2SUBSrr, ARM::t2SUBrr},
2455 {ARM::t2SUBSrs, ARM::t2SUBrs},
2456
2457 {ARM::t2RSBSri, ARM::t2RSBri},
2458 {ARM::t2RSBSrs, ARM::t2RSBrs},
2459};
2460
2461unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2462 for (const auto &Entry : AddSubFlagsOpcodeMap)
2463 if (OldOpc == Entry.PseudoOpc)
2464 return Entry.MachineOpc;
2465 return 0;
2466}
2467
2470 const DebugLoc &dl, Register DestReg,
2471 Register BaseReg, int NumBytes,
2472 ARMCC::CondCodes Pred, Register PredReg,
2473 const ARMBaseInstrInfo &TII,
2474 unsigned MIFlags) {
2475 if (NumBytes == 0 && DestReg != BaseReg) {
2476 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2477 .addReg(BaseReg, RegState::Kill)
2478 .add(predOps(Pred, PredReg))
2479 .add(condCodeOp())
2480 .setMIFlags(MIFlags);
2481 return;
2482 }
2483
2484 bool isSub = NumBytes < 0;
2485 if (isSub) NumBytes = -NumBytes;
2486
2487 while (NumBytes) {
2488 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2489 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2490 assert(ThisVal && "Didn't extract field correctly");
2491
2492 // We will handle these bits from offset, clear them.
2493 NumBytes &= ~ThisVal;
2494
2495 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2496
2497 // Build the new ADD / SUB.
2498 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2499 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2500 .addReg(BaseReg, RegState::Kill)
2501 .addImm(ThisVal)
2502 .add(predOps(Pred, PredReg))
2503 .add(condCodeOp())
2504 .setMIFlags(MIFlags);
2505 BaseReg = DestReg;
2506 }
2507}
2508
2511 unsigned NumBytes) {
2512 // This optimisation potentially adds lots of load and store
2513 // micro-operations, it's only really a great benefit to code-size.
2514 if (!Subtarget.hasMinSize())
2515 return false;
2516
2517 // If only one register is pushed/popped, LLVM can use an LDR/STR
2518 // instead. We can't modify those so make sure we're dealing with an
2519 // instruction we understand.
2520 bool IsPop = isPopOpcode(MI->getOpcode());
2521 bool IsPush = isPushOpcode(MI->getOpcode());
2522 if (!IsPush && !IsPop)
2523 return false;
2524
2525 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2526 MI->getOpcode() == ARM::VLDMDIA_UPD;
2527 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2528 MI->getOpcode() == ARM::tPOP ||
2529 MI->getOpcode() == ARM::tPOP_RET;
2530
2531 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2532 MI->getOperand(1).getReg() == ARM::SP)) &&
2533 "trying to fold sp update into non-sp-updating push/pop");
2534
2535 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2536 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2537 // if this is violated.
2538 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2539 return false;
2540
2541 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2542 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2543 int RegListIdx = IsT1PushPop ? 2 : 4;
2544
2545 // Calculate the space we'll need in terms of registers.
2546 unsigned RegsNeeded;
2547 const TargetRegisterClass *RegClass;
2548 if (IsVFPPushPop) {
2549 RegsNeeded = NumBytes / 8;
2550 RegClass = &ARM::DPRRegClass;
2551 } else {
2552 RegsNeeded = NumBytes / 4;
2553 RegClass = &ARM::GPRRegClass;
2554 }
2555
2556 // We're going to have to strip all list operands off before
2557 // re-adding them since the order matters, so save the existing ones
2558 // for later.
2560
2561 // We're also going to need the first register transferred by this
2562 // instruction, which won't necessarily be the first register in the list.
2563 unsigned FirstRegEnc = -1;
2564
2566 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2567 MachineOperand &MO = MI->getOperand(i);
2568 RegList.push_back(MO);
2569
2570 if (MO.isReg() && !MO.isImplicit() &&
2571 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2572 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2573 }
2574
2575 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2576
2577 // Now try to find enough space in the reglist to allocate NumBytes.
2578 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2579 --CurRegEnc) {
2580 unsigned CurReg = RegClass->getRegister(CurRegEnc);
2581 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2582 continue;
2583 if (!IsPop) {
2584 // Pushing any register is completely harmless, mark the register involved
2585 // as undef since we don't care about its value and must not restore it
2586 // during stack unwinding.
2587 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2588 false, false, true));
2589 --RegsNeeded;
2590 continue;
2591 }
2592
2593 // However, we can only pop an extra register if it's not live. For
2594 // registers live within the function we might clobber a return value
2595 // register; the other way a register can be live here is if it's
2596 // callee-saved.
2597 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2598 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2600 // VFP pops don't allow holes in the register list, so any skip is fatal
2601 // for our transformation. GPR pops do, so we should just keep looking.
2602 if (IsVFPPushPop)
2603 return false;
2604 else
2605 continue;
2606 }
2607
2608 // Mark the unimportant registers as <def,dead> in the POP.
2609 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2610 true));
2611 --RegsNeeded;
2612 }
2613
2614 if (RegsNeeded > 0)
2615 return false;
2616
2617 // Finally we know we can profitably perform the optimisation so go
2618 // ahead: strip all existing registers off and add them back again
2619 // in the right order.
2620 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2621 MI->removeOperand(i);
2622
2623 // Add the complete list back in.
2624 MachineInstrBuilder MIB(MF, &*MI);
2625 for (const MachineOperand &MO : llvm::reverse(RegList))
2626 MIB.add(MO);
2627
2628 return true;
2629}
2630
2631bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2632 Register FrameReg, int &Offset,
2633 const ARMBaseInstrInfo &TII) {
2634 unsigned Opcode = MI.getOpcode();
2635 const MCInstrDesc &Desc = MI.getDesc();
2636 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2637 bool isSub = false;
2638
2639 // Memory operands in inline assembly always use AddrMode2.
2640 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2642
2643 if (Opcode == ARM::ADDri) {
2644 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2645 if (Offset == 0) {
2646 // Turn it into a move.
2647 MI.setDesc(TII.get(ARM::MOVr));
2648 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2649 MI.removeOperand(FrameRegIdx+1);
2650 Offset = 0;
2651 return true;
2652 } else if (Offset < 0) {
2653 Offset = -Offset;
2654 isSub = true;
2655 MI.setDesc(TII.get(ARM::SUBri));
2656 }
2657
2658 // Common case: small offset, fits into instruction.
2659 if (ARM_AM::getSOImmVal(Offset) != -1) {
2660 // Replace the FrameIndex with sp / fp
2661 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2662 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2663 Offset = 0;
2664 return true;
2665 }
2666
2667 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2668 // as possible.
2669 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2670 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2671
2672 // We will handle these bits from offset, clear them.
2673 Offset &= ~ThisImmVal;
2674
2675 // Get the properly encoded SOImmVal field.
2676 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2677 "Bit extraction didn't work?");
2678 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2679 } else {
2680 unsigned ImmIdx = 0;
2681 int InstrOffs = 0;
2682 unsigned NumBits = 0;
2683 unsigned Scale = 1;
2684 switch (AddrMode) {
2686 ImmIdx = FrameRegIdx + 1;
2687 InstrOffs = MI.getOperand(ImmIdx).getImm();
2688 NumBits = 12;
2689 break;
2690 case ARMII::AddrMode2:
2691 ImmIdx = FrameRegIdx+2;
2692 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2693 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2694 InstrOffs *= -1;
2695 NumBits = 12;
2696 break;
2697 case ARMII::AddrMode3:
2698 ImmIdx = FrameRegIdx+2;
2699 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2700 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2701 InstrOffs *= -1;
2702 NumBits = 8;
2703 break;
2704 case ARMII::AddrMode4:
2705 case ARMII::AddrMode6:
2706 // Can't fold any offset even if it's zero.
2707 return false;
2708 case ARMII::AddrMode5:
2709 ImmIdx = FrameRegIdx+1;
2710 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2711 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2712 InstrOffs *= -1;
2713 NumBits = 8;
2714 Scale = 4;
2715 break;
2717 ImmIdx = FrameRegIdx+1;
2718 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2719 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2720 InstrOffs *= -1;
2721 NumBits = 8;
2722 Scale = 2;
2723 break;
2727 ImmIdx = FrameRegIdx+1;
2728 InstrOffs = MI.getOperand(ImmIdx).getImm();
2729 NumBits = 7;
2730 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2731 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2732 break;
2733 default:
2734 llvm_unreachable("Unsupported addressing mode!");
2735 }
2736
2737 Offset += InstrOffs * Scale;
2738 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2739 if (Offset < 0) {
2740 Offset = -Offset;
2741 isSub = true;
2742 }
2743
2744 // Attempt to fold address comp. if opcode has offset bits
2745 if (NumBits > 0) {
2746 // Common case: small offset, fits into instruction.
2747 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2748 int ImmedOffset = Offset / Scale;
2749 unsigned Mask = (1 << NumBits) - 1;
2750 if ((unsigned)Offset <= Mask * Scale) {
2751 // Replace the FrameIndex with sp
2752 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2753 // FIXME: When addrmode2 goes away, this will simplify (like the
2754 // T2 version), as the LDR.i12 versions don't need the encoding
2755 // tricks for the offset value.
2756 if (isSub) {
2758 ImmedOffset = -ImmedOffset;
2759 else
2760 ImmedOffset |= 1 << NumBits;
2761 }
2762 ImmOp.ChangeToImmediate(ImmedOffset);
2763 Offset = 0;
2764 return true;
2765 }
2766
2767 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2768 ImmedOffset = ImmedOffset & Mask;
2769 if (isSub) {
2771 ImmedOffset = -ImmedOffset;
2772 else
2773 ImmedOffset |= 1 << NumBits;
2774 }
2775 ImmOp.ChangeToImmediate(ImmedOffset);
2776 Offset &= ~(Mask*Scale);
2777 }
2778 }
2779
2780 Offset = (isSub) ? -Offset : Offset;
2781 return Offset == 0;
2782}
2783
2784/// analyzeCompare - For a comparison instruction, return the source registers
2785/// in SrcReg and SrcReg2 if having two register operands, and the value it
2786/// compares against in CmpValue. Return true if the comparison instruction
2787/// can be analyzed.
2789 Register &SrcReg2, int64_t &CmpMask,
2790 int64_t &CmpValue) const {
2791 switch (MI.getOpcode()) {
2792 default: break;
2793 case ARM::CMPri:
2794 case ARM::t2CMPri:
2795 case ARM::tCMPi8:
2796 SrcReg = MI.getOperand(0).getReg();
2797 SrcReg2 = 0;
2798 CmpMask = ~0;
2799 CmpValue = MI.getOperand(1).getImm();
2800 return true;
2801 case ARM::CMPrr:
2802 case ARM::t2CMPrr:
2803 case ARM::tCMPr:
2804 SrcReg = MI.getOperand(0).getReg();
2805 SrcReg2 = MI.getOperand(1).getReg();
2806 CmpMask = ~0;
2807 CmpValue = 0;
2808 return true;
2809 case ARM::TSTri:
2810 case ARM::t2TSTri:
2811 SrcReg = MI.getOperand(0).getReg();
2812 SrcReg2 = 0;
2813 CmpMask = MI.getOperand(1).getImm();
2814 CmpValue = 0;
2815 return true;
2816 }
2817
2818 return false;
2819}
2820
2821/// isSuitableForMask - Identify a suitable 'and' instruction that
2822/// operates on the given source register and applies the same mask
2823/// as a 'tst' instruction. Provide a limited look-through for copies.
2824/// When successful, MI will hold the found instruction.
2826 int CmpMask, bool CommonUse) {
2827 switch (MI->getOpcode()) {
2828 case ARM::ANDri:
2829 case ARM::t2ANDri:
2830 if (CmpMask != MI->getOperand(2).getImm())
2831 return false;
2832 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2833 return true;
2834 break;
2835 }
2836
2837 return false;
2838}
2839
2840/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2841/// the condition code if we modify the instructions such that flags are
2842/// set by ADD(a,b,X).
2844 switch (CC) {
2845 default: return ARMCC::AL;
2846 case ARMCC::HS: return ARMCC::LO;
2847 case ARMCC::LO: return ARMCC::HS;
2848 case ARMCC::VS: return ARMCC::VS;
2849 case ARMCC::VC: return ARMCC::VC;
2850 }
2851}
2852
2853/// isRedundantFlagInstr - check whether the first instruction, whose only
2854/// purpose is to update flags, can be made redundant.
2855/// CMPrr can be made redundant by SUBrr if the operands are the same.
2856/// CMPri can be made redundant by SUBri if the operands are the same.
2857/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2858/// This function can be extended later on.
2859inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2860 Register SrcReg, Register SrcReg2,
2861 int64_t ImmValue,
2862 const MachineInstr *OI,
2863 bool &IsThumb1) {
2864 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2865 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2866 ((OI->getOperand(1).getReg() == SrcReg &&
2867 OI->getOperand(2).getReg() == SrcReg2) ||
2868 (OI->getOperand(1).getReg() == SrcReg2 &&
2869 OI->getOperand(2).getReg() == SrcReg))) {
2870 IsThumb1 = false;
2871 return true;
2872 }
2873
2874 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2875 ((OI->getOperand(2).getReg() == SrcReg &&
2876 OI->getOperand(3).getReg() == SrcReg2) ||
2877 (OI->getOperand(2).getReg() == SrcReg2 &&
2878 OI->getOperand(3).getReg() == SrcReg))) {
2879 IsThumb1 = true;
2880 return true;
2881 }
2882
2883 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2884 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2885 OI->getOperand(1).getReg() == SrcReg &&
2886 OI->getOperand(2).getImm() == ImmValue) {
2887 IsThumb1 = false;
2888 return true;
2889 }
2890
2891 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2892 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2893 OI->getOperand(2).getReg() == SrcReg &&
2894 OI->getOperand(3).getImm() == ImmValue) {
2895 IsThumb1 = true;
2896 return true;
2897 }
2898
2899 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2900 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2901 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2902 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2903 OI->getOperand(0).getReg() == SrcReg &&
2904 OI->getOperand(1).getReg() == SrcReg2) {
2905 IsThumb1 = false;
2906 return true;
2907 }
2908
2909 if (CmpI->getOpcode() == ARM::tCMPr &&
2910 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2911 OI->getOpcode() == ARM::tADDrr) &&
2912 OI->getOperand(0).getReg() == SrcReg &&
2913 OI->getOperand(2).getReg() == SrcReg2) {
2914 IsThumb1 = true;
2915 return true;
2916 }
2917
2918 return false;
2919}
2920
2921static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2922 switch (MI->getOpcode()) {
2923 default: return false;
2924 case ARM::tLSLri:
2925 case ARM::tLSRri:
2926 case ARM::tLSLrr:
2927 case ARM::tLSRrr:
2928 case ARM::tSUBrr:
2929 case ARM::tADDrr:
2930 case ARM::tADDi3:
2931 case ARM::tADDi8:
2932 case ARM::tSUBi3:
2933 case ARM::tSUBi8:
2934 case ARM::tMUL:
2935 case ARM::tADC:
2936 case ARM::tSBC:
2937 case ARM::tRSB:
2938 case ARM::tAND:
2939 case ARM::tORR:
2940 case ARM::tEOR:
2941 case ARM::tBIC:
2942 case ARM::tMVN:
2943 case ARM::tASRri:
2944 case ARM::tASRrr:
2945 case ARM::tROR:
2946 IsThumb1 = true;
2947 [[fallthrough]];
2948 case ARM::RSBrr:
2949 case ARM::RSBri:
2950 case ARM::RSCrr:
2951 case ARM::RSCri:
2952 case ARM::ADDrr:
2953 case ARM::ADDri:
2954 case ARM::ADCrr:
2955 case ARM::ADCri:
2956 case ARM::SUBrr:
2957 case ARM::SUBri:
2958 case ARM::SBCrr:
2959 case ARM::SBCri:
2960 case ARM::t2RSBri:
2961 case ARM::t2ADDrr:
2962 case ARM::t2ADDri:
2963 case ARM::t2ADCrr:
2964 case ARM::t2ADCri:
2965 case ARM::t2SUBrr:
2966 case ARM::t2SUBri:
2967 case ARM::t2SBCrr:
2968 case ARM::t2SBCri:
2969 case ARM::ANDrr:
2970 case ARM::ANDri:
2971 case ARM::ANDrsr:
2972 case ARM::ANDrsi:
2973 case ARM::t2ANDrr:
2974 case ARM::t2ANDri:
2975 case ARM::t2ANDrs:
2976 case ARM::ORRrr:
2977 case ARM::ORRri:
2978 case ARM::ORRrsr:
2979 case ARM::ORRrsi:
2980 case ARM::t2ORRrr:
2981 case ARM::t2ORRri:
2982 case ARM::t2ORRrs:
2983 case ARM::EORrr:
2984 case ARM::EORri:
2985 case ARM::EORrsr:
2986 case ARM::EORrsi:
2987 case ARM::t2EORrr:
2988 case ARM::t2EORri:
2989 case ARM::t2EORrs:
2990 case ARM::BICri:
2991 case ARM::BICrr:
2992 case ARM::BICrsi:
2993 case ARM::BICrsr:
2994 case ARM::t2BICri:
2995 case ARM::t2BICrr:
2996 case ARM::t2BICrs:
2997 case ARM::t2LSRri:
2998 case ARM::t2LSRrr:
2999 case ARM::t2LSLri:
3000 case ARM::t2LSLrr:
3001 case ARM::MOVsr:
3002 case ARM::MOVsi:
3003 return true;
3004 }
3005}
3006
3007/// optimizeCompareInstr - Convert the instruction supplying the argument to the
3008/// comparison into one that sets the zero bit in the flags register;
3009/// Remove a redundant Compare instruction if an earlier instruction can set the
3010/// flags in the same way as Compare.
3011/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3012/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3013/// condition code of instructions which use the flags.
3015 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3016 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3017 // Get the unique definition of SrcReg.
3018 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3019 if (!MI) return false;
3020
3021 // Masked compares sometimes use the same register as the corresponding 'and'.
3022 if (CmpMask != ~0) {
3023 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3024 MI = nullptr;
3026 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3027 UI != UE; ++UI) {
3028 if (UI->getParent() != CmpInstr.getParent())
3029 continue;
3030 MachineInstr *PotentialAND = &*UI;
3031 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3032 isPredicated(*PotentialAND))
3033 continue;
3034 MI = PotentialAND;
3035 break;
3036 }
3037 if (!MI) return false;
3038 }
3039 }
3040
3041 // Get ready to iterate backward from CmpInstr.
3042 MachineBasicBlock::iterator I = CmpInstr, E = MI,
3043 B = CmpInstr.getParent()->begin();
3044
3045 // Early exit if CmpInstr is at the beginning of the BB.
3046 if (I == B) return false;
3047
3048 // There are two possible candidates which can be changed to set CPSR:
3049 // One is MI, the other is a SUB or ADD instruction.
3050 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3051 // ADDr[ri](r1, r2, X).
3052 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3053 MachineInstr *SubAdd = nullptr;
3054 if (SrcReg2 != 0)
3055 // MI is not a candidate for CMPrr.
3056 MI = nullptr;
3057 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3058 // Conservatively refuse to convert an instruction which isn't in the same
3059 // BB as the comparison.
3060 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3061 // Thus we cannot return here.
3062 if (CmpInstr.getOpcode() == ARM::CMPri ||
3063 CmpInstr.getOpcode() == ARM::t2CMPri ||
3064 CmpInstr.getOpcode() == ARM::tCMPi8)
3065 MI = nullptr;
3066 else
3067 return false;
3068 }
3069
3070 bool IsThumb1 = false;
3071 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3072 return false;
3073
3074 // We also want to do this peephole for cases like this: if (a*b == 0),
3075 // and optimise away the CMP instruction from the generated code sequence:
3076 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3077 // resulting from the select instruction, but these MOVS instructions for
3078 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3079 // However, if we only have MOVS instructions in between the CMP and the
3080 // other instruction (the MULS in this example), then the CPSR is dead so we
3081 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3082 // reordering and then continue the analysis hoping we can eliminate the
3083 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3084 // consequence, the movs won't redefine/kill the MUL operands which would
3085 // make this reordering illegal.
3087 if (MI && IsThumb1) {
3088 --I;
3089 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3090 bool CanReorder = true;
3091 for (; I != E; --I) {
3092 if (I->getOpcode() != ARM::tMOVi8) {
3093 CanReorder = false;
3094 break;
3095 }
3096 }
3097 if (CanReorder) {
3098 MI = MI->removeFromParent();
3099 E = CmpInstr;
3100 CmpInstr.getParent()->insert(E, MI);
3101 }
3102 }
3103 I = CmpInstr;
3104 E = MI;
3105 }
3106
3107 // Check that CPSR isn't set between the comparison instruction and the one we
3108 // want to change. At the same time, search for SubAdd.
3109 bool SubAddIsThumb1 = false;
3110 do {
3111 const MachineInstr &Instr = *--I;
3112
3113 // Check whether CmpInstr can be made redundant by the current instruction.
3114 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3115 SubAddIsThumb1)) {
3116 SubAdd = &*I;
3117 break;
3118 }
3119
3120 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3121 if (I == E)
3122 break;
3123
3124 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3125 Instr.readsRegister(ARM::CPSR, TRI))
3126 // This instruction modifies or uses CPSR after the one we want to
3127 // change. We can't do this transformation.
3128 return false;
3129
3130 if (I == B) {
3131 // In some cases, we scan the use-list of an instruction for an AND;
3132 // that AND is in the same BB, but may not be scheduled before the
3133 // corresponding TST. In that case, bail out.
3134 //
3135 // FIXME: We could try to reschedule the AND.
3136 return false;
3137 }
3138 } while (true);
3139
3140 // Return false if no candidates exist.
3141 if (!MI && !SubAdd)
3142 return false;
3143
3144 // If we found a SubAdd, use it as it will be closer to the CMP
3145 if (SubAdd) {
3146 MI = SubAdd;
3147 IsThumb1 = SubAddIsThumb1;
3148 }
3149
3150 // We can't use a predicated instruction - it doesn't always write the flags.
3151 if (isPredicated(*MI))
3152 return false;
3153
3154 // Scan forward for the use of CPSR
3155 // When checking against MI: if it's a conditional code that requires
3156 // checking of the V bit or C bit, then this is not safe to do.
3157 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3158 // If we are done with the basic block, we need to check whether CPSR is
3159 // live-out.
3161 OperandsToUpdate;
3162 bool isSafe = false;
3163 I = CmpInstr;
3164 E = CmpInstr.getParent()->end();
3165 while (!isSafe && ++I != E) {
3166 const MachineInstr &Instr = *I;
3167 for (unsigned IO = 0, EO = Instr.getNumOperands();
3168 !isSafe && IO != EO; ++IO) {
3169 const MachineOperand &MO = Instr.getOperand(IO);
3170 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3171 isSafe = true;
3172 break;
3173 }
3174 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3175 continue;
3176 if (MO.isDef()) {
3177 isSafe = true;
3178 break;
3179 }
3180 // Condition code is after the operand before CPSR except for VSELs.
3182 bool IsInstrVSel = true;
3183 switch (Instr.getOpcode()) {
3184 default:
3185 IsInstrVSel = false;
3186 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3187 break;
3188 case ARM::VSELEQD:
3189 case ARM::VSELEQS:
3190 case ARM::VSELEQH:
3191 CC = ARMCC::EQ;
3192 break;
3193 case ARM::VSELGTD:
3194 case ARM::VSELGTS:
3195 case ARM::VSELGTH:
3196 CC = ARMCC::GT;
3197 break;
3198 case ARM::VSELGED:
3199 case ARM::VSELGES:
3200 case ARM::VSELGEH:
3201 CC = ARMCC::GE;
3202 break;
3203 case ARM::VSELVSD:
3204 case ARM::VSELVSS:
3205 case ARM::VSELVSH:
3206 CC = ARMCC::VS;
3207 break;
3208 }
3209
3210 if (SubAdd) {
3211 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3212 // on CMP needs to be updated to be based on SUB.
3213 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3214 // needs to be modified.
3215 // Push the condition code operands to OperandsToUpdate.
3216 // If it is safe to remove CmpInstr, the condition code of these
3217 // operands will be modified.
3218 unsigned Opc = SubAdd->getOpcode();
3219 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3220 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3221 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3222 Opc == ARM::tSUBi8;
3223 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3224 if (!IsSub ||
3225 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3226 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3227 // VSel doesn't support condition code update.
3228 if (IsInstrVSel)
3229 return false;
3230 // Ensure we can swap the condition.
3232 if (NewCC == ARMCC::AL)
3233 return false;
3234 OperandsToUpdate.push_back(
3235 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3236 }
3237 } else {
3238 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3239 switch (CC) {
3240 case ARMCC::EQ: // Z
3241 case ARMCC::NE: // Z
3242 case ARMCC::MI: // N
3243 case ARMCC::PL: // N
3244 case ARMCC::AL: // none
3245 // CPSR can be used multiple times, we should continue.
3246 break;
3247 case ARMCC::HS: // C
3248 case ARMCC::LO: // C
3249 case ARMCC::VS: // V
3250 case ARMCC::VC: // V
3251 case ARMCC::HI: // C Z
3252 case ARMCC::LS: // C Z
3253 case ARMCC::GE: // N V
3254 case ARMCC::LT: // N V
3255 case ARMCC::GT: // Z N V
3256 case ARMCC::LE: // Z N V
3257 // The instruction uses the V bit or C bit which is not safe.
3258 return false;
3259 }
3260 }
3261 }
3262 }
3263
3264 // If CPSR is not killed nor re-defined, we should check whether it is
3265 // live-out. If it is live-out, do not optimize.
3266 if (!isSafe) {
3267 MachineBasicBlock *MBB = CmpInstr.getParent();
3268 for (MachineBasicBlock *Succ : MBB->successors())
3269 if (Succ->isLiveIn(ARM::CPSR))
3270 return false;
3271 }
3272
3273 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3274 // set CPSR so this is represented as an explicit output)
3275 if (!IsThumb1) {
3276 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3277 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3278 MI->getOperand(CPSRRegNum).setIsDef(true);
3279 }
3280 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3281 CmpInstr.eraseFromParent();
3282
3283 // Modify the condition code of operands in OperandsToUpdate.
3284 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3285 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3286 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3287 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3288
3289 MI->clearRegisterDeads(ARM::CPSR);
3290
3291 return true;
3292}
3293
3295 // Do not sink MI if it might be used to optimize a redundant compare.
3296 // We heuristically only look at the instruction immediately following MI to
3297 // avoid potentially searching the entire basic block.
3298 if (isPredicated(MI))
3299 return true;
3301 ++Next;
3302 Register SrcReg, SrcReg2;
3303 int64_t CmpMask, CmpValue;
3304 bool IsThumb1;
3305 if (Next != MI.getParent()->end() &&
3306 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3307 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3308 return false;
3309 return true;
3310}
3311
3313 Register Reg,
3314 MachineRegisterInfo *MRI) const {
3315 // Fold large immediates into add, sub, or, xor.
3316 unsigned DefOpc = DefMI.getOpcode();
3317 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3318 DefOpc != ARM::tMOVi32imm)
3319 return false;
3320 if (!DefMI.getOperand(1).isImm())
3321 // Could be t2MOVi32imm @xx
3322 return false;
3323
3324 if (!MRI->hasOneNonDBGUse(Reg))
3325 return false;
3326
3327 const MCInstrDesc &DefMCID = DefMI.getDesc();
3328 if (DefMCID.hasOptionalDef()) {
3329 unsigned NumOps = DefMCID.getNumOperands();
3330 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3331 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3332 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3333 // to delete DefMI.
3334 return false;
3335 }
3336
3337 const MCInstrDesc &UseMCID = UseMI.getDesc();
3338 if (UseMCID.hasOptionalDef()) {
3339 unsigned NumOps = UseMCID.getNumOperands();
3340 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3341 // If the instruction sets the flag, do not attempt this optimization
3342 // since it may change the semantics of the code.
3343 return false;
3344 }
3345
3346 unsigned UseOpc = UseMI.getOpcode();
3347 unsigned NewUseOpc = 0;
3348 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3349 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3350 bool Commute = false;
3351 switch (UseOpc) {
3352 default: return false;
3353 case ARM::SUBrr:
3354 case ARM::ADDrr:
3355 case ARM::ORRrr:
3356 case ARM::EORrr:
3357 case ARM::t2SUBrr:
3358 case ARM::t2ADDrr:
3359 case ARM::t2ORRrr:
3360 case ARM::t2EORrr: {
3361 Commute = UseMI.getOperand(2).getReg() != Reg;
3362 switch (UseOpc) {
3363 default: break;
3364 case ARM::ADDrr:
3365 case ARM::SUBrr:
3366 if (UseOpc == ARM::SUBrr && Commute)
3367 return false;
3368
3369 // ADD/SUB are special because they're essentially the same operation, so
3370 // we can handle a larger range of immediates.
3371 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3372 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3373 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3374 ImmVal = -ImmVal;
3375 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3376 } else
3377 return false;
3378 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3379 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3380 break;
3381 case ARM::ORRrr:
3382 case ARM::EORrr:
3383 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3384 return false;
3385 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3386 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3387 switch (UseOpc) {
3388 default: break;
3389 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3390 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3391 }
3392 break;
3393 case ARM::t2ADDrr:
3394 case ARM::t2SUBrr: {
3395 if (UseOpc == ARM::t2SUBrr && Commute)
3396 return false;
3397
3398 // ADD/SUB are special because they're essentially the same operation, so
3399 // we can handle a larger range of immediates.
3400 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3401 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3402 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3403 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3404 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3405 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3406 ImmVal = -ImmVal;
3407 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3408 } else
3409 return false;
3410 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3411 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3412 break;
3413 }
3414 case ARM::t2ORRrr:
3415 case ARM::t2EORrr:
3416 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3417 return false;
3418 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3419 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3420 switch (UseOpc) {
3421 default: break;
3422 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3423 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3424 }
3425 break;
3426 }
3427 }
3428 }
3429
3430 unsigned OpIdx = Commute ? 2 : 1;
3431 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3432 bool isKill = UseMI.getOperand(OpIdx).isKill();
3433 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3434 Register NewReg = MRI->createVirtualRegister(TRC);
3435 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3436 NewReg)
3437 .addReg(Reg1, getKillRegState(isKill))
3438 .addImm(SOImmValV1)
3440 .add(condCodeOp());
3441 UseMI.setDesc(get(NewUseOpc));
3442 UseMI.getOperand(1).setReg(NewReg);
3443 UseMI.getOperand(1).setIsKill();
3444 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3445 DefMI.eraseFromParent();
3446 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3447 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3448 // Then the below code will not be needed, as the input/output register
3449 // classes will be rgpr or gprSP.
3450 // For now, we fix the UseMI operand explicitly here:
3451 switch(NewUseOpc){
3452 case ARM::t2ADDspImm:
3453 case ARM::t2SUBspImm:
3454 case ARM::t2ADDri:
3455 case ARM::t2SUBri:
3456 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3457 }
3458 return true;
3459}
3460
3461static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3462 const MachineInstr &MI) {
3463 switch (MI.getOpcode()) {
3464 default: {
3465 const MCInstrDesc &Desc = MI.getDesc();
3466 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3467 assert(UOps >= 0 && "bad # UOps");
3468 return UOps;
3469 }
3470
3471 case ARM::LDRrs:
3472 case ARM::LDRBrs:
3473 case ARM::STRrs:
3474 case ARM::STRBrs: {
3475 unsigned ShOpVal = MI.getOperand(3).getImm();
3476 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3477 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3478 if (!isSub &&
3479 (ShImm == 0 ||
3480 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3481 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3482 return 1;
3483 return 2;
3484 }
3485
3486 case ARM::LDRH:
3487 case ARM::STRH: {
3488 if (!MI.getOperand(2).getReg())
3489 return 1;
3490
3491 unsigned ShOpVal = MI.getOperand(3).getImm();
3492 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3493 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3494 if (!isSub &&
3495 (ShImm == 0 ||
3496 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3497 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3498 return 1;
3499 return 2;
3500 }
3501
3502 case ARM::LDRSB:
3503 case ARM::LDRSH:
3504 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3505
3506 case ARM::LDRSB_POST:
3507 case ARM::LDRSH_POST: {
3508 Register Rt = MI.getOperand(0).getReg();
3509 Register Rm = MI.getOperand(3).getReg();
3510 return (Rt == Rm) ? 4 : 3;
3511 }
3512
3513 case ARM::LDR_PRE_REG:
3514 case ARM::LDRB_PRE_REG: {
3515 Register Rt = MI.getOperand(0).getReg();
3516 Register Rm = MI.getOperand(3).getReg();
3517 if (Rt == Rm)
3518 return 3;
3519 unsigned ShOpVal = MI.getOperand(4).getImm();
3520 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3521 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3522 if (!isSub &&
3523 (ShImm == 0 ||
3524 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3525 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3526 return 2;
3527 return 3;
3528 }
3529
3530 case ARM::STR_PRE_REG:
3531 case ARM::STRB_PRE_REG: {
3532 unsigned ShOpVal = MI.getOperand(4).getImm();
3533 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3534 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3535 if (!isSub &&
3536 (ShImm == 0 ||
3537 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3538 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3539 return 2;
3540 return 3;
3541 }
3542
3543 case ARM::LDRH_PRE:
3544 case ARM::STRH_PRE: {
3545 Register Rt = MI.getOperand(0).getReg();
3546 Register Rm = MI.getOperand(3).getReg();
3547 if (!Rm)
3548 return 2;
3549 if (Rt == Rm)
3550 return 3;
3551 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3552 }
3553
3554 case ARM::LDR_POST_REG:
3555 case ARM::LDRB_POST_REG:
3556 case ARM::LDRH_POST: {
3557 Register Rt = MI.getOperand(0).getReg();
3558 Register Rm = MI.getOperand(3).getReg();
3559 return (Rt == Rm) ? 3 : 2;
3560 }
3561
3562 case ARM::LDR_PRE_IMM:
3563 case ARM::LDRB_PRE_IMM:
3564 case ARM::LDR_POST_IMM:
3565 case ARM::LDRB_POST_IMM:
3566 case ARM::STRB_POST_IMM:
3567 case ARM::STRB_POST_REG:
3568 case ARM::STRB_PRE_IMM:
3569 case ARM::STRH_POST:
3570 case ARM::STR_POST_IMM:
3571 case ARM::STR_POST_REG:
3572 case ARM::STR_PRE_IMM:
3573 return 2;
3574
3575 case ARM::LDRSB_PRE:
3576 case ARM::LDRSH_PRE: {
3577 Register Rm = MI.getOperand(3).getReg();
3578 if (Rm == 0)
3579 return 3;
3580 Register Rt = MI.getOperand(0).getReg();
3581 if (Rt == Rm)
3582 return 4;
3583 unsigned ShOpVal = MI.getOperand(4).getImm();
3584 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3585 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3586 if (!isSub &&
3587 (ShImm == 0 ||
3588 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3589 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3590 return 3;
3591 return 4;
3592 }
3593
3594 case ARM::LDRD: {
3595 Register Rt = MI.getOperand(0).getReg();
3596 Register Rn = MI.getOperand(2).getReg();
3597 Register Rm = MI.getOperand(3).getReg();
3598 if (Rm)
3599 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3600 : 3;
3601 return (Rt == Rn) ? 3 : 2;
3602 }
3603
3604 case ARM::STRD: {
3605 Register Rm = MI.getOperand(3).getReg();
3606 if (Rm)
3607 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3608 : 3;
3609 return 2;
3610 }
3611
3612 case ARM::LDRD_POST:
3613 case ARM::t2LDRD_POST:
3614 return 3;
3615
3616 case ARM::STRD_POST:
3617 case ARM::t2STRD_POST:
3618 return 4;
3619
3620 case ARM::LDRD_PRE: {
3621 Register Rt = MI.getOperand(0).getReg();
3622 Register Rn = MI.getOperand(3).getReg();
3623 Register Rm = MI.getOperand(4).getReg();
3624 if (Rm)
3625 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3626 : 4;
3627 return (Rt == Rn) ? 4 : 3;
3628 }
3629
3630 case ARM::t2LDRD_PRE: {
3631 Register Rt = MI.getOperand(0).getReg();
3632 Register Rn = MI.getOperand(3).getReg();
3633 return (Rt == Rn) ? 4 : 3;
3634 }
3635
3636 case ARM::STRD_PRE: {
3637 Register Rm = MI.getOperand(4).getReg();
3638 if (Rm)
3639 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3640 : 4;
3641 return 3;
3642 }
3643
3644 case ARM::t2STRD_PRE:
3645 return 3;
3646
3647 case ARM::t2LDR_POST:
3648 case ARM::t2LDRB_POST:
3649 case ARM::t2LDRB_PRE:
3650 case ARM::t2LDRSBi12:
3651 case ARM::t2LDRSBi8:
3652 case ARM::t2LDRSBpci:
3653 case ARM::t2LDRSBs:
3654 case ARM::t2LDRH_POST:
3655 case ARM::t2LDRH_PRE:
3656 case ARM::t2LDRSBT:
3657 case ARM::t2LDRSB_POST:
3658 case ARM::t2LDRSB_PRE:
3659 case ARM::t2LDRSH_POST:
3660 case ARM::t2LDRSH_PRE:
3661 case ARM::t2LDRSHi12:
3662 case ARM::t2LDRSHi8:
3663 case ARM::t2LDRSHpci:
3664 case ARM::t2LDRSHs:
3665 return 2;
3666
3667 case ARM::t2LDRDi8: {
3668 Register Rt = MI.getOperand(0).getReg();
3669 Register Rn = MI.getOperand(2).getReg();
3670 return (Rt == Rn) ? 3 : 2;
3671 }
3672
3673 case ARM::t2STRB_POST:
3674 case ARM::t2STRB_PRE:
3675 case ARM::t2STRBs:
3676 case ARM::t2STRDi8:
3677 case ARM::t2STRH_POST:
3678 case ARM::t2STRH_PRE:
3679 case ARM::t2STRHs:
3680 case ARM::t2STR_POST:
3681 case ARM::t2STR_PRE:
3682 case ARM::t2STRs:
3683 return 2;
3684 }
3685}
3686
3687// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3688// can't be easily determined return 0 (missing MachineMemOperand).
3689//
3690// FIXME: The current MachineInstr design does not support relying on machine
3691// mem operands to determine the width of a memory access. Instead, we expect
3692// the target to provide this information based on the instruction opcode and
3693// operands. However, using MachineMemOperand is the best solution now for
3694// two reasons:
3695//
3696// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3697// operands. This is much more dangerous than using the MachineMemOperand
3698// sizes because CodeGen passes can insert/remove optional machine operands. In
3699// fact, it's totally incorrect for preRA passes and appears to be wrong for
3700// postRA passes as well.
3701//
3702// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3703// machine model that calls this should handle the unknown (zero size) case.
3704//
3705// Long term, we should require a target hook that verifies MachineMemOperand
3706// sizes during MC lowering. That target hook should be local to MC lowering
3707// because we can't ensure that it is aware of other MI forms. Doing this will
3708// ensure that MachineMemOperands are correctly propagated through all passes.
3710 unsigned Size = 0;
3711 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3712 E = MI.memoperands_end();
3713 I != E; ++I) {
3714 Size += (*I)->getSize().getValue();
3715 }
3716 // FIXME: The scheduler currently can't handle values larger than 16. But
3717 // the values can actually go up to 32 for floating-point load/store
3718 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3719 // operations isn't right; we could end up with "extra" memory operands for
3720 // various reasons, like tail merge merging two memory operations.
3721 return std::min(Size / 4, 16U);
3722}
3723
3724static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3725 unsigned NumRegs) {
3726 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3727 switch (Opc) {
3728 default:
3729 break;
3730 case ARM::VLDMDIA_UPD:
3731 case ARM::VLDMDDB_UPD:
3732 case ARM::VLDMSIA_UPD:
3733 case ARM::VLDMSDB_UPD:
3734 case ARM::VSTMDIA_UPD:
3735 case ARM::VSTMDDB_UPD:
3736 case ARM::VSTMSIA_UPD:
3737 case ARM::VSTMSDB_UPD:
3738 case ARM::LDMIA_UPD:
3739 case ARM::LDMDA_UPD:
3740 case ARM::LDMDB_UPD:
3741 case ARM::LDMIB_UPD:
3742 case ARM::STMIA_UPD:
3743 case ARM::STMDA_UPD:
3744 case ARM::STMDB_UPD:
3745 case ARM::STMIB_UPD:
3746 case ARM::tLDMIA_UPD:
3747 case ARM::tSTMIA_UPD:
3748 case ARM::t2LDMIA_UPD:
3749 case ARM::t2LDMDB_UPD:
3750 case ARM::t2STMIA_UPD:
3751 case ARM::t2STMDB_UPD:
3752 ++UOps; // One for base register writeback.
3753 break;
3754 case ARM::LDMIA_RET:
3755 case ARM::tPOP_RET:
3756 case ARM::t2LDMIA_RET:
3757 UOps += 2; // One for base reg wb, one for write to pc.
3758 break;
3759 }
3760 return UOps;
3761}
3762
3764 const MachineInstr &MI) const {
3765 if (!ItinData || ItinData->isEmpty())
3766 return 1;
3767
3768 const MCInstrDesc &Desc = MI.getDesc();
3769 unsigned Class = Desc.getSchedClass();
3770 int ItinUOps = ItinData->getNumMicroOps(Class);
3771 if (ItinUOps >= 0) {
3772 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3773 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3774
3775 return ItinUOps;
3776 }
3777
3778 unsigned Opc = MI.getOpcode();
3779 switch (Opc) {
3780 default:
3781 llvm_unreachable("Unexpected multi-uops instruction!");
3782 case ARM::VLDMQIA:
3783 case ARM::VSTMQIA:
3784 return 2;
3785
3786 // The number of uOps for load / store multiple are determined by the number
3787 // registers.
3788 //
3789 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3790 // same cycle. The scheduling for the first load / store must be done
3791 // separately by assuming the address is not 64-bit aligned.
3792 //
3793 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3794 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3795 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3796 case ARM::VLDMDIA:
3797 case ARM::VLDMDIA_UPD:
3798 case ARM::VLDMDDB_UPD:
3799 case ARM::VLDMSIA:
3800 case ARM::VLDMSIA_UPD:
3801 case ARM::VLDMSDB_UPD:
3802 case ARM::VSTMDIA:
3803 case ARM::VSTMDIA_UPD:
3804 case ARM::VSTMDDB_UPD:
3805 case ARM::VSTMSIA:
3806 case ARM::VSTMSIA_UPD:
3807 case ARM::VSTMSDB_UPD: {
3808 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3809 return (NumRegs / 2) + (NumRegs % 2) + 1;
3810 }
3811
3812 case ARM::LDMIA_RET:
3813 case ARM::LDMIA:
3814 case ARM::LDMDA:
3815 case ARM::LDMDB:
3816 case ARM::LDMIB:
3817 case ARM::LDMIA_UPD:
3818 case ARM::LDMDA_UPD:
3819 case ARM::LDMDB_UPD:
3820 case ARM::LDMIB_UPD:
3821 case ARM::STMIA:
3822 case ARM::STMDA:
3823 case ARM::STMDB:
3824 case ARM::STMIB:
3825 case ARM::STMIA_UPD:
3826 case ARM::STMDA_UPD:
3827 case ARM::STMDB_UPD:
3828 case ARM::STMIB_UPD:
3829 case ARM::tLDMIA:
3830 case ARM::tLDMIA_UPD:
3831 case ARM::tSTMIA_UPD:
3832 case ARM::tPOP_RET:
3833 case ARM::tPOP:
3834 case ARM::tPUSH:
3835 case ARM::t2LDMIA_RET:
3836 case ARM::t2LDMIA:
3837 case ARM::t2LDMDB:
3838 case ARM::t2LDMIA_UPD:
3839 case ARM::t2LDMDB_UPD:
3840 case ARM::t2STMIA:
3841 case ARM::t2STMDB:
3842 case ARM::t2STMIA_UPD:
3843 case ARM::t2STMDB_UPD: {
3844 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3845 switch (Subtarget.getLdStMultipleTiming()) {
3847 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3849 // Assume the worst.
3850 return NumRegs;
3852 if (NumRegs < 4)
3853 return 2;
3854 // 4 registers would be issued: 2, 2.
3855 // 5 registers would be issued: 2, 2, 1.
3856 unsigned UOps = (NumRegs / 2);
3857 if (NumRegs % 2)
3858 ++UOps;
3859 return UOps;
3860 }
3862 unsigned UOps = (NumRegs / 2);
3863 // If there are odd number of registers or if it's not 64-bit aligned,
3864 // then it takes an extra AGU (Address Generation Unit) cycle.
3865 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3866 (*MI.memoperands_begin())->getAlign() < Align(8))
3867 ++UOps;
3868 return UOps;
3869 }
3870 }
3871 }
3872 }
3873 llvm_unreachable("Didn't find the number of microops");
3874}
3875
3876std::optional<unsigned>
3877ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3878 const MCInstrDesc &DefMCID, unsigned DefClass,
3879 unsigned DefIdx, unsigned DefAlign) const {
3880 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3881 if (RegNo <= 0)
3882 // Def is the address writeback.
3883 return ItinData->getOperandCycle(DefClass, DefIdx);
3884
3885 unsigned DefCycle;
3886 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3887 // (regno / 2) + (regno % 2) + 1
3888 DefCycle = RegNo / 2 + 1;
3889 if (RegNo % 2)
3890 ++DefCycle;
3891 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3892 DefCycle = RegNo;
3893 bool isSLoad = false;
3894
3895 switch (DefMCID.getOpcode()) {
3896 default: break;
3897 case ARM::VLDMSIA:
3898 case ARM::VLDMSIA_UPD:
3899 case ARM::VLDMSDB_UPD:
3900 isSLoad = true;
3901 break;
3902 }
3903
3904 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3905 // then it takes an extra cycle.
3906 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3907 ++DefCycle;
3908 } else {
3909 // Assume the worst.
3910 DefCycle = RegNo + 2;
3911 }
3912
3913 return DefCycle;
3914}
3915
3916std::optional<unsigned>
3917ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3918 const MCInstrDesc &DefMCID, unsigned DefClass,
3919 unsigned DefIdx, unsigned DefAlign) const {
3920 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3921 if (RegNo <= 0)
3922 // Def is the address writeback.
3923 return ItinData->getOperandCycle(DefClass, DefIdx);
3924
3925 unsigned DefCycle;
3926 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3927 // 4 registers would be issued: 1, 2, 1.
3928 // 5 registers would be issued: 1, 2, 2.
3929 DefCycle = RegNo / 2;
3930 if (DefCycle < 1)
3931 DefCycle = 1;
3932 // Result latency is issue cycle + 2: E2.
3933 DefCycle += 2;
3934 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3935 DefCycle = (RegNo / 2);
3936 // If there are odd number of registers or if it's not 64-bit aligned,
3937 // then it takes an extra AGU (Address Generation Unit) cycle.
3938 if ((RegNo % 2) || DefAlign < 8)
3939 ++DefCycle;
3940 // Result latency is AGU cycles + 2.
3941 DefCycle += 2;
3942 } else {
3943 // Assume the worst.
3944 DefCycle = RegNo + 2;
3945 }
3946
3947 return DefCycle;
3948}
3949
3950std::optional<unsigned>
3951ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3952 const MCInstrDesc &UseMCID, unsigned UseClass,
3953 unsigned UseIdx, unsigned UseAlign) const {
3954 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3955 if (RegNo <= 0)
3956 return ItinData->getOperandCycle(UseClass, UseIdx);
3957
3958 unsigned UseCycle;
3959 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3960 // (regno / 2) + (regno % 2) + 1
3961 UseCycle = RegNo / 2 + 1;
3962 if (RegNo % 2)
3963 ++UseCycle;
3964 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3965 UseCycle = RegNo;
3966 bool isSStore = false;
3967
3968 switch (UseMCID.getOpcode()) {
3969 default: break;
3970 case ARM::VSTMSIA:
3971 case ARM::VSTMSIA_UPD:
3972 case ARM::VSTMSDB_UPD:
3973 isSStore = true;
3974 break;
3975 }
3976
3977 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3978 // then it takes an extra cycle.
3979 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3980 ++UseCycle;
3981 } else {
3982 // Assume the worst.
3983 UseCycle = RegNo + 2;
3984 }
3985
3986 return UseCycle;
3987}
3988
3989std::optional<unsigned>
3990ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3991 const MCInstrDesc &UseMCID, unsigned UseClass,
3992 unsigned UseIdx, unsigned UseAlign) const {
3993 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3994 if (RegNo <= 0)
3995 return ItinData->getOperandCycle(UseClass, UseIdx);
3996
3997 unsigned UseCycle;
3998 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3999 UseCycle = RegNo / 2;
4000 if (UseCycle < 2)
4001 UseCycle = 2;
4002 // Read in E3.
4003 UseCycle += 2;
4004 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4005 UseCycle = (RegNo / 2);
4006 // If there are odd number of registers or if it's not 64-bit aligned,
4007 // then it takes an extra AGU (Address Generation Unit) cycle.
4008 if ((RegNo % 2) || UseAlign < 8)
4009 ++UseCycle;
4010 } else {
4011 // Assume the worst.
4012 UseCycle = 1;
4013 }
4014 return UseCycle;
4015}
4016
4017std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4018 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
4019 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
4020 unsigned UseIdx, unsigned UseAlign) const {
4021 unsigned DefClass = DefMCID.getSchedClass();
4022 unsigned UseClass = UseMCID.getSchedClass();
4023
4024 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4025 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4026
4027 // This may be a def / use of a variable_ops instruction, the operand
4028 // latency might be determinable dynamically. Let the target try to
4029 // figure it out.
4030 std::optional<unsigned> DefCycle;
4031 bool LdmBypass = false;
4032 switch (DefMCID.getOpcode()) {
4033 default:
4034 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4035 break;
4036
4037 case ARM::VLDMDIA:
4038 case ARM::VLDMDIA_UPD:
4039 case ARM::VLDMDDB_UPD:
4040 case ARM::VLDMSIA:
4041 case ARM::VLDMSIA_UPD:
4042 case ARM::VLDMSDB_UPD:
4043 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4044 break;
4045
4046 case ARM::LDMIA_RET:
4047 case ARM::LDMIA:
4048 case ARM::LDMDA:
4049 case ARM::LDMDB:
4050 case ARM::LDMIB:
4051 case ARM::LDMIA_UPD:
4052 case ARM::LDMDA_UPD:
4053 case ARM::LDMDB_UPD:
4054 case ARM::LDMIB_UPD:
4055 case ARM::tLDMIA:
4056 case ARM::tLDMIA_UPD:
4057 case ARM::tPUSH:
4058 case ARM::t2LDMIA_RET:
4059 case ARM::t2LDMIA:
4060 case ARM::t2LDMDB:
4061 case ARM::t2LDMIA_UPD:
4062 case ARM::t2LDMDB_UPD:
4063 LdmBypass = true;
4064 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4065 break;
4066 }
4067
4068 if (!DefCycle)
4069 // We can't seem to determine the result latency of the def, assume it's 2.
4070 DefCycle = 2;
4071
4072 std::optional<unsigned> UseCycle;
4073 switch (UseMCID.getOpcode()) {
4074 default:
4075 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4076 break;
4077
4078 case ARM::VSTMDIA:
4079 case ARM::VSTMDIA_UPD:
4080 case ARM::VSTMDDB_UPD:
4081 case ARM::VSTMSIA:
4082 case ARM::VSTMSIA_UPD:
4083 case ARM::VSTMSDB_UPD:
4084 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4085 break;
4086
4087 case ARM::STMIA:
4088 case ARM::STMDA:
4089 case ARM::STMDB:
4090 case ARM::STMIB:
4091 case ARM::STMIA_UPD:
4092 case ARM::STMDA_UPD:
4093 case ARM::STMDB_UPD:
4094 case ARM::STMIB_UPD:
4095 case ARM::tSTMIA_UPD:
4096 case ARM::tPOP_RET:
4097 case ARM::tPOP:
4098 case ARM::t2STMIA:
4099 case ARM::t2STMDB:
4100 case ARM::t2STMIA_UPD:
4101 case ARM::t2STMDB_UPD:
4102 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4103 break;
4104 }
4105
4106 if (!UseCycle)
4107 // Assume it's read in the first stage.
4108 UseCycle = 1;
4109
4110 if (UseCycle > *DefCycle + 1)
4111 return std::nullopt;
4112
4113 UseCycle = *DefCycle - *UseCycle + 1;
4114 if (UseCycle > 0u) {
4115 if (LdmBypass) {
4116 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4117 // first def operand.
4118 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4119 UseClass, UseIdx))
4120 UseCycle = *UseCycle - 1;
4121 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4122 UseClass, UseIdx)) {
4123 UseCycle = *UseCycle - 1;
4124 }
4125 }
4126
4127 return UseCycle;
4128}
4129
4131 const MachineInstr *MI, unsigned Reg,
4132 unsigned &DefIdx, unsigned &Dist) {
4133 Dist = 0;
4134
4136 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4137 assert(II->isInsideBundle() && "Empty bundle?");
4138
4139 int Idx = -1;
4140 while (II->isInsideBundle()) {
4141 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
4142 if (Idx != -1)
4143 break;
4144 --II;
4145 ++Dist;
4146 }
4147
4148 assert(Idx != -1 && "Cannot find bundled definition!");
4149 DefIdx = Idx;
4150 return &*II;
4151}
4152
4154 const MachineInstr &MI, unsigned Reg,
4155 unsigned &UseIdx, unsigned &Dist) {
4156 Dist = 0;
4157
4159 assert(II->isInsideBundle() && "Empty bundle?");
4160 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4161
4162 // FIXME: This doesn't properly handle multiple uses.
4163 int Idx = -1;
4164 while (II != E && II->isInsideBundle()) {
4165 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4166 if (Idx != -1)
4167 break;
4168 if (II->getOpcode() != ARM::t2IT)
4169 ++Dist;
4170 ++II;
4171 }
4172
4173 if (Idx == -1) {
4174 Dist = 0;
4175 return nullptr;
4176 }
4177
4178 UseIdx = Idx;
4179 return &*II;
4180}
4181
4182/// Return the number of cycles to add to (or subtract from) the static
4183/// itinerary based on the def opcode and alignment. The caller will ensure that
4184/// adjusted latency is at least one cycle.
4185static int adjustDefLatency(const ARMSubtarget &Subtarget,
4186 const MachineInstr &DefMI,
4187 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4188 int Adjust = 0;
4189 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4190 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4191 // variants are one cycle cheaper.
4192 switch (DefMCID.getOpcode()) {
4193 default: break;
4194 case ARM::LDRrs:
4195 case ARM::LDRBrs: {
4196 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4197 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4198 if (ShImm == 0 ||
4199 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4200 --Adjust;
4201 break;
4202 }
4203 case ARM::t2LDRs:
4204 case ARM::t2LDRBs:
4205 case ARM::t2LDRHs:
4206 case ARM::t2LDRSHs: {
4207 // Thumb2 mode: lsl only.
4208 unsigned ShAmt = DefMI.getOperand(3).getImm();
4209 if (ShAmt == 0 || ShAmt == 2)
4210 --Adjust;
4211 break;
4212 }
4213 }
4214 } else if (Subtarget.isSwift()) {
4215 // FIXME: Properly handle all of the latency adjustments for address
4216 // writeback.
4217 switch (DefMCID.getOpcode()) {
4218 default: break;
4219 case ARM::LDRrs:
4220 case ARM::LDRBrs: {
4221 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4222 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4223 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4224 if (!isSub &&
4225 (ShImm == 0 ||
4226 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4227 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4228 Adjust -= 2;
4229 else if (!isSub &&
4230 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4231 --Adjust;
4232 break;
4233 }
4234 case ARM::t2LDRs:
4235 case ARM::t2LDRBs:
4236 case ARM::t2LDRHs:
4237 case ARM::t2LDRSHs: {
4238 // Thumb2 mode: lsl only.
4239 unsigned ShAmt = DefMI.getOperand(3).getImm();
4240 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4241 Adjust -= 2;
4242 break;
4243 }
4244 }
4245 }
4246
4247 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4248 switch (DefMCID.getOpcode()) {
4249 default: break;
4250 case ARM::VLD1q8:
4251 case ARM::VLD1q16:
4252 case ARM::VLD1q32:
4253 case ARM::VLD1q64:
4254 case ARM::VLD1q8wb_fixed:
4255 case ARM::VLD1q16wb_fixed:
4256 case ARM::VLD1q32wb_fixed:
4257 case ARM::VLD1q64wb_fixed:
4258 case ARM::VLD1q8wb_register:
4259 case ARM::VLD1q16wb_register:
4260 case ARM::VLD1q32wb_register:
4261 case ARM::VLD1q64wb_register:
4262 case ARM::VLD2d8:
4263 case ARM::VLD2d16:
4264 case ARM::VLD2d32:
4265 case ARM::VLD2q8:
4266 case ARM::VLD2q16:
4267 case ARM::VLD2q32:
4268 case ARM::VLD2d8wb_fixed:
4269 case ARM::VLD2d16wb_fixed:
4270 case ARM::VLD2d32wb_fixed:
4271 case ARM::VLD2q8wb_fixed:
4272 case ARM::VLD2q16wb_fixed:
4273 case ARM::VLD2q32wb_fixed:
4274 case ARM::VLD2d8wb_register:
4275 case ARM::VLD2d16wb_register:
4276 case ARM::VLD2d32wb_register:
4277 case ARM::VLD2q8wb_register:
4278 case ARM::VLD2q16wb_register:
4279 case ARM::VLD2q32wb_register:
4280 case ARM::VLD3d8:
4281 case ARM::VLD3d16:
4282 case ARM::VLD3d32:
4283 case ARM::VLD1d64T:
4284 case ARM::VLD3d8_UPD:
4285 case ARM::VLD3d16_UPD:
4286 case ARM::VLD3d32_UPD:
4287 case ARM::VLD1d64Twb_fixed:
4288 case ARM::VLD1d64Twb_register:
4289 case ARM::VLD3q8_UPD:
4290 case ARM::VLD3q16_UPD:
4291 case ARM::VLD3q32_UPD:
4292 case ARM::VLD4d8:
4293 case ARM::VLD4d16:
4294 case ARM::VLD4d32:
4295 case ARM::VLD1d64Q:
4296 case ARM::VLD4d8_UPD:
4297 case ARM::VLD4d16_UPD:
4298 case ARM::VLD4d32_UPD:
4299 case ARM::VLD1d64Qwb_fixed:
4300 case ARM::VLD1d64Qwb_register:
4301 case ARM::VLD4q8_UPD:
4302 case ARM::VLD4q16_UPD:
4303 case ARM::VLD4q32_UPD:
4304 case ARM::VLD1DUPq8:
4305 case ARM::VLD1DUPq16:
4306 case ARM::VLD1DUPq32:
4307 case ARM::VLD1DUPq8wb_fixed:
4308 case ARM::VLD1DUPq16wb_fixed:
4309 case ARM::VLD1DUPq32wb_fixed:
4310 case ARM::VLD1DUPq8wb_register:
4311 case ARM::VLD1DUPq16wb_register:
4312 case ARM::VLD1DUPq32wb_register:
4313 case ARM::VLD2DUPd8:
4314 case ARM::VLD2DUPd16:
4315 case ARM::VLD2DUPd32:
4316 case ARM::VLD2DUPd8wb_fixed:
4317 case ARM::VLD2DUPd16wb_fixed:
4318 case ARM::VLD2DUPd32wb_fixed:
4319 case ARM::VLD2DUPd8wb_register:
4320 case ARM::VLD2DUPd16wb_register:
4321 case ARM::VLD2DUPd32wb_register:
4322 case ARM::VLD4DUPd8:
4323 case ARM::VLD4DUPd16:
4324 case ARM::VLD4DUPd32:
4325 case ARM::VLD4DUPd8_UPD:
4326 case ARM::VLD4DUPd16_UPD:
4327 case ARM::VLD4DUPd32_UPD:
4328 case ARM::VLD1LNd8:
4329 case ARM::VLD1LNd16:
4330 case ARM::VLD1LNd32:
4331 case ARM::VLD1LNd8_UPD:
4332 case ARM::VLD1LNd16_UPD:
4333 case ARM::VLD1LNd32_UPD:
4334 case ARM::VLD2LNd8:
4335 case ARM::VLD2LNd16:
4336 case ARM::VLD2LNd32:
4337 case ARM::VLD2LNq16:
4338 case ARM::VLD2LNq32:
4339 case ARM::VLD2LNd8_UPD:
4340 case ARM::VLD2LNd16_UPD:
4341 case ARM::VLD2LNd32_UPD:
4342 case ARM::VLD2LNq16_UPD:
4343 case ARM::VLD2LNq32_UPD:
4344 case ARM::VLD4LNd8:
4345 case ARM::VLD4LNd16:
4346 case ARM::VLD4LNd32:
4347 case ARM::VLD4LNq16:
4348 case ARM::VLD4LNq32:
4349 case ARM::VLD4LNd8_UPD:
4350 case ARM::VLD4LNd16_UPD:
4351 case ARM::VLD4LNd32_UPD:
4352 case ARM::VLD4LNq16_UPD:
4353 case ARM::VLD4LNq32_UPD:
4354 // If the address is not 64-bit aligned, the latencies of these
4355 // instructions increases by one.
4356 ++Adjust;
4357 break;
4358 }
4359 }
4360 return Adjust;
4361}
4362
4364 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4365 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4366 // No operand latency. The caller may fall back to getInstrLatency.
4367 if (!ItinData || ItinData->isEmpty())
4368 return std::nullopt;
4369
4370 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4371 Register Reg = DefMO.getReg();
4372
4373 const MachineInstr *ResolvedDefMI = &DefMI;
4374 unsigned DefAdj = 0;
4375 if (DefMI.isBundle())
4376 ResolvedDefMI =
4377 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4378 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4379 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4380 return 1;
4381 }
4382
4383 const MachineInstr *ResolvedUseMI = &UseMI;
4384 unsigned UseAdj = 0;
4385 if (UseMI.isBundle()) {
4386 ResolvedUseMI =
4387 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4388 if (!ResolvedUseMI)
4389 return std::nullopt;
4390 }
4391
4392 return getOperandLatencyImpl(
4393 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4394 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4395}
4396
4397std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4398 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4399 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4400 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4401 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4402 if (Reg == ARM::CPSR) {
4403 if (DefMI.getOpcode() == ARM::FMSTAT) {
4404 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4405 return Subtarget.isLikeA9() ? 1 : 20;
4406 }
4407
4408 // CPSR set and branch can be paired in the same cycle.
4409 if (UseMI.isBranch())
4410 return 0;
4411
4412 // Otherwise it takes the instruction latency (generally one).
4413 unsigned Latency = getInstrLatency(ItinData, DefMI);
4414
4415 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4416 // its uses. Instructions which are otherwise scheduled between them may
4417 // incur a code size penalty (not able to use the CPSR setting 16-bit
4418 // instructions).
4419 if (Latency > 0 && Subtarget.isThumb2()) {
4420 const MachineFunction *MF = DefMI.getParent()->getParent();
4421 // FIXME: Use Function::hasOptSize().
4422 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4423 --Latency;
4424 }
4425 return Latency;
4426 }
4427
4428 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4429 return std::nullopt;
4430
4431 unsigned DefAlign = DefMI.hasOneMemOperand()
4432 ? (*DefMI.memoperands_begin())->getAlign().value()
4433 : 0;
4434 unsigned UseAlign = UseMI.hasOneMemOperand()
4435 ? (*UseMI.memoperands_begin())->getAlign().value()
4436 : 0;
4437
4438 // Get the itinerary's latency if possible, and handle variable_ops.
4439 std::optional<unsigned> Latency = getOperandLatency(
4440 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4441 // Unable to find operand latency. The caller may resort to getInstrLatency.
4442 if (!Latency)
4443 return std::nullopt;
4444
4445 // Adjust for IT block position.
4446 int Adj = DefAdj + UseAdj;
4447
4448 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4449 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4450 if (Adj >= 0 || (int)*Latency > -Adj) {
4451 return *Latency + Adj;
4452 }
4453 // Return the itinerary latency, which may be zero but not less than zero.
4454 return Latency;
4455}
4456
4457std::optional<unsigned>
4459 SDNode *DefNode, unsigned DefIdx,
4460 SDNode *UseNode, unsigned UseIdx) const {
4461 if (!DefNode->isMachineOpcode())
4462 return 1;
4463
4464 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4465
4466 if (isZeroCost(DefMCID.Opcode))
4467 return 0;
4468
4469 if (!ItinData || ItinData->isEmpty())
4470 return DefMCID.mayLoad() ? 3 : 1;
4471
4472 if (!UseNode->isMachineOpcode()) {
4473 std::optional<unsigned> Latency =
4474 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4475 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4476 int Threshold = 1 + Adj;
4477 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4478 }
4479
4480 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4481 auto *DefMN = cast<MachineSDNode>(DefNode);
4482 unsigned DefAlign = !DefMN->memoperands_empty()
4483 ? (*DefMN->memoperands_begin())->getAlign().value()
4484 : 0;
4485 auto *UseMN = cast<MachineSDNode>(UseNode);
4486 unsigned UseAlign = !UseMN->memoperands_empty()
4487 ? (*UseMN->memoperands_begin())->getAlign().value()
4488 : 0;
4489 std::optional<unsigned> Latency = getOperandLatency(
4490 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4491 if (!Latency)
4492 return std::nullopt;
4493
4494 if (Latency > 1U &&
4495 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4496 Subtarget.isCortexA7())) {
4497 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4498 // variants are one cycle cheaper.
4499 switch (DefMCID.getOpcode()) {
4500 default: break;
4501 case ARM::LDRrs:
4502 case ARM::LDRBrs: {
4503 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4504 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4505 if (ShImm == 0 ||
4506 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4507 Latency = *Latency - 1;
4508 break;
4509 }
4510 case ARM::t2LDRs:
4511 case ARM::t2LDRBs:
4512 case ARM::t2LDRHs:
4513 case ARM::t2LDRSHs: {
4514 // Thumb2 mode: lsl only.
4515 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4516 if (ShAmt == 0 || ShAmt == 2)
4517 Latency = *Latency - 1;
4518 break;
4519 }
4520 }
4521 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4522 // FIXME: Properly handle all of the latency adjustments for address
4523 // writeback.
4524 switch (DefMCID.getOpcode()) {
4525 default: break;
4526 case ARM::LDRrs:
4527 case ARM::LDRBrs: {
4528 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4529 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4530 if (ShImm == 0 ||
4531 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4533 Latency = *Latency - 2;
4534 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4535 Latency = *Latency - 1;
4536 break;
4537 }
4538 case ARM::t2LDRs:
4539 case ARM::t2LDRBs:
4540 case ARM::t2LDRHs:
4541 case ARM::t2LDRSHs:
4542 // Thumb2 mode: lsl 0-3 only.
4543 Latency = *Latency - 2;
4544 break;
4545 }
4546 }
4547
4548 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4549 switch (DefMCID.getOpcode()) {
4550 default: break;
4551 case ARM::VLD1q8:
4552 case ARM::VLD1q16:
4553 case ARM::VLD1q32:
4554 case ARM::VLD1q64:
4555 case ARM::VLD1q8wb_register:
4556 case ARM::VLD1q16wb_register:
4557 case ARM::VLD1q32wb_register:
4558 case ARM::VLD1q64wb_register:
4559 case ARM::VLD1q8wb_fixed:
4560 case ARM::VLD1q16wb_fixed:
4561 case ARM::VLD1q32wb_fixed:
4562 case ARM::VLD1q64wb_fixed:
4563 case ARM::VLD2d8:
4564 case ARM::VLD2d16:
4565 case ARM::VLD2d32:
4566 case ARM::VLD2q8Pseudo:
4567 case ARM::VLD2q16Pseudo:
4568 case ARM::VLD2q32Pseudo:
4569 case ARM::VLD2d8wb_fixed:
4570 case ARM::VLD2d16wb_fixed:
4571 case ARM::VLD2d32wb_fixed:
4572 case ARM::VLD2q8PseudoWB_fixed:
4573 case ARM::VLD2q16PseudoWB_fixed:
4574 case ARM::VLD2q32PseudoWB_fixed:
4575 case ARM::VLD2d8wb_register:
4576 case ARM::VLD2d16wb_register:
4577 case ARM::VLD2d32wb_register:
4578 case ARM::VLD2q8PseudoWB_register:
4579 case ARM::VLD2q16PseudoWB_register:
4580 case ARM::VLD2q32PseudoWB_register:
4581 case ARM::VLD3d8Pseudo:
4582 case ARM::VLD3d16Pseudo:
4583 case ARM::VLD3d32Pseudo:
4584 case ARM::VLD1d8TPseudo:
4585 case ARM::VLD1d16TPseudo:
4586 case ARM::VLD1d32TPseudo:
4587 case ARM::VLD1d64TPseudo:
4588 case ARM::VLD1d64TPseudoWB_fixed:
4589 case ARM::VLD1d64TPseudoWB_register:
4590 case ARM::VLD3d8Pseudo_UPD:
4591 case ARM::VLD3d16Pseudo_UPD:
4592 case ARM::VLD3d32Pseudo_UPD:
4593 case ARM::VLD3q8Pseudo_UPD:
4594 case ARM::VLD3q16Pseudo_UPD:
4595 case ARM::VLD3q32Pseudo_UPD:
4596 case ARM::VLD3q8oddPseudo:
4597 case ARM::VLD3q16oddPseudo:
4598 case ARM::VLD3q32oddPseudo:
4599 case ARM::VLD3q8oddPseudo_UPD:
4600 case ARM::VLD3q16oddPseudo_UPD:
4601 case ARM::VLD3q32oddPseudo_UPD:
4602 case ARM::VLD4d8Pseudo:
4603 case ARM::VLD4d16Pseudo:
4604 case ARM::VLD4d32Pseudo:
4605 case ARM::VLD1d8QPseudo:
4606 case ARM::VLD1d16QPseudo:
4607 case ARM::VLD1d32QPseudo:
4608 case ARM::VLD1d64QPseudo:
4609 case ARM::VLD1d64QPseudoWB_fixed:
4610 case ARM::VLD1d64QPseudoWB_register:
4611 case ARM::VLD1q8HighQPseudo:
4612 case ARM::VLD1q8LowQPseudo_UPD:
4613 case ARM::VLD1q8HighTPseudo:
4614 case ARM::VLD1q8LowTPseudo_UPD:
4615 case ARM::VLD1q16HighQPseudo:
4616 case ARM::VLD1q16LowQPseudo_UPD:
4617 case ARM::VLD1q16HighTPseudo:
4618 case ARM::VLD1q16LowTPseudo_UPD:
4619 case ARM::VLD1q32HighQPseudo:
4620 case ARM::VLD1q32LowQPseudo_UPD:
4621 case ARM::VLD1q32HighTPseudo:
4622 case ARM::VLD1q32LowTPseudo_UPD:
4623 case ARM::VLD1q64HighQPseudo:
4624 case ARM::VLD1q64LowQPseudo_UPD:
4625 case ARM::VLD1q64HighTPseudo:
4626 case ARM::VLD1q64LowTPseudo_UPD:
4627 case ARM::VLD4d8Pseudo_UPD:
4628 case ARM::VLD4d16Pseudo_UPD:
4629 case ARM::VLD4d32Pseudo_UPD:
4630 case ARM::VLD4q8Pseudo_UPD:
4631 case ARM::VLD4q16Pseudo_UPD:
4632 case ARM::VLD4q32Pseudo_UPD:
4633 case ARM::VLD4q8oddPseudo:
4634 case ARM::VLD4q16oddPseudo:
4635 case ARM::VLD4q32oddPseudo:
4636 case ARM::VLD4q8oddPseudo_UPD:
4637 case ARM::VLD4q16oddPseudo_UPD:
4638 case ARM::VLD4q32oddPseudo_UPD:
4639 case ARM::VLD1DUPq8:
4640 case ARM::VLD1DUPq16:
4641 case ARM::VLD1DUPq32:
4642 case ARM::VLD1DUPq8wb_fixed:
4643 case ARM::VLD1DUPq16wb_fixed:
4644 case ARM::VLD1DUPq32wb_fixed:
4645 case ARM::VLD1DUPq8wb_register:
4646 case ARM::VLD1DUPq16wb_register:
4647 case ARM::VLD1DUPq32wb_register:
4648 case ARM::VLD2DUPd8:
4649 case ARM::VLD2DUPd16:
4650 case ARM::VLD2DUPd32:
4651 case ARM::VLD2DUPd8wb_fixed:
4652 case ARM::VLD2DUPd16wb_fixed:
4653 case ARM::VLD2DUPd32wb_fixed:
4654 case ARM::VLD2DUPd8wb_register:
4655 case ARM::VLD2DUPd16wb_register:
4656 case ARM::VLD2DUPd32wb_register:
4657 case ARM::VLD2DUPq8EvenPseudo:
4658 case ARM::VLD2DUPq8OddPseudo:
4659 case ARM::VLD2DUPq16EvenPseudo:
4660 case ARM::VLD2DUPq16OddPseudo:
4661 case ARM::VLD2DUPq32EvenPseudo:
4662 case ARM::VLD2DUPq32OddPseudo:
4663 case ARM::VLD3DUPq8EvenPseudo:
4664 case ARM::VLD3DUPq8OddPseudo:
4665 case ARM::VLD3DUPq16EvenPseudo:
4666 case ARM::VLD3DUPq16OddPseudo:
4667 case ARM::VLD3DUPq32EvenPseudo:
4668 case ARM::VLD3DUPq32OddPseudo:
4669 case ARM::VLD4DUPd8Pseudo:
4670 case ARM::VLD4DUPd16Pseudo:
4671 case ARM::VLD4DUPd32Pseudo:
4672 case ARM::VLD4DUPd8Pseudo_UPD:
4673 case ARM::VLD4DUPd16Pseudo_UPD:
4674 case ARM::VLD4DUPd32Pseudo_UPD:
4675 case ARM::VLD4DUPq8EvenPseudo:
4676 case ARM::VLD4DUPq8OddPseudo:
4677 case ARM::VLD4DUPq16EvenPseudo:
4678 case ARM::VLD4DUPq16OddPseudo:
4679 case ARM::VLD4DUPq32EvenPseudo:
4680 case ARM::VLD4DUPq32OddPseudo:
4681 case ARM::VLD1LNq8Pseudo:
4682 case ARM::VLD1LNq16Pseudo:
4683 case ARM::VLD1LNq32Pseudo:
4684 case ARM::VLD1LNq8Pseudo_UPD:
4685 case ARM::VLD1LNq16Pseudo_UPD:
4686 case ARM::VLD1LNq32Pseudo_UPD:
4687 case ARM::VLD2LNd8Pseudo:
4688 case ARM::VLD2LNd16Pseudo:
4689 case ARM::VLD2LNd32Pseudo:
4690 case ARM::VLD2LNq16Pseudo:
4691 case ARM::VLD2LNq32Pseudo:
4692 case ARM::VLD2LNd8Pseudo_UPD:
4693 case ARM::VLD2LNd16Pseudo_UPD:
4694 case ARM::VLD2LNd32Pseudo_UPD:
4695 case ARM::VLD2LNq16Pseudo_UPD:
4696 case ARM::VLD2LNq32Pseudo_UPD:
4697 case ARM::VLD4LNd8Pseudo:
4698 case ARM::VLD4LNd16Pseudo:
4699 case ARM::VLD4LNd32Pseudo:
4700 case ARM::VLD4LNq16Pseudo:
4701 case ARM::VLD4LNq32Pseudo:
4702 case ARM::VLD4LNd8Pseudo_UPD:
4703 case ARM::VLD4LNd16Pseudo_UPD:
4704 case ARM::VLD4LNd32Pseudo_UPD:
4705 case ARM::VLD4LNq16Pseudo_UPD:
4706 case ARM::VLD4LNq32Pseudo_UPD:
4707 // If the address is not 64-bit aligned, the latencies of these
4708 // instructions increases by one.
4709 Latency = *Latency + 1;
4710 break;
4711 }
4712
4713 return Latency;
4714}
4715
4716unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4717 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4718 MI.isImplicitDef())
4719 return 0;
4720
4721 if (MI.isBundle())
4722 return 0;
4723
4724 const MCInstrDesc &MCID = MI.getDesc();
4725
4726 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4727 !Subtarget.cheapPredicableCPSRDef())) {
4728 // When predicated, CPSR is an additional source operand for CPSR updating
4729 // instructions, this apparently increases their latencies.
4730 return 1;
4731 }
4732 return 0;
4733}
4734
4735unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4736 const MachineInstr &MI,
4737 unsigned *PredCost) const {
4738 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4739 MI.isImplicitDef())
4740 return 1;
4741
4742 // An instruction scheduler typically runs on unbundled instructions, however
4743 // other passes may query the latency of a bundled instruction.
4744 if (MI.isBundle()) {
4745 unsigned Latency = 0;
4747 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4748 while (++I != E && I->isInsideBundle()) {
4749 if (I->getOpcode() != ARM::t2IT)
4750 Latency += getInstrLatency(ItinData, *I, PredCost);
4751 }
4752 return Latency;
4753 }
4754
4755 const MCInstrDesc &MCID = MI.getDesc();
4756 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4757 !Subtarget.cheapPredicableCPSRDef()))) {
4758 // When predicated, CPSR is an additional source operand for CPSR updating
4759 // instructions, this apparently increases their latencies.
4760 *PredCost = 1;
4761 }
4762 // Be sure to call getStageLatency for an empty itinerary in case it has a
4763 // valid MinLatency property.
4764 if (!ItinData)
4765 return MI.mayLoad() ? 3 : 1;
4766
4767 unsigned Class = MCID.getSchedClass();
4768
4769 // For instructions with variable uops, use uops as latency.
4770 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4771 return getNumMicroOps(ItinData, MI);
4772
4773 // For the common case, fall back on the itinerary's latency.
4774 unsigned Latency = ItinData->getStageLatency(Class);
4775
4776 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4777 unsigned DefAlign =
4778 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4779 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4780 if (Adj >= 0 || (int)Latency > -Adj) {
4781 return Latency + Adj;
4782 }
4783 return Latency;
4784}
4785
4786unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4787 SDNode *Node) const {
4788 if (!Node->isMachineOpcode())
4789 return 1;
4790
4791 if (!ItinData || ItinData->isEmpty())
4792 return 1;
4793
4794 unsigned Opcode = Node->getMachineOpcode();
4795 switch (Opcode) {
4796 default:
4797 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4798 case ARM::VLDMQIA:
4799 case ARM::VSTMQIA:
4800 return 2;
4801 }
4802}
4803
4804bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4805 const MachineRegisterInfo *MRI,
4806 const MachineInstr &DefMI,
4807 unsigned DefIdx,
4808 const MachineInstr &UseMI,
4809 unsigned UseIdx) const {
4810 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4811 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4812 if (Subtarget.nonpipelinedVFP() &&
4813 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4814 return true;
4815
4816 // Hoist VFP / NEON instructions with 4 or higher latency.
4817 unsigned Latency =
4818 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4819 if (Latency <= 3)
4820 return false;
4821 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4822 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4823}
4824
4825bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4826 const MachineInstr &DefMI,
4827 unsigned DefIdx) const {
4828 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4829 if (!ItinData || ItinData->isEmpty())
4830 return false;
4831
4832 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4833 if (DDomain == ARMII::DomainGeneral) {
4834 unsigned DefClass = DefMI.getDesc().getSchedClass();
4835 std::optional<unsigned> DefCycle =
4836 ItinData->getOperandCycle(DefClass, DefIdx);
4837 return DefCycle && DefCycle <= 2U;
4838 }
4839 return false;
4840}
4841
4842bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4843 StringRef &ErrInfo) const {
4844 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4845 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4846 return false;
4847 }
4848 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4849 // Make sure we don't generate a lo-lo mov that isn't supported.
4850 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4851 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4852 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4853 return false;
4854 }
4855 }
4856 if (MI.getOpcode() == ARM::tPUSH ||
4857 MI.getOpcode() == ARM::tPOP ||
4858 MI.getOpcode() == ARM::tPOP_RET) {
4859 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4860 if (MO.isImplicit() || !MO.isReg())
4861 continue;
4862 Register Reg = MO.getReg();
4863 if (Reg < ARM::R0 || Reg > ARM::R7) {
4864 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4865 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4866 ErrInfo = "Unsupported register in Thumb1 push/pop";
4867 return false;
4868 }
4869 }
4870 }
4871 }
4872 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4873 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4874 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4875 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4876 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4877 return false;
4878 }
4879 }
4880
4881 // Check the address model by taking the first Imm operand and checking it is
4882 // legal for that addressing mode.
4884 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4885 switch (AddrMode) {
4886 default:
4887 break;
4895 case ARMII::AddrModeT2_i12: {
4896 uint32_t Imm = 0;
4897 for (auto Op : MI.operands()) {
4898 if (Op.isImm()) {
4899 Imm = Op.getImm();
4900 break;
4901 }
4902 }
4903 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4904 ErrInfo = "Incorrect AddrMode Imm for instruction";
4905 return false;
4906 }
4907 break;
4908 }
4909 }
4910 return true;
4911}
4912
4914 unsigned LoadImmOpc,
4915 unsigned LoadOpc) const {
4916 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4917 "ROPI/RWPI not currently supported with stack guard");
4918
4919 MachineBasicBlock &MBB = *MI->getParent();
4920 DebugLoc DL = MI->getDebugLoc();
4921 Register Reg = MI->getOperand(0).getReg();
4923 unsigned int Offset = 0;
4924
4925 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4926 assert(!Subtarget.isReadTPSoft() &&
4927 "TLS stack protector requires hardware TLS register");
4928
4929 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4930 .addImm(15)
4931 .addImm(0)
4932 .addImm(13)
4933 .addImm(0)
4934 .addImm(3)
4936
4938 Offset = M.getStackProtectorGuardOffset();
4939 if (Offset & ~0xfffU) {
4940 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4941 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4942 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4943 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4944 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4945 .addReg(Reg, RegState::Kill)
4946 .addImm(Offset & ~0xfffU)
4948 .addReg(0);
4949 Offset &= 0xfffU;
4950 }
4951 } else {
4952 const GlobalValue *GV =
4953 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4954 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4955
4956 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4957 if (Subtarget.isTargetMachO()) {
4958 TargetFlags |= ARMII::MO_NONLAZY;
4959 } else if (Subtarget.isTargetCOFF()) {
4960 if (GV->hasDLLImportStorageClass())
4961 TargetFlags |= ARMII::MO_DLLIMPORT;
4962 else if (IsIndirect)
4963 TargetFlags |= ARMII::MO_COFFSTUB;
4964 } else if (IsIndirect) {
4965 TargetFlags |= ARMII::MO_GOT;
4966 }
4967
4968 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4969 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4970 auto APSREncoding =
4971 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4972 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4973 .addImm(APSREncoding)
4975 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4976 .addGlobalAddress(GV, 0, TargetFlags);
4977 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4978 .addImm(APSREncoding)
4979 .addReg(CPSRSaveReg, RegState::Kill)
4981 } else {
4982 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4983 .addGlobalAddress(GV, 0, TargetFlags);
4984 }
4985
4986 if (IsIndirect) {
4987 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4988 MIB.addReg(Reg, RegState::Kill).addImm(0);
4989 auto Flags = MachineMemOperand::MOLoad |
4993 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4995 }
4996 }
4997
4998 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4999 MIB.addReg(Reg, RegState::Kill)
5000 .addImm(Offset)
5001 .cloneMemRefs(*MI)
5003}
5004
5005bool
5006ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
5007 unsigned &AddSubOpc,
5008 bool &NegAcc, bool &HasLane) const {
5010 if (I == MLxEntryMap.end())
5011 return false;
5012
5013 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
5014 MulOpc = Entry.MulOpc;
5015 AddSubOpc = Entry.AddSubOpc;
5016 NegAcc = Entry.NegAcc;
5017 HasLane = Entry.HasLane;
5018 return true;
5019}
5020
5021//===----------------------------------------------------------------------===//
5022// Execution domains.
5023//===----------------------------------------------------------------------===//
5024//
5025// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5026// and some can go down both. The vmov instructions go down the VFP pipeline,
5027// but they can be changed to vorr equivalents that are executed by the NEON
5028// pipeline.
5029//
5030// We use the following execution domain numbering:
5031//
5035 ExeNEON = 2
5037
5038//
5039// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5040//
5041std::pair<uint16_t, uint16_t>
5043 // If we don't have access to NEON instructions then we won't be able
5044 // to swizzle anything to the NEON domain. Check to make sure.
5045 if (Subtarget.hasNEON()) {
5046 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5047 // if they are not predicated.
5048 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5049 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5050
5051 // CortexA9 is particularly picky about mixing the two and wants these
5052 // converted.
5053 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5054 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5055 MI.getOpcode() == ARM::VMOVS))
5056 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5057 }
5058 // No other instructions can be swizzled, so just determine their domain.
5059 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5060
5062 return std::make_pair(ExeNEON, 0);
5063
5064 // Certain instructions can go either way on Cortex-A8.
5065 // Treat them as NEON instructions.
5066 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5067 return std::make_pair(ExeNEON, 0);
5068
5070 return std::make_pair(ExeVFP, 0);
5071
5072 return std::make_pair(ExeGeneric, 0);
5073}
5074
5076 unsigned SReg, unsigned &Lane) {
5077 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5078 Lane = 0;
5079
5080 if (DReg != ARM::NoRegister)
5081 return DReg;
5082
5083 Lane = 1;
5084 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5085
5086 assert(DReg && "S-register with no D super-register?");
5087 return DReg;
5088}
5089
5090/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5091/// set ImplicitSReg to a register number that must be marked as implicit-use or
5092/// zero if no register needs to be defined as implicit-use.
5093///
5094/// If the function cannot determine if an SPR should be marked implicit use or
5095/// not, it returns false.
5096///
5097/// This function handles cases where an instruction is being modified from taking
5098/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5099/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5100/// lane of the DPR).
5101///
5102/// If the other SPR is defined, an implicit-use of it should be added. Else,
5103/// (including the case where the DPR itself is defined), it should not.
5104///
5106 MachineInstr &MI, unsigned DReg,
5107 unsigned Lane, unsigned &ImplicitSReg) {
5108 // If the DPR is defined or used already, the other SPR lane will be chained
5109 // correctly, so there is nothing to be done.
5110 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5111 ImplicitSReg = 0;
5112 return true;
5113 }
5114
5115 // Otherwise we need to go searching to see if the SPR is set explicitly.
5116 ImplicitSReg = TRI->getSubReg(DReg,
5117 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5119 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5120
5121 if (LQR == MachineBasicBlock::LQR_Live)
5122 return true;
5123 else if (LQR == MachineBasicBlock::LQR_Unknown)
5124 return false;
5125
5126 // If the register is known not to be live, there is no need to add an
5127 // implicit-use.
5128 ImplicitSReg = 0;
5129 return true;
5130}
5131
5133 unsigned Domain) const {
5134 unsigned DstReg, SrcReg, DReg;
5135 unsigned Lane;
5136 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5138 switch (MI.getOpcode()) {
5139 default:
5140 llvm_unreachable("cannot handle opcode!");
5141 break;
5142 case ARM::VMOVD:
5143 if (Domain != ExeNEON)
5144 break;
5145
5146 // Zap the predicate operands.
5147 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5148
5149 // Make sure we've got NEON instructions.
5150 assert(Subtarget.hasNEON() && "VORRd requires NEON");
5151
5152 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5153 DstReg = MI.getOperand(0).getReg();
5154 SrcReg = MI.getOperand(1).getReg();
5155
5156 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5157 MI.removeOperand(i - 1);
5158
5159 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5160 MI.setDesc(get(ARM::VORRd));
5161 MIB.addReg(DstReg, RegState::Define)
5162 .addReg(SrcReg)
5163 .addReg(SrcReg)
5165 break;
5166 case ARM::VMOVRS:
5167 if (Domain != ExeNEON)
5168 break;
5169 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5170
5171 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5172 DstReg = MI.getOperand(0).getReg();
5173 SrcReg = MI.getOperand(1).getReg();
5174
5175 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5176 MI.removeOperand(i - 1);
5177
5178 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5179
5180 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5181 // Note that DSrc has been widened and the other lane may be undef, which
5182 // contaminates the entire register.
5183 MI.setDesc(get(ARM::VGETLNi32));
5184 MIB.addReg(DstReg, RegState::Define)
5185 .addReg(DReg, RegState::Undef)
5186 .addImm(Lane)
5188
5189 // The old source should be an implicit use, otherwise we might think it
5190 // was dead before here.
5191 MIB.addReg(SrcReg, RegState::Implicit);
5192 break;
5193 case ARM::VMOVSR: {
5194 if (Domain != ExeNEON)
5195 break;
5196 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5197
5198 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5199 DstReg = MI.getOperand(0).getReg();
5200 SrcReg = MI.getOperand(1).getReg();
5201
5202 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5203
5204 unsigned ImplicitSReg;
5205 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5206 break;
5207
5208 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5209 MI.removeOperand(i - 1);
5210
5211 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5212 // Again DDst may be undefined at the beginning of this instruction.
5213 MI.setDesc(get(ARM::VSETLNi32));
5214 MIB.addReg(DReg, RegState::Define)
5215 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5216 .addReg(SrcReg)
5217 .addImm(Lane)
5219
5220 // The narrower destination must be marked as set to keep previous chains
5221 // in place.
5223 if (ImplicitSReg != 0)
5224 MIB.addReg(ImplicitSReg, RegState::Implicit);
5225 break;
5226 }
5227 case ARM::VMOVS: {
5228 if (Domain != ExeNEON)
5229 break;
5230
5231 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5232 DstReg = MI.getOperand(0).getReg();
5233 SrcReg = MI.getOperand(1).getReg();
5234
5235 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
5236 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5237 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5238
5239 unsigned ImplicitSReg;
5240 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5241 break;
5242
5243 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5244 MI.removeOperand(i - 1);
5245
5246 if (DSrc == DDst) {
5247 // Destination can be:
5248 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5249 MI.setDesc(get(ARM::VDUPLN32d));
5250 MIB.addReg(DDst, RegState::Define)
5251 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5252 .addImm(SrcLane)
5254
5255 // Neither the source or the destination are naturally represented any
5256 // more, so add them in manually.
5258 MIB.addReg(SrcReg, RegState::Implicit);
5259 if (ImplicitSReg != 0)
5260 MIB.addReg(ImplicitSReg, RegState::Implicit);
5261 break;
5262 }
5263
5264 // In general there's no single instruction that can perform an S <-> S
5265 // move in NEON space, but a pair of VEXT instructions *can* do the
5266 // job. It turns out that the VEXTs needed will only use DSrc once, with
5267 // the position based purely on the combination of lane-0 and lane-1
5268 // involved. For example
5269 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5270 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5271 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5272 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5273 //
5274 // Pattern of the MachineInstrs is:
5275 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5276 MachineInstrBuilder NewMIB;
5277 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5278 DDst);
5279
5280 // On the first instruction, both DSrc and DDst may be undef if present.
5281 // Specifically when the original instruction didn't have them as an
5282 // <imp-use>.
5283 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5284 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5285 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5286
5287 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5288 CurUndef = !MI.readsRegister(CurReg, TRI);
5289 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5290 .addImm(1)
5292
5293 if (SrcLane == DstLane)
5294 NewMIB.addReg(SrcReg, RegState::Implicit);
5295
5296 MI.setDesc(get(ARM::VEXTd32));
5297 MIB.addReg(DDst, RegState::Define);
5298
5299 // On the second instruction, DDst has definitely been defined above, so
5300 // it is not undef. DSrc, if present, can be undef as above.
5301 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5302 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5303 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5304
5305 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5306 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5307 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5308 .addImm(1)
5310
5311 if (SrcLane != DstLane)
5312 MIB.addReg(SrcReg, RegState::Implicit);
5313
5314 // As before, the original destination is no longer represented, add it
5315 // implicitly.
5317 if (ImplicitSReg != 0)
5318 MIB.addReg(ImplicitSReg, RegState::Implicit);
5319 break;
5320 }
5321 }
5322}
5323
5324//===----------------------------------------------------------------------===//
5325// Partial register updates
5326//===----------------------------------------------------------------------===//
5327//
5328// Swift renames NEON registers with 64-bit granularity. That means any
5329// instruction writing an S-reg implicitly reads the containing D-reg. The
5330// problem is mostly avoided by translating f32 operations to v2f32 operations
5331// on D-registers, but f32 loads are still a problem.
5332//
5333// These instructions can load an f32 into a NEON register:
5334//
5335// VLDRS - Only writes S, partial D update.
5336// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5337// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5338//
5339// FCONSTD can be used as a dependency-breaking instruction.
5341 const MachineInstr &MI, unsigned OpNum,
5342 const TargetRegisterInfo *TRI) const {
5343 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5344 if (!PartialUpdateClearance)
5345 return 0;
5346
5347 assert(TRI && "Need TRI instance");
5348
5349 const MachineOperand &MO = MI.getOperand(OpNum);
5350 if (MO.readsReg())
5351 return 0;
5352 Register Reg = MO.getReg();
5353 int UseOp = -1;
5354
5355 switch (MI.getOpcode()) {
5356 // Normal instructions writing only an S-register.
5357 case ARM::VLDRS:
5358 case ARM::FCONSTS:
5359 case ARM::VMOVSR:
5360 case ARM::VMOVv8i8:
5361 case ARM::VMOVv4i16:
5362 case ARM::VMOVv2i32:
5363 case ARM::VMOVv2f32:
5364 case ARM::VMOVv1i64:
5365 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5366 break;
5367
5368 // Explicitly reads the dependency.
5369 case ARM::VLD1LNd32:
5370 UseOp = 3;
5371 break;
5372 default:
5373 return 0;
5374 }
5375
5376 // If this instruction actually reads a value from Reg, there is no unwanted
5377 // dependency.
5378 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5379 return 0;
5380
5381 // We must be able to clobber the whole D-reg.
5382 if (Reg.isVirtual()) {
5383 // Virtual register must be a def undef foo:ssub_0 operand.
5384 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5385 return 0;
5386 } else if (ARM::SPRRegClass.contains(Reg)) {
5387 // Physical register: MI must define the full D-reg.
5388 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
5389 &ARM::DPRRegClass);
5390 if (!DReg || !MI.definesRegister(DReg, TRI))
5391 return 0;
5392 }
5393
5394 // MI has an unwanted D-register dependency.
5395 // Avoid defs in the previous N instructrions.
5396 return PartialUpdateClearance;
5397}
5398
5399// Break a partial register dependency after getPartialRegUpdateClearance
5400// returned non-zero.
5402 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5403 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5404 assert(TRI && "Need TRI instance");
5405
5406 const MachineOperand &MO = MI.getOperand(OpNum);
5407 Register Reg = MO.getReg();
5408 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5409 unsigned DReg = Reg;
5410
5411 // If MI defines an S-reg, find the corresponding D super-register.
5412 if (ARM::SPRRegClass.contains(Reg)) {
5413 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5414 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5415 }
5416
5417 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5418 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5419
5420 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5421 // the full D-register by loading the same value to both lanes. The
5422 // instruction is micro-coded with 2 uops, so don't do this until we can
5423 // properly schedule micro-coded instructions. The dispatcher stalls cause
5424 // too big regressions.
5425
5426 // Insert the dependency-breaking FCONSTD before MI.
5427 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5428 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5429 .addImm(96)
5431 MI.addRegisterKilled(DReg, TRI, true);
5432}
5433
5435 return Subtarget.hasFeature(ARM::HasV6KOps);
5436}
5437
5439 if (MI->getNumOperands() < 4)
5440 return true;
5441 unsigned ShOpVal = MI->getOperand(3).getImm();
5442 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5443 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5444 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5445 ((ShImm == 1 || ShImm == 2) &&
5446 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5447 return true;
5448
5449 return false;
5450}
5451
5453 const MachineInstr &MI, unsigned DefIdx,
5454 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5455 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5456 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5457
5458 switch (MI.getOpcode()) {
5459 case ARM::VMOVDRR:
5460 // dX = VMOVDRR rY, rZ
5461 // is the same as:
5462 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5463 // Populate the InputRegs accordingly.
5464 // rY
5465 const MachineOperand *MOReg = &MI.getOperand(1);
5466 if (!MOReg->isUndef())
5467 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5468 MOReg->getSubReg(), ARM::ssub_0));
5469 // rZ
5470 MOReg = &MI.getOperand(2);
5471 if (!MOReg->isUndef())
5472 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5473 MOReg->getSubReg(), ARM::ssub_1));
5474 return true;
5475 }
5476 llvm_unreachable("Target dependent opcode missing");
5477}
5478
5480 const MachineInstr &MI, unsigned DefIdx,
5481 RegSubRegPairAndIdx &InputReg) const {
5482 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5483 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5484
5485 switch (MI.getOpcode()) {
5486 case ARM::VMOVRRD:
5487 // rX, rY = VMOVRRD dZ
5488 // is the same as:
5489 // rX = EXTRACT_SUBREG dZ, ssub_0
5490 // rY = EXTRACT_SUBREG dZ, ssub_1
5491 const MachineOperand &MOReg = MI.getOperand(2);
5492 if (MOReg.isUndef())
5493 return false;
5494 InputReg.Reg = MOReg.getReg();
5495 InputReg.SubReg = MOReg.getSubReg();
5496 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5497 return true;
5498 }
5499 llvm_unreachable("Target dependent opcode missing");
5500}
5501
5503 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5504 RegSubRegPairAndIdx &InsertedReg) const {
5505 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5506 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5507
5508 switch (MI.getOpcode()) {
5509 case ARM::VSETLNi32:
5510 case ARM::MVE_VMOV_to_lane_32:
5511 // dX = VSETLNi32 dY, rZ, imm
5512 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5513 const MachineOperand &MOBaseReg = MI.getOperand(1);
5514 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5515 if (MOInsertedReg.isUndef())
5516 return false;
5517 const MachineOperand &MOIndex = MI.getOperand(3);
5518 BaseReg.Reg = MOBaseReg.getReg();
5519 BaseReg.SubReg = MOBaseReg.getSubReg();
5520
5521 InsertedReg.Reg = MOInsertedReg.getReg();
5522 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5523 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5524 return true;
5525 }
5526 llvm_unreachable("Target dependent opcode missing");
5527}
5528
5529std::pair<unsigned, unsigned>
5531 const unsigned Mask = ARMII::MO_OPTION_MASK;
5532 return std::make_pair(TF & Mask, TF & ~Mask);
5533}
5534
5537 using namespace ARMII;
5538
5539 static const std::pair<unsigned, const char *> TargetFlags[] = {
5540 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5541 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5542 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5543 };
5544 return ArrayRef(TargetFlags);
5545}
5546
5549 using namespace ARMII;
5550
5551 static const std::pair<unsigned, const char *> TargetFlags[] = {
5552 {MO_COFFSTUB, "arm-coffstub"},
5553 {MO_GOT, "arm-got"},
5554 {MO_SBREL, "arm-sbrel"},
5555 {MO_DLLIMPORT, "arm-dllimport"},
5556 {MO_SECREL, "arm-secrel"},
5557 {MO_NONLAZY, "arm-nonlazy"}};
5558 return ArrayRef(TargetFlags);
5559}
5560
5561std::optional<RegImmPair>
5563 int Sign = 1;
5564 unsigned Opcode = MI.getOpcode();
5565 int64_t Offset = 0;
5566
5567 // TODO: Handle cases where Reg is a super- or sub-register of the
5568 // destination register.
5569 const MachineOperand &Op0 = MI.getOperand(0);
5570 if (!Op0.isReg() || Reg != Op0.getReg())
5571 return std::nullopt;
5572
5573 // We describe SUBri or ADDri instructions.
5574 if (Opcode == ARM::SUBri)
5575 Sign = -1;
5576 else if (Opcode != ARM::ADDri)
5577 return std::nullopt;
5578
5579 // TODO: Third operand can be global address (usually some string). Since
5580 // strings can be relocated we cannot calculate their offsets for
5581 // now.
5582 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5583 return std::nullopt;
5584
5585 Offset = MI.getOperand(2).getImm() * Sign;
5586 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5587}
5588
5592 const TargetRegisterInfo *TRI) {
5593 for (auto I = From; I != To; ++I)
5594 if (I->modifiesRegister(Reg, TRI))
5595 return true;
5596 return false;
5597}
5598
5600 const TargetRegisterInfo *TRI) {
5601 // Search backwards to the instruction that defines CSPR. This may or not
5602 // be a CMP, we check that after this loop. If we find another instruction
5603 // that reads cpsr, we return nullptr.
5604 MachineBasicBlock::iterator CmpMI = Br;
5605 while (CmpMI != Br->getParent()->begin()) {
5606 --CmpMI;
5607 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5608 break;
5609 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5610 break;
5611 }
5612
5613 // Check that this inst is a CMP r[0-7], #0 and that the register
5614 // is not redefined between the cmp and the br.
5615 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5616 return nullptr;
5617 Register Reg = CmpMI->getOperand(0).getReg();
5618 Register PredReg;
5619 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5620 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5621 return nullptr;
5622 if (!isARMLowRegister(Reg))
5623 return nullptr;
5624 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5625 return nullptr;
5626
5627 return &*CmpMI;
5628}
5629
5631 const ARMSubtarget *Subtarget,
5632 bool ForCodesize) {
5633 if (Subtarget->isThumb()) {
5634 if (Val <= 255) // MOV
5635 return ForCodesize ? 2 : 1;
5636 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5637 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5638 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5639 return ForCodesize ? 4 : 1;
5640 if (Val <= 510) // MOV + ADDi8
5641 return ForCodesize ? 4 : 2;
5642 if (~Val <= 255) // MOV + MVN
5643 return ForCodesize ? 4 : 2;
5644 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5645 return ForCodesize ? 4 : 2;
5646 } else {
5647 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5648 return ForCodesize ? 4 : 1;
5649 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5650 return ForCodesize ? 4 : 1;
5651 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5652 return ForCodesize ? 4 : 1;
5653 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5654 return ForCodesize ? 8 : 2;
5655 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5656 return ForCodesize ? 8 : 2;
5657 }
5658 if (Subtarget->useMovt()) // MOVW + MOVT
5659 return ForCodesize ? 8 : 2;
5660 return ForCodesize ? 8 : 3; // Literal pool load
5661}
5662
5663bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5664 const ARMSubtarget *Subtarget,
5665 bool ForCodesize) {
5666 // Check with ForCodesize
5667 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5668 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5669 if (Cost1 < Cost2)
5670 return true;
5671 if (Cost1 > Cost2)
5672 return false;
5673
5674 // If they are equal, try with !ForCodesize
5675 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5676 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5677}
5678
5679/// Constants defining how certain sequences should be outlined.
5680/// This encompasses how an outlined function should be called, and what kind of
5681/// frame should be emitted for that outlined function.
5682///
5683/// \p MachineOutlinerTailCall implies that the function is being created from
5684/// a sequence of instructions ending in a return.
5685///
5686/// That is,
5687///
5688/// I1 OUTLINED_FUNCTION:
5689/// I2 --> B OUTLINED_FUNCTION I1
5690/// BX LR I2
5691/// BX LR
5692///
5693/// +-------------------------+--------+-----+
5694/// | | Thumb2 | ARM |
5695/// +-------------------------+--------+-----+
5696/// | Call overhead in Bytes | 4 | 4 |
5697/// | Frame overhead in Bytes | 0 | 0 |
5698/// | Stack fixup required | No | No |
5699/// +-------------------------+--------+-----+
5700///
5701/// \p MachineOutlinerThunk implies that the function is being created from
5702/// a sequence of instructions ending in a call. The outlined function is
5703/// called with a BL instruction, and the outlined function tail-calls the
5704/// original call destination.
5705///
5706/// That is,
5707///
5708/// I1 OUTLINED_FUNCTION:
5709/// I2 --> BL OUTLINED_FUNCTION I1
5710/// BL f I2
5711/// B f
5712///
5713/// +-------------------------+--------+-----+
5714/// | | Thumb2 | ARM |
5715/// +-------------------------+--------+-----+
5716/// | Call overhead in Bytes | 4 | 4 |
5717/// | Frame overhead in Bytes | 0 | 0 |
5718/// | Stack fixup required | No | No |
5719/// +-------------------------+--------+-----+
5720///
5721/// \p MachineOutlinerNoLRSave implies that the function should be called using
5722/// a BL instruction, but doesn't require LR to be saved and restored. This
5723/// happens when LR is known to be dead.
5724///
5725/// That is,
5726///
5727/// I1 OUTLINED_FUNCTION:
5728/// I2 --> BL OUTLINED_FUNCTION I1
5729/// I3 I2
5730/// I3
5731/// BX LR
5732///
5733/// +-------------------------+--------+-----+
5734/// | | Thumb2 | ARM |
5735/// +-------------------------+--------+-----+
5736/// | Call overhead in Bytes | 4 | 4 |
5737/// | Frame overhead in Bytes | 2 | 4 |
5738/// | Stack fixup required | No | No |
5739/// +-------------------------+--------+-----+
5740///
5741/// \p MachineOutlinerRegSave implies that the function should be called with a
5742/// save and restore of LR to an available register. This allows us to avoid
5743/// stack fixups. Note that this outlining variant is compatible with the
5744/// NoLRSave case.
5745///
5746/// That is,
5747///
5748/// I1 Save LR OUTLINED_FUNCTION:
5749/// I2 --> BL OUTLINED_FUNCTION I1
5750/// I3 Restore LR I2
5751/// I3
5752/// BX LR
5753///
5754/// +-------------------------+--------+-----+
5755/// | | Thumb2 | ARM |
5756/// +-------------------------+--------+-----+
5757/// | Call overhead in Bytes | 8 | 12 |
5758/// | Frame overhead in Bytes | 2 | 4 |
5759/// | Stack fixup required | No | No |
5760/// +-------------------------+--------+-----+
5761///
5762/// \p MachineOutlinerDefault implies that the function should be called with
5763/// a save and restore of LR to the stack.
5764///
5765/// That is,
5766///
5767/// I1 Save LR OUTLINED_FUNCTION:
5768/// I2 --> BL OUTLINED_FUNCTION I1
5769/// I3 Restore LR I2
5770/// I3
5771/// BX LR
5772///
5773/// +-------------------------+--------+-----+
5774/// | | Thumb2 | ARM |
5775/// +-------------------------+--------+-----+
5776/// | Call overhead in Bytes | 8 | 12 |
5777/// | Frame overhead in Bytes | 2 | 4 |
5778/// | Stack fixup required | Yes | Yes |
5779/// +-------------------------+--------+-----+
5780
5788
5792 UnsafeRegsDead = 0x8
5794
5807
5809 : CallTailCall(target.isThumb() ? 4 : 4),
5810 FrameTailCall(target.isThumb() ? 0 : 0),
5811 CallThunk(target.isThumb() ? 4 : 4),
5812 FrameThunk(target.isThumb() ? 0 : 0),
5813 CallNoLRSave(target.isThumb() ? 4 : 4),
5814 FrameNoLRSave(target.isThumb() ? 2 : 4),
5815 CallRegSave(target.isThumb() ? 8 : 12),
5816 FrameRegSave(target.isThumb() ? 2 : 4),
5817 CallDefault(target.isThumb() ? 8 : 12),
5818 FrameDefault(target.isThumb() ? 2 : 4),
5819 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5820};
5821
5823ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5824 MachineFunction *MF = C.getMF();
5826 const ARMBaseRegisterInfo *ARI =
5827 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5828
5829 BitVector regsReserved = ARI->getReservedRegs(*MF);
5830 // Check if there is an available register across the sequence that we can
5831 // use.
5832 for (Register Reg : ARM::rGPRRegClass) {
5833 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5834 Reg != ARM::LR && // LR is not reserved, but don't use it.
5835 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5836 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5837 C.isAvailableInsideSeq(Reg, TRI))
5838 return Reg;
5839 }
5840 return Register();
5841}
5842
5843// Compute liveness of LR at the point after the interval [I, E), which
5844// denotes a *backward* iteration through instructions. Used only for return
5845// basic blocks, which do not end with a tail call.
5849 // At the end of the function LR dead.
5850 bool Live = false;
5851 for (; I != E; ++I) {
5852 const MachineInstr &MI = *I;
5853
5854 // Check defs of LR.
5855 if (MI.modifiesRegister(ARM::LR, &TRI))
5856 Live = false;
5857
5858 // Check uses of LR.
5859 unsigned Opcode = MI.getOpcode();
5860 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5861 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5862 Opcode == ARM::tBXNS_RET) {
5863 // These instructions use LR, but it's not an (explicit or implicit)
5864 // operand.
5865 Live = true;
5866 continue;
5867 }
5868 if (MI.readsRegister(ARM::LR, &TRI))
5869 Live = true;
5870 }
5871 return !Live;
5872}
5873
5874std::optional<outliner::OutlinedFunction>
5876 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5877 unsigned SequenceSize = 0;
5878 for (auto &MI : RepeatedSequenceLocs[0])
5879 SequenceSize += getInstSizeInBytes(MI);
5880
5881 // Properties about candidate MBBs that hold for all of them.
5882 unsigned FlagsSetInAll = 0xF;
5883
5884 // Compute liveness information for each candidate, and set FlagsSetInAll.
5886 for (outliner::Candidate &C : RepeatedSequenceLocs)
5887 FlagsSetInAll &= C.Flags;
5888
5889 // According to the ARM Procedure Call Standard, the following are
5890 // undefined on entry/exit from a function call:
5891 //
5892 // * Register R12(IP),
5893 // * Condition codes (and thus the CPSR register)
5894 //
5895 // Since we control the instructions which are part of the outlined regions
5896 // we don't need to be fully compliant with the AAPCS, but we have to
5897 // guarantee that if a veneer is inserted at link time the code is still
5898 // correct. Because of this, we can't outline any sequence of instructions
5899 // where one of these registers is live into/across it. Thus, we need to
5900 // delete those candidates.
5901 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5902 // If the unsafe registers in this block are all dead, then we don't need
5903 // to compute liveness here.
5904 if (C.Flags & UnsafeRegsDead)
5905 return false;
5906 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5907 };
5908
5909 // Are there any candidates where those registers are live?
5910 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5911 // Erase every candidate that violates the restrictions above. (It could be
5912 // true that we have viable candidates, so it's not worth bailing out in
5913 // the case that, say, 1 out of 20 candidates violate the restructions.)
5914 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5915
5916 // If the sequence doesn't have enough candidates left, then we're done.
5917 if (RepeatedSequenceLocs.size() < 2)
5918 return std::nullopt;
5919 }
5920
5921 // We expect the majority of the outlining candidates to be in consensus with
5922 // regard to return address sign and authentication, and branch target
5923 // enforcement, in other words, partitioning according to all the four
5924 // possible combinations of PAC-RET and BTI is going to yield one big subset
5925 // and three small (likely empty) subsets. That allows us to cull incompatible
5926 // candidates separately for PAC-RET and BTI.
5927
5928 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5929 // disabled. Remove the candidates from the smaller set. If they are the same
5930 // number prefer the non-BTI ones for outlining, since they have less
5931 // overhead.
5932 auto NoBTI =
5933 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5934 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5935 return AFI.branchTargetEnforcement();
5936 });
5937 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5938 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5939 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5940 else
5941 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5942
5943 if (RepeatedSequenceLocs.size() < 2)
5944 return std::nullopt;
5945
5946 // Likewise, partition the candidates according to PAC-RET enablement.
5947 auto NoPAC =
5948 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5949 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5950 // If the function happens to not spill the LR, do not disqualify it
5951 // from the outlining.
5952 return AFI.shouldSignReturnAddress(true);
5953 });
5954 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5955 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5956 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5957 else
5958 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5959
5960 if (RepeatedSequenceLocs.size() < 2)
5961 return std::nullopt;
5962
5963 // At this point, we have only "safe" candidates to outline. Figure out
5964 // frame + call instruction information.
5965
5966 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5967
5968 // Helper lambda which sets call information for every candidate.
5969 auto SetCandidateCallInfo =
5970 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5971 for (outliner::Candidate &C : RepeatedSequenceLocs)
5972 C.setCallInfo(CallID, NumBytesForCall);
5973 };
5974
5975 OutlinerCosts Costs(Subtarget);
5976
5977 const auto &SomeMFI =
5978 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5979 // Adjust costs to account for the BTI instructions.
5980 if (SomeMFI.branchTargetEnforcement()) {
5981 Costs.FrameDefault += 4;
5982 Costs.FrameNoLRSave += 4;
5983 Costs.FrameRegSave += 4;
5984 Costs.FrameTailCall += 4;
5985 Costs.FrameThunk += 4;
5986 }
5987
5988 // Adjust costs to account for sign and authentication instructions.
5989 if (SomeMFI.shouldSignReturnAddress(true)) {
5990 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5991 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5992 }
5993
5994 unsigned FrameID = MachineOutlinerDefault;
5995 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5996
5997 // If the last instruction in any candidate is a terminator, then we should
5998 // tail call all of the candidates.
5999 if (RepeatedSequenceLocs[0].back().isTerminator()) {
6000 FrameID = MachineOutlinerTailCall;
6001 NumBytesToCreateFrame = Costs.FrameTailCall;
6002 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
6003 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
6004 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
6005 LastInstrOpcode == ARM::tBLXr ||
6006 LastInstrOpcode == ARM::tBLXr_noip ||
6007 LastInstrOpcode == ARM::tBLXi) {
6008 FrameID = MachineOutlinerThunk;
6009 NumBytesToCreateFrame = Costs.FrameThunk;
6010 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
6011 } else {
6012 // We need to decide how to emit calls + frames. We can always emit the same
6013 // frame if we don't need to save to the stack. If we have to save to the
6014 // stack, then we need a different frame.
6015 unsigned NumBytesNoStackCalls = 0;
6016 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6017
6018 for (outliner::Candidate &C : RepeatedSequenceLocs) {
6019 // LR liveness is overestimated in return blocks, unless they end with a
6020 // tail call.
6021 const auto Last = C.getMBB()->rbegin();
6022 const bool LRIsAvailable =
6023 C.getMBB()->isReturnBlock() && !Last->isCall()
6026 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
6027 if (LRIsAvailable) {
6028 FrameID = MachineOutlinerNoLRSave;
6029 NumBytesNoStackCalls += Costs.CallNoLRSave;
6030 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
6031 CandidatesWithoutStackFixups.push_back(C);
6032 }
6033
6034 // Is an unused register available? If so, we won't modify the stack, so
6035 // we can outline with the same frame type as those that don't save LR.
6036 else if (findRegisterToSaveLRTo(C)) {
6037 FrameID = MachineOutlinerRegSave;
6038 NumBytesNoStackCalls += Costs.CallRegSave;
6039 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
6040 CandidatesWithoutStackFixups.push_back(C);
6041 }
6042
6043 // Is SP used in the sequence at all? If not, we don't have to modify
6044 // the stack, so we are guaranteed to get the same frame.
6045 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
6046 NumBytesNoStackCalls += Costs.CallDefault;
6047 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6048 CandidatesWithoutStackFixups.push_back(C);
6049 }
6050
6051 // If we outline this, we need to modify the stack. Pretend we don't
6052 // outline this by saving all of its bytes.
6053 else
6054 NumBytesNoStackCalls += SequenceSize;
6055 }
6056
6057 // If there are no places where we have to save LR, then note that we don't
6058 // have to update the stack. Otherwise, give every candidate the default
6059 // call type
6060 if (NumBytesNoStackCalls <=
6061 RepeatedSequenceLocs.size() * Costs.CallDefault) {
6062 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6063 FrameID = MachineOutlinerNoLRSave;
6064 if (RepeatedSequenceLocs.size() < 2)
6065 return std::nullopt;
6066 } else
6067 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6068 }
6069
6070 // Does every candidate's MBB contain a call? If so, then we might have a
6071 // call in the range.
6072 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6073 // check if the range contains a call. These require a save + restore of
6074 // the link register.
6075 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
6076 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
6077 [](const MachineInstr &MI) { return MI.isCall(); }))
6078 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6079
6080 // Handle the last instruction separately. If it is tail call, then the
6081 // last instruction is a call, we don't want to save + restore in this
6082 // case. However, it could be possible that the last instruction is a
6083 // call without it being valid to tail call this sequence. We should
6084 // consider this as well.
6085 else if (FrameID != MachineOutlinerThunk &&
6086 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
6087 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6088 }
6089
6090 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
6091 NumBytesToCreateFrame, FrameID);
6092}
6093
6094bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
6095 int64_t Fixup,
6096 bool Updt) const {
6097 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
6098 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
6099 if (SPIdx < 0)
6100 // No SP operand
6101 return true;
6102 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
6103 // If SP is not the base register we can't do much
6104 return false;
6105
6106 // Stack might be involved but addressing mode doesn't handle any offset.
6107 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6108 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
6109 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
6110 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
6111 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
6112 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
6113 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
6114 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
6115 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
6116 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
6118 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
6119 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
6120 return false;
6121
6122 unsigned NumOps = MI->getDesc().getNumOperands();
6123 unsigned ImmIdx = NumOps - 3;
6124
6125 const MachineOperand &Offset = MI->getOperand(ImmIdx);
6126 assert(Offset.isImm() && "Is not an immediate");
6127 int64_t OffVal = Offset.getImm();
6128
6129 if (OffVal < 0)
6130 // Don't override data if the are below SP.
6131 return false;
6132
6133 unsigned NumBits = 0;
6134 unsigned Scale = 1;
6135
6136 switch (AddrMode) {
6137 case ARMII::AddrMode3:
6138 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
6139 return false;
6140 OffVal = ARM_AM::getAM3Offset(OffVal);
6141 NumBits = 8;
6142 break;
6143 case ARMII::AddrMode5:
6144 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
6145 return false;
6146 OffVal = ARM_AM::getAM5Offset(OffVal);
6147 NumBits = 8;
6148 Scale = 4;
6149 break;
6151 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6152 return false;
6153 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6154 NumBits = 8;
6155 Scale = 2;
6156 break;
6158 NumBits = 8;
6159 break;
6161 // FIXME: Values are already scaled in this addressing mode.
6162 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6163 NumBits = 10;
6164 break;
6166 NumBits = 8;
6167 Scale = 4;
6168 break;
6171 NumBits = 12;
6172 break;
6173 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6174 NumBits = 8;
6175 Scale = 4;
6176 break;
6177 default:
6178 llvm_unreachable("Unsupported addressing mode!");
6179 }
6180 // Make sure the offset is encodable for instructions that scale the
6181 // immediate.
6182 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6183 "Can't encode this offset!");
6184 OffVal += Fixup / Scale;
6185
6186 unsigned Mask = (1 << NumBits) - 1;
6187
6188 if (OffVal <= Mask) {
6189 if (Updt)
6190 MI->getOperand(ImmIdx).setImm(OffVal);
6191 return true;
6192 }
6193
6194 return false;
6195}
6196
6198 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6199 outliner::Candidate &C = Candidates.front();
6200 // branch-target-enforcement is guaranteed to be consistent between all
6201 // candidates, so we only need to look at one.
6202 const Function &CFn = C.getMF()->getFunction();
6203 if (CFn.hasFnAttribute("branch-target-enforcement"))
6204 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6205
6206 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6207}
6208
6210 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6211 const Function &F = MF.getFunction();
6212
6213 // Can F be deduplicated by the linker? If it can, don't outline from it.
6214 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6215 return false;
6216
6217 // Don't outline from functions with section markings; the program could
6218 // expect that all the code is in the named section.
6219 // FIXME: Allow outlining from multiple functions with the same section
6220 // marking.
6221 if (F.hasSection())
6222 return false;
6223
6224 // FIXME: Thumb1 outlining is not handled
6226 return false;
6227
6228 // It's safe to outline from MF.
6229 return true;
6230}
6231
6233 unsigned &Flags) const {
6234 // Check if LR is available through all of the MBB. If it's not, then set
6235 // a flag.
6237 "Suitable Machine Function for outlining must track liveness");
6238
6240
6242 LRU.accumulate(MI);
6243
6244 // Check if each of the unsafe registers are available...
6245 bool R12AvailableInBlock = LRU.available(ARM::R12);
6246 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6247
6248 // If all of these are dead (and not live out), we know we don't have to check
6249 // them later.
6250 if (R12AvailableInBlock && CPSRAvailableInBlock)
6251 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6252
6253 // Now, add the live outs to the set.
6254 LRU.addLiveOuts(MBB);
6255
6256 // If any of these registers is available in the MBB, but also a live out of
6257 // the block, then we know outlining is unsafe.
6258 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6259 return false;
6260 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6261 return false;
6262
6263 // Check if there's a call inside this MachineBasicBlock. If there is, then
6264 // set a flag.
6265 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6266 Flags |= MachineOutlinerMBBFlags::HasCalls;
6267
6268 // LR liveness is overestimated in return blocks.
6269
6270 bool LRIsAvailable =
6271 MBB.isReturnBlock() && !MBB.back().isCall()
6273 : LRU.available(ARM::LR);
6274 if (!LRIsAvailable)
6275 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6276
6277 return true;
6278}
6279
6282 unsigned Flags) const {
6283 MachineInstr &MI = *MIT;
6285
6286 // PIC instructions contain labels, outlining them would break offset
6287 // computing. unsigned Opc = MI.getOpcode();
6288 unsigned Opc = MI.getOpcode();
6289 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6290 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6291 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6292 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6293 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6294 Opc == ARM::t2MOV_ga_pcrel)
6296
6297 // Be conservative with ARMv8.1 MVE instructions.
6298 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6299 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6300 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6301 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6302 Opc == ARM::t2LoopEndDec)
6304
6305 const MCInstrDesc &MCID = MI.getDesc();
6306 uint64_t MIFlags = MCID.TSFlags;
6307 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6309
6310 // Is this a terminator for a basic block?
6311 if (MI.isTerminator())
6312 // TargetInstrInfo::getOutliningType has already filtered out anything
6313 // that would break this, so we can allow it here.
6315
6316 // Don't outline if link register or program counter value are used.
6317 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6319
6320 if (MI.isCall()) {
6321 // Get the function associated with the call. Look at each operand and find
6322 // the one that represents the calle and get its name.
6323 const Function *Callee = nullptr;
6324 for (const MachineOperand &MOP : MI.operands()) {
6325 if (MOP.isGlobal()) {
6326 Callee = dyn_cast<Function>(MOP.getGlobal());
6327 break;
6328 }
6329 }
6330
6331 // Dont't outline calls to "mcount" like functions, in particular Linux
6332 // kernel function tracing relies on it.
6333 if (Callee &&
6334 (Callee->getName() == "\01__gnu_mcount_nc" ||
6335 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6337
6338 // If we don't know anything about the callee, assume it depends on the
6339 // stack layout of the caller. In that case, it's only legal to outline
6340 // as a tail-call. Explicitly list the call instructions we know about so
6341 // we don't get unexpected results with call pseudo-instructions.
6342 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6343 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6344 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6345 Opc == ARM::tBLXi)
6346 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6347
6348 if (!Callee)
6349 return UnknownCallOutlineType;
6350
6351 // We have a function we have information about. Check if it's something we
6352 // can safely outline.
6353 MachineFunction *MF = MI.getParent()->getParent();
6354 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6355
6356 // We don't know what's going on with the callee at all. Don't touch it.
6357 if (!CalleeMF)
6358 return UnknownCallOutlineType;
6359
6360 // Check if we know anything about the callee saves on the function. If we
6361 // don't, then don't touch it, since that implies that we haven't computed
6362 // anything about its stack frame yet.
6363 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6364 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6365 MFI.getNumObjects() > 0)
6366 return UnknownCallOutlineType;
6367
6368 // At this point, we can say that CalleeMF ought to not pass anything on the
6369 // stack. Therefore, we can outline it.
6371 }
6372
6373 // Since calls are handled, don't touch LR or PC
6374 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6376
6377 // Does this use the stack?
6378 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6379 // True if there is no chance that any outlined candidate from this range
6380 // could require stack fixups. That is, both
6381 // * LR is available in the range (No save/restore around call)
6382 // * The range doesn't include calls (No save/restore in outlined frame)
6383 // are true.
6384 // These conditions also ensure correctness of the return address
6385 // authentication - we insert sign and authentication instructions only if
6386 // we save/restore LR on stack, but then this condition ensures that the
6387 // outlined range does not modify the SP, therefore the SP value used for
6388 // signing is the same as the one used for authentication.
6389 // FIXME: This is very restrictive; the flags check the whole block,
6390 // not just the bit we will try to outline.
6391 bool MightNeedStackFixUp =
6392 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6393 MachineOutlinerMBBFlags::HasCalls));
6394
6395 if (!MightNeedStackFixUp)
6397
6398 // Any modification of SP will break our code to save/restore LR.
6399 // FIXME: We could handle some instructions which add a constant offset to
6400 // SP, with a bit more work.
6401 if (MI.modifiesRegister(ARM::SP, TRI))
6403
6404 // At this point, we have a stack instruction that we might need to fix up.
6405 // up. We'll handle it if it's a load or store.
6406 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6407 false))
6409
6410 // We can't fix it up, so don't outline it.
6412 }
6413
6414 // Be conservative with IT blocks.
6415 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6416 MI.modifiesRegister(ARM::ITSTATE, TRI))
6418
6419 // Don't outline CFI instructions.
6420 if (MI.isCFIInstruction())
6422
6424}
6425
6426void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6427 for (MachineInstr &MI : MBB) {
6428 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6429 }
6430}
6431
6432void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6433 MachineBasicBlock::iterator It, bool CFI,
6434 bool Auth) const {
6435 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6436 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6437 assert(Align >= 8 && Align <= 256);
6438 if (Auth) {
6439 assert(Subtarget.isThumb2());
6440 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6441 // sequence.
6442 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6443 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6444 .addReg(ARM::R12, RegState::Kill)
6445 .addReg(ARM::LR, RegState::Kill)
6446 .addReg(ARM::SP)
6447 .addImm(-Align)
6449 .setMIFlags(MIFlags);
6450 } else {
6451 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6452 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6453 .addReg(ARM::LR, RegState::Kill)
6454 .addReg(ARM::SP)
6455 .addImm(-Align)
6457 .setMIFlags(MIFlags);
6458 }
6459
6460 if (!CFI)
6461 return;
6462
6463 MachineFunction &MF = *MBB.getParent();
6464
6465 // Add a CFI, saying CFA is offset by Align bytes from SP.
6466 int64_t StackPosEntry =
6468 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6469 .addCFIIndex(StackPosEntry)
6471
6472 // Add a CFI saying that the LR that we want to find is now higher than
6473 // before.
6474 int LROffset = Auth ? Align - 4 : Align;
6475 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6476 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6477 int64_t LRPosEntry = MF.addFrameInst(
6478 MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
6479 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6480 .addCFIIndex(LRPosEntry)
6482 if (Auth) {
6483 // Add a CFI for the location of the return adddress PAC.
6484 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6485 int64_t RACPosEntry = MF.addFrameInst(
6486 MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
6487 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6488 .addCFIIndex(RACPosEntry)
6490 }
6491}
6492
6493void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
6495 Register Reg) const {
6496 MachineFunction &MF = *MBB.getParent();
6497 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6498 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6499 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
6500
6501 int64_t LRPosEntry = MF.addFrameInst(
6502 MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
6503 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6504 .addCFIIndex(LRPosEntry)
6506}
6507
6508void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6510 bool CFI, bool Auth) const {
6511 int Align = Subtarget.getStackAlignment().value();
6512 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6513 if (Auth) {
6514 assert(Subtarget.isThumb2());
6515 // Restore return address PAC and LR.
6516 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6517 .addReg(ARM::R12, RegState::Define)
6518 .addReg(ARM::LR, RegState::Define)
6519 .addReg(ARM::SP, RegState::Define)
6520 .addReg(ARM::SP)
6521 .addImm(Align)
6523 .setMIFlags(MIFlags);
6524 // LR authentication is after the CFI instructions, below.
6525 } else {
6526 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6527 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6528 .addReg(ARM::SP, RegState::Define)
6529 .addReg(ARM::SP);
6530 if (!Subtarget.isThumb())
6531 MIB.addReg(0);
6532 MIB.addImm(Subtarget.getStackAlignment().value())
6534 .setMIFlags(MIFlags);
6535 }
6536
6537 if (CFI) {
6538 // Now stack has moved back up...
6539 MachineFunction &MF = *MBB.getParent();
6540 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6541 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6542 int64_t StackPosEntry =
6544 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6545 .addCFIIndex(StackPosEntry)
6547
6548 // ... and we have restored LR.
6549 int64_t LRPosEntry =
6550 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6551 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6552 .addCFIIndex(LRPosEntry)
6554
6555 if (Auth) {
6556 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6557 int64_t Entry =
6558 MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));
6559 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6560 .addCFIIndex(Entry)
6562 }
6563 }
6564
6565 if (Auth)
6566 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6567}
6568
6569void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
6571 MachineFunction &MF = *MBB.getParent();
6572 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6573 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6574
6575 int64_t LRPosEntry =
6576 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6577 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6578 .addCFIIndex(LRPosEntry)
6580}
6581
6584 const outliner::OutlinedFunction &OF) const {
6585 // For thunk outlining, rewrite the last instruction from a call to a
6586 // tail-call.
6588 MachineInstr *Call = &*--MBB.instr_end();
6589 bool isThumb = Subtarget.isThumb();
6590 unsigned FuncOp = isThumb ? 2 : 0;
6591 unsigned Opc = Call->getOperand(FuncOp).isReg()
6592 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6593 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6594 : ARM::tTAILJMPdND
6595 : ARM::TAILJMPd;
6596 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6597 .add(Call->getOperand(FuncOp));
6598 if (isThumb && !Call->getOperand(FuncOp).isReg())
6599 MIB.add(predOps(ARMCC::AL));
6600 Call->eraseFromParent();
6601 }
6602
6603 // Is there a call in the outlined range?
6604 auto IsNonTailCall = [](MachineInstr &MI) {
6605 return MI.isCall() && !MI.isReturn();
6606 };
6607 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6610
6613 Et = std::prev(MBB.end());
6614
6615 // We have to save and restore LR, we need to add it to the liveins if it
6616 // is not already part of the set. This is suffient since outlined
6617 // functions only have one block.
6618 if (!MBB.isLiveIn(ARM::LR))
6619 MBB.addLiveIn(ARM::LR);
6620
6621 // Insert a save before the outlined region
6622 bool Auth = OF.Candidates.front()
6623 .getMF()
6624 ->getInfo<ARMFunctionInfo>()
6625 ->shouldSignReturnAddress(true);
6626 saveLROnStack(MBB, It, true, Auth);
6627
6628 // Fix up the instructions in the range, since we're going to modify the
6629 // stack.
6631 "Can only fix up stack references once");
6632 fixupPostOutline(MBB);
6633
6634 // Insert a restore before the terminator for the function. Restore LR.
6635 restoreLRFromStack(MBB, Et, true, Auth);
6636 }
6637
6638 // If this is a tail call outlined function, then there's already a return.
6641 return;
6642
6643 // Here we have to insert the return ourselves. Get the correct opcode from
6644 // current feature set.
6645 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6647
6648 // Did we have to modify the stack by saving the link register?
6650 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6651 return;
6652
6653 // We modified the stack.
6654 // Walk over the basic block and fix up all the stack accesses.
6655 fixupPostOutline(MBB);
6656}
6657
6663 unsigned Opc;
6664 bool isThumb = Subtarget.isThumb();
6665
6666 // Are we tail calling?
6667 if (C.CallConstructionID == MachineOutlinerTailCall) {
6668 // If yes, then we can just branch to the label.
6669 Opc = isThumb
6670 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6671 : ARM::TAILJMPd;
6672 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6673 .addGlobalAddress(M.getNamedValue(MF.getName()));
6674 if (isThumb)
6675 MIB.add(predOps(ARMCC::AL));
6676 It = MBB.insert(It, MIB);
6677 return It;
6678 }
6679
6680 // Create the call instruction.
6681 Opc = isThumb ? ARM::tBL : ARM::BL;
6682 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6683 if (isThumb)
6684 CallMIB.add(predOps(ARMCC::AL));
6685 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6686
6687 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6688 C.CallConstructionID == MachineOutlinerThunk) {
6689 // No, so just insert the call.
6690 It = MBB.insert(It, CallMIB);
6691 return It;
6692 }
6693
6694 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6695 // Can we save to a register?
6696 if (C.CallConstructionID == MachineOutlinerRegSave) {
6697 Register Reg = findRegisterToSaveLRTo(C);
6698 assert(Reg != 0 && "No callee-saved register available?");
6699
6700 // Save and restore LR from that register.
6701 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6702 if (!AFI.isLRSpilled())
6703 emitCFIForLRSaveToReg(MBB, It, Reg);
6704 CallPt = MBB.insert(It, CallMIB);
6705 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6706 if (!AFI.isLRSpilled())
6707 emitCFIForLRRestoreFromReg(MBB, It);
6708 It--;
6709 return CallPt;
6710 }
6711 // We have the default case. Save and restore from SP.
6712 if (!MBB.isLiveIn(ARM::LR))
6713 MBB.addLiveIn(ARM::LR);
6714 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6715 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6716 CallPt = MBB.insert(It, CallMIB);
6717 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6718 It--;
6719 return CallPt;
6720}
6721
6723 MachineFunction &MF) const {
6724 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6725}
6726
6727bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6728 const MachineInstr &MI) const {
6729 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6730 // the tail predication conversion. This means that the element count
6731 // register has to be live for longer, but that has to be better than
6732 // spill/restore and VPT predication.
6733 return (isVCTP(&MI) && !isPredicated(MI)) ||
6735}
6736
6738 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6739 : ARM::BLX;
6740}
6741
6743 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6744 : ARM::tBLXr;
6745}
6746
6748 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6749 : ARM::BLX_pred;
6750}
6751
6752namespace {
6753class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6754 MachineInstr *EndLoop, *LoopCount;
6755 MachineFunction *MF;
6756 const TargetInstrInfo *TII;
6757
6758 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6759 // [LAST_IS_USE] : last reference to register in schedule is a use
6760 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6761 static int constexpr MAX_STAGES = 30;
6762 static int constexpr LAST_IS_USE = MAX_STAGES;
6763 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6764 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6765 typedef std::map<unsigned, IterNeed> IterNeeds;
6766
6767 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6768 const IterNeeds &CIN);
6769 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6770
6771 // Meanings of the various stuff with loop types:
6772 // t2Bcc:
6773 // EndLoop = branch at end of original BB that will become a kernel
6774 // LoopCount = CC setter live into branch
6775 // t2LoopEnd:
6776 // EndLoop = branch at end of original BB
6777 // LoopCount = t2LoopDec
6778public:
6779 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6780 : EndLoop(EndLoop), LoopCount(LoopCount),
6781 MF(EndLoop->getParent()->getParent()),
6782 TII(MF->getSubtarget().getInstrInfo()) {}
6783
6784 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6785 // Only ignore the terminator.
6786 return MI == EndLoop || MI == LoopCount;
6787 }
6788
6789 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6790 if (tooMuchRegisterPressure(SSD, SMS))
6791 return false;
6792
6793 return true;
6794 }
6795
6796 std::optional<bool> createTripCountGreaterCondition(
6797 int TC, MachineBasicBlock &MBB,
6799
6800 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6801 Cond.push_back(EndLoop->getOperand(1));
6802 Cond.push_back(EndLoop->getOperand(2));
6803 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6805 }
6806 return {};
6807 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6808 // General case just lets the unrolled t2LoopDec do the subtraction and
6809 // therefore just needs to check if zero has been reached.
6810 MachineInstr *LoopDec = nullptr;
6811 for (auto &I : MBB.instrs())
6812 if (I.getOpcode() == ARM::t2LoopDec)
6813 LoopDec = &I;
6814 assert(LoopDec && "Unable to find copied LoopDec");
6815 // Check if we're done with the loop.
6816 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6817 .addReg(LoopDec->getOperand(0).getReg())
6818 .addImm(0)
6820 .addReg(ARM::NoRegister);
6822 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6823 return {};
6824 } else
6825 llvm_unreachable("Unknown EndLoop");
6826 }
6827
6828 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6829
6830 void adjustTripCount(int TripCountAdjust) override {}
6831
6832 void disposed() override {}
6833};
6834
6835void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6836 const IterNeeds &CIN) {
6837 // Increase pressure by the amounts in CrossIterationNeeds
6838 for (const auto &N : CIN) {
6839 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6840 for (int I = 0; I < Cnt; ++I)
6843 }
6844 // Decrease pressure by the amounts in CrossIterationNeeds
6845 for (const auto &N : CIN) {
6846 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6847 for (int I = 0; I < Cnt; ++I)
6850 }
6851}
6852
6853bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6854 SMSchedule &SMS) {
6855 IterNeeds CrossIterationNeeds;
6856
6857 // Determine which values will be loop-carried after the schedule is
6858 // applied
6859
6860 for (auto &SU : SSD.SUnits) {
6861 const MachineInstr *MI = SU.getInstr();
6862 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6863 for (auto &S : SU.Succs)
6864 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6865 Register Reg = S.getReg();
6866 if (Reg.isVirtual())
6867 CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
6868 .first->second.set(0);
6869 } else if (S.isAssignedRegDep()) {
6870 int OStg = SMS.stageScheduled(S.getSUnit());
6871 if (OStg >= 0 && OStg != Stg) {
6872 Register Reg = S.getReg();
6873 if (Reg.isVirtual())
6874 CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
6875 .first->second |= ((1 << (OStg - Stg)) - 1);
6876 }
6877 }
6878 }
6879
6880 // Determine more-or-less what the proposed schedule (reversed) is going to
6881 // be; it might not be quite the same because the within-cycle ordering
6882 // created by SMSchedule depends upon changes to help with address offsets and
6883 // the like.
6884 std::vector<SUnit *> ProposedSchedule;
6885 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6886 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6887 ++Stage) {
6888 std::deque<SUnit *> Instrs =
6889 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6890 std::sort(Instrs.begin(), Instrs.end(),
6891 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6892 for (SUnit *SU : Instrs)
6893 ProposedSchedule.push_back(SU);
6894 }
6895
6896 // Learn whether the last use/def of each cross-iteration register is a use or
6897 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6898 // and we do not have to add the pressure.
6899 for (auto *SU : ProposedSchedule)
6900 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6901 ++OperI) {
6902 auto MO = *OperI;
6903 if (!MO.isReg() || !MO.getReg())
6904 continue;
6905 Register Reg = MO.getReg();
6906 auto CIter = CrossIterationNeeds.find(Reg.id());
6907 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6908 CIter->second[SEEN_AS_LIVE])
6909 continue;
6910 if (MO.isDef() && !MO.isDead())
6911 CIter->second.set(SEEN_AS_LIVE);
6912 else if (MO.isUse())
6913 CIter->second.set(LAST_IS_USE);
6914 }
6915 for (auto &CI : CrossIterationNeeds)
6916 CI.second.reset(LAST_IS_USE);
6917
6918 RegionPressure RecRegPressure;
6919 RegPressureTracker RPTracker(RecRegPressure);
6920 RegisterClassInfo RegClassInfo;
6921 RegClassInfo.runOnMachineFunction(*MF);
6922 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6923 EndLoop->getParent()->end(), false, false);
6925
6926 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6927
6928 for (auto *SU : ProposedSchedule) {
6929 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6930 RPTracker.setPos(std::next(CurInstI));
6931 RPTracker.recede();
6932
6933 // Track what cross-iteration registers would be seen as live
6934 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6935 auto MO = *OperI;
6936 if (!MO.isReg() || !MO.getReg())
6937 continue;
6938 Register Reg = MO.getReg();
6939 if (MO.isDef() && !MO.isDead()) {
6940 auto CIter = CrossIterationNeeds.find(Reg.id());
6941 if (CIter != CrossIterationNeeds.end()) {
6942 CIter->second.reset(0);
6943 CIter->second.reset(SEEN_AS_LIVE);
6944 }
6945 }
6946 }
6947 for (auto &S : SU->Preds) {
6948 auto Stg = SMS.stageScheduled(SU);
6949 if (S.isAssignedRegDep()) {
6950 Register Reg = S.getReg();
6951 auto CIter = CrossIterationNeeds.find(Reg.id());
6952 if (CIter != CrossIterationNeeds.end()) {
6953 auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));
6954 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6955 if (Stg - Stg2 < MAX_STAGES)
6956 CIter->second.set(Stg - Stg2);
6957 CIter->second.set(SEEN_AS_LIVE);
6958 }
6959 }
6960 }
6961
6962 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6963 }
6964
6965 auto &P = RPTracker.getPressure().MaxSetPressure;
6966 for (unsigned I = 0, E = P.size(); I < E; ++I)
6967 if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
6968 return true;
6969 }
6970 return false;
6971}
6972
6973} // namespace
6974
6975std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6978 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6979 if (Preheader == LoopBB)
6980 Preheader = *std::next(LoopBB->pred_begin());
6981
6982 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6983 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6984 // block. We need to determine the reaching definition of CPSR so that
6985 // it can be marked as non-pipelineable, allowing the pipeliner to force
6986 // it into stage 0 or give up if it cannot or will not do so.
6987 MachineInstr *CCSetter = nullptr;
6988 for (auto &L : LoopBB->instrs()) {
6989 if (L.isCall())
6990 return nullptr;
6991 if (isCPSRDefined(L))
6992 CCSetter = &L;
6993 }
6994 if (CCSetter)
6995 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6996 else
6997 return nullptr; // Unable to find the CC setter, so unable to guarantee
6998 // that pipeline will work
6999 }
7000
7001 // Recognize:
7002 // preheader:
7003 // %1 = t2DoopLoopStart %0
7004 // loop:
7005 // %2 = phi %1, <not loop>, %..., %loop
7006 // %3 = t2LoopDec %2, <imm>
7007 // t2LoopEnd %3, %loop
7008
7009 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
7010 for (auto &L : LoopBB->instrs())
7011 if (L.isCall())
7012 return nullptr;
7013 else if (isVCTP(&L))
7014 return nullptr;
7015 Register LoopDecResult = I->getOperand(0).getReg();
7017 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
7018 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
7019 return nullptr;
7020 MachineInstr *LoopStart = nullptr;
7021 for (auto &J : Preheader->instrs())
7022 if (J.getOpcode() == ARM::t2DoLoopStart)
7023 LoopStart = &J;
7024 if (!LoopStart)
7025 return nullptr;
7026 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
7027 }
7028 return nullptr;
7029}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isLoad(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, unsigned DReg, unsigned Lane, unsigned &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static cl::opt< bool > EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv"))
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
@ ExeGeneric
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
ARMBaseInstrInfo(const ARMSubtarget &STI)
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
ARMConstantPoolConstant - ARM-specific constant pool values for Constants, Functions,...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic block.
ARMConstantPoolSymbol - ARM-specific constantpool values for external symbols.
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isTargetMachO() const
Definition: ARMSubtarget.h:312
bool isCortexA7() const
Definition: ARMSubtarget.h:253
bool useMovt() const
bool isSwift() const
Definition: ARMSubtarget.h:257
ARMLdStMultipleTiming getLdStMultipleTiming() const
Definition: ARMSubtarget.h:451
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool isCortexM7() const
Definition: ARMSubtarget.h:259
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isReadTPSoft() const
Definition: ARMSubtarget.h:346
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned getMispredictionPenalty() const
bool isLikeA9() const
Definition: ARMSubtarget.h:260
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:471
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
Definition: ARMSubtarget.h:440
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isROPI() const
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:310
unsigned getPartialUpdateClearance() const
Definition: ARMSubtarget.h:449
bool hasMinSize() const
Definition: ARMSubtarget.h:363
bool isCortexA8() const
Definition: ARMSubtarget.h:254
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
Definition: ARMSubtarget.h:76
@ SingleIssue
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:78
@ DoubleIssue
Can load/store 2 registers/cycle.
Definition: ARMSubtarget.h:73
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
Definition: ARMSubtarget.h:81
int getPreISelOperandLatencyAdjustment() const
Definition: ARMSubtarget.h:455
bool isRWPI() const
bool isMClass() const
Definition: ARMSubtarget.h:366
bool restrictIT() const
Definition: ARMSubtarget.h:403
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool test(unsigned Idx) const
Definition: BitVector.h:461
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:159
uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:716
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
A possibly irreducible generalization of a Loop.
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterDead - Add information about the fact that the specified register is dead after bei...
void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterKilled - Add information about the fact that the specified register is killed after...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_undefined From now on the previous value of Register can't be restored anymore.
Definition: MCDwarf.h:623
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:616
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc={})
.cfi_register Previous value of Register1 is saved in register Register2.
Definition: MCDwarf.h:598
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:604
unsigned short Opcode
Definition: MCInstrDesc.h:205
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isValid() const
isValid - Returns true until all the operands have been visited.
unsigned pred_size() const
instr_iterator instr_begin()
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
union llvm::MachineConstantPoolEntry::@198 Val
The constant itself.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
bool isRegSequence() const
bool isInsertSubreg() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:924
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:361
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ ThumbArithFlagSetting
Definition: ARMBaseInfo.h:414
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
Definition: ARMBaseInfo.h:258
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:185
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ Entry
Definition: COFF.h:811
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:431
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
unsigned getBLXpredOpcode(const MachineFunction &MF)
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
MaybeAlign getAlign(const Function &F, unsigned Index)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
static bool isJumpTableBranchOpcode(int Opc)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static bool isPopOpcode(int Opc)
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
unsigned getUndefRegState(bool B)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
unsigned getKillRegState(bool B)
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1935
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
Definition: ARMBaseInfo.h:146
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Description of the encoding of one expression Op.
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:80
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.
std::vector< Candidate > Candidates