LLVM 20.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/DebugLoc.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/GlobalValue.h"
51#include "llvm/IR/Module.h"
52#include "llvm/MC/MCAsmInfo.h"
53#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78static cl::opt<bool>
79EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
80 cl::desc("Enable ARM 2-addr to 3-addr conv"));
81
82/// ARM_MLxEntry - Record information about MLA / MLS instructions.
84 uint16_t MLxOpc; // MLA / MLS opcode
85 uint16_t MulOpc; // Expanded multiplication opcode
86 uint16_t AddSubOpc; // Expanded add / sub opcode
87 bool NegAcc; // True if the acc is negated before the add / sub.
88 bool HasLane; // True if instruction has an extra "lane" operand.
89};
90
91static const ARM_MLxEntry ARM_MLxTable[] = {
92 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
93 // fp scalar ops
94 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
95 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
96 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
97 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
98 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
99 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
100 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
101 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
102
103 // fp SIMD ops
104 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
105 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
106 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
107 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
108 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
109 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
110 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
111 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
112};
113
115 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
116 Subtarget(STI) {
117 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
118 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
119 llvm_unreachable("Duplicated entries?");
120 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
121 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
122 }
123}
124
125// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
126// currently defaults to no prepass hazard recognizer.
129 const ScheduleDAG *DAG) const {
130 if (usePreRAHazardRecognizer()) {
131 const InstrItineraryData *II =
132 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
133 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
134 }
136}
137
138// Called during:
139// - pre-RA scheduling
140// - post-RA scheduling when FeatureUseMISched is set
142 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
144
145 // We would like to restrict this hazard recognizer to only
146 // post-RA scheduling; we can tell that we're post-RA because we don't
147 // track VRegLiveness.
148 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
149 // banks banked on bit 2. Assume that TCMs are in use.
150 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
152 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
153
154 // Not inserting ARMHazardRecognizerFPMLx because that would change
155 // legacy behavior
156
158 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
159 return MHR;
160}
161
162// Called during post-RA scheduling when FeatureUseMISched is not set
165 const ScheduleDAG *DAG) const {
167
168 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
169 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
170
172 if (BHR)
173 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
174 return MHR;
175}
176
179 LiveIntervals *LIS) const {
180 // FIXME: Thumb2 support.
181
182 if (!EnableARM3Addr)
183 return nullptr;
184
185 MachineFunction &MF = *MI.getParent()->getParent();
186 uint64_t TSFlags = MI.getDesc().TSFlags;
187 bool isPre = false;
188 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
189 default: return nullptr;
191 isPre = true;
192 break;
194 break;
195 }
196
197 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
198 // operation.
199 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
200 if (MemOpc == 0)
201 return nullptr;
202
203 MachineInstr *UpdateMI = nullptr;
204 MachineInstr *MemMI = nullptr;
205 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
206 const MCInstrDesc &MCID = MI.getDesc();
207 unsigned NumOps = MCID.getNumOperands();
208 bool isLoad = !MI.mayStore();
209 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
210 const MachineOperand &Base = MI.getOperand(2);
211 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
212 Register WBReg = WB.getReg();
213 Register BaseReg = Base.getReg();
214 Register OffReg = Offset.getReg();
215 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
216 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
217 switch (AddrMode) {
218 default: llvm_unreachable("Unknown indexed op!");
219 case ARMII::AddrMode2: {
220 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
221 unsigned Amt = ARM_AM::getAM2Offset(OffImm);
222 if (OffReg == 0) {
223 if (ARM_AM::getSOImmVal(Amt) == -1)
224 // Can't encode it in a so_imm operand. This transformation will
225 // add more than 1 instruction. Abandon!
226 return nullptr;
227 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
228 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
229 .addReg(BaseReg)
230 .addImm(Amt)
231 .add(predOps(Pred))
232 .add(condCodeOp());
233 } else if (Amt != 0) {
235 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
236 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
237 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
238 .addReg(BaseReg)
239 .addReg(OffReg)
240 .addReg(0)
241 .addImm(SOOpc)
242 .add(predOps(Pred))
243 .add(condCodeOp());
244 } else
245 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
246 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
247 .addReg(BaseReg)
248 .addReg(OffReg)
249 .add(predOps(Pred))
250 .add(condCodeOp());
251 break;
252 }
253 case ARMII::AddrMode3 : {
254 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
255 unsigned Amt = ARM_AM::getAM3Offset(OffImm);
256 if (OffReg == 0)
257 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
258 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
259 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
260 .addReg(BaseReg)
261 .addImm(Amt)
262 .add(predOps(Pred))
263 .add(condCodeOp());
264 else
265 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
266 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
267 .addReg(BaseReg)
268 .addReg(OffReg)
269 .add(predOps(Pred))
270 .add(condCodeOp());
271 break;
272 }
273 }
274
275 std::vector<MachineInstr*> NewMIs;
276 if (isPre) {
277 if (isLoad)
278 MemMI =
279 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
280 .addReg(WBReg)
281 .addImm(0)
282 .addImm(Pred);
283 else
284 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
285 .addReg(MI.getOperand(1).getReg())
286 .addReg(WBReg)
287 .addReg(0)
288 .addImm(0)
289 .addImm(Pred);
290 NewMIs.push_back(MemMI);
291 NewMIs.push_back(UpdateMI);
292 } else {
293 if (isLoad)
294 MemMI =
295 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
296 .addReg(BaseReg)
297 .addImm(0)
298 .addImm(Pred);
299 else
300 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
301 .addReg(MI.getOperand(1).getReg())
302 .addReg(BaseReg)
303 .addReg(0)
304 .addImm(0)
305 .addImm(Pred);
306 if (WB.isDead())
307 UpdateMI->getOperand(0).setIsDead();
308 NewMIs.push_back(UpdateMI);
309 NewMIs.push_back(MemMI);
310 }
311
312 // Transfer LiveVariables states, kill / dead info.
313 if (LV) {
314 for (const MachineOperand &MO : MI.operands()) {
315 if (MO.isReg() && MO.getReg().isVirtual()) {
316 Register Reg = MO.getReg();
317
318 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
319 if (MO.isDef()) {
320 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
321 if (MO.isDead())
322 LV->addVirtualRegisterDead(Reg, *NewMI);
323 }
324 if (MO.isUse() && MO.isKill()) {
325 for (unsigned j = 0; j < 2; ++j) {
326 // Look at the two new MI's in reverse order.
327 MachineInstr *NewMI = NewMIs[j];
328 if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))
329 continue;
330 LV->addVirtualRegisterKilled(Reg, *NewMI);
331 if (VI.removeKill(MI))
332 VI.Kills.push_back(NewMI);
333 break;
334 }
335 }
336 }
337 }
338 }
339
340 MachineBasicBlock &MBB = *MI.getParent();
341 MBB.insert(MI, NewMIs[1]);
342 MBB.insert(MI, NewMIs[0]);
343 return NewMIs[0];
344}
345
346// Branch analysis.
347// Cond vector output format:
348// 0 elements indicates an unconditional branch
349// 2 elements indicates a conditional branch; the elements are
350// the condition to check and the CPSR.
351// 3 elements indicates a hardware loop end; the elements
352// are the opcode, the operand value to test, and a dummy
353// operand used to pad out to 3 operands.
356 MachineBasicBlock *&FBB,
358 bool AllowModify) const {
359 TBB = nullptr;
360 FBB = nullptr;
361
363 if (I == MBB.instr_begin())
364 return false; // Empty blocks are easy.
365 --I;
366
367 // Walk backwards from the end of the basic block until the branch is
368 // analyzed or we give up.
369 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
370 // Flag to be raised on unanalyzeable instructions. This is useful in cases
371 // where we want to clean up on the end of the basic block before we bail
372 // out.
373 bool CantAnalyze = false;
374
375 // Skip over DEBUG values, predicated nonterminators and speculation
376 // barrier terminators.
377 while (I->isDebugInstr() || !I->isTerminator() ||
378 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
379 I->getOpcode() == ARM::t2DoLoopStartTP){
380 if (I == MBB.instr_begin())
381 return false;
382 --I;
383 }
384
385 if (isIndirectBranchOpcode(I->getOpcode()) ||
386 isJumpTableBranchOpcode(I->getOpcode())) {
387 // Indirect branches and jump tables can't be analyzed, but we still want
388 // to clean up any instructions at the tail of the basic block.
389 CantAnalyze = true;
390 } else if (isUncondBranchOpcode(I->getOpcode())) {
391 TBB = I->getOperand(0).getMBB();
392 } else if (isCondBranchOpcode(I->getOpcode())) {
393 // Bail out if we encounter multiple conditional branches.
394 if (!Cond.empty())
395 return true;
396
397 assert(!FBB && "FBB should have been null.");
398 FBB = TBB;
399 TBB = I->getOperand(0).getMBB();
400 Cond.push_back(I->getOperand(1));
401 Cond.push_back(I->getOperand(2));
402 } else if (I->isReturn()) {
403 // Returns can't be analyzed, but we should run cleanup.
404 CantAnalyze = true;
405 } else if (I->getOpcode() == ARM::t2LoopEnd &&
406 MBB.getParent()
409 if (!Cond.empty())
410 return true;
411 FBB = TBB;
412 TBB = I->getOperand(1).getMBB();
413 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
414 Cond.push_back(I->getOperand(0));
415 Cond.push_back(MachineOperand::CreateImm(0));
416 } else {
417 // We encountered other unrecognized terminator. Bail out immediately.
418 return true;
419 }
420
421 // Cleanup code - to be run for unpredicated unconditional branches and
422 // returns.
423 if (!isPredicated(*I) &&
424 (isUncondBranchOpcode(I->getOpcode()) ||
425 isIndirectBranchOpcode(I->getOpcode()) ||
426 isJumpTableBranchOpcode(I->getOpcode()) ||
427 I->isReturn())) {
428 // Forget any previous condition branch information - it no longer applies.
429 Cond.clear();
430 FBB = nullptr;
431
432 // If we can modify the function, delete everything below this
433 // unconditional branch.
434 if (AllowModify) {
435 MachineBasicBlock::iterator DI = std::next(I);
436 while (DI != MBB.instr_end()) {
437 MachineInstr &InstToDelete = *DI;
438 ++DI;
439 // Speculation barriers must not be deleted.
440 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
441 continue;
442 InstToDelete.eraseFromParent();
443 }
444 }
445 }
446
447 if (CantAnalyze) {
448 // We may not be able to analyze the block, but we could still have
449 // an unconditional branch as the last instruction in the block, which
450 // just branches to layout successor. If this is the case, then just
451 // remove it if we're allowed to make modifications.
452 if (AllowModify && !isPredicated(MBB.back()) &&
456 return true;
457 }
458
459 if (I == MBB.instr_begin())
460 return false;
461
462 --I;
463 }
464
465 // We made it past the terminators without bailing out - we must have
466 // analyzed this branch successfully.
467 return false;
468}
469
471 int *BytesRemoved) const {
472 assert(!BytesRemoved && "code size not handled");
473
475 if (I == MBB.end())
476 return 0;
477
478 if (!isUncondBranchOpcode(I->getOpcode()) &&
479 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
480 return 0;
481
482 // Remove the branch.
483 I->eraseFromParent();
484
485 I = MBB.end();
486
487 if (I == MBB.begin()) return 1;
488 --I;
489 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
490 return 1;
491
492 // Remove the branch.
493 I->eraseFromParent();
494 return 2;
495}
496
501 const DebugLoc &DL,
502 int *BytesAdded) const {
503 assert(!BytesAdded && "code size not handled");
505 int BOpc = !AFI->isThumbFunction()
506 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
507 int BccOpc = !AFI->isThumbFunction()
508 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
509 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
510
511 // Shouldn't be a fall through.
512 assert(TBB && "insertBranch must not be told to insert a fallthrough");
513 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
514 "ARM branch conditions have two or three components!");
515
516 // For conditional branches, we use addOperand to preserve CPSR flags.
517
518 if (!FBB) {
519 if (Cond.empty()) { // Unconditional branch?
520 if (isThumb)
522 else
523 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
524 } else if (Cond.size() == 2) {
525 BuildMI(&MBB, DL, get(BccOpc))
526 .addMBB(TBB)
527 .addImm(Cond[0].getImm())
528 .add(Cond[1]);
529 } else
530 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
531 return 1;
532 }
533
534 // Two-way conditional branch.
535 if (Cond.size() == 2)
536 BuildMI(&MBB, DL, get(BccOpc))
537 .addMBB(TBB)
538 .addImm(Cond[0].getImm())
539 .add(Cond[1]);
540 else if (Cond.size() == 3)
541 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
542 if (isThumb)
543 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
544 else
545 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
546 return 2;
547}
548
551 if (Cond.size() == 2) {
552 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
554 return false;
555 }
556 return true;
557}
558
560 if (MI.isBundle()) {
562 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
563 while (++I != E && I->isInsideBundle()) {
564 int PIdx = I->findFirstPredOperandIdx();
565 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
566 return true;
567 }
568 return false;
569 }
570
571 int PIdx = MI.findFirstPredOperandIdx();
572 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
573}
574
576 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
577 const TargetRegisterInfo *TRI) const {
578
579 // First, let's see if there is a generic comment for this operand
580 std::string GenericComment =
582 if (!GenericComment.empty())
583 return GenericComment;
584
585 // If not, check if we have an immediate operand.
586 if (!Op.isImm())
587 return std::string();
588
589 // And print its corresponding condition code if the immediate is a
590 // predicate.
591 int FirstPredOp = MI.findFirstPredOperandIdx();
592 if (FirstPredOp != (int) OpIdx)
593 return std::string();
594
595 std::string CC = "CC::";
597 return CC;
598}
599
602 unsigned Opc = MI.getOpcode();
603 if (isUncondBranchOpcode(Opc)) {
604 MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
605 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
606 .addImm(Pred[0].getImm())
607 .addReg(Pred[1].getReg());
608 return true;
609 }
610
611 int PIdx = MI.findFirstPredOperandIdx();
612 if (PIdx != -1) {
613 MachineOperand &PMO = MI.getOperand(PIdx);
614 PMO.setImm(Pred[0].getImm());
615 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
616
617 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
618 // IT block. This affects how they are printed.
619 const MCInstrDesc &MCID = MI.getDesc();
621 assert(MCID.operands()[1].isOptionalDef() &&
622 "CPSR def isn't expected operand");
623 assert((MI.getOperand(1).isDead() ||
624 MI.getOperand(1).getReg() != ARM::CPSR) &&
625 "if conversion tried to stop defining used CPSR");
626 MI.getOperand(1).setReg(ARM::NoRegister);
627 }
628
629 return true;
630 }
631 return false;
632}
633
635 ArrayRef<MachineOperand> Pred2) const {
636 if (Pred1.size() > 2 || Pred2.size() > 2)
637 return false;
638
639 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
640 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
641 if (CC1 == CC2)
642 return true;
643
644 switch (CC1) {
645 default:
646 return false;
647 case ARMCC::AL:
648 return true;
649 case ARMCC::HS:
650 return CC2 == ARMCC::HI;
651 case ARMCC::LS:
652 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
653 case ARMCC::GE:
654 return CC2 == ARMCC::GT;
655 case ARMCC::LE:
656 return CC2 == ARMCC::LT;
657 }
658}
659
661 std::vector<MachineOperand> &Pred,
662 bool SkipDead) const {
663 bool Found = false;
664 for (const MachineOperand &MO : MI.operands()) {
665 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
666 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
667 if (ClobbersCPSR || IsCPSR) {
668
669 // Filter out T1 instructions that have a dead CPSR,
670 // allowing IT blocks to be generated containing T1 instructions
671 const MCInstrDesc &MCID = MI.getDesc();
672 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
673 SkipDead)
674 continue;
675
676 Pred.push_back(MO);
677 Found = true;
678 }
679 }
680
681 return Found;
682}
683
685 for (const auto &MO : MI.operands())
686 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
687 return true;
688 return false;
689}
690
692 switch (MI->getOpcode()) {
693 default: return true;
694 case ARM::tADC: // ADC (register) T1
695 case ARM::tADDi3: // ADD (immediate) T1
696 case ARM::tADDi8: // ADD (immediate) T2
697 case ARM::tADDrr: // ADD (register) T1
698 case ARM::tAND: // AND (register) T1
699 case ARM::tASRri: // ASR (immediate) T1
700 case ARM::tASRrr: // ASR (register) T1
701 case ARM::tBIC: // BIC (register) T1
702 case ARM::tEOR: // EOR (register) T1
703 case ARM::tLSLri: // LSL (immediate) T1
704 case ARM::tLSLrr: // LSL (register) T1
705 case ARM::tLSRri: // LSR (immediate) T1
706 case ARM::tLSRrr: // LSR (register) T1
707 case ARM::tMUL: // MUL T1
708 case ARM::tMVN: // MVN (register) T1
709 case ARM::tORR: // ORR (register) T1
710 case ARM::tROR: // ROR (register) T1
711 case ARM::tRSB: // RSB (immediate) T1
712 case ARM::tSBC: // SBC (register) T1
713 case ARM::tSUBi3: // SUB (immediate) T1
714 case ARM::tSUBi8: // SUB (immediate) T2
715 case ARM::tSUBrr: // SUB (register) T1
717 }
718}
719
720/// isPredicable - Return true if the specified instruction can be predicated.
721/// By default, this returns true for every instruction with a
722/// PredicateOperand.
724 if (!MI.isPredicable())
725 return false;
726
727 if (MI.isBundle())
728 return false;
729
731 return false;
732
733 const MachineFunction *MF = MI.getParent()->getParent();
734 const ARMFunctionInfo *AFI =
736
737 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
738 // In their ARM encoding, they can't be encoded in a conditional form.
739 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
740 return false;
741
742 // Make indirect control flow changes unpredicable when SLS mitigation is
743 // enabled.
744 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
745 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
746 return false;
747 if (ST.hardenSlsBlr() && isIndirectCall(MI))
748 return false;
749
750 if (AFI->isThumb2Function()) {
751 if (getSubtarget().restrictIT())
752 return isV8EligibleForIT(&MI);
753 }
754
755 return true;
756}
757
758namespace llvm {
759
760template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
761 for (const MachineOperand &MO : MI->operands()) {
762 if (!MO.isReg() || MO.isUndef() || MO.isUse())
763 continue;
764 if (MO.getReg() != ARM::CPSR)
765 continue;
766 if (!MO.isDead())
767 return false;
768 }
769 // all definitions of CPSR are dead
770 return true;
771}
772
773} // end namespace llvm
774
775/// GetInstSize - Return the size of the specified MachineInstr.
776///
778 const MachineBasicBlock &MBB = *MI.getParent();
779 const MachineFunction *MF = MBB.getParent();
780 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
781
782 const MCInstrDesc &MCID = MI.getDesc();
783
784 switch (MI.getOpcode()) {
785 default:
786 // Return the size specified in .td file. If there's none, return 0, as we
787 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
788 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
789 // contrast to AArch64 instructions which have a default size of 4 bytes for
790 // example.
791 return MCID.getSize();
792 case TargetOpcode::BUNDLE:
793 return getInstBundleLength(MI);
794 case ARM::CONSTPOOL_ENTRY:
795 case ARM::JUMPTABLE_INSTS:
796 case ARM::JUMPTABLE_ADDRS:
797 case ARM::JUMPTABLE_TBB:
798 case ARM::JUMPTABLE_TBH:
799 // If this machine instr is a constant pool entry, its size is recorded as
800 // operand #2.
801 return MI.getOperand(2).getImm();
802 case ARM::SPACE:
803 return MI.getOperand(1).getImm();
804 case ARM::INLINEASM:
805 case ARM::INLINEASM_BR: {
806 // If this machine instr is an inline asm, measure it.
807 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
809 Size = alignTo(Size, 4);
810 return Size;
811 }
812 }
813}
814
815unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
816 unsigned Size = 0;
818 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
819 while (++I != E && I->isInsideBundle()) {
820 assert(!I->isBundle() && "No nested bundle!");
822 }
823 return Size;
824}
825
828 unsigned DestReg, bool KillSrc,
829 const ARMSubtarget &Subtarget) const {
830 unsigned Opc = Subtarget.isThumb()
831 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
832 : ARM::MRS;
833
835 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
836
837 // There is only 1 A/R class MRS instruction, and it always refers to
838 // APSR. However, there are lots of other possibilities on M-class cores.
839 if (Subtarget.isMClass())
840 MIB.addImm(0x800);
841
842 MIB.add(predOps(ARMCC::AL))
843 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
844}
845
848 unsigned SrcReg, bool KillSrc,
849 const ARMSubtarget &Subtarget) const {
850 unsigned Opc = Subtarget.isThumb()
851 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
852 : ARM::MSR;
853
854 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
855
856 if (Subtarget.isMClass())
857 MIB.addImm(0x800);
858 else
859 MIB.addImm(8);
860
861 MIB.addReg(SrcReg, getKillRegState(KillSrc))
864}
865
867 MIB.addImm(ARMVCC::None);
868 MIB.addReg(0);
869 MIB.addReg(0); // tp_reg
870}
871
873 Register DestReg) {
875 MIB.addReg(DestReg, RegState::Undef);
876}
877
879 MIB.addImm(Cond);
880 MIB.addReg(ARM::VPR, RegState::Implicit);
881 MIB.addReg(0); // tp_reg
882}
883
885 unsigned Cond, unsigned Inactive) {
887 MIB.addReg(Inactive);
888}
889
892 const DebugLoc &DL, MCRegister DestReg,
893 MCRegister SrcReg, bool KillSrc,
894 bool RenamableDest,
895 bool RenamableSrc) const {
896 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
897 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
898
899 if (GPRDest && GPRSrc) {
900 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
901 .addReg(SrcReg, getKillRegState(KillSrc))
903 .add(condCodeOp());
904 return;
905 }
906
907 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
908 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
909
910 unsigned Opc = 0;
911 if (SPRDest && SPRSrc)
912 Opc = ARM::VMOVS;
913 else if (GPRDest && SPRSrc)
914 Opc = ARM::VMOVRS;
915 else if (SPRDest && GPRSrc)
916 Opc = ARM::VMOVSR;
917 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
918 Opc = ARM::VMOVD;
919 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
920 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
921
922 if (Opc) {
923 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
924 MIB.addReg(SrcReg, getKillRegState(KillSrc));
925 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
926 MIB.addReg(SrcReg, getKillRegState(KillSrc));
927 if (Opc == ARM::MVE_VORR)
928 addUnpredicatedMveVpredROp(MIB, DestReg);
929 else if (Opc != ARM::MQPRCopy)
930 MIB.add(predOps(ARMCC::AL));
931 return;
932 }
933
934 // Handle register classes that require multiple instructions.
935 unsigned BeginIdx = 0;
936 unsigned SubRegs = 0;
937 int Spacing = 1;
938
939 // Use VORRq when possible.
940 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
941 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
942 BeginIdx = ARM::qsub_0;
943 SubRegs = 2;
944 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
945 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
946 BeginIdx = ARM::qsub_0;
947 SubRegs = 4;
948 // Fall back to VMOVD.
949 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
950 Opc = ARM::VMOVD;
951 BeginIdx = ARM::dsub_0;
952 SubRegs = 2;
953 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
954 Opc = ARM::VMOVD;
955 BeginIdx = ARM::dsub_0;
956 SubRegs = 3;
957 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
958 Opc = ARM::VMOVD;
959 BeginIdx = ARM::dsub_0;
960 SubRegs = 4;
961 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
962 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
963 BeginIdx = ARM::gsub_0;
964 SubRegs = 2;
965 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
966 Opc = ARM::VMOVD;
967 BeginIdx = ARM::dsub_0;
968 SubRegs = 2;
969 Spacing = 2;
970 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
971 Opc = ARM::VMOVD;
972 BeginIdx = ARM::dsub_0;
973 SubRegs = 3;
974 Spacing = 2;
975 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
976 Opc = ARM::VMOVD;
977 BeginIdx = ARM::dsub_0;
978 SubRegs = 4;
979 Spacing = 2;
980 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
981 !Subtarget.hasFP64()) {
982 Opc = ARM::VMOVS;
983 BeginIdx = ARM::ssub_0;
984 SubRegs = 2;
985 } else if (SrcReg == ARM::CPSR) {
986 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
987 return;
988 } else if (DestReg == ARM::CPSR) {
989 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
990 return;
991 } else if (DestReg == ARM::VPR) {
992 assert(ARM::GPRRegClass.contains(SrcReg));
993 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
994 .addReg(SrcReg, getKillRegState(KillSrc))
996 return;
997 } else if (SrcReg == ARM::VPR) {
998 assert(ARM::GPRRegClass.contains(DestReg));
999 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
1000 .addReg(SrcReg, getKillRegState(KillSrc))
1002 return;
1003 } else if (DestReg == ARM::FPSCR_NZCV) {
1004 assert(ARM::GPRRegClass.contains(SrcReg));
1005 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1006 .addReg(SrcReg, getKillRegState(KillSrc))
1008 return;
1009 } else if (SrcReg == ARM::FPSCR_NZCV) {
1010 assert(ARM::GPRRegClass.contains(DestReg));
1011 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1012 .addReg(SrcReg, getKillRegState(KillSrc))
1014 return;
1015 }
1016
1017 assert(Opc && "Impossible reg-to-reg copy");
1018
1021
1022 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1023 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1024 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1025 Spacing = -Spacing;
1026 }
1027#ifndef NDEBUG
1028 SmallSet<unsigned, 4> DstRegs;
1029#endif
1030 for (unsigned i = 0; i != SubRegs; ++i) {
1031 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1032 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1033 assert(Dst && Src && "Bad sub-register");
1034#ifndef NDEBUG
1035 assert(!DstRegs.count(Src) && "destructive vector copy");
1036 DstRegs.insert(Dst);
1037#endif
1038 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1039 // VORR (NEON or MVE) takes two source operands.
1040 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1041 Mov.addReg(Src);
1042 }
1043 // MVE VORR takes predicate operands in place of an ordinary condition.
1044 if (Opc == ARM::MVE_VORR)
1046 else
1047 Mov = Mov.add(predOps(ARMCC::AL));
1048 // MOVr can set CC.
1049 if (Opc == ARM::MOVr)
1050 Mov = Mov.add(condCodeOp());
1051 }
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov->addRegisterDefined(DestReg, TRI);
1054 if (KillSrc)
1055 Mov->addRegisterKilled(SrcReg, TRI);
1056}
1057
1058std::optional<DestSourcePair>
1060 // VMOVRRD is also a copy instruction but it requires
1061 // special way of handling. It is more complex copy version
1062 // and since that we are not considering it. For recognition
1063 // of such instruction isExtractSubregLike MI interface fuction
1064 // could be used.
1065 // VORRq is considered as a move only if two inputs are
1066 // the same register.
1067 if (!MI.isMoveReg() ||
1068 (MI.getOpcode() == ARM::VORRq &&
1069 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1070 return std::nullopt;
1071 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1072}
1073
1074std::optional<ParamLoadedValue>
1076 Register Reg) const {
1077 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1078 Register DstReg = DstSrcPair->Destination->getReg();
1079
1080 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1081 // than the copy registers. Consider for example:
1082 //
1083 // s16 = VMOVS s0
1084 // s17 = VMOVS s1
1085 // call @callee(d0)
1086 //
1087 // We'd like to describe the call site value of d0 as d8, but this requires
1088 // gathering and merging the descriptions for the two VMOVS instructions.
1089 //
1090 // We also don't handle the reverse situation, where the forwarding reg is
1091 // narrower than the copy destination:
1092 //
1093 // d8 = VMOVD d0
1094 // call @callee(s1)
1095 //
1096 // We need to produce a fragment description (the call site value of s1 is
1097 // /not/ just d8).
1098 if (DstReg != Reg)
1099 return std::nullopt;
1100 }
1102}
1103
1104const MachineInstrBuilder &
1106 unsigned SubIdx, unsigned State,
1107 const TargetRegisterInfo *TRI) const {
1108 if (!SubIdx)
1109 return MIB.addReg(Reg, State);
1110
1112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1113 return MIB.addReg(Reg, State, SubIdx);
1114}
1115
1118 Register SrcReg, bool isKill, int FI,
1119 const TargetRegisterClass *RC,
1120 const TargetRegisterInfo *TRI,
1121 Register VReg) const {
1122 MachineFunction &MF = *MBB.getParent();
1123 MachineFrameInfo &MFI = MF.getFrameInfo();
1124 Align Alignment = MFI.getObjectAlign(FI);
1125
1128 MFI.getObjectSize(FI), Alignment);
1129
1130 switch (TRI->getSpillSize(*RC)) {
1131 case 2:
1132 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1133 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1134 .addReg(SrcReg, getKillRegState(isKill))
1135 .addFrameIndex(FI)
1136 .addImm(0)
1137 .addMemOperand(MMO)
1139 } else
1140 llvm_unreachable("Unknown reg class!");
1141 break;
1142 case 4:
1143 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1144 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1145 .addReg(SrcReg, getKillRegState(isKill))
1146 .addFrameIndex(FI)
1147 .addImm(0)
1148 .addMemOperand(MMO)
1150 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1151 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1152 .addReg(SrcReg, getKillRegState(isKill))
1153 .addFrameIndex(FI)
1154 .addImm(0)
1155 .addMemOperand(MMO)
1157 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1158 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1159 .addReg(SrcReg, getKillRegState(isKill))
1160 .addFrameIndex(FI)
1161 .addImm(0)
1162 .addMemOperand(MMO)
1164 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1165 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
1166 .addReg(SrcReg, getKillRegState(isKill))
1167 .addFrameIndex(FI)
1168 .addImm(0)
1169 .addMemOperand(MMO)
1171 } else
1172 llvm_unreachable("Unknown reg class!");
1173 break;
1174 case 8:
1175 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1176 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1177 .addReg(SrcReg, getKillRegState(isKill))
1178 .addFrameIndex(FI)
1179 .addImm(0)
1180 .addMemOperand(MMO)
1182 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1183 if (Subtarget.hasV5TEOps()) {
1184 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1185 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1186 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1187 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1189 } else {
1190 // Fallback to STM instruction, which has existed since the dawn of
1191 // time.
1192 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1193 .addFrameIndex(FI)
1194 .addMemOperand(MMO)
1196 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1197 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1198 }
1199 } else
1200 llvm_unreachable("Unknown reg class!");
1201 break;
1202 case 16:
1203 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1204 // Use aligned spills if the stack can be realigned.
1205 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1206 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1207 .addFrameIndex(FI)
1208 .addImm(16)
1209 .addReg(SrcReg, getKillRegState(isKill))
1210 .addMemOperand(MMO)
1212 } else {
1213 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1214 .addReg(SrcReg, getKillRegState(isKill))
1215 .addFrameIndex(FI)
1216 .addMemOperand(MMO)
1218 }
1219 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1220 Subtarget.hasMVEIntegerOps()) {
1221 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1222 MIB.addReg(SrcReg, getKillRegState(isKill))
1223 .addFrameIndex(FI)
1224 .addImm(0)
1225 .addMemOperand(MMO);
1227 } else
1228 llvm_unreachable("Unknown reg class!");
1229 break;
1230 case 24:
1231 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1232 // Use aligned spills if the stack can be realigned.
1233 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1234 Subtarget.hasNEON()) {
1235 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1236 .addFrameIndex(FI)
1237 .addImm(16)
1238 .addReg(SrcReg, getKillRegState(isKill))
1239 .addMemOperand(MMO)
1241 } else {
1243 get(ARM::VSTMDIA))
1244 .addFrameIndex(FI)
1246 .addMemOperand(MMO);
1247 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1248 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1249 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1250 }
1251 } else
1252 llvm_unreachable("Unknown reg class!");
1253 break;
1254 case 32:
1255 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1256 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1257 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1258 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1259 Subtarget.hasNEON()) {
1260 // FIXME: It's possible to only store part of the QQ register if the
1261 // spilled def has a sub-register index.
1262 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1263 .addFrameIndex(FI)
1264 .addImm(16)
1265 .addReg(SrcReg, getKillRegState(isKill))
1266 .addMemOperand(MMO)
1268 } else if (Subtarget.hasMVEIntegerOps()) {
1269 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1270 .addReg(SrcReg, getKillRegState(isKill))
1271 .addFrameIndex(FI)
1272 .addMemOperand(MMO);
1273 } else {
1275 get(ARM::VSTMDIA))
1276 .addFrameIndex(FI)
1278 .addMemOperand(MMO);
1279 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1280 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1281 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1282 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1283 }
1284 } else
1285 llvm_unreachable("Unknown reg class!");
1286 break;
1287 case 64:
1288 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1289 Subtarget.hasMVEIntegerOps()) {
1290 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1291 .addReg(SrcReg, getKillRegState(isKill))
1292 .addFrameIndex(FI)
1293 .addMemOperand(MMO);
1294 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1295 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1296 .addFrameIndex(FI)
1298 .addMemOperand(MMO);
1299 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1300 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1301 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1302 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1303 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1304 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1305 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1306 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1307 } else
1308 llvm_unreachable("Unknown reg class!");
1309 break;
1310 default:
1311 llvm_unreachable("Unknown reg class!");
1312 }
1313}
1314
1316 int &FrameIndex) const {
1317 switch (MI.getOpcode()) {
1318 default: break;
1319 case ARM::STRrs:
1320 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1321 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1322 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1323 MI.getOperand(3).getImm() == 0) {
1324 FrameIndex = MI.getOperand(1).getIndex();
1325 return MI.getOperand(0).getReg();
1326 }
1327 break;
1328 case ARM::STRi12:
1329 case ARM::t2STRi12:
1330 case ARM::tSTRspi:
1331 case ARM::VSTRD:
1332 case ARM::VSTRS:
1333 case ARM::VSTRH:
1334 case ARM::VSTR_P0_off:
1335 case ARM::VSTR_FPSCR_NZCVQC_off:
1336 case ARM::MVE_VSTRWU32:
1337 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1338 MI.getOperand(2).getImm() == 0) {
1339 FrameIndex = MI.getOperand(1).getIndex();
1340 return MI.getOperand(0).getReg();
1341 }
1342 break;
1343 case ARM::VST1q64:
1344 case ARM::VST1d64TPseudo:
1345 case ARM::VST1d64QPseudo:
1346 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1347 FrameIndex = MI.getOperand(0).getIndex();
1348 return MI.getOperand(2).getReg();
1349 }
1350 break;
1351 case ARM::VSTMQIA:
1352 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1353 FrameIndex = MI.getOperand(1).getIndex();
1354 return MI.getOperand(0).getReg();
1355 }
1356 break;
1357 case ARM::MQQPRStore:
1358 case ARM::MQQQQPRStore:
1359 if (MI.getOperand(1).isFI()) {
1360 FrameIndex = MI.getOperand(1).getIndex();
1361 return MI.getOperand(0).getReg();
1362 }
1363 break;
1364 }
1365
1366 return 0;
1367}
1368
1370 int &FrameIndex) const {
1372 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1373 Accesses.size() == 1) {
1374 FrameIndex =
1375 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1376 ->getFrameIndex();
1377 return true;
1378 }
1379 return false;
1380}
1381
1384 Register DestReg, int FI,
1385 const TargetRegisterClass *RC,
1386 const TargetRegisterInfo *TRI,
1387 Register VReg) const {
1388 DebugLoc DL;
1389 if (I != MBB.end()) DL = I->getDebugLoc();
1390 MachineFunction &MF = *MBB.getParent();
1391 MachineFrameInfo &MFI = MF.getFrameInfo();
1392 const Align Alignment = MFI.getObjectAlign(FI);
1395 MFI.getObjectSize(FI), Alignment);
1396
1397 switch (TRI->getSpillSize(*RC)) {
1398 case 2:
1399 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1400 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1401 .addFrameIndex(FI)
1402 .addImm(0)
1403 .addMemOperand(MMO)
1405 } else
1406 llvm_unreachable("Unknown reg class!");
1407 break;
1408 case 4:
1409 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1410 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1411 .addFrameIndex(FI)
1412 .addImm(0)
1413 .addMemOperand(MMO)
1415 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1416 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1417 .addFrameIndex(FI)
1418 .addImm(0)
1419 .addMemOperand(MMO)
1421 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1422 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1423 .addFrameIndex(FI)
1424 .addImm(0)
1425 .addMemOperand(MMO)
1427 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1428 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1429 .addFrameIndex(FI)
1430 .addImm(0)
1431 .addMemOperand(MMO)
1433 } else
1434 llvm_unreachable("Unknown reg class!");
1435 break;
1436 case 8:
1437 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1438 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1439 .addFrameIndex(FI)
1440 .addImm(0)
1441 .addMemOperand(MMO)
1443 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1445
1446 if (Subtarget.hasV5TEOps()) {
1447 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1448 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1449 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1450 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1452 } else {
1453 // Fallback to LDM instruction, which has existed since the dawn of
1454 // time.
1455 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1456 .addFrameIndex(FI)
1457 .addMemOperand(MMO)
1459 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1460 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1461 }
1462
1463 if (DestReg.isPhysical())
1464 MIB.addReg(DestReg, RegState::ImplicitDefine);
1465 } else
1466 llvm_unreachable("Unknown reg class!");
1467 break;
1468 case 16:
1469 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1470 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1471 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1472 .addFrameIndex(FI)
1473 .addImm(16)
1474 .addMemOperand(MMO)
1476 } else {
1477 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1478 .addFrameIndex(FI)
1479 .addMemOperand(MMO)
1481 }
1482 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1483 Subtarget.hasMVEIntegerOps()) {
1484 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1485 MIB.addFrameIndex(FI)
1486 .addImm(0)
1487 .addMemOperand(MMO);
1489 } else
1490 llvm_unreachable("Unknown reg class!");
1491 break;
1492 case 24:
1493 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1494 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1495 Subtarget.hasNEON()) {
1496 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1497 .addFrameIndex(FI)
1498 .addImm(16)
1499 .addMemOperand(MMO)
1501 } else {
1502 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1503 .addFrameIndex(FI)
1504 .addMemOperand(MMO)
1506 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1507 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1508 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1509 if (DestReg.isPhysical())
1510 MIB.addReg(DestReg, RegState::ImplicitDefine);
1511 }
1512 } else
1513 llvm_unreachable("Unknown reg class!");
1514 break;
1515 case 32:
1516 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1517 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1518 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1519 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1520 Subtarget.hasNEON()) {
1521 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1522 .addFrameIndex(FI)
1523 .addImm(16)
1524 .addMemOperand(MMO)
1526 } else if (Subtarget.hasMVEIntegerOps()) {
1527 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1528 .addFrameIndex(FI)
1529 .addMemOperand(MMO);
1530 } else {
1531 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1532 .addFrameIndex(FI)
1534 .addMemOperand(MMO);
1535 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1536 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1537 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1538 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1539 if (DestReg.isPhysical())
1540 MIB.addReg(DestReg, RegState::ImplicitDefine);
1541 }
1542 } else
1543 llvm_unreachable("Unknown reg class!");
1544 break;
1545 case 64:
1546 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1547 Subtarget.hasMVEIntegerOps()) {
1548 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1549 .addFrameIndex(FI)
1550 .addMemOperand(MMO);
1551 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1552 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1553 .addFrameIndex(FI)
1555 .addMemOperand(MMO);
1556 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1557 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1558 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1559 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1560 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1561 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1562 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1563 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1564 if (DestReg.isPhysical())
1565 MIB.addReg(DestReg, RegState::ImplicitDefine);
1566 } else
1567 llvm_unreachable("Unknown reg class!");
1568 break;
1569 default:
1570 llvm_unreachable("Unknown regclass!");
1571 }
1572}
1573
1575 int &FrameIndex) const {
1576 switch (MI.getOpcode()) {
1577 default: break;
1578 case ARM::LDRrs:
1579 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1580 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1581 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1582 MI.getOperand(3).getImm() == 0) {
1583 FrameIndex = MI.getOperand(1).getIndex();
1584 return MI.getOperand(0).getReg();
1585 }
1586 break;
1587 case ARM::LDRi12:
1588 case ARM::t2LDRi12:
1589 case ARM::tLDRspi:
1590 case ARM::VLDRD:
1591 case ARM::VLDRS:
1592 case ARM::VLDRH:
1593 case ARM::VLDR_P0_off:
1594 case ARM::VLDR_FPSCR_NZCVQC_off:
1595 case ARM::MVE_VLDRWU32:
1596 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1597 MI.getOperand(2).getImm() == 0) {
1598 FrameIndex = MI.getOperand(1).getIndex();
1599 return MI.getOperand(0).getReg();
1600 }
1601 break;
1602 case ARM::VLD1q64:
1603 case ARM::VLD1d8TPseudo:
1604 case ARM::VLD1d16TPseudo:
1605 case ARM::VLD1d32TPseudo:
1606 case ARM::VLD1d64TPseudo:
1607 case ARM::VLD1d8QPseudo:
1608 case ARM::VLD1d16QPseudo:
1609 case ARM::VLD1d32QPseudo:
1610 case ARM::VLD1d64QPseudo:
1611 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1612 FrameIndex = MI.getOperand(1).getIndex();
1613 return MI.getOperand(0).getReg();
1614 }
1615 break;
1616 case ARM::VLDMQIA:
1617 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1618 FrameIndex = MI.getOperand(1).getIndex();
1619 return MI.getOperand(0).getReg();
1620 }
1621 break;
1622 case ARM::MQQPRLoad:
1623 case ARM::MQQQQPRLoad:
1624 if (MI.getOperand(1).isFI()) {
1625 FrameIndex = MI.getOperand(1).getIndex();
1626 return MI.getOperand(0).getReg();
1627 }
1628 break;
1629 }
1630
1631 return 0;
1632}
1633
1635 int &FrameIndex) const {
1637 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1638 Accesses.size() == 1) {
1639 FrameIndex =
1640 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1641 ->getFrameIndex();
1642 return true;
1643 }
1644 return false;
1645}
1646
1647/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1648/// depending on whether the result is used.
1649void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1650 bool isThumb1 = Subtarget.isThumb1Only();
1651 bool isThumb2 = Subtarget.isThumb2();
1652 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1653
1654 DebugLoc dl = MI->getDebugLoc();
1655 MachineBasicBlock *BB = MI->getParent();
1656
1657 MachineInstrBuilder LDM, STM;
1658 if (isThumb1 || !MI->getOperand(1).isDead()) {
1659 MachineOperand LDWb(MI->getOperand(1));
1660 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1661 : isThumb1 ? ARM::tLDMIA_UPD
1662 : ARM::LDMIA_UPD))
1663 .add(LDWb);
1664 } else {
1665 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1666 }
1667
1668 if (isThumb1 || !MI->getOperand(0).isDead()) {
1669 MachineOperand STWb(MI->getOperand(0));
1670 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1671 : isThumb1 ? ARM::tSTMIA_UPD
1672 : ARM::STMIA_UPD))
1673 .add(STWb);
1674 } else {
1675 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1676 }
1677
1678 MachineOperand LDBase(MI->getOperand(3));
1679 LDM.add(LDBase).add(predOps(ARMCC::AL));
1680
1681 MachineOperand STBase(MI->getOperand(2));
1682 STM.add(STBase).add(predOps(ARMCC::AL));
1683
1684 // Sort the scratch registers into ascending order.
1686 SmallVector<unsigned, 6> ScratchRegs;
1687 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1688 ScratchRegs.push_back(MO.getReg());
1689 llvm::sort(ScratchRegs,
1690 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1691 return TRI.getEncodingValue(Reg1) <
1692 TRI.getEncodingValue(Reg2);
1693 });
1694
1695 for (const auto &Reg : ScratchRegs) {
1696 LDM.addReg(Reg, RegState::Define);
1697 STM.addReg(Reg, RegState::Kill);
1698 }
1699
1700 BB->erase(MI);
1701}
1702
1704 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1705 expandLoadStackGuard(MI);
1706 MI.getParent()->erase(MI);
1707 return true;
1708 }
1709
1710 if (MI.getOpcode() == ARM::MEMCPY) {
1711 expandMEMCPY(MI);
1712 return true;
1713 }
1714
1715 // This hook gets to expand COPY instructions before they become
1716 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1717 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1718 // changed into a VORR that can go down the NEON pipeline.
1719 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1720 return false;
1721
1722 // Look for a copy between even S-registers. That is where we keep floats
1723 // when using NEON v2f32 instructions for f32 arithmetic.
1724 Register DstRegS = MI.getOperand(0).getReg();
1725 Register SrcRegS = MI.getOperand(1).getReg();
1726 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1727 return false;
1728
1730 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1731 &ARM::DPRRegClass);
1732 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1733 &ARM::DPRRegClass);
1734 if (!DstRegD || !SrcRegD)
1735 return false;
1736
1737 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1738 // legal if the COPY already defines the full DstRegD, and it isn't a
1739 // sub-register insertion.
1740 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1741 return false;
1742
1743 // A dead copy shouldn't show up here, but reject it just in case.
1744 if (MI.getOperand(0).isDead())
1745 return false;
1746
1747 // All clear, widen the COPY.
1748 LLVM_DEBUG(dbgs() << "widening: " << MI);
1749 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1750
1751 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1752 // or some other super-register.
1753 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1754 if (ImpDefIdx != -1)
1755 MI.removeOperand(ImpDefIdx);
1756
1757 // Change the opcode and operands.
1758 MI.setDesc(get(ARM::VMOVD));
1759 MI.getOperand(0).setReg(DstRegD);
1760 MI.getOperand(1).setReg(SrcRegD);
1761 MIB.add(predOps(ARMCC::AL));
1762
1763 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1764 // register scavenger and machine verifier, so we need to indicate that we
1765 // are reading an undefined value from SrcRegD, but a proper value from
1766 // SrcRegS.
1767 MI.getOperand(1).setIsUndef();
1768 MIB.addReg(SrcRegS, RegState::Implicit);
1769
1770 // SrcRegD may actually contain an unrelated value in the ssub_1
1771 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1772 if (MI.getOperand(1).isKill()) {
1773 MI.getOperand(1).setIsKill(false);
1774 MI.addRegisterKilled(SrcRegS, TRI, true);
1775 }
1776
1777 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1778 return true;
1779}
1780
1781/// Create a copy of a const pool value. Update CPI to the new index and return
1782/// the label UID.
1783static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1786
1787 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1788 assert(MCPE.isMachineConstantPoolEntry() &&
1789 "Expecting a machine constantpool entry!");
1790 ARMConstantPoolValue *ACPV =
1791 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1792
1793 unsigned PCLabelId = AFI->createPICLabelUId();
1794 ARMConstantPoolValue *NewCPV = nullptr;
1795
1796 // FIXME: The below assumes PIC relocation model and that the function
1797 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1798 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1799 // instructions, so that's probably OK, but is PIC always correct when
1800 // we get here?
1801 if (ACPV->isGlobalValue())
1803 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1804 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1805 else if (ACPV->isExtSymbol())
1806 NewCPV = ARMConstantPoolSymbol::
1807 Create(MF.getFunction().getContext(),
1808 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1809 else if (ACPV->isBlockAddress())
1810 NewCPV = ARMConstantPoolConstant::
1811 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1813 else if (ACPV->isLSDA())
1814 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1815 ARMCP::CPLSDA, 4);
1816 else if (ACPV->isMachineBasicBlock())
1817 NewCPV = ARMConstantPoolMBB::
1818 Create(MF.getFunction().getContext(),
1819 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1820 else
1821 llvm_unreachable("Unexpected ARM constantpool value type!!");
1822 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1823 return PCLabelId;
1824}
1825
1828 Register DestReg, unsigned SubIdx,
1829 const MachineInstr &Orig,
1830 const TargetRegisterInfo &TRI) const {
1831 unsigned Opcode = Orig.getOpcode();
1832 switch (Opcode) {
1833 default: {
1835 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1836 MBB.insert(I, MI);
1837 break;
1838 }
1839 case ARM::tLDRpci_pic:
1840 case ARM::t2LDRpci_pic: {
1841 MachineFunction &MF = *MBB.getParent();
1842 unsigned CPI = Orig.getOperand(1).getIndex();
1843 unsigned PCLabelId = duplicateCPV(MF, CPI);
1844 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1846 .addImm(PCLabelId)
1847 .cloneMemRefs(Orig);
1848 break;
1849 }
1850 }
1851}
1852
1855 MachineBasicBlock::iterator InsertBefore,
1856 const MachineInstr &Orig) const {
1857 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1859 for (;;) {
1860 switch (I->getOpcode()) {
1861 case ARM::tLDRpci_pic:
1862 case ARM::t2LDRpci_pic: {
1863 MachineFunction &MF = *MBB.getParent();
1864 unsigned CPI = I->getOperand(1).getIndex();
1865 unsigned PCLabelId = duplicateCPV(MF, CPI);
1866 I->getOperand(1).setIndex(CPI);
1867 I->getOperand(2).setImm(PCLabelId);
1868 break;
1869 }
1870 }
1871 if (!I->isBundledWithSucc())
1872 break;
1873 ++I;
1874 }
1875 return Cloned;
1876}
1877
1879 const MachineInstr &MI1,
1880 const MachineRegisterInfo *MRI) const {
1881 unsigned Opcode = MI0.getOpcode();
1882 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1883 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1884 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1885 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1886 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1887 Opcode == ARM::t2MOV_ga_pcrel) {
1888 if (MI1.getOpcode() != Opcode)
1889 return false;
1890 if (MI0.getNumOperands() != MI1.getNumOperands())
1891 return false;
1892
1893 const MachineOperand &MO0 = MI0.getOperand(1);
1894 const MachineOperand &MO1 = MI1.getOperand(1);
1895 if (MO0.getOffset() != MO1.getOffset())
1896 return false;
1897
1898 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1899 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1900 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1901 Opcode == ARM::t2MOV_ga_pcrel)
1902 // Ignore the PC labels.
1903 return MO0.getGlobal() == MO1.getGlobal();
1904
1905 const MachineFunction *MF = MI0.getParent()->getParent();
1906 const MachineConstantPool *MCP = MF->getConstantPool();
1907 int CPI0 = MO0.getIndex();
1908 int CPI1 = MO1.getIndex();
1909 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1910 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1911 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1912 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1913 if (isARMCP0 && isARMCP1) {
1914 ARMConstantPoolValue *ACPV0 =
1915 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1916 ARMConstantPoolValue *ACPV1 =
1917 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1918 return ACPV0->hasSameValue(ACPV1);
1919 } else if (!isARMCP0 && !isARMCP1) {
1920 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1921 }
1922 return false;
1923 } else if (Opcode == ARM::PICLDR) {
1924 if (MI1.getOpcode() != Opcode)
1925 return false;
1926 if (MI0.getNumOperands() != MI1.getNumOperands())
1927 return false;
1928
1929 Register Addr0 = MI0.getOperand(1).getReg();
1930 Register Addr1 = MI1.getOperand(1).getReg();
1931 if (Addr0 != Addr1) {
1932 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1933 return false;
1934
1935 // This assumes SSA form.
1936 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1937 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1938 // Check if the loaded value, e.g. a constantpool of a global address, are
1939 // the same.
1940 if (!produceSameValue(*Def0, *Def1, MRI))
1941 return false;
1942 }
1943
1944 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1945 // %12 = PICLDR %11, 0, 14, %noreg
1946 const MachineOperand &MO0 = MI0.getOperand(i);
1947 const MachineOperand &MO1 = MI1.getOperand(i);
1948 if (!MO0.isIdenticalTo(MO1))
1949 return false;
1950 }
1951 return true;
1952 }
1953
1955}
1956
1957/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1958/// determine if two loads are loading from the same base address. It should
1959/// only return true if the base pointers are the same and the only differences
1960/// between the two addresses is the offset. It also returns the offsets by
1961/// reference.
1962///
1963/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1964/// is permanently disabled.
1966 int64_t &Offset1,
1967 int64_t &Offset2) const {
1968 // Don't worry about Thumb: just ARM and Thumb2.
1969 if (Subtarget.isThumb1Only()) return false;
1970
1971 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1972 return false;
1973
1974 auto IsLoadOpcode = [&](unsigned Opcode) {
1975 switch (Opcode) {
1976 default:
1977 return false;
1978 case ARM::LDRi12:
1979 case ARM::LDRBi12:
1980 case ARM::LDRD:
1981 case ARM::LDRH:
1982 case ARM::LDRSB:
1983 case ARM::LDRSH:
1984 case ARM::VLDRD:
1985 case ARM::VLDRS:
1986 case ARM::t2LDRi8:
1987 case ARM::t2LDRBi8:
1988 case ARM::t2LDRDi8:
1989 case ARM::t2LDRSHi8:
1990 case ARM::t2LDRi12:
1991 case ARM::t2LDRBi12:
1992 case ARM::t2LDRSHi12:
1993 return true;
1994 }
1995 };
1996
1997 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1998 !IsLoadOpcode(Load2->getMachineOpcode()))
1999 return false;
2000
2001 // Check if base addresses and chain operands match.
2002 if (Load1->getOperand(0) != Load2->getOperand(0) ||
2003 Load1->getOperand(4) != Load2->getOperand(4))
2004 return false;
2005
2006 // Index should be Reg0.
2007 if (Load1->getOperand(3) != Load2->getOperand(3))
2008 return false;
2009
2010 // Determine the offsets.
2011 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
2012 isa<ConstantSDNode>(Load2->getOperand(1))) {
2013 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
2014 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2015 return true;
2016 }
2017
2018 return false;
2019}
2020
2021/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2022/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2023/// be scheduled togther. On some targets if two loads are loading from
2024/// addresses in the same cache line, it's better if they are scheduled
2025/// together. This function takes two integers that represent the load offsets
2026/// from the common base address. It returns true if it decides it's desirable
2027/// to schedule the two loads together. "NumLoads" is the number of loads that
2028/// have already been scheduled after Load1.
2029///
2030/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2031/// is permanently disabled.
2033 int64_t Offset1, int64_t Offset2,
2034 unsigned NumLoads) const {
2035 // Don't worry about Thumb: just ARM and Thumb2.
2036 if (Subtarget.isThumb1Only()) return false;
2037
2038 assert(Offset2 > Offset1);
2039
2040 if ((Offset2 - Offset1) / 8 > 64)
2041 return false;
2042
2043 // Check if the machine opcodes are different. If they are different
2044 // then we consider them to not be of the same base address,
2045 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2046 // In this case, they are considered to be the same because they are different
2047 // encoding forms of the same basic instruction.
2048 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2049 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2050 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2051 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2052 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2053 return false; // FIXME: overly conservative?
2054
2055 // Four loads in a row should be sufficient.
2056 if (NumLoads >= 3)
2057 return false;
2058
2059 return true;
2060}
2061
2063 const MachineBasicBlock *MBB,
2064 const MachineFunction &MF) const {
2065 // Debug info is never a scheduling boundary. It's necessary to be explicit
2066 // due to the special treatment of IT instructions below, otherwise a
2067 // dbg_value followed by an IT will result in the IT instruction being
2068 // considered a scheduling hazard, which is wrong. It should be the actual
2069 // instruction preceding the dbg_value instruction(s), just like it is
2070 // when debug info is not present.
2071 if (MI.isDebugInstr())
2072 return false;
2073
2074 // Terminators and labels can't be scheduled around.
2075 if (MI.isTerminator() || MI.isPosition())
2076 return true;
2077
2078 // INLINEASM_BR can jump to another block
2079 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2080 return true;
2081
2082 if (isSEHInstruction(MI))
2083 return true;
2084
2085 // Treat the start of the IT block as a scheduling boundary, but schedule
2086 // t2IT along with all instructions following it.
2087 // FIXME: This is a big hammer. But the alternative is to add all potential
2088 // true and anti dependencies to IT block instructions as implicit operands
2089 // to the t2IT instruction. The added compile time and complexity does not
2090 // seem worth it.
2092 // Make sure to skip any debug instructions
2093 while (++I != MBB->end() && I->isDebugInstr())
2094 ;
2095 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2096 return true;
2097
2098 // Don't attempt to schedule around any instruction that defines
2099 // a stack-oriented pointer, as it's unlikely to be profitable. This
2100 // saves compile time, because it doesn't require every single
2101 // stack slot reference to depend on the instruction that does the
2102 // modification.
2103 // Calls don't actually change the stack pointer, even if they have imp-defs.
2104 // No ARM calling conventions change the stack pointer. (X86 calling
2105 // conventions sometimes do).
2106 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
2107 return true;
2108
2109 return false;
2110}
2111
2114 unsigned NumCycles, unsigned ExtraPredCycles,
2115 BranchProbability Probability) const {
2116 if (!NumCycles)
2117 return false;
2118
2119 // If we are optimizing for size, see if the branch in the predecessor can be
2120 // lowered to cbn?z by the constant island lowering pass, and return false if
2121 // so. This results in a shorter instruction sequence.
2122 if (MBB.getParent()->getFunction().hasOptSize()) {
2123 MachineBasicBlock *Pred = *MBB.pred_begin();
2124 if (!Pred->empty()) {
2125 MachineInstr *LastMI = &*Pred->rbegin();
2126 if (LastMI->getOpcode() == ARM::t2Bcc) {
2128 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2129 if (CmpMI)
2130 return false;
2131 }
2132 }
2133 }
2134 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2135 MBB, 0, 0, Probability);
2136}
2137
2140 unsigned TCycles, unsigned TExtra,
2141 MachineBasicBlock &FBB,
2142 unsigned FCycles, unsigned FExtra,
2143 BranchProbability Probability) const {
2144 if (!TCycles)
2145 return false;
2146
2147 // In thumb code we often end up trading one branch for a IT block, and
2148 // if we are cloning the instruction can increase code size. Prevent
2149 // blocks with multiple predecesors from being ifcvted to prevent this
2150 // cloning.
2151 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2152 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2153 return false;
2154 }
2155
2156 // Attempt to estimate the relative costs of predication versus branching.
2157 // Here we scale up each component of UnpredCost to avoid precision issue when
2158 // scaling TCycles/FCycles by Probability.
2159 const unsigned ScalingUpFactor = 1024;
2160
2161 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2162 unsigned UnpredCost;
2163 if (!Subtarget.hasBranchPredictor()) {
2164 // When we don't have a branch predictor it's always cheaper to not take a
2165 // branch than take it, so we have to take that into account.
2166 unsigned NotTakenBranchCost = 1;
2167 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2168 unsigned TUnpredCycles, FUnpredCycles;
2169 if (!FCycles) {
2170 // Triangle: TBB is the fallthrough
2171 TUnpredCycles = TCycles + NotTakenBranchCost;
2172 FUnpredCycles = TakenBranchCost;
2173 } else {
2174 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2175 TUnpredCycles = TCycles + TakenBranchCost;
2176 FUnpredCycles = FCycles + NotTakenBranchCost;
2177 // The branch at the end of FBB will disappear when it's predicated, so
2178 // discount it from PredCost.
2179 PredCost -= 1 * ScalingUpFactor;
2180 }
2181 // The total cost is the cost of each path scaled by their probabilites
2182 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2183 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2184 UnpredCost = TUnpredCost + FUnpredCost;
2185 // When predicating assume that the first IT can be folded away but later
2186 // ones cost one cycle each
2187 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2188 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2189 }
2190 } else {
2191 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2192 unsigned FUnpredCost =
2193 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2194 UnpredCost = TUnpredCost + FUnpredCost;
2195 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2196 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2197 }
2198
2199 return PredCost <= UnpredCost;
2200}
2201
2202unsigned
2204 unsigned NumInsts) const {
2205 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2206 // ARM has a condition code field in every predicable instruction, using it
2207 // doesn't change code size.
2208 if (!Subtarget.isThumb2())
2209 return 0;
2210
2211 // It's possible that the size of the IT is restricted to a single block.
2212 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2213 return divideCeil(NumInsts, MaxInsts) * 2;
2214}
2215
2216unsigned
2218 // If this branch is likely to be folded into the comparison to form a
2219 // CB(N)Z, then removing it won't reduce code size at all, because that will
2220 // just replace the CB(N)Z with a CMP.
2221 if (MI.getOpcode() == ARM::t2Bcc &&
2223 return 0;
2224
2225 unsigned Size = getInstSizeInBytes(MI);
2226
2227 // For Thumb2, all branches are 32-bit instructions during the if conversion
2228 // pass, but may be replaced with 16-bit instructions during size reduction.
2229 // Since the branches considered by if conversion tend to be forward branches
2230 // over small basic blocks, they are very likely to be in range for the
2231 // narrow instructions, so we assume the final code size will be half what it
2232 // currently is.
2233 if (Subtarget.isThumb2())
2234 Size /= 2;
2235
2236 return Size;
2237}
2238
2239bool
2241 MachineBasicBlock &FMBB) const {
2242 // Reduce false anti-dependencies to let the target's out-of-order execution
2243 // engine do its thing.
2244 return Subtarget.isProfitableToUnpredicate();
2245}
2246
2247/// getInstrPredicate - If instruction is predicated, returns its predicate
2248/// condition, otherwise returns AL. It also returns the condition code
2249/// register by reference.
2251 Register &PredReg) {
2252 int PIdx = MI.findFirstPredOperandIdx();
2253 if (PIdx == -1) {
2254 PredReg = 0;
2255 return ARMCC::AL;
2256 }
2257
2258 PredReg = MI.getOperand(PIdx+1).getReg();
2259 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2260}
2261
2263 if (Opc == ARM::B)
2264 return ARM::Bcc;
2265 if (Opc == ARM::tB)
2266 return ARM::tBcc;
2267 if (Opc == ARM::t2B)
2268 return ARM::t2Bcc;
2269
2270 llvm_unreachable("Unknown unconditional branch opcode!");
2271}
2272
2274 bool NewMI,
2275 unsigned OpIdx1,
2276 unsigned OpIdx2) const {
2277 switch (MI.getOpcode()) {
2278 case ARM::MOVCCr:
2279 case ARM::t2MOVCCr: {
2280 // MOVCC can be commuted by inverting the condition.
2281 Register PredReg;
2283 // MOVCC AL can't be inverted. Shouldn't happen.
2284 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2285 return nullptr;
2286 MachineInstr *CommutedMI =
2287 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2288 if (!CommutedMI)
2289 return nullptr;
2290 // After swapping the MOVCC operands, also invert the condition.
2291 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2293 return CommutedMI;
2294 }
2295 }
2296 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2297}
2298
2299/// Identify instructions that can be folded into a MOVCC instruction, and
2300/// return the defining instruction.
2302ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2303 const TargetInstrInfo *TII) const {
2304 if (!Reg.isVirtual())
2305 return nullptr;
2306 if (!MRI.hasOneNonDBGUse(Reg))
2307 return nullptr;
2308 MachineInstr *MI = MRI.getVRegDef(Reg);
2309 if (!MI)
2310 return nullptr;
2311 // Check if MI can be predicated and folded into the MOVCC.
2312 if (!isPredicable(*MI))
2313 return nullptr;
2314 // Check if MI has any non-dead defs or physreg uses. This also detects
2315 // predicated instructions which will be reading CPSR.
2316 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2317 // Reject frame index operands, PEI can't handle the predicated pseudos.
2318 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2319 return nullptr;
2320 if (!MO.isReg())
2321 continue;
2322 // MI can't have any tied operands, that would conflict with predication.
2323 if (MO.isTied())
2324 return nullptr;
2325 if (MO.getReg().isPhysical())
2326 return nullptr;
2327 if (MO.isDef() && !MO.isDead())
2328 return nullptr;
2329 }
2330 bool DontMoveAcrossStores = true;
2331 if (!MI->isSafeToMove(DontMoveAcrossStores))
2332 return nullptr;
2333 return MI;
2334}
2335
2338 unsigned &TrueOp, unsigned &FalseOp,
2339 bool &Optimizable) const {
2340 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2341 "Unknown select instruction");
2342 // MOVCC operands:
2343 // 0: Def.
2344 // 1: True use.
2345 // 2: False use.
2346 // 3: Condition code.
2347 // 4: CPSR use.
2348 TrueOp = 1;
2349 FalseOp = 2;
2350 Cond.push_back(MI.getOperand(3));
2351 Cond.push_back(MI.getOperand(4));
2352 // We can always fold a def.
2353 Optimizable = true;
2354 return false;
2355}
2356
2360 bool PreferFalse) const {
2361 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2362 "Unknown select instruction");
2363 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2364 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2365 bool Invert = !DefMI;
2366 if (!DefMI)
2367 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2368 if (!DefMI)
2369 return nullptr;
2370
2371 // Find new register class to use.
2372 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2373 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2374 Register DestReg = MI.getOperand(0).getReg();
2375 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2376 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2377 if (!MRI.constrainRegClass(DestReg, FalseClass))
2378 return nullptr;
2379 if (!MRI.constrainRegClass(DestReg, TrueClass))
2380 return nullptr;
2381
2382 // Create a new predicated version of DefMI.
2383 // Rfalse is the first use.
2384 MachineInstrBuilder NewMI =
2385 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2386
2387 // Copy all the DefMI operands, excluding its (null) predicate.
2388 const MCInstrDesc &DefDesc = DefMI->getDesc();
2389 for (unsigned i = 1, e = DefDesc.getNumOperands();
2390 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2391 NewMI.add(DefMI->getOperand(i));
2392
2393 unsigned CondCode = MI.getOperand(3).getImm();
2394 if (Invert)
2396 else
2397 NewMI.addImm(CondCode);
2398 NewMI.add(MI.getOperand(4));
2399
2400 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2401 if (NewMI->hasOptionalDef())
2402 NewMI.add(condCodeOp());
2403
2404 // The output register value when the predicate is false is an implicit
2405 // register operand tied to the first def.
2406 // The tie makes the register allocator ensure the FalseReg is allocated the
2407 // same register as operand 0.
2408 FalseReg.setImplicit();
2409 NewMI.add(FalseReg);
2410 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2411
2412 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2413 SeenMIs.insert(NewMI);
2414 SeenMIs.erase(DefMI);
2415
2416 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2417 // DefMI would be invalid when tranferred inside the loop. Checking for a
2418 // loop is expensive, but at least remove kill flags if they are in different
2419 // BBs.
2420 if (DefMI->getParent() != MI.getParent())
2421 NewMI->clearKillInfo();
2422
2423 // The caller will erase MI, but not DefMI.
2425 return NewMI;
2426}
2427
2428/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2429/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2430/// def operand.
2431///
2432/// This will go away once we can teach tblgen how to set the optional CPSR def
2433/// operand itself.
2437};
2438
2440 {ARM::ADDSri, ARM::ADDri},
2441 {ARM::ADDSrr, ARM::ADDrr},
2442 {ARM::ADDSrsi, ARM::ADDrsi},
2443 {ARM::ADDSrsr, ARM::ADDrsr},
2444
2445 {ARM::SUBSri, ARM::SUBri},
2446 {ARM::SUBSrr, ARM::SUBrr},
2447 {ARM::SUBSrsi, ARM::SUBrsi},
2448 {ARM::SUBSrsr, ARM::SUBrsr},
2449
2450 {ARM::RSBSri, ARM::RSBri},
2451 {ARM::RSBSrsi, ARM::RSBrsi},
2452 {ARM::RSBSrsr, ARM::RSBrsr},
2453
2454 {ARM::tADDSi3, ARM::tADDi3},
2455 {ARM::tADDSi8, ARM::tADDi8},
2456 {ARM::tADDSrr, ARM::tADDrr},
2457 {ARM::tADCS, ARM::tADC},
2458
2459 {ARM::tSUBSi3, ARM::tSUBi3},
2460 {ARM::tSUBSi8, ARM::tSUBi8},
2461 {ARM::tSUBSrr, ARM::tSUBrr},
2462 {ARM::tSBCS, ARM::tSBC},
2463 {ARM::tRSBS, ARM::tRSB},
2464 {ARM::tLSLSri, ARM::tLSLri},
2465
2466 {ARM::t2ADDSri, ARM::t2ADDri},
2467 {ARM::t2ADDSrr, ARM::t2ADDrr},
2468 {ARM::t2ADDSrs, ARM::t2ADDrs},
2469
2470 {ARM::t2SUBSri, ARM::t2SUBri},
2471 {ARM::t2SUBSrr, ARM::t2SUBrr},
2472 {ARM::t2SUBSrs, ARM::t2SUBrs},
2473
2474 {ARM::t2RSBSri, ARM::t2RSBri},
2475 {ARM::t2RSBSrs, ARM::t2RSBrs},
2476};
2477
2478unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2479 for (const auto &Entry : AddSubFlagsOpcodeMap)
2480 if (OldOpc == Entry.PseudoOpc)
2481 return Entry.MachineOpc;
2482 return 0;
2483}
2484
2487 const DebugLoc &dl, Register DestReg,
2488 Register BaseReg, int NumBytes,
2489 ARMCC::CondCodes Pred, Register PredReg,
2490 const ARMBaseInstrInfo &TII,
2491 unsigned MIFlags) {
2492 if (NumBytes == 0 && DestReg != BaseReg) {
2493 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2494 .addReg(BaseReg, RegState::Kill)
2495 .add(predOps(Pred, PredReg))
2496 .add(condCodeOp())
2497 .setMIFlags(MIFlags);
2498 return;
2499 }
2500
2501 bool isSub = NumBytes < 0;
2502 if (isSub) NumBytes = -NumBytes;
2503
2504 while (NumBytes) {
2505 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2506 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2507 assert(ThisVal && "Didn't extract field correctly");
2508
2509 // We will handle these bits from offset, clear them.
2510 NumBytes &= ~ThisVal;
2511
2512 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2513
2514 // Build the new ADD / SUB.
2515 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2516 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2517 .addReg(BaseReg, RegState::Kill)
2518 .addImm(ThisVal)
2519 .add(predOps(Pred, PredReg))
2520 .add(condCodeOp())
2521 .setMIFlags(MIFlags);
2522 BaseReg = DestReg;
2523 }
2524}
2525
2528 unsigned NumBytes) {
2529 // This optimisation potentially adds lots of load and store
2530 // micro-operations, it's only really a great benefit to code-size.
2531 if (!Subtarget.hasMinSize())
2532 return false;
2533
2534 // If only one register is pushed/popped, LLVM can use an LDR/STR
2535 // instead. We can't modify those so make sure we're dealing with an
2536 // instruction we understand.
2537 bool IsPop = isPopOpcode(MI->getOpcode());
2538 bool IsPush = isPushOpcode(MI->getOpcode());
2539 if (!IsPush && !IsPop)
2540 return false;
2541
2542 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2543 MI->getOpcode() == ARM::VLDMDIA_UPD;
2544 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2545 MI->getOpcode() == ARM::tPOP ||
2546 MI->getOpcode() == ARM::tPOP_RET;
2547
2548 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2549 MI->getOperand(1).getReg() == ARM::SP)) &&
2550 "trying to fold sp update into non-sp-updating push/pop");
2551
2552 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2553 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2554 // if this is violated.
2555 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2556 return false;
2557
2558 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2559 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2560 int RegListIdx = IsT1PushPop ? 2 : 4;
2561
2562 // Calculate the space we'll need in terms of registers.
2563 unsigned RegsNeeded;
2564 const TargetRegisterClass *RegClass;
2565 if (IsVFPPushPop) {
2566 RegsNeeded = NumBytes / 8;
2567 RegClass = &ARM::DPRRegClass;
2568 } else {
2569 RegsNeeded = NumBytes / 4;
2570 RegClass = &ARM::GPRRegClass;
2571 }
2572
2573 // We're going to have to strip all list operands off before
2574 // re-adding them since the order matters, so save the existing ones
2575 // for later.
2577
2578 // We're also going to need the first register transferred by this
2579 // instruction, which won't necessarily be the first register in the list.
2580 unsigned FirstRegEnc = -1;
2581
2583 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2584 MachineOperand &MO = MI->getOperand(i);
2585 RegList.push_back(MO);
2586
2587 if (MO.isReg() && !MO.isImplicit() &&
2588 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2589 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2590 }
2591
2592 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2593
2594 // Now try to find enough space in the reglist to allocate NumBytes.
2595 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2596 --CurRegEnc) {
2597 unsigned CurReg = RegClass->getRegister(CurRegEnc);
2598 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2599 continue;
2600 if (!IsPop) {
2601 // Pushing any register is completely harmless, mark the register involved
2602 // as undef since we don't care about its value and must not restore it
2603 // during stack unwinding.
2604 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2605 false, false, true));
2606 --RegsNeeded;
2607 continue;
2608 }
2609
2610 // However, we can only pop an extra register if it's not live. For
2611 // registers live within the function we might clobber a return value
2612 // register; the other way a register can be live here is if it's
2613 // callee-saved.
2614 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2615 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2617 // VFP pops don't allow holes in the register list, so any skip is fatal
2618 // for our transformation. GPR pops do, so we should just keep looking.
2619 if (IsVFPPushPop)
2620 return false;
2621 else
2622 continue;
2623 }
2624
2625 // Mark the unimportant registers as <def,dead> in the POP.
2626 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2627 true));
2628 --RegsNeeded;
2629 }
2630
2631 if (RegsNeeded > 0)
2632 return false;
2633
2634 // Finally we know we can profitably perform the optimisation so go
2635 // ahead: strip all existing registers off and add them back again
2636 // in the right order.
2637 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2638 MI->removeOperand(i);
2639
2640 // Add the complete list back in.
2641 MachineInstrBuilder MIB(MF, &*MI);
2642 for (const MachineOperand &MO : llvm::reverse(RegList))
2643 MIB.add(MO);
2644
2645 return true;
2646}
2647
2648bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2649 Register FrameReg, int &Offset,
2650 const ARMBaseInstrInfo &TII) {
2651 unsigned Opcode = MI.getOpcode();
2652 const MCInstrDesc &Desc = MI.getDesc();
2653 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2654 bool isSub = false;
2655
2656 // Memory operands in inline assembly always use AddrMode2.
2657 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2659
2660 if (Opcode == ARM::ADDri) {
2661 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2662 if (Offset == 0) {
2663 // Turn it into a move.
2664 MI.setDesc(TII.get(ARM::MOVr));
2665 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2666 MI.removeOperand(FrameRegIdx+1);
2667 Offset = 0;
2668 return true;
2669 } else if (Offset < 0) {
2670 Offset = -Offset;
2671 isSub = true;
2672 MI.setDesc(TII.get(ARM::SUBri));
2673 }
2674
2675 // Common case: small offset, fits into instruction.
2676 if (ARM_AM::getSOImmVal(Offset) != -1) {
2677 // Replace the FrameIndex with sp / fp
2678 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2679 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2680 Offset = 0;
2681 return true;
2682 }
2683
2684 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2685 // as possible.
2686 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2687 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2688
2689 // We will handle these bits from offset, clear them.
2690 Offset &= ~ThisImmVal;
2691
2692 // Get the properly encoded SOImmVal field.
2693 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2694 "Bit extraction didn't work?");
2695 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2696 } else {
2697 unsigned ImmIdx = 0;
2698 int InstrOffs = 0;
2699 unsigned NumBits = 0;
2700 unsigned Scale = 1;
2701 switch (AddrMode) {
2703 ImmIdx = FrameRegIdx + 1;
2704 InstrOffs = MI.getOperand(ImmIdx).getImm();
2705 NumBits = 12;
2706 break;
2707 case ARMII::AddrMode2:
2708 ImmIdx = FrameRegIdx+2;
2709 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2710 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2711 InstrOffs *= -1;
2712 NumBits = 12;
2713 break;
2714 case ARMII::AddrMode3:
2715 ImmIdx = FrameRegIdx+2;
2716 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2717 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2718 InstrOffs *= -1;
2719 NumBits = 8;
2720 break;
2721 case ARMII::AddrMode4:
2722 case ARMII::AddrMode6:
2723 // Can't fold any offset even if it's zero.
2724 return false;
2725 case ARMII::AddrMode5:
2726 ImmIdx = FrameRegIdx+1;
2727 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2728 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2729 InstrOffs *= -1;
2730 NumBits = 8;
2731 Scale = 4;
2732 break;
2734 ImmIdx = FrameRegIdx+1;
2735 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2736 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2737 InstrOffs *= -1;
2738 NumBits = 8;
2739 Scale = 2;
2740 break;
2744 ImmIdx = FrameRegIdx+1;
2745 InstrOffs = MI.getOperand(ImmIdx).getImm();
2746 NumBits = 7;
2747 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2748 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2749 break;
2750 default:
2751 llvm_unreachable("Unsupported addressing mode!");
2752 }
2753
2754 Offset += InstrOffs * Scale;
2755 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2756 if (Offset < 0) {
2757 Offset = -Offset;
2758 isSub = true;
2759 }
2760
2761 // Attempt to fold address comp. if opcode has offset bits
2762 if (NumBits > 0) {
2763 // Common case: small offset, fits into instruction.
2764 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2765 int ImmedOffset = Offset / Scale;
2766 unsigned Mask = (1 << NumBits) - 1;
2767 if ((unsigned)Offset <= Mask * Scale) {
2768 // Replace the FrameIndex with sp
2769 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2770 // FIXME: When addrmode2 goes away, this will simplify (like the
2771 // T2 version), as the LDR.i12 versions don't need the encoding
2772 // tricks for the offset value.
2773 if (isSub) {
2775 ImmedOffset = -ImmedOffset;
2776 else
2777 ImmedOffset |= 1 << NumBits;
2778 }
2779 ImmOp.ChangeToImmediate(ImmedOffset);
2780 Offset = 0;
2781 return true;
2782 }
2783
2784 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2785 ImmedOffset = ImmedOffset & Mask;
2786 if (isSub) {
2788 ImmedOffset = -ImmedOffset;
2789 else
2790 ImmedOffset |= 1 << NumBits;
2791 }
2792 ImmOp.ChangeToImmediate(ImmedOffset);
2793 Offset &= ~(Mask*Scale);
2794 }
2795 }
2796
2797 Offset = (isSub) ? -Offset : Offset;
2798 return Offset == 0;
2799}
2800
2801/// analyzeCompare - For a comparison instruction, return the source registers
2802/// in SrcReg and SrcReg2 if having two register operands, and the value it
2803/// compares against in CmpValue. Return true if the comparison instruction
2804/// can be analyzed.
2806 Register &SrcReg2, int64_t &CmpMask,
2807 int64_t &CmpValue) const {
2808 switch (MI.getOpcode()) {
2809 default: break;
2810 case ARM::CMPri:
2811 case ARM::t2CMPri:
2812 case ARM::tCMPi8:
2813 SrcReg = MI.getOperand(0).getReg();
2814 SrcReg2 = 0;
2815 CmpMask = ~0;
2816 CmpValue = MI.getOperand(1).getImm();
2817 return true;
2818 case ARM::CMPrr:
2819 case ARM::t2CMPrr:
2820 case ARM::tCMPr:
2821 SrcReg = MI.getOperand(0).getReg();
2822 SrcReg2 = MI.getOperand(1).getReg();
2823 CmpMask = ~0;
2824 CmpValue = 0;
2825 return true;
2826 case ARM::TSTri:
2827 case ARM::t2TSTri:
2828 SrcReg = MI.getOperand(0).getReg();
2829 SrcReg2 = 0;
2830 CmpMask = MI.getOperand(1).getImm();
2831 CmpValue = 0;
2832 return true;
2833 }
2834
2835 return false;
2836}
2837
2838/// isSuitableForMask - Identify a suitable 'and' instruction that
2839/// operates on the given source register and applies the same mask
2840/// as a 'tst' instruction. Provide a limited look-through for copies.
2841/// When successful, MI will hold the found instruction.
2843 int CmpMask, bool CommonUse) {
2844 switch (MI->getOpcode()) {
2845 case ARM::ANDri:
2846 case ARM::t2ANDri:
2847 if (CmpMask != MI->getOperand(2).getImm())
2848 return false;
2849 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2850 return true;
2851 break;
2852 }
2853
2854 return false;
2855}
2856
2857/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2858/// the condition code if we modify the instructions such that flags are
2859/// set by ADD(a,b,X).
2861 switch (CC) {
2862 default: return ARMCC::AL;
2863 case ARMCC::HS: return ARMCC::LO;
2864 case ARMCC::LO: return ARMCC::HS;
2865 case ARMCC::VS: return ARMCC::VS;
2866 case ARMCC::VC: return ARMCC::VC;
2867 }
2868}
2869
2870/// isRedundantFlagInstr - check whether the first instruction, whose only
2871/// purpose is to update flags, can be made redundant.
2872/// CMPrr can be made redundant by SUBrr if the operands are the same.
2873/// CMPri can be made redundant by SUBri if the operands are the same.
2874/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2875/// This function can be extended later on.
2876inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2877 Register SrcReg, Register SrcReg2,
2878 int64_t ImmValue,
2879 const MachineInstr *OI,
2880 bool &IsThumb1) {
2881 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2882 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2883 ((OI->getOperand(1).getReg() == SrcReg &&
2884 OI->getOperand(2).getReg() == SrcReg2) ||
2885 (OI->getOperand(1).getReg() == SrcReg2 &&
2886 OI->getOperand(2).getReg() == SrcReg))) {
2887 IsThumb1 = false;
2888 return true;
2889 }
2890
2891 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2892 ((OI->getOperand(2).getReg() == SrcReg &&
2893 OI->getOperand(3).getReg() == SrcReg2) ||
2894 (OI->getOperand(2).getReg() == SrcReg2 &&
2895 OI->getOperand(3).getReg() == SrcReg))) {
2896 IsThumb1 = true;
2897 return true;
2898 }
2899
2900 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2901 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2902 OI->getOperand(1).getReg() == SrcReg &&
2903 OI->getOperand(2).getImm() == ImmValue) {
2904 IsThumb1 = false;
2905 return true;
2906 }
2907
2908 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2909 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2910 OI->getOperand(2).getReg() == SrcReg &&
2911 OI->getOperand(3).getImm() == ImmValue) {
2912 IsThumb1 = true;
2913 return true;
2914 }
2915
2916 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2917 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2918 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2919 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2920 OI->getOperand(0).getReg() == SrcReg &&
2921 OI->getOperand(1).getReg() == SrcReg2) {
2922 IsThumb1 = false;
2923 return true;
2924 }
2925
2926 if (CmpI->getOpcode() == ARM::tCMPr &&
2927 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2928 OI->getOpcode() == ARM::tADDrr) &&
2929 OI->getOperand(0).getReg() == SrcReg &&
2930 OI->getOperand(2).getReg() == SrcReg2) {
2931 IsThumb1 = true;
2932 return true;
2933 }
2934
2935 return false;
2936}
2937
2938static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2939 switch (MI->getOpcode()) {
2940 default: return false;
2941 case ARM::tLSLri:
2942 case ARM::tLSRri:
2943 case ARM::tLSLrr:
2944 case ARM::tLSRrr:
2945 case ARM::tSUBrr:
2946 case ARM::tADDrr:
2947 case ARM::tADDi3:
2948 case ARM::tADDi8:
2949 case ARM::tSUBi3:
2950 case ARM::tSUBi8:
2951 case ARM::tMUL:
2952 case ARM::tADC:
2953 case ARM::tSBC:
2954 case ARM::tRSB:
2955 case ARM::tAND:
2956 case ARM::tORR:
2957 case ARM::tEOR:
2958 case ARM::tBIC:
2959 case ARM::tMVN:
2960 case ARM::tASRri:
2961 case ARM::tASRrr:
2962 case ARM::tROR:
2963 IsThumb1 = true;
2964 [[fallthrough]];
2965 case ARM::RSBrr:
2966 case ARM::RSBri:
2967 case ARM::RSCrr:
2968 case ARM::RSCri:
2969 case ARM::ADDrr:
2970 case ARM::ADDri:
2971 case ARM::ADCrr:
2972 case ARM::ADCri:
2973 case ARM::SUBrr:
2974 case ARM::SUBri:
2975 case ARM::SBCrr:
2976 case ARM::SBCri:
2977 case ARM::t2RSBri:
2978 case ARM::t2ADDrr:
2979 case ARM::t2ADDri:
2980 case ARM::t2ADCrr:
2981 case ARM::t2ADCri:
2982 case ARM::t2SUBrr:
2983 case ARM::t2SUBri:
2984 case ARM::t2SBCrr:
2985 case ARM::t2SBCri:
2986 case ARM::ANDrr:
2987 case ARM::ANDri:
2988 case ARM::ANDrsr:
2989 case ARM::ANDrsi:
2990 case ARM::t2ANDrr:
2991 case ARM::t2ANDri:
2992 case ARM::t2ANDrs:
2993 case ARM::ORRrr:
2994 case ARM::ORRri:
2995 case ARM::ORRrsr:
2996 case ARM::ORRrsi:
2997 case ARM::t2ORRrr:
2998 case ARM::t2ORRri:
2999 case ARM::t2ORRrs:
3000 case ARM::EORrr:
3001 case ARM::EORri:
3002 case ARM::EORrsr:
3003 case ARM::EORrsi:
3004 case ARM::t2EORrr:
3005 case ARM::t2EORri:
3006 case ARM::t2EORrs:
3007 case ARM::BICri:
3008 case ARM::BICrr:
3009 case ARM::BICrsi:
3010 case ARM::BICrsr:
3011 case ARM::t2BICri:
3012 case ARM::t2BICrr:
3013 case ARM::t2BICrs:
3014 case ARM::t2LSRri:
3015 case ARM::t2LSRrr:
3016 case ARM::t2LSLri:
3017 case ARM::t2LSLrr:
3018 case ARM::MOVsr:
3019 case ARM::MOVsi:
3020 return true;
3021 }
3022}
3023
3024/// optimizeCompareInstr - Convert the instruction supplying the argument to the
3025/// comparison into one that sets the zero bit in the flags register;
3026/// Remove a redundant Compare instruction if an earlier instruction can set the
3027/// flags in the same way as Compare.
3028/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3029/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3030/// condition code of instructions which use the flags.
3032 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3033 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3034 // Get the unique definition of SrcReg.
3035 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3036 if (!MI) return false;
3037
3038 // Masked compares sometimes use the same register as the corresponding 'and'.
3039 if (CmpMask != ~0) {
3040 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3041 MI = nullptr;
3043 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3044 UI != UE; ++UI) {
3045 if (UI->getParent() != CmpInstr.getParent())
3046 continue;
3047 MachineInstr *PotentialAND = &*UI;
3048 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3049 isPredicated(*PotentialAND))
3050 continue;
3051 MI = PotentialAND;
3052 break;
3053 }
3054 if (!MI) return false;
3055 }
3056 }
3057
3058 // Get ready to iterate backward from CmpInstr.
3059 MachineBasicBlock::iterator I = CmpInstr, E = MI,
3060 B = CmpInstr.getParent()->begin();
3061
3062 // Early exit if CmpInstr is at the beginning of the BB.
3063 if (I == B) return false;
3064
3065 // There are two possible candidates which can be changed to set CPSR:
3066 // One is MI, the other is a SUB or ADD instruction.
3067 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3068 // ADDr[ri](r1, r2, X).
3069 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3070 MachineInstr *SubAdd = nullptr;
3071 if (SrcReg2 != 0)
3072 // MI is not a candidate for CMPrr.
3073 MI = nullptr;
3074 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3075 // Conservatively refuse to convert an instruction which isn't in the same
3076 // BB as the comparison.
3077 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3078 // Thus we cannot return here.
3079 if (CmpInstr.getOpcode() == ARM::CMPri ||
3080 CmpInstr.getOpcode() == ARM::t2CMPri ||
3081 CmpInstr.getOpcode() == ARM::tCMPi8)
3082 MI = nullptr;
3083 else
3084 return false;
3085 }
3086
3087 bool IsThumb1 = false;
3088 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3089 return false;
3090
3091 // We also want to do this peephole for cases like this: if (a*b == 0),
3092 // and optimise away the CMP instruction from the generated code sequence:
3093 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3094 // resulting from the select instruction, but these MOVS instructions for
3095 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3096 // However, if we only have MOVS instructions in between the CMP and the
3097 // other instruction (the MULS in this example), then the CPSR is dead so we
3098 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3099 // reordering and then continue the analysis hoping we can eliminate the
3100 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3101 // consequence, the movs won't redefine/kill the MUL operands which would
3102 // make this reordering illegal.
3104 if (MI && IsThumb1) {
3105 --I;
3106 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3107 bool CanReorder = true;
3108 for (; I != E; --I) {
3109 if (I->getOpcode() != ARM::tMOVi8) {
3110 CanReorder = false;
3111 break;
3112 }
3113 }
3114 if (CanReorder) {
3115 MI = MI->removeFromParent();
3116 E = CmpInstr;
3117 CmpInstr.getParent()->insert(E, MI);
3118 }
3119 }
3120 I = CmpInstr;
3121 E = MI;
3122 }
3123
3124 // Check that CPSR isn't set between the comparison instruction and the one we
3125 // want to change. At the same time, search for SubAdd.
3126 bool SubAddIsThumb1 = false;
3127 do {
3128 const MachineInstr &Instr = *--I;
3129
3130 // Check whether CmpInstr can be made redundant by the current instruction.
3131 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3132 SubAddIsThumb1)) {
3133 SubAdd = &*I;
3134 break;
3135 }
3136
3137 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3138 if (I == E)
3139 break;
3140
3141 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3142 Instr.readsRegister(ARM::CPSR, TRI))
3143 // This instruction modifies or uses CPSR after the one we want to
3144 // change. We can't do this transformation.
3145 return false;
3146
3147 if (I == B) {
3148 // In some cases, we scan the use-list of an instruction for an AND;
3149 // that AND is in the same BB, but may not be scheduled before the
3150 // corresponding TST. In that case, bail out.
3151 //
3152 // FIXME: We could try to reschedule the AND.
3153 return false;
3154 }
3155 } while (true);
3156
3157 // Return false if no candidates exist.
3158 if (!MI && !SubAdd)
3159 return false;
3160
3161 // If we found a SubAdd, use it as it will be closer to the CMP
3162 if (SubAdd) {
3163 MI = SubAdd;
3164 IsThumb1 = SubAddIsThumb1;
3165 }
3166
3167 // We can't use a predicated instruction - it doesn't always write the flags.
3168 if (isPredicated(*MI))
3169 return false;
3170
3171 // Scan forward for the use of CPSR
3172 // When checking against MI: if it's a conditional code that requires
3173 // checking of the V bit or C bit, then this is not safe to do.
3174 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3175 // If we are done with the basic block, we need to check whether CPSR is
3176 // live-out.
3178 OperandsToUpdate;
3179 bool isSafe = false;
3180 I = CmpInstr;
3181 E = CmpInstr.getParent()->end();
3182 while (!isSafe && ++I != E) {
3183 const MachineInstr &Instr = *I;
3184 for (unsigned IO = 0, EO = Instr.getNumOperands();
3185 !isSafe && IO != EO; ++IO) {
3186 const MachineOperand &MO = Instr.getOperand(IO);
3187 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3188 isSafe = true;
3189 break;
3190 }
3191 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3192 continue;
3193 if (MO.isDef()) {
3194 isSafe = true;
3195 break;
3196 }
3197 // Condition code is after the operand before CPSR except for VSELs.
3199 bool IsInstrVSel = true;
3200 switch (Instr.getOpcode()) {
3201 default:
3202 IsInstrVSel = false;
3203 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3204 break;
3205 case ARM::VSELEQD:
3206 case ARM::VSELEQS:
3207 case ARM::VSELEQH:
3208 CC = ARMCC::EQ;
3209 break;
3210 case ARM::VSELGTD:
3211 case ARM::VSELGTS:
3212 case ARM::VSELGTH:
3213 CC = ARMCC::GT;
3214 break;
3215 case ARM::VSELGED:
3216 case ARM::VSELGES:
3217 case ARM::VSELGEH:
3218 CC = ARMCC::GE;
3219 break;
3220 case ARM::VSELVSD:
3221 case ARM::VSELVSS:
3222 case ARM::VSELVSH:
3223 CC = ARMCC::VS;
3224 break;
3225 }
3226
3227 if (SubAdd) {
3228 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3229 // on CMP needs to be updated to be based on SUB.
3230 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3231 // needs to be modified.
3232 // Push the condition code operands to OperandsToUpdate.
3233 // If it is safe to remove CmpInstr, the condition code of these
3234 // operands will be modified.
3235 unsigned Opc = SubAdd->getOpcode();
3236 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3237 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3238 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3239 Opc == ARM::tSUBi8;
3240 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3241 if (!IsSub ||
3242 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3243 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3244 // VSel doesn't support condition code update.
3245 if (IsInstrVSel)
3246 return false;
3247 // Ensure we can swap the condition.
3249 if (NewCC == ARMCC::AL)
3250 return false;
3251 OperandsToUpdate.push_back(
3252 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3253 }
3254 } else {
3255 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3256 switch (CC) {
3257 case ARMCC::EQ: // Z
3258 case ARMCC::NE: // Z
3259 case ARMCC::MI: // N
3260 case ARMCC::PL: // N
3261 case ARMCC::AL: // none
3262 // CPSR can be used multiple times, we should continue.
3263 break;
3264 case ARMCC::HS: // C
3265 case ARMCC::LO: // C
3266 case ARMCC::VS: // V
3267 case ARMCC::VC: // V
3268 case ARMCC::HI: // C Z
3269 case ARMCC::LS: // C Z
3270 case ARMCC::GE: // N V
3271 case ARMCC::LT: // N V
3272 case ARMCC::GT: // Z N V
3273 case ARMCC::LE: // Z N V
3274 // The instruction uses the V bit or C bit which is not safe.
3275 return false;
3276 }
3277 }
3278 }
3279 }
3280
3281 // If CPSR is not killed nor re-defined, we should check whether it is
3282 // live-out. If it is live-out, do not optimize.
3283 if (!isSafe) {
3284 MachineBasicBlock *MBB = CmpInstr.getParent();
3285 for (MachineBasicBlock *Succ : MBB->successors())
3286 if (Succ->isLiveIn(ARM::CPSR))
3287 return false;
3288 }
3289
3290 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3291 // set CPSR so this is represented as an explicit output)
3292 if (!IsThumb1) {
3293 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3294 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3295 MI->getOperand(CPSRRegNum).setIsDef(true);
3296 }
3297 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3298 CmpInstr.eraseFromParent();
3299
3300 // Modify the condition code of operands in OperandsToUpdate.
3301 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3302 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3303 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3304 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3305
3306 MI->clearRegisterDeads(ARM::CPSR);
3307
3308 return true;
3309}
3310
3312 // Do not sink MI if it might be used to optimize a redundant compare.
3313 // We heuristically only look at the instruction immediately following MI to
3314 // avoid potentially searching the entire basic block.
3315 if (isPredicated(MI))
3316 return true;
3318 ++Next;
3319 Register SrcReg, SrcReg2;
3320 int64_t CmpMask, CmpValue;
3321 bool IsThumb1;
3322 if (Next != MI.getParent()->end() &&
3323 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3324 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3325 return false;
3326 return true;
3327}
3328
3330 Register Reg,
3331 MachineRegisterInfo *MRI) const {
3332 // Fold large immediates into add, sub, or, xor.
3333 unsigned DefOpc = DefMI.getOpcode();
3334 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3335 DefOpc != ARM::tMOVi32imm)
3336 return false;
3337 if (!DefMI.getOperand(1).isImm())
3338 // Could be t2MOVi32imm @xx
3339 return false;
3340
3341 if (!MRI->hasOneNonDBGUse(Reg))
3342 return false;
3343
3344 const MCInstrDesc &DefMCID = DefMI.getDesc();
3345 if (DefMCID.hasOptionalDef()) {
3346 unsigned NumOps = DefMCID.getNumOperands();
3347 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3348 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3349 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3350 // to delete DefMI.
3351 return false;
3352 }
3353
3354 const MCInstrDesc &UseMCID = UseMI.getDesc();
3355 if (UseMCID.hasOptionalDef()) {
3356 unsigned NumOps = UseMCID.getNumOperands();
3357 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3358 // If the instruction sets the flag, do not attempt this optimization
3359 // since it may change the semantics of the code.
3360 return false;
3361 }
3362
3363 unsigned UseOpc = UseMI.getOpcode();
3364 unsigned NewUseOpc = 0;
3365 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3366 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3367 bool Commute = false;
3368 switch (UseOpc) {
3369 default: return false;
3370 case ARM::SUBrr:
3371 case ARM::ADDrr:
3372 case ARM::ORRrr:
3373 case ARM::EORrr:
3374 case ARM::t2SUBrr:
3375 case ARM::t2ADDrr:
3376 case ARM::t2ORRrr:
3377 case ARM::t2EORrr: {
3378 Commute = UseMI.getOperand(2).getReg() != Reg;
3379 switch (UseOpc) {
3380 default: break;
3381 case ARM::ADDrr:
3382 case ARM::SUBrr:
3383 if (UseOpc == ARM::SUBrr && Commute)
3384 return false;
3385
3386 // ADD/SUB are special because they're essentially the same operation, so
3387 // we can handle a larger range of immediates.
3388 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3389 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3390 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3391 ImmVal = -ImmVal;
3392 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3393 } else
3394 return false;
3395 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3396 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3397 break;
3398 case ARM::ORRrr:
3399 case ARM::EORrr:
3400 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3401 return false;
3402 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3403 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3404 switch (UseOpc) {
3405 default: break;
3406 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3407 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3408 }
3409 break;
3410 case ARM::t2ADDrr:
3411 case ARM::t2SUBrr: {
3412 if (UseOpc == ARM::t2SUBrr && Commute)
3413 return false;
3414
3415 // ADD/SUB are special because they're essentially the same operation, so
3416 // we can handle a larger range of immediates.
3417 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3418 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3419 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3420 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3421 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3422 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3423 ImmVal = -ImmVal;
3424 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3425 } else
3426 return false;
3427 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3428 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3429 break;
3430 }
3431 case ARM::t2ORRrr:
3432 case ARM::t2EORrr:
3433 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3434 return false;
3435 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3436 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3437 switch (UseOpc) {
3438 default: break;
3439 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3440 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3441 }
3442 break;
3443 }
3444 }
3445 }
3446
3447 unsigned OpIdx = Commute ? 2 : 1;
3448 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3449 bool isKill = UseMI.getOperand(OpIdx).isKill();
3450 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3451 Register NewReg = MRI->createVirtualRegister(TRC);
3452 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3453 NewReg)
3454 .addReg(Reg1, getKillRegState(isKill))
3455 .addImm(SOImmValV1)
3457 .add(condCodeOp());
3458 UseMI.setDesc(get(NewUseOpc));
3459 UseMI.getOperand(1).setReg(NewReg);
3460 UseMI.getOperand(1).setIsKill();
3461 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3462 DefMI.eraseFromParent();
3463 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3464 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3465 // Then the below code will not be needed, as the input/output register
3466 // classes will be rgpr or gprSP.
3467 // For now, we fix the UseMI operand explicitly here:
3468 switch(NewUseOpc){
3469 case ARM::t2ADDspImm:
3470 case ARM::t2SUBspImm:
3471 case ARM::t2ADDri:
3472 case ARM::t2SUBri:
3473 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3474 }
3475 return true;
3476}
3477
3478static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3479 const MachineInstr &MI) {
3480 switch (MI.getOpcode()) {
3481 default: {
3482 const MCInstrDesc &Desc = MI.getDesc();
3483 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3484 assert(UOps >= 0 && "bad # UOps");
3485 return UOps;
3486 }
3487
3488 case ARM::LDRrs:
3489 case ARM::LDRBrs:
3490 case ARM::STRrs:
3491 case ARM::STRBrs: {
3492 unsigned ShOpVal = MI.getOperand(3).getImm();
3493 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3494 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3495 if (!isSub &&
3496 (ShImm == 0 ||
3497 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3498 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3499 return 1;
3500 return 2;
3501 }
3502
3503 case ARM::LDRH:
3504 case ARM::STRH: {
3505 if (!MI.getOperand(2).getReg())
3506 return 1;
3507
3508 unsigned ShOpVal = MI.getOperand(3).getImm();
3509 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3510 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3511 if (!isSub &&
3512 (ShImm == 0 ||
3513 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3514 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3515 return 1;
3516 return 2;
3517 }
3518
3519 case ARM::LDRSB:
3520 case ARM::LDRSH:
3521 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3522
3523 case ARM::LDRSB_POST:
3524 case ARM::LDRSH_POST: {
3525 Register Rt = MI.getOperand(0).getReg();
3526 Register Rm = MI.getOperand(3).getReg();
3527 return (Rt == Rm) ? 4 : 3;
3528 }
3529
3530 case ARM::LDR_PRE_REG:
3531 case ARM::LDRB_PRE_REG: {
3532 Register Rt = MI.getOperand(0).getReg();
3533 Register Rm = MI.getOperand(3).getReg();
3534 if (Rt == Rm)
3535 return 3;
3536 unsigned ShOpVal = MI.getOperand(4).getImm();
3537 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3538 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3539 if (!isSub &&
3540 (ShImm == 0 ||
3541 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3542 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3543 return 2;
3544 return 3;
3545 }
3546
3547 case ARM::STR_PRE_REG:
3548 case ARM::STRB_PRE_REG: {
3549 unsigned ShOpVal = MI.getOperand(4).getImm();
3550 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3551 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3552 if (!isSub &&
3553 (ShImm == 0 ||
3554 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3555 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3556 return 2;
3557 return 3;
3558 }
3559
3560 case ARM::LDRH_PRE:
3561 case ARM::STRH_PRE: {
3562 Register Rt = MI.getOperand(0).getReg();
3563 Register Rm = MI.getOperand(3).getReg();
3564 if (!Rm)
3565 return 2;
3566 if (Rt == Rm)
3567 return 3;
3568 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3569 }
3570
3571 case ARM::LDR_POST_REG:
3572 case ARM::LDRB_POST_REG:
3573 case ARM::LDRH_POST: {
3574 Register Rt = MI.getOperand(0).getReg();
3575 Register Rm = MI.getOperand(3).getReg();
3576 return (Rt == Rm) ? 3 : 2;
3577 }
3578
3579 case ARM::LDR_PRE_IMM:
3580 case ARM::LDRB_PRE_IMM:
3581 case ARM::LDR_POST_IMM:
3582 case ARM::LDRB_POST_IMM:
3583 case ARM::STRB_POST_IMM:
3584 case ARM::STRB_POST_REG:
3585 case ARM::STRB_PRE_IMM:
3586 case ARM::STRH_POST:
3587 case ARM::STR_POST_IMM:
3588 case ARM::STR_POST_REG:
3589 case ARM::STR_PRE_IMM:
3590 return 2;
3591
3592 case ARM::LDRSB_PRE:
3593 case ARM::LDRSH_PRE: {
3594 Register Rm = MI.getOperand(3).getReg();
3595 if (Rm == 0)
3596 return 3;
3597 Register Rt = MI.getOperand(0).getReg();
3598 if (Rt == Rm)
3599 return 4;
3600 unsigned ShOpVal = MI.getOperand(4).getImm();
3601 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3602 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3603 if (!isSub &&
3604 (ShImm == 0 ||
3605 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3606 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3607 return 3;
3608 return 4;
3609 }
3610
3611 case ARM::LDRD: {
3612 Register Rt = MI.getOperand(0).getReg();
3613 Register Rn = MI.getOperand(2).getReg();
3614 Register Rm = MI.getOperand(3).getReg();
3615 if (Rm)
3616 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3617 : 3;
3618 return (Rt == Rn) ? 3 : 2;
3619 }
3620
3621 case ARM::STRD: {
3622 Register Rm = MI.getOperand(3).getReg();
3623 if (Rm)
3624 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3625 : 3;
3626 return 2;
3627 }
3628
3629 case ARM::LDRD_POST:
3630 case ARM::t2LDRD_POST:
3631 return 3;
3632
3633 case ARM::STRD_POST:
3634 case ARM::t2STRD_POST:
3635 return 4;
3636
3637 case ARM::LDRD_PRE: {
3638 Register Rt = MI.getOperand(0).getReg();
3639 Register Rn = MI.getOperand(3).getReg();
3640 Register Rm = MI.getOperand(4).getReg();
3641 if (Rm)
3642 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3643 : 4;
3644 return (Rt == Rn) ? 4 : 3;
3645 }
3646
3647 case ARM::t2LDRD_PRE: {
3648 Register Rt = MI.getOperand(0).getReg();
3649 Register Rn = MI.getOperand(3).getReg();
3650 return (Rt == Rn) ? 4 : 3;
3651 }
3652
3653 case ARM::STRD_PRE: {
3654 Register Rm = MI.getOperand(4).getReg();
3655 if (Rm)
3656 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3657 : 4;
3658 return 3;
3659 }
3660
3661 case ARM::t2STRD_PRE:
3662 return 3;
3663
3664 case ARM::t2LDR_POST:
3665 case ARM::t2LDRB_POST:
3666 case ARM::t2LDRB_PRE:
3667 case ARM::t2LDRSBi12:
3668 case ARM::t2LDRSBi8:
3669 case ARM::t2LDRSBpci:
3670 case ARM::t2LDRSBs:
3671 case ARM::t2LDRH_POST:
3672 case ARM::t2LDRH_PRE:
3673 case ARM::t2LDRSBT:
3674 case ARM::t2LDRSB_POST:
3675 case ARM::t2LDRSB_PRE:
3676 case ARM::t2LDRSH_POST:
3677 case ARM::t2LDRSH_PRE:
3678 case ARM::t2LDRSHi12:
3679 case ARM::t2LDRSHi8:
3680 case ARM::t2LDRSHpci:
3681 case ARM::t2LDRSHs:
3682 return 2;
3683
3684 case ARM::t2LDRDi8: {
3685 Register Rt = MI.getOperand(0).getReg();
3686 Register Rn = MI.getOperand(2).getReg();
3687 return (Rt == Rn) ? 3 : 2;
3688 }
3689
3690 case ARM::t2STRB_POST:
3691 case ARM::t2STRB_PRE:
3692 case ARM::t2STRBs:
3693 case ARM::t2STRDi8:
3694 case ARM::t2STRH_POST:
3695 case ARM::t2STRH_PRE:
3696 case ARM::t2STRHs:
3697 case ARM::t2STR_POST:
3698 case ARM::t2STR_PRE:
3699 case ARM::t2STRs:
3700 return 2;
3701 }
3702}
3703
3704// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3705// can't be easily determined return 0 (missing MachineMemOperand).
3706//
3707// FIXME: The current MachineInstr design does not support relying on machine
3708// mem operands to determine the width of a memory access. Instead, we expect
3709// the target to provide this information based on the instruction opcode and
3710// operands. However, using MachineMemOperand is the best solution now for
3711// two reasons:
3712//
3713// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3714// operands. This is much more dangerous than using the MachineMemOperand
3715// sizes because CodeGen passes can insert/remove optional machine operands. In
3716// fact, it's totally incorrect for preRA passes and appears to be wrong for
3717// postRA passes as well.
3718//
3719// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3720// machine model that calls this should handle the unknown (zero size) case.
3721//
3722// Long term, we should require a target hook that verifies MachineMemOperand
3723// sizes during MC lowering. That target hook should be local to MC lowering
3724// because we can't ensure that it is aware of other MI forms. Doing this will
3725// ensure that MachineMemOperands are correctly propagated through all passes.
3727 unsigned Size = 0;
3728 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3729 E = MI.memoperands_end();
3730 I != E; ++I) {
3731 Size += (*I)->getSize().getValue();
3732 }
3733 // FIXME: The scheduler currently can't handle values larger than 16. But
3734 // the values can actually go up to 32 for floating-point load/store
3735 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3736 // operations isn't right; we could end up with "extra" memory operands for
3737 // various reasons, like tail merge merging two memory operations.
3738 return std::min(Size / 4, 16U);
3739}
3740
3741static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3742 unsigned NumRegs) {
3743 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3744 switch (Opc) {
3745 default:
3746 break;
3747 case ARM::VLDMDIA_UPD:
3748 case ARM::VLDMDDB_UPD:
3749 case ARM::VLDMSIA_UPD:
3750 case ARM::VLDMSDB_UPD:
3751 case ARM::VSTMDIA_UPD:
3752 case ARM::VSTMDDB_UPD:
3753 case ARM::VSTMSIA_UPD:
3754 case ARM::VSTMSDB_UPD:
3755 case ARM::LDMIA_UPD:
3756 case ARM::LDMDA_UPD:
3757 case ARM::LDMDB_UPD:
3758 case ARM::LDMIB_UPD:
3759 case ARM::STMIA_UPD:
3760 case ARM::STMDA_UPD:
3761 case ARM::STMDB_UPD:
3762 case ARM::STMIB_UPD:
3763 case ARM::tLDMIA_UPD:
3764 case ARM::tSTMIA_UPD:
3765 case ARM::t2LDMIA_UPD:
3766 case ARM::t2LDMDB_UPD:
3767 case ARM::t2STMIA_UPD:
3768 case ARM::t2STMDB_UPD:
3769 ++UOps; // One for base register writeback.
3770 break;
3771 case ARM::LDMIA_RET:
3772 case ARM::tPOP_RET:
3773 case ARM::t2LDMIA_RET:
3774 UOps += 2; // One for base reg wb, one for write to pc.
3775 break;
3776 }
3777 return UOps;
3778}
3779
3781 const MachineInstr &MI) const {
3782 if (!ItinData || ItinData->isEmpty())
3783 return 1;
3784
3785 const MCInstrDesc &Desc = MI.getDesc();
3786 unsigned Class = Desc.getSchedClass();
3787 int ItinUOps = ItinData->getNumMicroOps(Class);
3788 if (ItinUOps >= 0) {
3789 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3790 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3791
3792 return ItinUOps;
3793 }
3794
3795 unsigned Opc = MI.getOpcode();
3796 switch (Opc) {
3797 default:
3798 llvm_unreachable("Unexpected multi-uops instruction!");
3799 case ARM::VLDMQIA:
3800 case ARM::VSTMQIA:
3801 return 2;
3802
3803 // The number of uOps for load / store multiple are determined by the number
3804 // registers.
3805 //
3806 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3807 // same cycle. The scheduling for the first load / store must be done
3808 // separately by assuming the address is not 64-bit aligned.
3809 //
3810 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3811 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3812 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3813 case ARM::VLDMDIA:
3814 case ARM::VLDMDIA_UPD:
3815 case ARM::VLDMDDB_UPD:
3816 case ARM::VLDMSIA:
3817 case ARM::VLDMSIA_UPD:
3818 case ARM::VLDMSDB_UPD:
3819 case ARM::VSTMDIA:
3820 case ARM::VSTMDIA_UPD:
3821 case ARM::VSTMDDB_UPD:
3822 case ARM::VSTMSIA:
3823 case ARM::VSTMSIA_UPD:
3824 case ARM::VSTMSDB_UPD: {
3825 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3826 return (NumRegs / 2) + (NumRegs % 2) + 1;
3827 }
3828
3829 case ARM::LDMIA_RET:
3830 case ARM::LDMIA:
3831 case ARM::LDMDA:
3832 case ARM::LDMDB:
3833 case ARM::LDMIB:
3834 case ARM::LDMIA_UPD:
3835 case ARM::LDMDA_UPD:
3836 case ARM::LDMDB_UPD:
3837 case ARM::LDMIB_UPD:
3838 case ARM::STMIA:
3839 case ARM::STMDA:
3840 case ARM::STMDB:
3841 case ARM::STMIB:
3842 case ARM::STMIA_UPD:
3843 case ARM::STMDA_UPD:
3844 case ARM::STMDB_UPD:
3845 case ARM::STMIB_UPD:
3846 case ARM::tLDMIA:
3847 case ARM::tLDMIA_UPD:
3848 case ARM::tSTMIA_UPD:
3849 case ARM::tPOP_RET:
3850 case ARM::tPOP:
3851 case ARM::tPUSH:
3852 case ARM::t2LDMIA_RET:
3853 case ARM::t2LDMIA:
3854 case ARM::t2LDMDB:
3855 case ARM::t2LDMIA_UPD:
3856 case ARM::t2LDMDB_UPD:
3857 case ARM::t2STMIA:
3858 case ARM::t2STMDB:
3859 case ARM::t2STMIA_UPD:
3860 case ARM::t2STMDB_UPD: {
3861 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3862 switch (Subtarget.getLdStMultipleTiming()) {
3864 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3866 // Assume the worst.
3867 return NumRegs;
3869 if (NumRegs < 4)
3870 return 2;
3871 // 4 registers would be issued: 2, 2.
3872 // 5 registers would be issued: 2, 2, 1.
3873 unsigned UOps = (NumRegs / 2);
3874 if (NumRegs % 2)
3875 ++UOps;
3876 return UOps;
3877 }
3879 unsigned UOps = (NumRegs / 2);
3880 // If there are odd number of registers or if it's not 64-bit aligned,
3881 // then it takes an extra AGU (Address Generation Unit) cycle.
3882 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3883 (*MI.memoperands_begin())->getAlign() < Align(8))
3884 ++UOps;
3885 return UOps;
3886 }
3887 }
3888 }
3889 }
3890 llvm_unreachable("Didn't find the number of microops");
3891}
3892
3893std::optional<unsigned>
3894ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3895 const MCInstrDesc &DefMCID, unsigned DefClass,
3896 unsigned DefIdx, unsigned DefAlign) const {
3897 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3898 if (RegNo <= 0)
3899 // Def is the address writeback.
3900 return ItinData->getOperandCycle(DefClass, DefIdx);
3901
3902 unsigned DefCycle;
3903 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3904 // (regno / 2) + (regno % 2) + 1
3905 DefCycle = RegNo / 2 + 1;
3906 if (RegNo % 2)
3907 ++DefCycle;
3908 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3909 DefCycle = RegNo;
3910 bool isSLoad = false;
3911
3912 switch (DefMCID.getOpcode()) {
3913 default: break;
3914 case ARM::VLDMSIA:
3915 case ARM::VLDMSIA_UPD:
3916 case ARM::VLDMSDB_UPD:
3917 isSLoad = true;
3918 break;
3919 }
3920
3921 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3922 // then it takes an extra cycle.
3923 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3924 ++DefCycle;
3925 } else {
3926 // Assume the worst.
3927 DefCycle = RegNo + 2;
3928 }
3929
3930 return DefCycle;
3931}
3932
3933std::optional<unsigned>
3934ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3935 const MCInstrDesc &DefMCID, unsigned DefClass,
3936 unsigned DefIdx, unsigned DefAlign) const {
3937 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3938 if (RegNo <= 0)
3939 // Def is the address writeback.
3940 return ItinData->getOperandCycle(DefClass, DefIdx);
3941
3942 unsigned DefCycle;
3943 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3944 // 4 registers would be issued: 1, 2, 1.
3945 // 5 registers would be issued: 1, 2, 2.
3946 DefCycle = RegNo / 2;
3947 if (DefCycle < 1)
3948 DefCycle = 1;
3949 // Result latency is issue cycle + 2: E2.
3950 DefCycle += 2;
3951 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3952 DefCycle = (RegNo / 2);
3953 // If there are odd number of registers or if it's not 64-bit aligned,
3954 // then it takes an extra AGU (Address Generation Unit) cycle.
3955 if ((RegNo % 2) || DefAlign < 8)
3956 ++DefCycle;
3957 // Result latency is AGU cycles + 2.
3958 DefCycle += 2;
3959 } else {
3960 // Assume the worst.
3961 DefCycle = RegNo + 2;
3962 }
3963
3964 return DefCycle;
3965}
3966
3967std::optional<unsigned>
3968ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3969 const MCInstrDesc &UseMCID, unsigned UseClass,
3970 unsigned UseIdx, unsigned UseAlign) const {
3971 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3972 if (RegNo <= 0)
3973 return ItinData->getOperandCycle(UseClass, UseIdx);
3974
3975 unsigned UseCycle;
3976 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3977 // (regno / 2) + (regno % 2) + 1
3978 UseCycle = RegNo / 2 + 1;
3979 if (RegNo % 2)
3980 ++UseCycle;
3981 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3982 UseCycle = RegNo;
3983 bool isSStore = false;
3984
3985 switch (UseMCID.getOpcode()) {
3986 default: break;
3987 case ARM::VSTMSIA:
3988 case ARM::VSTMSIA_UPD:
3989 case ARM::VSTMSDB_UPD:
3990 isSStore = true;
3991 break;
3992 }
3993
3994 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3995 // then it takes an extra cycle.
3996 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3997 ++UseCycle;
3998 } else {
3999 // Assume the worst.
4000 UseCycle = RegNo + 2;
4001 }
4002
4003 return UseCycle;
4004}
4005
4006std::optional<unsigned>
4007ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
4008 const MCInstrDesc &UseMCID, unsigned UseClass,
4009 unsigned UseIdx, unsigned UseAlign) const {
4010 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
4011 if (RegNo <= 0)
4012 return ItinData->getOperandCycle(UseClass, UseIdx);
4013
4014 unsigned UseCycle;
4015 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
4016 UseCycle = RegNo / 2;
4017 if (UseCycle < 2)
4018 UseCycle = 2;
4019 // Read in E3.
4020 UseCycle += 2;
4021 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4022 UseCycle = (RegNo / 2);
4023 // If there are odd number of registers or if it's not 64-bit aligned,
4024 // then it takes an extra AGU (Address Generation Unit) cycle.
4025 if ((RegNo % 2) || UseAlign < 8)
4026 ++UseCycle;
4027 } else {
4028 // Assume the worst.
4029 UseCycle = 1;
4030 }
4031 return UseCycle;
4032}
4033
4034std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4035 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
4036 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
4037 unsigned UseIdx, unsigned UseAlign) const {
4038 unsigned DefClass = DefMCID.getSchedClass();
4039 unsigned UseClass = UseMCID.getSchedClass();
4040
4041 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4042 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4043
4044 // This may be a def / use of a variable_ops instruction, the operand
4045 // latency might be determinable dynamically. Let the target try to
4046 // figure it out.
4047 std::optional<unsigned> DefCycle;
4048 bool LdmBypass = false;
4049 switch (DefMCID.getOpcode()) {
4050 default:
4051 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4052 break;
4053
4054 case ARM::VLDMDIA:
4055 case ARM::VLDMDIA_UPD:
4056 case ARM::VLDMDDB_UPD:
4057 case ARM::VLDMSIA:
4058 case ARM::VLDMSIA_UPD:
4059 case ARM::VLDMSDB_UPD:
4060 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4061 break;
4062
4063 case ARM::LDMIA_RET:
4064 case ARM::LDMIA:
4065 case ARM::LDMDA:
4066 case ARM::LDMDB:
4067 case ARM::LDMIB:
4068 case ARM::LDMIA_UPD:
4069 case ARM::LDMDA_UPD:
4070 case ARM::LDMDB_UPD:
4071 case ARM::LDMIB_UPD:
4072 case ARM::tLDMIA:
4073 case ARM::tLDMIA_UPD:
4074 case ARM::tPUSH:
4075 case ARM::t2LDMIA_RET:
4076 case ARM::t2LDMIA:
4077 case ARM::t2LDMDB:
4078 case ARM::t2LDMIA_UPD:
4079 case ARM::t2LDMDB_UPD:
4080 LdmBypass = true;
4081 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4082 break;
4083 }
4084
4085 if (!DefCycle)
4086 // We can't seem to determine the result latency of the def, assume it's 2.
4087 DefCycle = 2;
4088
4089 std::optional<unsigned> UseCycle;
4090 switch (UseMCID.getOpcode()) {
4091 default:
4092 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4093 break;
4094
4095 case ARM::VSTMDIA:
4096 case ARM::VSTMDIA_UPD:
4097 case ARM::VSTMDDB_UPD:
4098 case ARM::VSTMSIA:
4099 case ARM::VSTMSIA_UPD:
4100 case ARM::VSTMSDB_UPD:
4101 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4102 break;
4103
4104 case ARM::STMIA:
4105 case ARM::STMDA:
4106 case ARM::STMDB:
4107 case ARM::STMIB:
4108 case ARM::STMIA_UPD:
4109 case ARM::STMDA_UPD:
4110 case ARM::STMDB_UPD:
4111 case ARM::STMIB_UPD:
4112 case ARM::tSTMIA_UPD:
4113 case ARM::tPOP_RET:
4114 case ARM::tPOP:
4115 case ARM::t2STMIA:
4116 case ARM::t2STMDB:
4117 case ARM::t2STMIA_UPD:
4118 case ARM::t2STMDB_UPD:
4119 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4120 break;
4121 }
4122
4123 if (!UseCycle)
4124 // Assume it's read in the first stage.
4125 UseCycle = 1;
4126
4127 if (UseCycle > *DefCycle + 1)
4128 return std::nullopt;
4129
4130 UseCycle = *DefCycle - *UseCycle + 1;
4131 if (UseCycle > 0u) {
4132 if (LdmBypass) {
4133 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4134 // first def operand.
4135 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4136 UseClass, UseIdx))
4137 UseCycle = *UseCycle - 1;
4138 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4139 UseClass, UseIdx)) {
4140 UseCycle = *UseCycle - 1;
4141 }
4142 }
4143
4144 return UseCycle;
4145}
4146
4148 const MachineInstr *MI, unsigned Reg,
4149 unsigned &DefIdx, unsigned &Dist) {
4150 Dist = 0;
4151
4153 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4154 assert(II->isInsideBundle() && "Empty bundle?");
4155
4156 int Idx = -1;
4157 while (II->isInsideBundle()) {
4158 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
4159 if (Idx != -1)
4160 break;
4161 --II;
4162 ++Dist;
4163 }
4164
4165 assert(Idx != -1 && "Cannot find bundled definition!");
4166 DefIdx = Idx;
4167 return &*II;
4168}
4169
4171 const MachineInstr &MI, unsigned Reg,
4172 unsigned &UseIdx, unsigned &Dist) {
4173 Dist = 0;
4174
4176 assert(II->isInsideBundle() && "Empty bundle?");
4177 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4178
4179 // FIXME: This doesn't properly handle multiple uses.
4180 int Idx = -1;
4181 while (II != E && II->isInsideBundle()) {
4182 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4183 if (Idx != -1)
4184 break;
4185 if (II->getOpcode() != ARM::t2IT)
4186 ++Dist;
4187 ++II;
4188 }
4189
4190 if (Idx == -1) {
4191 Dist = 0;
4192 return nullptr;
4193 }
4194
4195 UseIdx = Idx;
4196 return &*II;
4197}
4198
4199/// Return the number of cycles to add to (or subtract from) the static
4200/// itinerary based on the def opcode and alignment. The caller will ensure that
4201/// adjusted latency is at least one cycle.
4202static int adjustDefLatency(const ARMSubtarget &Subtarget,
4203 const MachineInstr &DefMI,
4204 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4205 int Adjust = 0;
4206 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4207 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4208 // variants are one cycle cheaper.
4209 switch (DefMCID.getOpcode()) {
4210 default: break;
4211 case ARM::LDRrs:
4212 case ARM::LDRBrs: {
4213 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4214 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4215 if (ShImm == 0 ||
4216 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4217 --Adjust;
4218 break;
4219 }
4220 case ARM::t2LDRs:
4221 case ARM::t2LDRBs:
4222 case ARM::t2LDRHs:
4223 case ARM::t2LDRSHs: {
4224 // Thumb2 mode: lsl only.
4225 unsigned ShAmt = DefMI.getOperand(3).getImm();
4226 if (ShAmt == 0 || ShAmt == 2)
4227 --Adjust;
4228 break;
4229 }
4230 }
4231 } else if (Subtarget.isSwift()) {
4232 // FIXME: Properly handle all of the latency adjustments for address
4233 // writeback.
4234 switch (DefMCID.getOpcode()) {
4235 default: break;
4236 case ARM::LDRrs:
4237 case ARM::LDRBrs: {
4238 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4239 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4240 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4241 if (!isSub &&
4242 (ShImm == 0 ||
4243 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4244 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4245 Adjust -= 2;
4246 else if (!isSub &&
4247 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4248 --Adjust;
4249 break;
4250 }
4251 case ARM::t2LDRs:
4252 case ARM::t2LDRBs:
4253 case ARM::t2LDRHs:
4254 case ARM::t2LDRSHs: {
4255 // Thumb2 mode: lsl only.
4256 unsigned ShAmt = DefMI.getOperand(3).getImm();
4257 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4258 Adjust -= 2;
4259 break;
4260 }
4261 }
4262 }
4263
4264 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4265 switch (DefMCID.getOpcode()) {
4266 default: break;
4267 case ARM::VLD1q8:
4268 case ARM::VLD1q16:
4269 case ARM::VLD1q32:
4270 case ARM::VLD1q64:
4271 case ARM::VLD1q8wb_fixed:
4272 case ARM::VLD1q16wb_fixed:
4273 case ARM::VLD1q32wb_fixed:
4274 case ARM::VLD1q64wb_fixed:
4275 case ARM::VLD1q8wb_register:
4276 case ARM::VLD1q16wb_register:
4277 case ARM::VLD1q32wb_register:
4278 case ARM::VLD1q64wb_register:
4279 case ARM::VLD2d8:
4280 case ARM::VLD2d16:
4281 case ARM::VLD2d32:
4282 case ARM::VLD2q8:
4283 case ARM::VLD2q16:
4284 case ARM::VLD2q32:
4285 case ARM::VLD2d8wb_fixed:
4286 case ARM::VLD2d16wb_fixed:
4287 case ARM::VLD2d32wb_fixed:
4288 case ARM::VLD2q8wb_fixed:
4289 case ARM::VLD2q16wb_fixed:
4290 case ARM::VLD2q32wb_fixed:
4291 case ARM::VLD2d8wb_register:
4292 case ARM::VLD2d16wb_register:
4293 case ARM::VLD2d32wb_register:
4294 case ARM::VLD2q8wb_register:
4295 case ARM::VLD2q16wb_register:
4296 case ARM::VLD2q32wb_register:
4297 case ARM::VLD3d8:
4298 case ARM::VLD3d16:
4299 case ARM::VLD3d32:
4300 case ARM::VLD1d64T:
4301 case ARM::VLD3d8_UPD:
4302 case ARM::VLD3d16_UPD:
4303 case ARM::VLD3d32_UPD:
4304 case ARM::VLD1d64Twb_fixed:
4305 case ARM::VLD1d64Twb_register:
4306 case ARM::VLD3q8_UPD:
4307 case ARM::VLD3q16_UPD:
4308 case ARM::VLD3q32_UPD:
4309 case ARM::VLD4d8:
4310 case ARM::VLD4d16:
4311 case ARM::VLD4d32:
4312 case ARM::VLD1d64Q:
4313 case ARM::VLD4d8_UPD:
4314 case ARM::VLD4d16_UPD:
4315 case ARM::VLD4d32_UPD:
4316 case ARM::VLD1d64Qwb_fixed:
4317 case ARM::VLD1d64Qwb_register:
4318 case ARM::VLD4q8_UPD:
4319 case ARM::VLD4q16_UPD:
4320 case ARM::VLD4q32_UPD:
4321 case ARM::VLD1DUPq8:
4322 case ARM::VLD1DUPq16:
4323 case ARM::VLD1DUPq32:
4324 case ARM::VLD1DUPq8wb_fixed:
4325 case ARM::VLD1DUPq16wb_fixed:
4326 case ARM::VLD1DUPq32wb_fixed:
4327 case ARM::VLD1DUPq8wb_register:
4328 case ARM::VLD1DUPq16wb_register:
4329 case ARM::VLD1DUPq32wb_register:
4330 case ARM::VLD2DUPd8:
4331 case ARM::VLD2DUPd16:
4332 case ARM::VLD2DUPd32:
4333 case ARM::VLD2DUPd8wb_fixed:
4334 case ARM::VLD2DUPd16wb_fixed:
4335 case ARM::VLD2DUPd32wb_fixed:
4336 case ARM::VLD2DUPd8wb_register:
4337 case ARM::VLD2DUPd16wb_register:
4338 case ARM::VLD2DUPd32wb_register:
4339 case ARM::VLD4DUPd8:
4340 case ARM::VLD4DUPd16:
4341 case ARM::VLD4DUPd32:
4342 case ARM::VLD4DUPd8_UPD:
4343 case ARM::VLD4DUPd16_UPD:
4344 case ARM::VLD4DUPd32_UPD:
4345 case ARM::VLD1LNd8:
4346 case ARM::VLD1LNd16:
4347 case ARM::VLD1LNd32:
4348 case ARM::VLD1LNd8_UPD:
4349 case ARM::VLD1LNd16_UPD:
4350 case ARM::VLD1LNd32_UPD:
4351 case ARM::VLD2LNd8:
4352 case ARM::VLD2LNd16:
4353 case ARM::VLD2LNd32:
4354 case ARM::VLD2LNq16:
4355 case ARM::VLD2LNq32:
4356 case ARM::VLD2LNd8_UPD:
4357 case ARM::VLD2LNd16_UPD:
4358 case ARM::VLD2LNd32_UPD:
4359 case ARM::VLD2LNq16_UPD:
4360 case ARM::VLD2LNq32_UPD:
4361 case ARM::VLD4LNd8:
4362 case ARM::VLD4LNd16:
4363 case ARM::VLD4LNd32:
4364 case ARM::VLD4LNq16:
4365 case ARM::VLD4LNq32:
4366 case ARM::VLD4LNd8_UPD:
4367 case ARM::VLD4LNd16_UPD:
4368 case ARM::VLD4LNd32_UPD:
4369 case ARM::VLD4LNq16_UPD:
4370 case ARM::VLD4LNq32_UPD:
4371 // If the address is not 64-bit aligned, the latencies of these
4372 // instructions increases by one.
4373 ++Adjust;
4374 break;
4375 }
4376 }
4377 return Adjust;
4378}
4379
4381 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4382 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4383 // No operand latency. The caller may fall back to getInstrLatency.
4384 if (!ItinData || ItinData->isEmpty())
4385 return std::nullopt;
4386
4387 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4388 Register Reg = DefMO.getReg();
4389
4390 const MachineInstr *ResolvedDefMI = &DefMI;
4391 unsigned DefAdj = 0;
4392 if (DefMI.isBundle())
4393 ResolvedDefMI =
4394 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4395 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4396 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4397 return 1;
4398 }
4399
4400 const MachineInstr *ResolvedUseMI = &UseMI;
4401 unsigned UseAdj = 0;
4402 if (UseMI.isBundle()) {
4403 ResolvedUseMI =
4404 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4405 if (!ResolvedUseMI)
4406 return std::nullopt;
4407 }
4408
4409 return getOperandLatencyImpl(
4410 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4411 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4412}
4413
4414std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4415 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4416 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4417 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4418 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4419 if (Reg == ARM::CPSR) {
4420 if (DefMI.getOpcode() == ARM::FMSTAT) {
4421 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4422 return Subtarget.isLikeA9() ? 1 : 20;
4423 }
4424
4425 // CPSR set and branch can be paired in the same cycle.
4426 if (UseMI.isBranch())
4427 return 0;
4428
4429 // Otherwise it takes the instruction latency (generally one).
4430 unsigned Latency = getInstrLatency(ItinData, DefMI);
4431
4432 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4433 // its uses. Instructions which are otherwise scheduled between them may
4434 // incur a code size penalty (not able to use the CPSR setting 16-bit
4435 // instructions).
4436 if (Latency > 0 && Subtarget.isThumb2()) {
4437 const MachineFunction *MF = DefMI.getParent()->getParent();
4438 // FIXME: Use Function::hasOptSize().
4439 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4440 --Latency;
4441 }
4442 return Latency;
4443 }
4444
4445 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4446 return std::nullopt;
4447
4448 unsigned DefAlign = DefMI.hasOneMemOperand()
4449 ? (*DefMI.memoperands_begin())->getAlign().value()
4450 : 0;
4451 unsigned UseAlign = UseMI.hasOneMemOperand()
4452 ? (*UseMI.memoperands_begin())->getAlign().value()
4453 : 0;
4454
4455 // Get the itinerary's latency if possible, and handle variable_ops.
4456 std::optional<unsigned> Latency = getOperandLatency(
4457 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4458 // Unable to find operand latency. The caller may resort to getInstrLatency.
4459 if (!Latency)
4460 return std::nullopt;
4461
4462 // Adjust for IT block position.
4463 int Adj = DefAdj + UseAdj;
4464
4465 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4466 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4467 if (Adj >= 0 || (int)*Latency > -Adj) {
4468 return *Latency + Adj;
4469 }
4470 // Return the itinerary latency, which may be zero but not less than zero.
4471 return Latency;
4472}
4473
4474std::optional<unsigned>
4476 SDNode *DefNode, unsigned DefIdx,
4477 SDNode *UseNode, unsigned UseIdx) const {
4478 if (!DefNode->isMachineOpcode())
4479 return 1;
4480
4481 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4482
4483 if (isZeroCost(DefMCID.Opcode))
4484 return 0;
4485
4486 if (!ItinData || ItinData->isEmpty())
4487 return DefMCID.mayLoad() ? 3 : 1;
4488
4489 if (!UseNode->isMachineOpcode()) {
4490 std::optional<unsigned> Latency =
4491 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4492 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4493 int Threshold = 1 + Adj;
4494 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4495 }
4496
4497 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4498 auto *DefMN = cast<MachineSDNode>(DefNode);
4499 unsigned DefAlign = !DefMN->memoperands_empty()
4500 ? (*DefMN->memoperands_begin())->getAlign().value()
4501 : 0;
4502 auto *UseMN = cast<MachineSDNode>(UseNode);
4503 unsigned UseAlign = !UseMN->memoperands_empty()
4504 ? (*UseMN->memoperands_begin())->getAlign().value()
4505 : 0;
4506 std::optional<unsigned> Latency = getOperandLatency(
4507 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4508 if (!Latency)
4509 return std::nullopt;
4510
4511 if (Latency > 1U &&
4512 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4513 Subtarget.isCortexA7())) {
4514 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4515 // variants are one cycle cheaper.
4516 switch (DefMCID.getOpcode()) {
4517 default: break;
4518 case ARM::LDRrs:
4519 case ARM::LDRBrs: {
4520 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4521 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4522 if (ShImm == 0 ||
4523 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4524 Latency = *Latency - 1;
4525 break;
4526 }
4527 case ARM::t2LDRs:
4528 case ARM::t2LDRBs:
4529 case ARM::t2LDRHs:
4530 case ARM::t2LDRSHs: {
4531 // Thumb2 mode: lsl only.
4532 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4533 if (ShAmt == 0 || ShAmt == 2)
4534 Latency = *Latency - 1;
4535 break;
4536 }
4537 }
4538 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4539 // FIXME: Properly handle all of the latency adjustments for address
4540 // writeback.
4541 switch (DefMCID.getOpcode()) {
4542 default: break;
4543 case ARM::LDRrs:
4544 case ARM::LDRBrs: {
4545 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4546 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4547 if (ShImm == 0 ||
4548 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4550 Latency = *Latency - 2;
4551 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4552 Latency = *Latency - 1;
4553 break;
4554 }
4555 case ARM::t2LDRs:
4556 case ARM::t2LDRBs:
4557 case ARM::t2LDRHs:
4558 case ARM::t2LDRSHs:
4559 // Thumb2 mode: lsl 0-3 only.
4560 Latency = *Latency - 2;
4561 break;
4562 }
4563 }
4564
4565 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4566 switch (DefMCID.getOpcode()) {
4567 default: break;
4568 case ARM::VLD1q8:
4569 case ARM::VLD1q16:
4570 case ARM::VLD1q32:
4571 case ARM::VLD1q64:
4572 case ARM::VLD1q8wb_register:
4573 case ARM::VLD1q16wb_register:
4574 case ARM::VLD1q32wb_register:
4575 case ARM::VLD1q64wb_register:
4576 case ARM::VLD1q8wb_fixed:
4577 case ARM::VLD1q16wb_fixed:
4578 case ARM::VLD1q32wb_fixed:
4579 case ARM::VLD1q64wb_fixed:
4580 case ARM::VLD2d8:
4581 case ARM::VLD2d16:
4582 case ARM::VLD2d32:
4583 case ARM::VLD2q8Pseudo:
4584 case ARM::VLD2q16Pseudo:
4585 case ARM::VLD2q32Pseudo:
4586 case ARM::VLD2d8wb_fixed:
4587 case ARM::VLD2d16wb_fixed:
4588 case ARM::VLD2d32wb_fixed:
4589 case ARM::VLD2q8PseudoWB_fixed:
4590 case ARM::VLD2q16PseudoWB_fixed:
4591 case ARM::VLD2q32PseudoWB_fixed:
4592 case ARM::VLD2d8wb_register:
4593 case ARM::VLD2d16wb_register:
4594 case ARM::VLD2d32wb_register:
4595 case ARM::VLD2q8PseudoWB_register:
4596 case ARM::VLD2q16PseudoWB_register:
4597 case ARM::VLD2q32PseudoWB_register:
4598 case ARM::VLD3d8Pseudo:
4599 case ARM::VLD3d16Pseudo:
4600 case ARM::VLD3d32Pseudo:
4601 case ARM::VLD1d8TPseudo:
4602 case ARM::VLD1d16TPseudo:
4603 case ARM::VLD1d32TPseudo:
4604 case ARM::VLD1d64TPseudo:
4605 case ARM::VLD1d64TPseudoWB_fixed:
4606 case ARM::VLD1d64TPseudoWB_register:
4607 case ARM::VLD3d8Pseudo_UPD:
4608 case ARM::VLD3d16Pseudo_UPD:
4609 case ARM::VLD3d32Pseudo_UPD:
4610 case ARM::VLD3q8Pseudo_UPD:
4611 case ARM::VLD3q16Pseudo_UPD:
4612 case ARM::VLD3q32Pseudo_UPD:
4613 case ARM::VLD3q8oddPseudo:
4614 case ARM::VLD3q16oddPseudo:
4615 case ARM::VLD3q32oddPseudo:
4616 case ARM::VLD3q8oddPseudo_UPD:
4617 case ARM::VLD3q16oddPseudo_UPD:
4618 case ARM::VLD3q32oddPseudo_UPD:
4619 case ARM::VLD4d8Pseudo:
4620 case ARM::VLD4d16Pseudo:
4621 case ARM::VLD4d32Pseudo:
4622 case ARM::VLD1d8QPseudo:
4623 case ARM::VLD1d16QPseudo:
4624 case ARM::VLD1d32QPseudo:
4625 case ARM::VLD1d64QPseudo:
4626 case ARM::VLD1d64QPseudoWB_fixed:
4627 case ARM::VLD1d64QPseudoWB_register:
4628 case ARM::VLD1q8HighQPseudo:
4629 case ARM::VLD1q8LowQPseudo_UPD:
4630 case ARM::VLD1q8HighTPseudo:
4631 case ARM::VLD1q8LowTPseudo_UPD:
4632 case ARM::VLD1q16HighQPseudo:
4633 case ARM::VLD1q16LowQPseudo_UPD:
4634 case ARM::VLD1q16HighTPseudo:
4635 case ARM::VLD1q16LowTPseudo_UPD:
4636 case ARM::VLD1q32HighQPseudo:
4637 case ARM::VLD1q32LowQPseudo_UPD:
4638 case ARM::VLD1q32HighTPseudo:
4639 case ARM::VLD1q32LowTPseudo_UPD:
4640 case ARM::VLD1q64HighQPseudo:
4641 case ARM::VLD1q64LowQPseudo_UPD:
4642 case ARM::VLD1q64HighTPseudo:
4643 case ARM::VLD1q64LowTPseudo_UPD:
4644 case ARM::VLD4d8Pseudo_UPD:
4645 case ARM::VLD4d16Pseudo_UPD:
4646 case ARM::VLD4d32Pseudo_UPD:
4647 case ARM::VLD4q8Pseudo_UPD:
4648 case ARM::VLD4q16Pseudo_UPD:
4649 case ARM::VLD4q32Pseudo_UPD:
4650 case ARM::VLD4q8oddPseudo:
4651 case ARM::VLD4q16oddPseudo:
4652 case ARM::VLD4q32oddPseudo:
4653 case ARM::VLD4q8oddPseudo_UPD:
4654 case ARM::VLD4q16oddPseudo_UPD:
4655 case ARM::VLD4q32oddPseudo_UPD:
4656 case ARM::VLD1DUPq8:
4657 case ARM::VLD1DUPq16:
4658 case ARM::VLD1DUPq32:
4659 case ARM::VLD1DUPq8wb_fixed:
4660 case ARM::VLD1DUPq16wb_fixed:
4661 case ARM::VLD1DUPq32wb_fixed:
4662 case ARM::VLD1DUPq8wb_register:
4663 case ARM::VLD1DUPq16wb_register:
4664 case ARM::VLD1DUPq32wb_register:
4665 case ARM::VLD2DUPd8:
4666 case ARM::VLD2DUPd16:
4667 case ARM::VLD2DUPd32:
4668 case ARM::VLD2DUPd8wb_fixed:
4669 case ARM::VLD2DUPd16wb_fixed:
4670 case ARM::VLD2DUPd32wb_fixed:
4671 case ARM::VLD2DUPd8wb_register:
4672 case ARM::VLD2DUPd16wb_register:
4673 case ARM::VLD2DUPd32wb_register:
4674 case ARM::VLD2DUPq8EvenPseudo:
4675 case ARM::VLD2DUPq8OddPseudo:
4676 case ARM::VLD2DUPq16EvenPseudo:
4677 case ARM::VLD2DUPq16OddPseudo:
4678 case ARM::VLD2DUPq32EvenPseudo:
4679 case ARM::VLD2DUPq32OddPseudo:
4680 case ARM::VLD3DUPq8EvenPseudo:
4681 case ARM::VLD3DUPq8OddPseudo:
4682 case ARM::VLD3DUPq16EvenPseudo:
4683 case ARM::VLD3DUPq16OddPseudo:
4684 case ARM::VLD3DUPq32EvenPseudo:
4685 case ARM::VLD3DUPq32OddPseudo:
4686 case ARM::VLD4DUPd8Pseudo:
4687 case ARM::VLD4DUPd16Pseudo:
4688 case ARM::VLD4DUPd32Pseudo:
4689 case ARM::VLD4DUPd8Pseudo_UPD:
4690 case ARM::VLD4DUPd16Pseudo_UPD:
4691 case ARM::VLD4DUPd32Pseudo_UPD:
4692 case ARM::VLD4DUPq8EvenPseudo:
4693 case ARM::VLD4DUPq8OddPseudo:
4694 case ARM::VLD4DUPq16EvenPseudo:
4695 case ARM::VLD4DUPq16OddPseudo:
4696 case ARM::VLD4DUPq32EvenPseudo:
4697 case ARM::VLD4DUPq32OddPseudo:
4698 case ARM::VLD1LNq8Pseudo:
4699 case ARM::VLD1LNq16Pseudo:
4700 case ARM::VLD1LNq32Pseudo:
4701 case ARM::VLD1LNq8Pseudo_UPD:
4702 case ARM::VLD1LNq16Pseudo_UPD:
4703 case ARM::VLD1LNq32Pseudo_UPD:
4704 case ARM::VLD2LNd8Pseudo:
4705 case ARM::VLD2LNd16Pseudo:
4706 case ARM::VLD2LNd32Pseudo:
4707 case ARM::VLD2LNq16Pseudo:
4708 case ARM::VLD2LNq32Pseudo:
4709 case ARM::VLD2LNd8Pseudo_UPD:
4710 case ARM::VLD2LNd16Pseudo_UPD:
4711 case ARM::VLD2LNd32Pseudo_UPD:
4712 case ARM::VLD2LNq16Pseudo_UPD:
4713 case ARM::VLD2LNq32Pseudo_UPD:
4714 case ARM::VLD4LNd8Pseudo:
4715 case ARM::VLD4LNd16Pseudo:
4716 case ARM::VLD4LNd32Pseudo:
4717 case ARM::VLD4LNq16Pseudo:
4718 case ARM::VLD4LNq32Pseudo:
4719 case ARM::VLD4LNd8Pseudo_UPD:
4720 case ARM::VLD4LNd16Pseudo_UPD:
4721 case ARM::VLD4LNd32Pseudo_UPD:
4722 case ARM::VLD4LNq16Pseudo_UPD:
4723 case ARM::VLD4LNq32Pseudo_UPD:
4724 // If the address is not 64-bit aligned, the latencies of these
4725 // instructions increases by one.
4726 Latency = *Latency + 1;
4727 break;
4728 }
4729
4730 return Latency;
4731}
4732
4733unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4734 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4735 MI.isImplicitDef())
4736 return 0;
4737
4738 if (MI.isBundle())
4739 return 0;
4740
4741 const MCInstrDesc &MCID = MI.getDesc();
4742
4743 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4744 !Subtarget.cheapPredicableCPSRDef())) {
4745 // When predicated, CPSR is an additional source operand for CPSR updating
4746 // instructions, this apparently increases their latencies.
4747 return 1;
4748 }
4749 return 0;
4750}
4751
4752unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4753 const MachineInstr &MI,
4754 unsigned *PredCost) const {
4755 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4756 MI.isImplicitDef())
4757 return 1;
4758
4759 // An instruction scheduler typically runs on unbundled instructions, however
4760 // other passes may query the latency of a bundled instruction.
4761 if (MI.isBundle()) {
4762 unsigned Latency = 0;
4764 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4765 while (++I != E && I->isInsideBundle()) {
4766 if (I->getOpcode() != ARM::t2IT)
4767 Latency += getInstrLatency(ItinData, *I, PredCost);
4768 }
4769 return Latency;
4770 }
4771
4772 const MCInstrDesc &MCID = MI.getDesc();
4773 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4774 !Subtarget.cheapPredicableCPSRDef()))) {
4775 // When predicated, CPSR is an additional source operand for CPSR updating
4776 // instructions, this apparently increases their latencies.
4777 *PredCost = 1;
4778 }
4779 // Be sure to call getStageLatency for an empty itinerary in case it has a
4780 // valid MinLatency property.
4781 if (!ItinData)
4782 return MI.mayLoad() ? 3 : 1;
4783
4784 unsigned Class = MCID.getSchedClass();
4785
4786 // For instructions with variable uops, use uops as latency.
4787 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4788 return getNumMicroOps(ItinData, MI);
4789
4790 // For the common case, fall back on the itinerary's latency.
4791 unsigned Latency = ItinData->getStageLatency(Class);
4792
4793 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4794 unsigned DefAlign =
4795 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4796 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4797 if (Adj >= 0 || (int)Latency > -Adj) {
4798 return Latency + Adj;
4799 }
4800 return Latency;
4801}
4802
4803unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4804 SDNode *Node) const {
4805 if (!Node->isMachineOpcode())
4806 return 1;
4807
4808 if (!ItinData || ItinData->isEmpty())
4809 return 1;
4810
4811 unsigned Opcode = Node->getMachineOpcode();
4812 switch (Opcode) {
4813 default:
4814 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4815 case ARM::VLDMQIA:
4816 case ARM::VSTMQIA:
4817 return 2;
4818 }
4819}
4820
4821bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4822 const MachineRegisterInfo *MRI,
4823 const MachineInstr &DefMI,
4824 unsigned DefIdx,
4825 const MachineInstr &UseMI,
4826 unsigned UseIdx) const {
4827 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4828 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4829 if (Subtarget.nonpipelinedVFP() &&
4830 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4831 return true;
4832
4833 // Hoist VFP / NEON instructions with 4 or higher latency.
4834 unsigned Latency =
4835 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4836 if (Latency <= 3)
4837 return false;
4838 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4839 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4840}
4841
4842bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4843 const MachineInstr &DefMI,
4844 unsigned DefIdx) const {
4845 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4846 if (!ItinData || ItinData->isEmpty())
4847 return false;
4848
4849 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4850 if (DDomain == ARMII::DomainGeneral) {
4851 unsigned DefClass = DefMI.getDesc().getSchedClass();
4852 std::optional<unsigned> DefCycle =
4853 ItinData->getOperandCycle(DefClass, DefIdx);
4854 return DefCycle && DefCycle <= 2U;
4855 }
4856 return false;
4857}
4858
4859bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4860 StringRef &ErrInfo) const {
4861 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4862 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4863 return false;
4864 }
4865 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4866 // Make sure we don't generate a lo-lo mov that isn't supported.
4867 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4868 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4869 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4870 return false;
4871 }
4872 }
4873 if (MI.getOpcode() == ARM::tPUSH ||
4874 MI.getOpcode() == ARM::tPOP ||
4875 MI.getOpcode() == ARM::tPOP_RET) {
4876 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4877 if (MO.isImplicit() || !MO.isReg())
4878 continue;
4879 Register Reg = MO.getReg();
4880 if (Reg < ARM::R0 || Reg > ARM::R7) {
4881 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4882 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4883 ErrInfo = "Unsupported register in Thumb1 push/pop";
4884 return false;
4885 }
4886 }
4887 }
4888 }
4889 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4890 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4891 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4892 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4893 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4894 return false;
4895 }
4896 }
4897
4898 // Check the address model by taking the first Imm operand and checking it is
4899 // legal for that addressing mode.
4901 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4902 switch (AddrMode) {
4903 default:
4904 break;
4912 case ARMII::AddrModeT2_i12: {
4913 uint32_t Imm = 0;
4914 for (auto Op : MI.operands()) {
4915 if (Op.isImm()) {
4916 Imm = Op.getImm();
4917 break;
4918 }
4919 }
4920 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4921 ErrInfo = "Incorrect AddrMode Imm for instruction";
4922 return false;
4923 }
4924 break;
4925 }
4926 }
4927 return true;
4928}
4929
4931 unsigned LoadImmOpc,
4932 unsigned LoadOpc) const {
4933 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4934 "ROPI/RWPI not currently supported with stack guard");
4935
4936 MachineBasicBlock &MBB = *MI->getParent();
4937 DebugLoc DL = MI->getDebugLoc();
4938 Register Reg = MI->getOperand(0).getReg();
4940 unsigned int Offset = 0;
4941
4942 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4943 assert(!Subtarget.isReadTPSoft() &&
4944 "TLS stack protector requires hardware TLS register");
4945
4946 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4947 .addImm(15)
4948 .addImm(0)
4949 .addImm(13)
4950 .addImm(0)
4951 .addImm(3)
4953
4955 Offset = M.getStackProtectorGuardOffset();
4956 if (Offset & ~0xfffU) {
4957 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4958 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4959 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4960 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4961 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4962 .addReg(Reg, RegState::Kill)
4963 .addImm(Offset & ~0xfffU)
4965 .addReg(0);
4966 Offset &= 0xfffU;
4967 }
4968 } else {
4969 const GlobalValue *GV =
4970 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4971 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4972
4973 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4974 if (Subtarget.isTargetMachO()) {
4975 TargetFlags |= ARMII::MO_NONLAZY;
4976 } else if (Subtarget.isTargetCOFF()) {
4977 if (GV->hasDLLImportStorageClass())
4978 TargetFlags |= ARMII::MO_DLLIMPORT;
4979 else if (IsIndirect)
4980 TargetFlags |= ARMII::MO_COFFSTUB;
4981 } else if (IsIndirect) {
4982 TargetFlags |= ARMII::MO_GOT;
4983 }
4984
4985 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4986 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4987 auto APSREncoding =
4988 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4989 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4990 .addImm(APSREncoding)
4992 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4993 .addGlobalAddress(GV, 0, TargetFlags);
4994 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4995 .addImm(APSREncoding)
4996 .addReg(CPSRSaveReg, RegState::Kill)
4998 } else {
4999 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
5000 .addGlobalAddress(GV, 0, TargetFlags);
5001 }
5002
5003 if (IsIndirect) {
5004 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
5005 MIB.addReg(Reg, RegState::Kill).addImm(0);
5006 auto Flags = MachineMemOperand::MOLoad |
5010 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
5012 }
5013 }
5014
5015 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
5016 MIB.addReg(Reg, RegState::Kill)
5017 .addImm(Offset)
5018 .cloneMemRefs(*MI)
5020}
5021
5022bool
5023ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
5024 unsigned &AddSubOpc,
5025 bool &NegAcc, bool &HasLane) const {
5027 if (I == MLxEntryMap.end())
5028 return false;
5029
5030 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
5031 MulOpc = Entry.MulOpc;
5032 AddSubOpc = Entry.AddSubOpc;
5033 NegAcc = Entry.NegAcc;
5034 HasLane = Entry.HasLane;
5035 return true;
5036}
5037
5038//===----------------------------------------------------------------------===//
5039// Execution domains.
5040//===----------------------------------------------------------------------===//
5041//
5042// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5043// and some can go down both. The vmov instructions go down the VFP pipeline,
5044// but they can be changed to vorr equivalents that are executed by the NEON
5045// pipeline.
5046//
5047// We use the following execution domain numbering:
5048//
5052 ExeNEON = 2
5054
5055//
5056// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5057//
5058std::pair<uint16_t, uint16_t>
5060 // If we don't have access to NEON instructions then we won't be able
5061 // to swizzle anything to the NEON domain. Check to make sure.
5062 if (Subtarget.hasNEON()) {
5063 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5064 // if they are not predicated.
5065 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5066 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5067
5068 // CortexA9 is particularly picky about mixing the two and wants these
5069 // converted.
5070 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5071 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5072 MI.getOpcode() == ARM::VMOVS))
5073 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5074 }
5075 // No other instructions can be swizzled, so just determine their domain.
5076 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5077
5079 return std::make_pair(ExeNEON, 0);
5080
5081 // Certain instructions can go either way on Cortex-A8.
5082 // Treat them as NEON instructions.
5083 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5084 return std::make_pair(ExeNEON, 0);
5085
5087 return std::make_pair(ExeVFP, 0);
5088
5089 return std::make_pair(ExeGeneric, 0);
5090}
5091
5093 unsigned SReg, unsigned &Lane) {
5094 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5095 Lane = 0;
5096
5097 if (DReg != ARM::NoRegister)
5098 return DReg;
5099
5100 Lane = 1;
5101 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5102
5103 assert(DReg && "S-register with no D super-register?");
5104 return DReg;
5105}
5106
5107/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5108/// set ImplicitSReg to a register number that must be marked as implicit-use or
5109/// zero if no register needs to be defined as implicit-use.
5110///
5111/// If the function cannot determine if an SPR should be marked implicit use or
5112/// not, it returns false.
5113///
5114/// This function handles cases where an instruction is being modified from taking
5115/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5116/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5117/// lane of the DPR).
5118///
5119/// If the other SPR is defined, an implicit-use of it should be added. Else,
5120/// (including the case where the DPR itself is defined), it should not.
5121///
5123 MachineInstr &MI, unsigned DReg,
5124 unsigned Lane, unsigned &ImplicitSReg) {
5125 // If the DPR is defined or used already, the other SPR lane will be chained
5126 // correctly, so there is nothing to be done.
5127 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5128 ImplicitSReg = 0;
5129 return true;
5130 }
5131
5132 // Otherwise we need to go searching to see if the SPR is set explicitly.
5133 ImplicitSReg = TRI->getSubReg(DReg,
5134 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5136 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5137
5138 if (LQR == MachineBasicBlock::LQR_Live)
5139 return true;
5140 else if (LQR == MachineBasicBlock::LQR_Unknown)
5141 return false;
5142
5143 // If the register is known not to be live, there is no need to add an
5144 // implicit-use.
5145 ImplicitSReg = 0;
5146 return true;
5147}
5148
5150 unsigned Domain) const {
5151 unsigned DstReg, SrcReg, DReg;
5152 unsigned Lane;
5153 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5155 switch (MI.getOpcode()) {
5156 default:
5157 llvm_unreachable("cannot handle opcode!");
5158 break;
5159 case ARM::VMOVD:
5160 if (Domain != ExeNEON)
5161 break;
5162
5163 // Zap the predicate operands.
5164 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5165
5166 // Make sure we've got NEON instructions.
5167 assert(Subtarget.hasNEON() && "VORRd requires NEON");
5168
5169 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5170 DstReg = MI.getOperand(0).getReg();
5171 SrcReg = MI.getOperand(1).getReg();
5172
5173 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5174 MI.removeOperand(i - 1);
5175
5176 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5177 MI.setDesc(get(ARM::VORRd));
5178 MIB.addReg(DstReg, RegState::Define)
5179 .addReg(SrcReg)
5180 .addReg(SrcReg)
5182 break;
5183 case ARM::VMOVRS:
5184 if (Domain != ExeNEON)
5185 break;
5186 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5187
5188 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5189 DstReg = MI.getOperand(0).getReg();
5190 SrcReg = MI.getOperand(1).getReg();
5191
5192 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5193 MI.removeOperand(i - 1);
5194
5195 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5196
5197 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5198 // Note that DSrc has been widened and the other lane may be undef, which
5199 // contaminates the entire register.
5200 MI.setDesc(get(ARM::VGETLNi32));
5201 MIB.addReg(DstReg, RegState::Define)
5202 .addReg(DReg, RegState::Undef)
5203 .addImm(Lane)
5205
5206 // The old source should be an implicit use, otherwise we might think it
5207 // was dead before here.
5208 MIB.addReg(SrcReg, RegState::Implicit);
5209 break;
5210 case ARM::VMOVSR: {
5211 if (Domain != ExeNEON)
5212 break;
5213 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5214
5215 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5216 DstReg = MI.getOperand(0).getReg();
5217 SrcReg = MI.getOperand(1).getReg();
5218
5219 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5220
5221 unsigned ImplicitSReg;
5222 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5223 break;
5224
5225 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5226 MI.removeOperand(i - 1);
5227
5228 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5229 // Again DDst may be undefined at the beginning of this instruction.
5230 MI.setDesc(get(ARM::VSETLNi32));
5231 MIB.addReg(DReg, RegState::Define)
5232 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5233 .addReg(SrcReg)
5234 .addImm(Lane)
5236
5237 // The narrower destination must be marked as set to keep previous chains
5238 // in place.
5240 if (ImplicitSReg != 0)
5241 MIB.addReg(ImplicitSReg, RegState::Implicit);
5242 break;
5243 }
5244 case ARM::VMOVS: {
5245 if (Domain != ExeNEON)
5246 break;
5247
5248 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5249 DstReg = MI.getOperand(0).getReg();
5250 SrcReg = MI.getOperand(1).getReg();
5251
5252 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
5253 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5254 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5255
5256 unsigned ImplicitSReg;
5257 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5258 break;
5259
5260 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5261 MI.removeOperand(i - 1);
5262
5263 if (DSrc == DDst) {
5264 // Destination can be:
5265 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5266 MI.setDesc(get(ARM::VDUPLN32d));
5267 MIB.addReg(DDst, RegState::Define)
5268 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5269 .addImm(SrcLane)
5271
5272 // Neither the source or the destination are naturally represented any
5273 // more, so add them in manually.
5275 MIB.addReg(SrcReg, RegState::Implicit);
5276 if (ImplicitSReg != 0)
5277 MIB.addReg(ImplicitSReg, RegState::Implicit);
5278 break;
5279 }
5280
5281 // In general there's no single instruction that can perform an S <-> S
5282 // move in NEON space, but a pair of VEXT instructions *can* do the
5283 // job. It turns out that the VEXTs needed will only use DSrc once, with
5284 // the position based purely on the combination of lane-0 and lane-1
5285 // involved. For example
5286 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5287 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5288 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5289 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5290 //
5291 // Pattern of the MachineInstrs is:
5292 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5293 MachineInstrBuilder NewMIB;
5294 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5295 DDst);
5296
5297 // On the first instruction, both DSrc and DDst may be undef if present.
5298 // Specifically when the original instruction didn't have them as an
5299 // <imp-use>.
5300 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5301 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5302 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5303
5304 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5305 CurUndef = !MI.readsRegister(CurReg, TRI);
5306 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5307 .addImm(1)
5309
5310 if (SrcLane == DstLane)
5311 NewMIB.addReg(SrcReg, RegState::Implicit);
5312
5313 MI.setDesc(get(ARM::VEXTd32));
5314 MIB.addReg(DDst, RegState::Define);
5315
5316 // On the second instruction, DDst has definitely been defined above, so
5317 // it is not undef. DSrc, if present, can be undef as above.
5318 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5319 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5320 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5321
5322 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5323 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5324 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5325 .addImm(1)
5327
5328 if (SrcLane != DstLane)
5329 MIB.addReg(SrcReg, RegState::Implicit);
5330
5331 // As before, the original destination is no longer represented, add it
5332 // implicitly.
5334 if (ImplicitSReg != 0)
5335 MIB.addReg(ImplicitSReg, RegState::Implicit);
5336 break;
5337 }
5338 }
5339}
5340
5341//===----------------------------------------------------------------------===//
5342// Partial register updates
5343//===----------------------------------------------------------------------===//
5344//
5345// Swift renames NEON registers with 64-bit granularity. That means any
5346// instruction writing an S-reg implicitly reads the containing D-reg. The
5347// problem is mostly avoided by translating f32 operations to v2f32 operations
5348// on D-registers, but f32 loads are still a problem.
5349//
5350// These instructions can load an f32 into a NEON register:
5351//
5352// VLDRS - Only writes S, partial D update.
5353// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5354// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5355//
5356// FCONSTD can be used as a dependency-breaking instruction.
5358 const MachineInstr &MI, unsigned OpNum,
5359 const TargetRegisterInfo *TRI) const {
5360 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5361 if (!PartialUpdateClearance)
5362 return 0;
5363
5364 assert(TRI && "Need TRI instance");
5365
5366 const MachineOperand &MO = MI.getOperand(OpNum);
5367 if (MO.readsReg())
5368 return 0;
5369 Register Reg = MO.getReg();
5370 int UseOp = -1;
5371
5372 switch (MI.getOpcode()) {
5373 // Normal instructions writing only an S-register.
5374 case ARM::VLDRS:
5375 case ARM::FCONSTS:
5376 case ARM::VMOVSR:
5377 case ARM::VMOVv8i8:
5378 case ARM::VMOVv4i16:
5379 case ARM::VMOVv2i32:
5380 case ARM::VMOVv2f32:
5381 case ARM::VMOVv1i64:
5382 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5383 break;
5384
5385 // Explicitly reads the dependency.
5386 case ARM::VLD1LNd32:
5387 UseOp = 3;
5388 break;
5389 default:
5390 return 0;
5391 }
5392
5393 // If this instruction actually reads a value from Reg, there is no unwanted
5394 // dependency.
5395 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5396 return 0;
5397
5398 // We must be able to clobber the whole D-reg.
5399 if (Reg.isVirtual()) {
5400 // Virtual register must be a def undef foo:ssub_0 operand.
5401 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5402 return 0;
5403 } else if (ARM::SPRRegClass.contains(Reg)) {
5404 // Physical register: MI must define the full D-reg.
5405 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
5406 &ARM::DPRRegClass);
5407 if (!DReg || !MI.definesRegister(DReg, TRI))
5408 return 0;
5409 }
5410
5411 // MI has an unwanted D-register dependency.
5412 // Avoid defs in the previous N instructrions.
5413 return PartialUpdateClearance;
5414}
5415
5416// Break a partial register dependency after getPartialRegUpdateClearance
5417// returned non-zero.
5419 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5420 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5421 assert(TRI && "Need TRI instance");
5422
5423 const MachineOperand &MO = MI.getOperand(OpNum);
5424 Register Reg = MO.getReg();
5425 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5426 unsigned DReg = Reg;
5427
5428 // If MI defines an S-reg, find the corresponding D super-register.
5429 if (ARM::SPRRegClass.contains(Reg)) {
5430 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5431 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5432 }
5433
5434 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5435 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5436
5437 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5438 // the full D-register by loading the same value to both lanes. The
5439 // instruction is micro-coded with 2 uops, so don't do this until we can
5440 // properly schedule micro-coded instructions. The dispatcher stalls cause
5441 // too big regressions.
5442
5443 // Insert the dependency-breaking FCONSTD before MI.
5444 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5445 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5446 .addImm(96)
5448 MI.addRegisterKilled(DReg, TRI, true);
5449}
5450
5452 return Subtarget.hasFeature(ARM::HasV6KOps);
5453}
5454
5456 if (MI->getNumOperands() < 4)
5457 return true;
5458 unsigned ShOpVal = MI->getOperand(3).getImm();
5459 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5460 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5461 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5462 ((ShImm == 1 || ShImm == 2) &&
5463 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5464 return true;
5465
5466 return false;
5467}
5468
5470 const MachineInstr &MI, unsigned DefIdx,
5471 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5472 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5473 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5474
5475 switch (MI.getOpcode()) {
5476 case ARM::VMOVDRR:
5477 // dX = VMOVDRR rY, rZ
5478 // is the same as:
5479 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5480 // Populate the InputRegs accordingly.
5481 // rY
5482 const MachineOperand *MOReg = &MI.getOperand(1);
5483 if (!MOReg->isUndef())
5484 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5485 MOReg->getSubReg(), ARM::ssub_0));
5486 // rZ
5487 MOReg = &MI.getOperand(2);
5488 if (!MOReg->isUndef())
5489 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5490 MOReg->getSubReg(), ARM::ssub_1));
5491 return true;
5492 }
5493 llvm_unreachable("Target dependent opcode missing");
5494}
5495
5497 const MachineInstr &MI, unsigned DefIdx,
5498 RegSubRegPairAndIdx &InputReg) const {
5499 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5500 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5501
5502 switch (MI.getOpcode()) {
5503 case ARM::VMOVRRD:
5504 // rX, rY = VMOVRRD dZ
5505 // is the same as:
5506 // rX = EXTRACT_SUBREG dZ, ssub_0
5507 // rY = EXTRACT_SUBREG dZ, ssub_1
5508 const MachineOperand &MOReg = MI.getOperand(2);
5509 if (MOReg.isUndef())
5510 return false;
5511 InputReg.Reg = MOReg.getReg();
5512 InputReg.SubReg = MOReg.getSubReg();
5513 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5514 return true;
5515 }
5516 llvm_unreachable("Target dependent opcode missing");
5517}
5518
5520 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5521 RegSubRegPairAndIdx &InsertedReg) const {
5522 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5523 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5524
5525 switch (MI.getOpcode()) {
5526 case ARM::VSETLNi32:
5527 case ARM::MVE_VMOV_to_lane_32:
5528 // dX = VSETLNi32 dY, rZ, imm
5529 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5530 const MachineOperand &MOBaseReg = MI.getOperand(1);
5531 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5532 if (MOInsertedReg.isUndef())
5533 return false;
5534 const MachineOperand &MOIndex = MI.getOperand(3);
5535 BaseReg.Reg = MOBaseReg.getReg();
5536 BaseReg.SubReg = MOBaseReg.getSubReg();
5537
5538 InsertedReg.Reg = MOInsertedReg.getReg();
5539 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5540 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5541 return true;
5542 }
5543 llvm_unreachable("Target dependent opcode missing");
5544}
5545
5546std::pair<unsigned, unsigned>
5548 const unsigned Mask = ARMII::MO_OPTION_MASK;
5549 return std::make_pair(TF & Mask, TF & ~Mask);
5550}
5551
5554 using namespace ARMII;
5555
5556 static const std::pair<unsigned, const char *> TargetFlags[] = {
5557 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5558 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5559 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5560 };
5561 return ArrayRef(TargetFlags);
5562}
5563
5566 using namespace ARMII;
5567
5568 static const std::pair<unsigned, const char *> TargetFlags[] = {
5569 {MO_COFFSTUB, "arm-coffstub"},
5570 {MO_GOT, "arm-got"},
5571 {MO_SBREL, "arm-sbrel"},
5572 {MO_DLLIMPORT, "arm-dllimport"},
5573 {MO_SECREL, "arm-secrel"},
5574 {MO_NONLAZY, "arm-nonlazy"}};
5575 return ArrayRef(TargetFlags);
5576}
5577
5578std::optional<RegImmPair>
5580 int Sign = 1;
5581 unsigned Opcode = MI.getOpcode();
5582 int64_t Offset = 0;
5583
5584 // TODO: Handle cases where Reg is a super- or sub-register of the
5585 // destination register.
5586 const MachineOperand &Op0 = MI.getOperand(0);
5587 if (!Op0.isReg() || Reg != Op0.getReg())
5588 return std::nullopt;
5589
5590 // We describe SUBri or ADDri instructions.
5591 if (Opcode == ARM::SUBri)
5592 Sign = -1;
5593 else if (Opcode != ARM::ADDri)
5594 return std::nullopt;
5595
5596 // TODO: Third operand can be global address (usually some string). Since
5597 // strings can be relocated we cannot calculate their offsets for
5598 // now.
5599 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5600 return std::nullopt;
5601
5602 Offset = MI.getOperand(2).getImm() * Sign;
5603 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5604}
5605
5609 const TargetRegisterInfo *TRI) {
5610 for (auto I = From; I != To; ++I)
5611 if (I->modifiesRegister(Reg, TRI))
5612 return true;
5613 return false;
5614}
5615
5617 const TargetRegisterInfo *TRI) {
5618 // Search backwards to the instruction that defines CSPR. This may or not
5619 // be a CMP, we check that after this loop. If we find another instruction
5620 // that reads cpsr, we return nullptr.
5621 MachineBasicBlock::iterator CmpMI = Br;
5622 while (CmpMI != Br->getParent()->begin()) {
5623 --CmpMI;
5624 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5625 break;
5626 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5627 break;
5628 }
5629
5630 // Check that this inst is a CMP r[0-7], #0 and that the register
5631 // is not redefined between the cmp and the br.
5632 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5633 return nullptr;
5634 Register Reg = CmpMI->getOperand(0).getReg();
5635 Register PredReg;
5636 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5637 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5638 return nullptr;
5639 if (!isARMLowRegister(Reg))
5640 return nullptr;
5641 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5642 return nullptr;
5643
5644 return &*CmpMI;
5645}
5646
5648 const ARMSubtarget *Subtarget,
5649 bool ForCodesize) {
5650 if (Subtarget->isThumb()) {
5651 if (Val <= 255) // MOV
5652 return ForCodesize ? 2 : 1;
5653 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5654 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5655 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5656 return ForCodesize ? 4 : 1;
5657 if (Val <= 510) // MOV + ADDi8
5658 return ForCodesize ? 4 : 2;
5659 if (~Val <= 255) // MOV + MVN
5660 return ForCodesize ? 4 : 2;
5661 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5662 return ForCodesize ? 4 : 2;
5663 } else {
5664 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5665 return ForCodesize ? 4 : 1;
5666 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5667 return ForCodesize ? 4 : 1;
5668 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5669 return ForCodesize ? 4 : 1;
5670 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5671 return ForCodesize ? 8 : 2;
5672 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5673 return ForCodesize ? 8 : 2;
5674 }
5675 if (Subtarget->useMovt()) // MOVW + MOVT
5676 return ForCodesize ? 8 : 2;
5677 return ForCodesize ? 8 : 3; // Literal pool load
5678}
5679
5680bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5681 const ARMSubtarget *Subtarget,
5682 bool ForCodesize) {
5683 // Check with ForCodesize
5684 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5685 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5686 if (Cost1 < Cost2)
5687 return true;
5688 if (Cost1 > Cost2)
5689 return false;
5690
5691 // If they are equal, try with !ForCodesize
5692 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5693 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5694}
5695
5696/// Constants defining how certain sequences should be outlined.
5697/// This encompasses how an outlined function should be called, and what kind of
5698/// frame should be emitted for that outlined function.
5699///
5700/// \p MachineOutlinerTailCall implies that the function is being created from
5701/// a sequence of instructions ending in a return.
5702///
5703/// That is,
5704///
5705/// I1 OUTLINED_FUNCTION:
5706/// I2 --> B OUTLINED_FUNCTION I1
5707/// BX LR I2
5708/// BX LR
5709///
5710/// +-------------------------+--------+-----+
5711/// | | Thumb2 | ARM |
5712/// +-------------------------+--------+-----+
5713/// | Call overhead in Bytes | 4 | 4 |
5714/// | Frame overhead in Bytes | 0 | 0 |
5715/// | Stack fixup required | No | No |
5716/// +-------------------------+--------+-----+
5717///
5718/// \p MachineOutlinerThunk implies that the function is being created from
5719/// a sequence of instructions ending in a call. The outlined function is
5720/// called with a BL instruction, and the outlined function tail-calls the
5721/// original call destination.
5722///
5723/// That is,
5724///
5725/// I1 OUTLINED_FUNCTION:
5726/// I2 --> BL OUTLINED_FUNCTION I1
5727/// BL f I2
5728/// B f
5729///
5730/// +-------------------------+--------+-----+
5731/// | | Thumb2 | ARM |
5732/// +-------------------------+--------+-----+
5733/// | Call overhead in Bytes | 4 | 4 |
5734/// | Frame overhead in Bytes | 0 | 0 |
5735/// | Stack fixup required | No | No |
5736/// +-------------------------+--------+-----+
5737///
5738/// \p MachineOutlinerNoLRSave implies that the function should be called using
5739/// a BL instruction, but doesn't require LR to be saved and restored. This
5740/// happens when LR is known to be dead.
5741///
5742/// That is,
5743///
5744/// I1 OUTLINED_FUNCTION:
5745/// I2 --> BL OUTLINED_FUNCTION I1
5746/// I3 I2
5747/// I3
5748/// BX LR
5749///
5750/// +-------------------------+--------+-----+
5751/// | | Thumb2 | ARM |
5752/// +-------------------------+--------+-----+
5753/// | Call overhead in Bytes | 4 | 4 |
5754/// | Frame overhead in Bytes | 2 | 4 |
5755/// | Stack fixup required | No | No |
5756/// +-------------------------+--------+-----+
5757///
5758/// \p MachineOutlinerRegSave implies that the function should be called with a
5759/// save and restore of LR to an available register. This allows us to avoid
5760/// stack fixups. Note that this outlining variant is compatible with the
5761/// NoLRSave case.
5762///
5763/// That is,
5764///
5765/// I1 Save LR OUTLINED_FUNCTION:
5766/// I2 --> BL OUTLINED_FUNCTION I1
5767/// I3 Restore LR I2
5768/// I3
5769/// BX LR
5770///
5771/// +-------------------------+--------+-----+
5772/// | | Thumb2 | ARM |
5773/// +-------------------------+--------+-----+
5774/// | Call overhead in Bytes | 8 | 12 |
5775/// | Frame overhead in Bytes | 2 | 4 |
5776/// | Stack fixup required | No | No |
5777/// +-------------------------+--------+-----+
5778///
5779/// \p MachineOutlinerDefault implies that the function should be called with
5780/// a save and restore of LR to the stack.
5781///
5782/// That is,
5783///
5784/// I1 Save LR OUTLINED_FUNCTION:
5785/// I2 --> BL OUTLINED_FUNCTION I1
5786/// I3 Restore LR I2
5787/// I3
5788/// BX LR
5789///
5790/// +-------------------------+--------+-----+
5791/// | | Thumb2 | ARM |
5792/// +-------------------------+--------+-----+
5793/// | Call overhead in Bytes | 8 | 12 |
5794/// | Frame overhead in Bytes | 2 | 4 |
5795/// | Stack fixup required | Yes | Yes |
5796/// +-------------------------+--------+-----+
5797
5805
5809 UnsafeRegsDead = 0x8
5811
5824
5826 : CallTailCall(target.isThumb() ? 4 : 4),
5827 FrameTailCall(target.isThumb() ? 0 : 0),
5828 CallThunk(target.isThumb() ? 4 : 4),
5829 FrameThunk(target.isThumb() ? 0 : 0),
5830 CallNoLRSave(target.isThumb() ? 4 : 4),
5831 FrameNoLRSave(target.isThumb() ? 2 : 4),
5832 CallRegSave(target.isThumb() ? 8 : 12),
5833 FrameRegSave(target.isThumb() ? 2 : 4),
5834 CallDefault(target.isThumb() ? 8 : 12),
5835 FrameDefault(target.isThumb() ? 2 : 4),
5836 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5837};
5838
5840ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5841 MachineFunction *MF = C.getMF();
5843 const ARMBaseRegisterInfo *ARI =
5844 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5845
5846 BitVector regsReserved = ARI->getReservedRegs(*MF);
5847 // Check if there is an available register across the sequence that we can
5848 // use.
5849 for (Register Reg : ARM::rGPRRegClass) {
5850 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5851 Reg != ARM::LR && // LR is not reserved, but don't use it.
5852 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5853 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5854 C.isAvailableInsideSeq(Reg, TRI))
5855 return Reg;
5856 }
5857 return Register();
5858}
5859
5860// Compute liveness of LR at the point after the interval [I, E), which
5861// denotes a *backward* iteration through instructions. Used only for return
5862// basic blocks, which do not end with a tail call.
5866 // At the end of the function LR dead.
5867 bool Live = false;
5868 for (; I != E; ++I) {
5869 const MachineInstr &MI = *I;
5870
5871 // Check defs of LR.
5872 if (MI.modifiesRegister(ARM::LR, &TRI))
5873 Live = false;
5874
5875 // Check uses of LR.
5876 unsigned Opcode = MI.getOpcode();
5877 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5878 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5879 Opcode == ARM::tBXNS_RET) {
5880 // These instructions use LR, but it's not an (explicit or implicit)
5881 // operand.
5882 Live = true;
5883 continue;
5884 }
5885 if (MI.readsRegister(ARM::LR, &TRI))
5886 Live = true;
5887 }
5888 return !Live;
5889}
5890
5891std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5893 const MachineModuleInfo &MMI,
5894 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5895 unsigned MinRepeats) const {
5896 unsigned SequenceSize = 0;
5897 for (auto &MI : RepeatedSequenceLocs[0])
5898 SequenceSize += getInstSizeInBytes(MI);
5899
5900 // Properties about candidate MBBs that hold for all of them.
5901 unsigned FlagsSetInAll = 0xF;
5902
5903 // Compute liveness information for each candidate, and set FlagsSetInAll.
5905 for (outliner::Candidate &C : RepeatedSequenceLocs)
5906 FlagsSetInAll &= C.Flags;
5907
5908 // According to the ARM Procedure Call Standard, the following are
5909 // undefined on entry/exit from a function call:
5910 //
5911 // * Register R12(IP),
5912 // * Condition codes (and thus the CPSR register)
5913 //
5914 // Since we control the instructions which are part of the outlined regions
5915 // we don't need to be fully compliant with the AAPCS, but we have to
5916 // guarantee that if a veneer is inserted at link time the code is still
5917 // correct. Because of this, we can't outline any sequence of instructions
5918 // where one of these registers is live into/across it. Thus, we need to
5919 // delete those candidates.
5920 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5921 // If the unsafe registers in this block are all dead, then we don't need
5922 // to compute liveness here.
5923 if (C.Flags & UnsafeRegsDead)
5924 return false;
5925 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5926 };
5927
5928 // Are there any candidates where those registers are live?
5929 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5930 // Erase every candidate that violates the restrictions above. (It could be
5931 // true that we have viable candidates, so it's not worth bailing out in
5932 // the case that, say, 1 out of 20 candidates violate the restructions.)
5933 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5934
5935 // If the sequence doesn't have enough candidates left, then we're done.
5936 if (RepeatedSequenceLocs.size() < MinRepeats)
5937 return std::nullopt;
5938 }
5939
5940 // We expect the majority of the outlining candidates to be in consensus with
5941 // regard to return address sign and authentication, and branch target
5942 // enforcement, in other words, partitioning according to all the four
5943 // possible combinations of PAC-RET and BTI is going to yield one big subset
5944 // and three small (likely empty) subsets. That allows us to cull incompatible
5945 // candidates separately for PAC-RET and BTI.
5946
5947 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5948 // disabled. Remove the candidates from the smaller set. If they are the same
5949 // number prefer the non-BTI ones for outlining, since they have less
5950 // overhead.
5951 auto NoBTI =
5952 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5953 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5954 return AFI.branchTargetEnforcement();
5955 });
5956 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5957 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5958 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5959 else
5960 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5961
5962 if (RepeatedSequenceLocs.size() < MinRepeats)
5963 return std::nullopt;
5964
5965 // Likewise, partition the candidates according to PAC-RET enablement.
5966 auto NoPAC =
5967 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5968 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5969 // If the function happens to not spill the LR, do not disqualify it
5970 // from the outlining.
5971 return AFI.shouldSignReturnAddress(true);
5972 });
5973 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5974 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5975 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5976 else
5977 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5978
5979 if (RepeatedSequenceLocs.size() < MinRepeats)
5980 return std::nullopt;
5981
5982 // At this point, we have only "safe" candidates to outline. Figure out
5983 // frame + call instruction information.
5984
5985 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5986
5987 // Helper lambda which sets call information for every candidate.
5988 auto SetCandidateCallInfo =
5989 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5990 for (outliner::Candidate &C : RepeatedSequenceLocs)
5991 C.setCallInfo(CallID, NumBytesForCall);
5992 };
5993
5994 OutlinerCosts Costs(Subtarget);
5995
5996 const auto &SomeMFI =
5997 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5998 // Adjust costs to account for the BTI instructions.
5999 if (SomeMFI.branchTargetEnforcement()) {
6000 Costs.FrameDefault += 4;
6001 Costs.FrameNoLRSave += 4;
6002 Costs.FrameRegSave += 4;
6003 Costs.FrameTailCall += 4;
6004 Costs.FrameThunk += 4;
6005 }
6006
6007 // Adjust costs to account for sign and authentication instructions.
6008 if (SomeMFI.shouldSignReturnAddress(true)) {
6009 Costs.CallDefault += 8; // +PAC instr, +AUT instr
6010 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
6011 }
6012
6013 unsigned FrameID = MachineOutlinerDefault;
6014 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
6015
6016 // If the last instruction in any candidate is a terminator, then we should
6017 // tail call all of the candidates.
6018 if (RepeatedSequenceLocs[0].back().isTerminator()) {
6019 FrameID = MachineOutlinerTailCall;
6020 NumBytesToCreateFrame = Costs.FrameTailCall;
6021 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
6022 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
6023 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
6024 LastInstrOpcode == ARM::tBLXr ||
6025 LastInstrOpcode == ARM::tBLXr_noip ||
6026 LastInstrOpcode == ARM::tBLXi) {
6027 FrameID = MachineOutlinerThunk;
6028 NumBytesToCreateFrame = Costs.FrameThunk;
6029 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
6030 } else {
6031 // We need to decide how to emit calls + frames. We can always emit the same
6032 // frame if we don't need to save to the stack. If we have to save to the
6033 // stack, then we need a different frame.
6034 unsigned NumBytesNoStackCalls = 0;
6035 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6036
6037 for (outliner::Candidate &C : RepeatedSequenceLocs) {
6038 // LR liveness is overestimated in return blocks, unless they end with a
6039 // tail call.
6040 const auto Last = C.getMBB()->rbegin();
6041 const bool LRIsAvailable =
6042 C.getMBB()->isReturnBlock() && !Last->isCall()
6045 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
6046 if (LRIsAvailable) {
6047 FrameID = MachineOutlinerNoLRSave;
6048 NumBytesNoStackCalls += Costs.CallNoLRSave;
6049 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
6050 CandidatesWithoutStackFixups.push_back(C);
6051 }
6052
6053 // Is an unused register available? If so, we won't modify the stack, so
6054 // we can outline with the same frame type as those that don't save LR.
6055 else if (findRegisterToSaveLRTo(C)) {
6056 FrameID = MachineOutlinerRegSave;
6057 NumBytesNoStackCalls += Costs.CallRegSave;
6058 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
6059 CandidatesWithoutStackFixups.push_back(C);
6060 }
6061
6062 // Is SP used in the sequence at all? If not, we don't have to modify
6063 // the stack, so we are guaranteed to get the same frame.
6064 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
6065 NumBytesNoStackCalls += Costs.CallDefault;
6066 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6067 CandidatesWithoutStackFixups.push_back(C);
6068 }
6069
6070 // If we outline this, we need to modify the stack. Pretend we don't
6071 // outline this by saving all of its bytes.
6072 else
6073 NumBytesNoStackCalls += SequenceSize;
6074 }
6075
6076 // If there are no places where we have to save LR, then note that we don't
6077 // have to update the stack. Otherwise, give every candidate the default
6078 // call type
6079 if (NumBytesNoStackCalls <=
6080 RepeatedSequenceLocs.size() * Costs.CallDefault) {
6081 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6082 FrameID = MachineOutlinerNoLRSave;
6083 if (RepeatedSequenceLocs.size() < MinRepeats)
6084 return std::nullopt;
6085 } else
6086 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6087 }
6088
6089 // Does every candidate's MBB contain a call? If so, then we might have a
6090 // call in the range.
6091 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6092 // check if the range contains a call. These require a save + restore of
6093 // the link register.
6094 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
6095 if (any_of(drop_end(FirstCand),
6096 [](const MachineInstr &MI) { return MI.isCall(); }))
6097 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6098
6099 // Handle the last instruction separately. If it is tail call, then the
6100 // last instruction is a call, we don't want to save + restore in this
6101 // case. However, it could be possible that the last instruction is a
6102 // call without it being valid to tail call this sequence. We should
6103 // consider this as well.
6104 else if (FrameID != MachineOutlinerThunk &&
6105 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
6106 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6107 }
6108
6109 return std::make_unique<outliner::OutlinedFunction>(
6110 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
6111}
6112
6113bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
6114 int64_t Fixup,
6115 bool Updt) const {
6116 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
6117 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
6118 if (SPIdx < 0)
6119 // No SP operand
6120 return true;
6121 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
6122 // If SP is not the base register we can't do much
6123 return false;
6124
6125 // Stack might be involved but addressing mode doesn't handle any offset.
6126 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6127 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
6128 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
6129 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
6130 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
6131 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
6132 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
6133 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
6134 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
6135 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
6137 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
6138 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
6139 return false;
6140
6141 unsigned NumOps = MI->getDesc().getNumOperands();
6142 unsigned ImmIdx = NumOps - 3;
6143
6144 const MachineOperand &Offset = MI->getOperand(ImmIdx);
6145 assert(Offset.isImm() && "Is not an immediate");
6146 int64_t OffVal = Offset.getImm();
6147
6148 if (OffVal < 0)
6149 // Don't override data if the are below SP.
6150 return false;
6151
6152 unsigned NumBits = 0;
6153 unsigned Scale = 1;
6154
6155 switch (AddrMode) {
6156 case ARMII::AddrMode3:
6157 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
6158 return false;
6159 OffVal = ARM_AM::getAM3Offset(OffVal);
6160 NumBits = 8;
6161 break;
6162 case ARMII::AddrMode5:
6163 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
6164 return false;
6165 OffVal = ARM_AM::getAM5Offset(OffVal);
6166 NumBits = 8;
6167 Scale = 4;
6168 break;
6170 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6171 return false;
6172 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6173 NumBits = 8;
6174 Scale = 2;
6175 break;
6177 NumBits = 8;
6178 break;
6180 // FIXME: Values are already scaled in this addressing mode.
6181 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6182 NumBits = 10;
6183 break;
6185 NumBits = 8;
6186 Scale = 4;
6187 break;
6190 NumBits = 12;
6191 break;
6192 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6193 NumBits = 8;
6194 Scale = 4;
6195 break;
6196 default:
6197 llvm_unreachable("Unsupported addressing mode!");
6198 }
6199 // Make sure the offset is encodable for instructions that scale the
6200 // immediate.
6201 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6202 "Can't encode this offset!");
6203 OffVal += Fixup / Scale;
6204
6205 unsigned Mask = (1 << NumBits) - 1;
6206
6207 if (OffVal <= Mask) {
6208 if (Updt)
6209 MI->getOperand(ImmIdx).setImm(OffVal);
6210 return true;
6211 }
6212
6213 return false;
6214}
6215
6217 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6218 outliner::Candidate &C = Candidates.front();
6219 // branch-target-enforcement is guaranteed to be consistent between all
6220 // candidates, so we only need to look at one.
6221 const Function &CFn = C.getMF()->getFunction();
6222 if (CFn.hasFnAttribute("branch-target-enforcement"))
6223 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6224
6225 if (CFn.hasFnAttribute("sign-return-address"))
6226 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6227
6228 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6229}
6230
6232 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6233 const Function &F = MF.getFunction();
6234
6235 // Can F be deduplicated by the linker? If it can, don't outline from it.
6236 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6237 return false;
6238
6239 // Don't outline from functions with section markings; the program could
6240 // expect that all the code is in the named section.
6241 // FIXME: Allow outlining from multiple functions with the same section
6242 // marking.
6243 if (F.hasSection())
6244 return false;
6245
6246 // FIXME: Thumb1 outlining is not handled
6248 return false;
6249
6250 // It's safe to outline from MF.
6251 return true;
6252}
6253
6255 unsigned &Flags) const {
6256 // Check if LR is available through all of the MBB. If it's not, then set
6257 // a flag.
6259 "Suitable Machine Function for outlining must track liveness");
6260
6262
6264 LRU.accumulate(MI);
6265
6266 // Check if each of the unsafe registers are available...
6267 bool R12AvailableInBlock = LRU.available(ARM::R12);
6268 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6269
6270 // If all of these are dead (and not live out), we know we don't have to check
6271 // them later.
6272 if (R12AvailableInBlock && CPSRAvailableInBlock)
6273 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6274
6275 // Now, add the live outs to the set.
6276 LRU.addLiveOuts(MBB);
6277
6278 // If any of these registers is available in the MBB, but also a live out of
6279 // the block, then we know outlining is unsafe.
6280 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6281 return false;
6282 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6283 return false;
6284
6285 // Check if there's a call inside this MachineBasicBlock. If there is, then
6286 // set a flag.
6287 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6288 Flags |= MachineOutlinerMBBFlags::HasCalls;
6289
6290 // LR liveness is overestimated in return blocks.
6291
6292 bool LRIsAvailable =
6293 MBB.isReturnBlock() && !MBB.back().isCall()
6295 : LRU.available(ARM::LR);
6296 if (!LRIsAvailable)
6297 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6298
6299 return true;
6300}
6301
6305 unsigned Flags) const {
6306 MachineInstr &MI = *MIT;
6308
6309 // PIC instructions contain labels, outlining them would break offset
6310 // computing. unsigned Opc = MI.getOpcode();
6311 unsigned Opc = MI.getOpcode();
6312 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6313 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6314 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6315 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6316 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6317 Opc == ARM::t2MOV_ga_pcrel)
6319
6320 // Be conservative with ARMv8.1 MVE instructions.
6321 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6322 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6323 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6324 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6325 Opc == ARM::t2LoopEndDec)
6327
6328 const MCInstrDesc &MCID = MI.getDesc();
6329 uint64_t MIFlags = MCID.TSFlags;
6330 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6332
6333 // Is this a terminator for a basic block?
6334 if (MI.isTerminator())
6335 // TargetInstrInfo::getOutliningType has already filtered out anything
6336 // that would break this, so we can allow it here.
6338
6339 // Don't outline if link register or program counter value are used.
6340 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6342
6343 if (MI.isCall()) {
6344 // Get the function associated with the call. Look at each operand and find
6345 // the one that represents the calle and get its name.
6346 const Function *Callee = nullptr;
6347 for (const MachineOperand &MOP : MI.operands()) {
6348 if (MOP.isGlobal()) {
6349 Callee = dyn_cast<Function>(MOP.getGlobal());
6350 break;
6351 }
6352 }
6353
6354 // Dont't outline calls to "mcount" like functions, in particular Linux
6355 // kernel function tracing relies on it.
6356 if (Callee &&
6357 (Callee->getName() == "\01__gnu_mcount_nc" ||
6358 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6360
6361 // If we don't know anything about the callee, assume it depends on the
6362 // stack layout of the caller. In that case, it's only legal to outline
6363 // as a tail-call. Explicitly list the call instructions we know about so
6364 // we don't get unexpected results with call pseudo-instructions.
6365 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6366 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6367 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6368 Opc == ARM::tBLXi)
6369 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6370
6371 if (!Callee)
6372 return UnknownCallOutlineType;
6373
6374 // We have a function we have information about. Check if it's something we
6375 // can safely outline.
6376 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6377
6378 // We don't know what's going on with the callee at all. Don't touch it.
6379 if (!CalleeMF)
6380 return UnknownCallOutlineType;
6381
6382 // Check if we know anything about the callee saves on the function. If we
6383 // don't, then don't touch it, since that implies that we haven't computed
6384 // anything about its stack frame yet.
6385 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6386 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6387 MFI.getNumObjects() > 0)
6388 return UnknownCallOutlineType;
6389
6390 // At this point, we can say that CalleeMF ought to not pass anything on the
6391 // stack. Therefore, we can outline it.
6393 }
6394
6395 // Since calls are handled, don't touch LR or PC
6396 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6398
6399 // Does this use the stack?
6400 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6401 // True if there is no chance that any outlined candidate from this range
6402 // could require stack fixups. That is, both
6403 // * LR is available in the range (No save/restore around call)
6404 // * The range doesn't include calls (No save/restore in outlined frame)
6405 // are true.
6406 // These conditions also ensure correctness of the return address
6407 // authentication - we insert sign and authentication instructions only if
6408 // we save/restore LR on stack, but then this condition ensures that the
6409 // outlined range does not modify the SP, therefore the SP value used for
6410 // signing is the same as the one used for authentication.
6411 // FIXME: This is very restrictive; the flags check the whole block,
6412 // not just the bit we will try to outline.
6413 bool MightNeedStackFixUp =
6414 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6415 MachineOutlinerMBBFlags::HasCalls));
6416
6417 if (!MightNeedStackFixUp)
6419
6420 // Any modification of SP will break our code to save/restore LR.
6421 // FIXME: We could handle some instructions which add a constant offset to
6422 // SP, with a bit more work.
6423 if (MI.modifiesRegister(ARM::SP, TRI))
6425
6426 // At this point, we have a stack instruction that we might need to fix up.
6427 // up. We'll handle it if it's a load or store.
6428 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6429 false))
6431
6432 // We can't fix it up, so don't outline it.
6434 }
6435
6436 // Be conservative with IT blocks.
6437 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6438 MI.modifiesRegister(ARM::ITSTATE, TRI))
6440
6441 // Don't outline CFI instructions.
6442 if (MI.isCFIInstruction())
6444
6446}
6447
6448void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6449 for (MachineInstr &MI : MBB) {
6450 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6451 }
6452}
6453
6454void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6455 MachineBasicBlock::iterator It, bool CFI,
6456 bool Auth) const {
6457 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6458 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6459 assert(Align >= 8 && Align <= 256);
6460 if (Auth) {
6461 assert(Subtarget.isThumb2());
6462 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6463 // sequence.
6464 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6465 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6466 .addReg(ARM::R12, RegState::Kill)
6467 .addReg(ARM::LR, RegState::Kill)
6468 .addReg(ARM::SP)
6469 .addImm(-Align)
6471 .setMIFlags(MIFlags);
6472 } else {
6473 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6474 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6475 .addReg(ARM::LR, RegState::Kill)
6476 .addReg(ARM::SP)
6477 .addImm(-Align)
6479 .setMIFlags(MIFlags);
6480 }
6481
6482 if (!CFI)
6483 return;
6484
6485 MachineFunction &MF = *MBB.getParent();
6486
6487 // Add a CFI, saying CFA is offset by Align bytes from SP.
6488 int64_t StackPosEntry =
6490 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6491 .addCFIIndex(StackPosEntry)
6493
6494 // Add a CFI saying that the LR that we want to find is now higher than
6495 // before.
6496 int LROffset = Auth ? Align - 4 : Align;
6497 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6498 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6499 int64_t LRPosEntry = MF.addFrameInst(
6500 MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
6501 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6502 .addCFIIndex(LRPosEntry)
6504 if (Auth) {
6505 // Add a CFI for the location of the return adddress PAC.
6506 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6507 int64_t RACPosEntry = MF.addFrameInst(
6508 MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
6509 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6510 .addCFIIndex(RACPosEntry)
6512 }
6513}
6514
6515void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
6517 Register Reg) const {
6518 MachineFunction &MF = *MBB.getParent();
6519 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6520 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6521 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
6522
6523 int64_t LRPosEntry = MF.addFrameInst(
6524 MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
6525 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6526 .addCFIIndex(LRPosEntry)
6528}
6529
6530void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6532 bool CFI, bool Auth) const {
6533 int Align = Subtarget.getStackAlignment().value();
6534 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6535 if (Auth) {
6536 assert(Subtarget.isThumb2());
6537 // Restore return address PAC and LR.
6538 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6539 .addReg(ARM::R12, RegState::Define)
6540 .addReg(ARM::LR, RegState::Define)
6541 .addReg(ARM::SP, RegState::Define)
6542 .addReg(ARM::SP)
6543 .addImm(Align)
6545 .setMIFlags(MIFlags);
6546 // LR authentication is after the CFI instructions, below.
6547 } else {
6548 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6549 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6550 .addReg(ARM::SP, RegState::Define)
6551 .addReg(ARM::SP);
6552 if (!Subtarget.isThumb())
6553 MIB.addReg(0);
6554 MIB.addImm(Subtarget.getStackAlignment().value())
6556 .setMIFlags(MIFlags);
6557 }
6558
6559 if (CFI) {
6560 // Now stack has moved back up...
6561 MachineFunction &MF = *MBB.getParent();
6562 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6563 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6564 int64_t StackPosEntry =
6566 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6567 .addCFIIndex(StackPosEntry)
6569
6570 // ... and we have restored LR.
6571 int64_t LRPosEntry =
6572 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6573 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6574 .addCFIIndex(LRPosEntry)
6576
6577 if (Auth) {
6578 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6579 int64_t Entry =
6580 MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));
6581 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6582 .addCFIIndex(Entry)
6584 }
6585 }
6586
6587 if (Auth)
6588 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6589}
6590
6591void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
6593 MachineFunction &MF = *MBB.getParent();
6594 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6595 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6596
6597 int64_t LRPosEntry =
6598 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6599 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6600 .addCFIIndex(LRPosEntry)
6602}
6603
6606 const outliner::OutlinedFunction &OF) const {
6607 // For thunk outlining, rewrite the last instruction from a call to a
6608 // tail-call.
6610 MachineInstr *Call = &*--MBB.instr_end();
6611 bool isThumb = Subtarget.isThumb();
6612 unsigned FuncOp = isThumb ? 2 : 0;
6613 unsigned Opc = Call->getOperand(FuncOp).isReg()
6614 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6615 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6616 : ARM::tTAILJMPdND
6617 : ARM::TAILJMPd;
6618 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6619 .add(Call->getOperand(FuncOp));
6620 if (isThumb && !Call->getOperand(FuncOp).isReg())
6621 MIB.add(predOps(ARMCC::AL));
6622 Call->eraseFromParent();
6623 }
6624
6625 // Is there a call in the outlined range?
6626 auto IsNonTailCall = [](MachineInstr &MI) {
6627 return MI.isCall() && !MI.isReturn();
6628 };
6629 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6632
6635 Et = std::prev(MBB.end());
6636
6637 // We have to save and restore LR, we need to add it to the liveins if it
6638 // is not already part of the set. This is suffient since outlined
6639 // functions only have one block.
6640 if (!MBB.isLiveIn(ARM::LR))
6641 MBB.addLiveIn(ARM::LR);
6642
6643 // Insert a save before the outlined region
6644 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6645 saveLROnStack(MBB, It, true, Auth);
6646
6647 // Fix up the instructions in the range, since we're going to modify the
6648 // stack.
6650 "Can only fix up stack references once");
6651 fixupPostOutline(MBB);
6652
6653 // Insert a restore before the terminator for the function. Restore LR.
6654 restoreLRFromStack(MBB, Et, true, Auth);
6655 }
6656
6657 // If this is a tail call outlined function, then there's already a return.
6660 return;
6661
6662 // Here we have to insert the return ourselves. Get the correct opcode from
6663 // current feature set.
6664 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6666
6667 // Did we have to modify the stack by saving the link register?
6669 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6670 return;
6671
6672 // We modified the stack.
6673 // Walk over the basic block and fix up all the stack accesses.
6674 fixupPostOutline(MBB);
6675}
6676
6682 unsigned Opc;
6683 bool isThumb = Subtarget.isThumb();
6684
6685 // Are we tail calling?
6686 if (C.CallConstructionID == MachineOutlinerTailCall) {
6687 // If yes, then we can just branch to the label.
6688 Opc = isThumb
6689 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6690 : ARM::TAILJMPd;
6691 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6692 .addGlobalAddress(M.getNamedValue(MF.getName()));
6693 if (isThumb)
6694 MIB.add(predOps(ARMCC::AL));
6695 It = MBB.insert(It, MIB);
6696 return It;
6697 }
6698
6699 // Create the call instruction.
6700 Opc = isThumb ? ARM::tBL : ARM::BL;
6701 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6702 if (isThumb)
6703 CallMIB.add(predOps(ARMCC::AL));
6704 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6705
6706 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6707 C.CallConstructionID == MachineOutlinerThunk) {
6708 // No, so just insert the call.
6709 It = MBB.insert(It, CallMIB);
6710 return It;
6711 }
6712
6713 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6714 // Can we save to a register?
6715 if (C.CallConstructionID == MachineOutlinerRegSave) {
6716 Register Reg = findRegisterToSaveLRTo(C);
6717 assert(Reg != 0 && "No callee-saved register available?");
6718
6719 // Save and restore LR from that register.
6720 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6721 if (!AFI.isLRSpilled())
6722 emitCFIForLRSaveToReg(MBB, It, Reg);
6723 CallPt = MBB.insert(It, CallMIB);
6724 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6725 if (!AFI.isLRSpilled())
6726 emitCFIForLRRestoreFromReg(MBB, It);
6727 It--;
6728 return CallPt;
6729 }
6730 // We have the default case. Save and restore from SP.
6731 if (!MBB.isLiveIn(ARM::LR))
6732 MBB.addLiveIn(ARM::LR);
6733 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6734 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6735 CallPt = MBB.insert(It, CallMIB);
6736 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6737 It--;
6738 return CallPt;
6739}
6740
6742 MachineFunction &MF) const {
6743 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6744}
6745
6746bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6747 const MachineInstr &MI) const {
6748 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6749 // the tail predication conversion. This means that the element count
6750 // register has to be live for longer, but that has to be better than
6751 // spill/restore and VPT predication.
6752 return (isVCTP(&MI) && !isPredicated(MI)) ||
6754}
6755
6757 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6758 : ARM::BLX;
6759}
6760
6762 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6763 : ARM::tBLXr;
6764}
6765
6767 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6768 : ARM::BLX_pred;
6769}
6770
6771namespace {
6772class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6773 MachineInstr *EndLoop, *LoopCount;
6774 MachineFunction *MF;
6775 const TargetInstrInfo *TII;
6776
6777 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6778 // [LAST_IS_USE] : last reference to register in schedule is a use
6779 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6780 static int constexpr MAX_STAGES = 30;
6781 static int constexpr LAST_IS_USE = MAX_STAGES;
6782 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6783 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6784 typedef std::map<unsigned, IterNeed> IterNeeds;
6785
6786 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6787 const IterNeeds &CIN);
6788 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6789
6790 // Meanings of the various stuff with loop types:
6791 // t2Bcc:
6792 // EndLoop = branch at end of original BB that will become a kernel
6793 // LoopCount = CC setter live into branch
6794 // t2LoopEnd:
6795 // EndLoop = branch at end of original BB
6796 // LoopCount = t2LoopDec
6797public:
6798 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6799 : EndLoop(EndLoop), LoopCount(LoopCount),
6800 MF(EndLoop->getParent()->getParent()),
6801 TII(MF->getSubtarget().getInstrInfo()) {}
6802
6803 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6804 // Only ignore the terminator.
6805 return MI == EndLoop || MI == LoopCount;
6806 }
6807
6808 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6809 if (tooMuchRegisterPressure(SSD, SMS))
6810 return false;
6811
6812 return true;
6813 }
6814
6815 std::optional<bool> createTripCountGreaterCondition(
6816 int TC, MachineBasicBlock &MBB,
6818
6819 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6820 Cond.push_back(EndLoop->getOperand(1));
6821 Cond.push_back(EndLoop->getOperand(2));
6822 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6824 }
6825 return {};
6826 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6827 // General case just lets the unrolled t2LoopDec do the subtraction and
6828 // therefore just needs to check if zero has been reached.
6829 MachineInstr *LoopDec = nullptr;
6830 for (auto &I : MBB.instrs())
6831 if (I.getOpcode() == ARM::t2LoopDec)
6832 LoopDec = &I;
6833 assert(LoopDec && "Unable to find copied LoopDec");
6834 // Check if we're done with the loop.
6835 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6836 .addReg(LoopDec->getOperand(0).getReg())
6837 .addImm(0)
6839 .addReg(ARM::NoRegister);
6841 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6842 return {};
6843 } else
6844 llvm_unreachable("Unknown EndLoop");
6845 }
6846
6847 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6848
6849 void adjustTripCount(int TripCountAdjust) override {}
6850
6851 void disposed() override {}
6852};
6853
6854void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6855 const IterNeeds &CIN) {
6856 // Increase pressure by the amounts in CrossIterationNeeds
6857 for (const auto &N : CIN) {
6858 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6859 for (int I = 0; I < Cnt; ++I)
6862 }
6863 // Decrease pressure by the amounts in CrossIterationNeeds
6864 for (const auto &N : CIN) {
6865 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6866 for (int I = 0; I < Cnt; ++I)
6869 }
6870}
6871
6872bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6873 SMSchedule &SMS) {
6874 IterNeeds CrossIterationNeeds;
6875
6876 // Determine which values will be loop-carried after the schedule is
6877 // applied
6878
6879 for (auto &SU : SSD.SUnits) {
6880 const MachineInstr *MI = SU.getInstr();
6881 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6882 for (auto &S : SU.Succs)
6883 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6884 Register Reg = S.getReg();
6885 if (Reg.isVirtual())
6886 CrossIterationNeeds[Reg.id()].set(0);
6887 } else if (S.isAssignedRegDep()) {
6888 int OStg = SMS.stageScheduled(S.getSUnit());
6889 if (OStg >= 0 && OStg != Stg) {
6890 Register Reg = S.getReg();
6891 if (Reg.isVirtual())
6892 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6893 }
6894 }
6895 }
6896
6897 // Determine more-or-less what the proposed schedule (reversed) is going to
6898 // be; it might not be quite the same because the within-cycle ordering
6899 // created by SMSchedule depends upon changes to help with address offsets and
6900 // the like.
6901 std::vector<SUnit *> ProposedSchedule;
6902 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6903 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6904 ++Stage) {
6905 std::deque<SUnit *> Instrs =
6906 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6907 std::sort(Instrs.begin(), Instrs.end(),
6908 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6909 for (SUnit *SU : Instrs)
6910 ProposedSchedule.push_back(SU);
6911 }
6912
6913 // Learn whether the last use/def of each cross-iteration register is a use or
6914 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6915 // and we do not have to add the pressure.
6916 for (auto *SU : ProposedSchedule)
6917 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6918 ++OperI) {
6919 auto MO = *OperI;
6920 if (!MO.isReg() || !MO.getReg())
6921 continue;
6922 Register Reg = MO.getReg();
6923 auto CIter = CrossIterationNeeds.find(Reg.id());
6924 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6925 CIter->second[SEEN_AS_LIVE])
6926 continue;
6927 if (MO.isDef() && !MO.isDead())
6928 CIter->second.set(SEEN_AS_LIVE);
6929 else if (MO.isUse())
6930 CIter->second.set(LAST_IS_USE);
6931 }
6932 for (auto &CI : CrossIterationNeeds)
6933 CI.second.reset(LAST_IS_USE);
6934
6935 RegionPressure RecRegPressure;
6936 RegPressureTracker RPTracker(RecRegPressure);
6937 RegisterClassInfo RegClassInfo;
6938 RegClassInfo.runOnMachineFunction(*MF);
6939 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6940 EndLoop->getParent()->end(), false, false);
6941
6942 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6943
6944 for (auto *SU : ProposedSchedule) {
6945 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6946 RPTracker.setPos(std::next(CurInstI));
6947 RPTracker.recede();
6948
6949 // Track what cross-iteration registers would be seen as live
6950 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6951 auto MO = *OperI;
6952 if (!MO.isReg() || !MO.getReg())
6953 continue;
6954 Register Reg = MO.getReg();
6955 if (MO.isDef() && !MO.isDead()) {
6956 auto CIter = CrossIterationNeeds.find(Reg.id());
6957 if (CIter != CrossIterationNeeds.end()) {
6958 CIter->second.reset(0);
6959 CIter->second.reset(SEEN_AS_LIVE);
6960 }
6961 }
6962 }
6963 for (auto &S : SU->Preds) {
6964 auto Stg = SMS.stageScheduled(SU);
6965 if (S.isAssignedRegDep()) {
6966 Register Reg = S.getReg();
6967 auto CIter = CrossIterationNeeds.find(Reg.id());
6968 if (CIter != CrossIterationNeeds.end()) {
6969 auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));
6970 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6971 if (Stg - Stg2 < MAX_STAGES)
6972 CIter->second.set(Stg - Stg2);
6973 CIter->second.set(SEEN_AS_LIVE);
6974 }
6975 }
6976 }
6977
6978 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6979 }
6980
6981 auto &P = RPTracker.getPressure().MaxSetPressure;
6982 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6983 // Exclude some Neon register classes.
6984 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6985 I == ARM::DTriple_with_qsub_0_in_QPR)
6986 continue;
6987
6988 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6989 return true;
6990 }
6991 }
6992 return false;
6993}
6994
6995} // namespace
6996
6997std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
7000 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
7001 if (Preheader == LoopBB)
7002 Preheader = *std::next(LoopBB->pred_begin());
7003
7004 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
7005 // If the branch is a Bcc, then the CPSR should be set somewhere within the
7006 // block. We need to determine the reaching definition of CPSR so that
7007 // it can be marked as non-pipelineable, allowing the pipeliner to force
7008 // it into stage 0 or give up if it cannot or will not do so.
7009 MachineInstr *CCSetter = nullptr;
7010 for (auto &L : LoopBB->instrs()) {
7011 if (L.isCall())
7012 return nullptr;
7013 if (isCPSRDefined(L))
7014 CCSetter = &L;
7015 }
7016 if (CCSetter)
7017 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
7018 else
7019 return nullptr; // Unable to find the CC setter, so unable to guarantee
7020 // that pipeline will work
7021 }
7022
7023 // Recognize:
7024 // preheader:
7025 // %1 = t2DoopLoopStart %0
7026 // loop:
7027 // %2 = phi %1, <not loop>, %..., %loop
7028 // %3 = t2LoopDec %2, <imm>
7029 // t2LoopEnd %3, %loop
7030
7031 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
7032 for (auto &L : LoopBB->instrs())
7033 if (L.isCall())
7034 return nullptr;
7035 else if (isVCTP(&L))
7036 return nullptr;
7037 Register LoopDecResult = I->getOperand(0).getReg();
7039 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
7040 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
7041 return nullptr;
7042 MachineInstr *LoopStart = nullptr;
7043 for (auto &J : Preheader->instrs())
7044 if (J.getOpcode() == ARM::t2DoLoopStart)
7045 LoopStart = &J;
7046 if (!LoopStart)
7047 return nullptr;
7048 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
7049 }
7050 return nullptr;
7051}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static bool isLoad(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, unsigned DReg, unsigned Lane, unsigned &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static cl::opt< bool > EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv"))
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
@ ExeGeneric
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallSet class.
This file defines the SmallVector class.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
ARMBaseInstrInfo(const ARMSubtarget &STI)
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
ARMConstantPoolConstant - ARM-specific constant pool values for Constants, Functions,...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic block.
ARMConstantPoolSymbol - ARM-specific constantpool values for external symbols.
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isTargetMachO() const
Definition: ARMSubtarget.h:349
bool isCortexA7() const
Definition: ARMSubtarget.h:288
bool useMovt() const
bool isSwift() const
Definition: ARMSubtarget.h:292
ARMLdStMultipleTiming getLdStMultipleTiming() const
Definition: ARMSubtarget.h:479
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool isCortexM7() const
Definition: ARMSubtarget.h:295
bool isThumb2() const
Definition: ARMSubtarget.h:404
bool isReadTPSoft() const
Definition: ARMSubtarget.h:385
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned getMispredictionPenalty() const
bool isLikeA9() const
Definition: ARMSubtarget.h:297
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:499
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
Definition: ARMSubtarget.h:468
bool hasVFP2Base() const
Definition: ARMSubtarget.h:308
bool isROPI() const
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:347
unsigned getPartialUpdateClearance() const
Definition: ARMSubtarget.h:477
bool hasMinSize() const
Definition: ARMSubtarget.h:402
bool isCortexA8() const
Definition: ARMSubtarget.h:289
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
Definition: ARMSubtarget.h:76
@ SingleIssue
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:78
@ DoubleIssue
Can load/store 2 registers/cycle.
Definition: ARMSubtarget.h:73
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
Definition: ARMSubtarget.h:81
int getPreISelOperandLatencyAdjustment() const
Definition: ARMSubtarget.h:483
bool isRWPI() const
bool isMClass() const
Definition: ARMSubtarget.h:405
bool restrictIT() const
Definition: ARMSubtarget.h:431
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool test(unsigned Idx) const
Definition: BitVector.h:461
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:159
uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
A possibly irreducible generalization of a Loop.
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterDead - Add information about the fact that the specified register is dead after bei...
void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterKilled - Add information about the fact that the specified register is killed after...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_undefined From now on the previous value of Register can't be restored anymore.
Definition: MCDwarf.h:663
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc={})
.cfi_register Previous value of Register1 is saved in register Register2.
Definition: MCDwarf.h:632
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:604
unsigned short Opcode
Definition: MCInstrDesc.h:205
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isValid() const
isValid - Returns true until all the operands have been visited.
unsigned pred_size() const
instr_iterator instr_begin()
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
union llvm::MachineConstantPoolEntry::@204 Val
The constant itself.
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:956
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
bool isRegSequence() const
bool isInsertSubreg() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:930
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
Definition: ARMBaseInfo.h:258
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
@ ThumbArithFlagSetting
Definition: ARMBaseInfo.h:414
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:185
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
unsigned getBLXpredOpcode(const MachineFunction &MF)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
MaybeAlign getAlign(const Function &F, unsigned Index)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static bool isJumpTableBranchOpcode(int Opc)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static bool isPopOpcode(int Opc)
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
unsigned getUndefRegState(bool B)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
unsigned getKillRegState(bool B)
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1959
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
Definition: ARMBaseInfo.h:146
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Description of the encoding of one expression Op.
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:78
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.
std::vector< Candidate > Candidates