LLVM 20.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/DebugLoc.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/GlobalValue.h"
51#include "llvm/IR/Module.h"
52#include "llvm/MC/MCAsmInfo.h"
53#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78static cl::opt<bool>
79EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
80 cl::desc("Enable ARM 2-addr to 3-addr conv"));
81
82/// ARM_MLxEntry - Record information about MLA / MLS instructions.
84 uint16_t MLxOpc; // MLA / MLS opcode
85 uint16_t MulOpc; // Expanded multiplication opcode
86 uint16_t AddSubOpc; // Expanded add / sub opcode
87 bool NegAcc; // True if the acc is negated before the add / sub.
88 bool HasLane; // True if instruction has an extra "lane" operand.
89};
90
91static const ARM_MLxEntry ARM_MLxTable[] = {
92 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
93 // fp scalar ops
94 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
95 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
96 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
97 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
98 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
99 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
100 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
101 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
102
103 // fp SIMD ops
104 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
105 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
106 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
107 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
108 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
109 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
110 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
111 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
112};
113
115 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
116 Subtarget(STI) {
117 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
118 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
119 llvm_unreachable("Duplicated entries?");
120 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
121 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
122 }
123}
124
125// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
126// currently defaults to no prepass hazard recognizer.
129 const ScheduleDAG *DAG) const {
130 if (usePreRAHazardRecognizer()) {
131 const InstrItineraryData *II =
132 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
133 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
134 }
136}
137
138// Called during:
139// - pre-RA scheduling
140// - post-RA scheduling when FeatureUseMISched is set
142 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
144
145 // We would like to restrict this hazard recognizer to only
146 // post-RA scheduling; we can tell that we're post-RA because we don't
147 // track VRegLiveness.
148 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
149 // banks banked on bit 2. Assume that TCMs are in use.
150 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
152 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
153
154 // Not inserting ARMHazardRecognizerFPMLx because that would change
155 // legacy behavior
156
158 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
159 return MHR;
160}
161
162// Called during post-RA scheduling when FeatureUseMISched is not set
165 const ScheduleDAG *DAG) const {
167
168 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
169 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
170
172 if (BHR)
173 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
174 return MHR;
175}
176
179 LiveIntervals *LIS) const {
180 // FIXME: Thumb2 support.
181
182 if (!EnableARM3Addr)
183 return nullptr;
184
185 MachineFunction &MF = *MI.getParent()->getParent();
186 uint64_t TSFlags = MI.getDesc().TSFlags;
187 bool isPre = false;
188 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
189 default: return nullptr;
191 isPre = true;
192 break;
194 break;
195 }
196
197 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
198 // operation.
199 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
200 if (MemOpc == 0)
201 return nullptr;
202
203 MachineInstr *UpdateMI = nullptr;
204 MachineInstr *MemMI = nullptr;
205 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
206 const MCInstrDesc &MCID = MI.getDesc();
207 unsigned NumOps = MCID.getNumOperands();
208 bool isLoad = !MI.mayStore();
209 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
210 const MachineOperand &Base = MI.getOperand(2);
211 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
212 Register WBReg = WB.getReg();
213 Register BaseReg = Base.getReg();
214 Register OffReg = Offset.getReg();
215 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
216 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
217 switch (AddrMode) {
218 default: llvm_unreachable("Unknown indexed op!");
219 case ARMII::AddrMode2: {
220 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
221 unsigned Amt = ARM_AM::getAM2Offset(OffImm);
222 if (OffReg == 0) {
223 if (ARM_AM::getSOImmVal(Amt) == -1)
224 // Can't encode it in a so_imm operand. This transformation will
225 // add more than 1 instruction. Abandon!
226 return nullptr;
227 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
228 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
229 .addReg(BaseReg)
230 .addImm(Amt)
231 .add(predOps(Pred))
232 .add(condCodeOp());
233 } else if (Amt != 0) {
235 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
236 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
237 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
238 .addReg(BaseReg)
239 .addReg(OffReg)
240 .addReg(0)
241 .addImm(SOOpc)
242 .add(predOps(Pred))
243 .add(condCodeOp());
244 } else
245 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
246 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
247 .addReg(BaseReg)
248 .addReg(OffReg)
249 .add(predOps(Pred))
250 .add(condCodeOp());
251 break;
252 }
253 case ARMII::AddrMode3 : {
254 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
255 unsigned Amt = ARM_AM::getAM3Offset(OffImm);
256 if (OffReg == 0)
257 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
258 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
259 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
260 .addReg(BaseReg)
261 .addImm(Amt)
262 .add(predOps(Pred))
263 .add(condCodeOp());
264 else
265 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
266 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
267 .addReg(BaseReg)
268 .addReg(OffReg)
269 .add(predOps(Pred))
270 .add(condCodeOp());
271 break;
272 }
273 }
274
275 std::vector<MachineInstr*> NewMIs;
276 if (isPre) {
277 if (isLoad)
278 MemMI =
279 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
280 .addReg(WBReg)
281 .addImm(0)
282 .addImm(Pred);
283 else
284 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
285 .addReg(MI.getOperand(1).getReg())
286 .addReg(WBReg)
287 .addReg(0)
288 .addImm(0)
289 .addImm(Pred);
290 NewMIs.push_back(MemMI);
291 NewMIs.push_back(UpdateMI);
292 } else {
293 if (isLoad)
294 MemMI =
295 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
296 .addReg(BaseReg)
297 .addImm(0)
298 .addImm(Pred);
299 else
300 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
301 .addReg(MI.getOperand(1).getReg())
302 .addReg(BaseReg)
303 .addReg(0)
304 .addImm(0)
305 .addImm(Pred);
306 if (WB.isDead())
307 UpdateMI->getOperand(0).setIsDead();
308 NewMIs.push_back(UpdateMI);
309 NewMIs.push_back(MemMI);
310 }
311
312 // Transfer LiveVariables states, kill / dead info.
313 if (LV) {
314 for (const MachineOperand &MO : MI.operands()) {
315 if (MO.isReg() && MO.getReg().isVirtual()) {
316 Register Reg = MO.getReg();
317
318 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
319 if (MO.isDef()) {
320 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
321 if (MO.isDead())
322 LV->addVirtualRegisterDead(Reg, *NewMI);
323 }
324 if (MO.isUse() && MO.isKill()) {
325 for (unsigned j = 0; j < 2; ++j) {
326 // Look at the two new MI's in reverse order.
327 MachineInstr *NewMI = NewMIs[j];
328 if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))
329 continue;
330 LV->addVirtualRegisterKilled(Reg, *NewMI);
331 if (VI.removeKill(MI))
332 VI.Kills.push_back(NewMI);
333 break;
334 }
335 }
336 }
337 }
338 }
339
340 MachineBasicBlock &MBB = *MI.getParent();
341 MBB.insert(MI, NewMIs[1]);
342 MBB.insert(MI, NewMIs[0]);
343 return NewMIs[0];
344}
345
346// Branch analysis.
347// Cond vector output format:
348// 0 elements indicates an unconditional branch
349// 2 elements indicates a conditional branch; the elements are
350// the condition to check and the CPSR.
351// 3 elements indicates a hardware loop end; the elements
352// are the opcode, the operand value to test, and a dummy
353// operand used to pad out to 3 operands.
356 MachineBasicBlock *&FBB,
358 bool AllowModify) const {
359 TBB = nullptr;
360 FBB = nullptr;
361
363 if (I == MBB.instr_begin())
364 return false; // Empty blocks are easy.
365 --I;
366
367 // Walk backwards from the end of the basic block until the branch is
368 // analyzed or we give up.
369 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
370 // Flag to be raised on unanalyzeable instructions. This is useful in cases
371 // where we want to clean up on the end of the basic block before we bail
372 // out.
373 bool CantAnalyze = false;
374
375 // Skip over DEBUG values, predicated nonterminators and speculation
376 // barrier terminators.
377 while (I->isDebugInstr() || !I->isTerminator() ||
378 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
379 I->getOpcode() == ARM::t2DoLoopStartTP){
380 if (I == MBB.instr_begin())
381 return false;
382 --I;
383 }
384
385 if (isIndirectBranchOpcode(I->getOpcode()) ||
386 isJumpTableBranchOpcode(I->getOpcode())) {
387 // Indirect branches and jump tables can't be analyzed, but we still want
388 // to clean up any instructions at the tail of the basic block.
389 CantAnalyze = true;
390 } else if (isUncondBranchOpcode(I->getOpcode())) {
391 TBB = I->getOperand(0).getMBB();
392 } else if (isCondBranchOpcode(I->getOpcode())) {
393 // Bail out if we encounter multiple conditional branches.
394 if (!Cond.empty())
395 return true;
396
397 assert(!FBB && "FBB should have been null.");
398 FBB = TBB;
399 TBB = I->getOperand(0).getMBB();
400 Cond.push_back(I->getOperand(1));
401 Cond.push_back(I->getOperand(2));
402 } else if (I->isReturn()) {
403 // Returns can't be analyzed, but we should run cleanup.
404 CantAnalyze = true;
405 } else if (I->getOpcode() == ARM::t2LoopEnd &&
406 MBB.getParent()
409 if (!Cond.empty())
410 return true;
411 FBB = TBB;
412 TBB = I->getOperand(1).getMBB();
413 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
414 Cond.push_back(I->getOperand(0));
415 Cond.push_back(MachineOperand::CreateImm(0));
416 } else {
417 // We encountered other unrecognized terminator. Bail out immediately.
418 return true;
419 }
420
421 // Cleanup code - to be run for unpredicated unconditional branches and
422 // returns.
423 if (!isPredicated(*I) &&
424 (isUncondBranchOpcode(I->getOpcode()) ||
425 isIndirectBranchOpcode(I->getOpcode()) ||
426 isJumpTableBranchOpcode(I->getOpcode()) ||
427 I->isReturn())) {
428 // Forget any previous condition branch information - it no longer applies.
429 Cond.clear();
430 FBB = nullptr;
431
432 // If we can modify the function, delete everything below this
433 // unconditional branch.
434 if (AllowModify) {
435 MachineBasicBlock::iterator DI = std::next(I);
436 while (DI != MBB.instr_end()) {
437 MachineInstr &InstToDelete = *DI;
438 ++DI;
439 // Speculation barriers must not be deleted.
440 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
441 continue;
442 InstToDelete.eraseFromParent();
443 }
444 }
445 }
446
447 if (CantAnalyze) {
448 // We may not be able to analyze the block, but we could still have
449 // an unconditional branch as the last instruction in the block, which
450 // just branches to layout successor. If this is the case, then just
451 // remove it if we're allowed to make modifications.
452 if (AllowModify && !isPredicated(MBB.back()) &&
456 return true;
457 }
458
459 if (I == MBB.instr_begin())
460 return false;
461
462 --I;
463 }
464
465 // We made it past the terminators without bailing out - we must have
466 // analyzed this branch successfully.
467 return false;
468}
469
471 int *BytesRemoved) const {
472 assert(!BytesRemoved && "code size not handled");
473
475 if (I == MBB.end())
476 return 0;
477
478 if (!isUncondBranchOpcode(I->getOpcode()) &&
479 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
480 return 0;
481
482 // Remove the branch.
483 I->eraseFromParent();
484
485 I = MBB.end();
486
487 if (I == MBB.begin()) return 1;
488 --I;
489 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
490 return 1;
491
492 // Remove the branch.
493 I->eraseFromParent();
494 return 2;
495}
496
501 const DebugLoc &DL,
502 int *BytesAdded) const {
503 assert(!BytesAdded && "code size not handled");
505 int BOpc = !AFI->isThumbFunction()
506 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
507 int BccOpc = !AFI->isThumbFunction()
508 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
509 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
510
511 // Shouldn't be a fall through.
512 assert(TBB && "insertBranch must not be told to insert a fallthrough");
513 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
514 "ARM branch conditions have two or three components!");
515
516 // For conditional branches, we use addOperand to preserve CPSR flags.
517
518 if (!FBB) {
519 if (Cond.empty()) { // Unconditional branch?
520 if (isThumb)
522 else
523 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
524 } else if (Cond.size() == 2) {
525 BuildMI(&MBB, DL, get(BccOpc))
526 .addMBB(TBB)
527 .addImm(Cond[0].getImm())
528 .add(Cond[1]);
529 } else
530 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
531 return 1;
532 }
533
534 // Two-way conditional branch.
535 if (Cond.size() == 2)
536 BuildMI(&MBB, DL, get(BccOpc))
537 .addMBB(TBB)
538 .addImm(Cond[0].getImm())
539 .add(Cond[1]);
540 else if (Cond.size() == 3)
541 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
542 if (isThumb)
543 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
544 else
545 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
546 return 2;
547}
548
551 if (Cond.size() == 2) {
552 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
554 return false;
555 }
556 return true;
557}
558
560 if (MI.isBundle()) {
562 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
563 while (++I != E && I->isInsideBundle()) {
564 int PIdx = I->findFirstPredOperandIdx();
565 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
566 return true;
567 }
568 return false;
569 }
570
571 int PIdx = MI.findFirstPredOperandIdx();
572 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
573}
574
576 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
577 const TargetRegisterInfo *TRI) const {
578
579 // First, let's see if there is a generic comment for this operand
580 std::string GenericComment =
582 if (!GenericComment.empty())
583 return GenericComment;
584
585 // If not, check if we have an immediate operand.
586 if (!Op.isImm())
587 return std::string();
588
589 // And print its corresponding condition code if the immediate is a
590 // predicate.
591 int FirstPredOp = MI.findFirstPredOperandIdx();
592 if (FirstPredOp != (int) OpIdx)
593 return std::string();
594
595 std::string CC = "CC::";
597 return CC;
598}
599
602 unsigned Opc = MI.getOpcode();
603 if (isUncondBranchOpcode(Opc)) {
604 MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
605 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
606 .addImm(Pred[0].getImm())
607 .addReg(Pred[1].getReg());
608 return true;
609 }
610
611 int PIdx = MI.findFirstPredOperandIdx();
612 if (PIdx != -1) {
613 MachineOperand &PMO = MI.getOperand(PIdx);
614 PMO.setImm(Pred[0].getImm());
615 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
616
617 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
618 // IT block. This affects how they are printed.
619 const MCInstrDesc &MCID = MI.getDesc();
621 assert(MCID.operands()[1].isOptionalDef() &&
622 "CPSR def isn't expected operand");
623 assert((MI.getOperand(1).isDead() ||
624 MI.getOperand(1).getReg() != ARM::CPSR) &&
625 "if conversion tried to stop defining used CPSR");
626 MI.getOperand(1).setReg(ARM::NoRegister);
627 }
628
629 return true;
630 }
631 return false;
632}
633
635 ArrayRef<MachineOperand> Pred2) const {
636 if (Pred1.size() > 2 || Pred2.size() > 2)
637 return false;
638
639 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
640 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
641 if (CC1 == CC2)
642 return true;
643
644 switch (CC1) {
645 default:
646 return false;
647 case ARMCC::AL:
648 return true;
649 case ARMCC::HS:
650 return CC2 == ARMCC::HI;
651 case ARMCC::LS:
652 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
653 case ARMCC::GE:
654 return CC2 == ARMCC::GT;
655 case ARMCC::LE:
656 return CC2 == ARMCC::LT;
657 }
658}
659
661 std::vector<MachineOperand> &Pred,
662 bool SkipDead) const {
663 bool Found = false;
664 for (const MachineOperand &MO : MI.operands()) {
665 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
666 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
667 if (ClobbersCPSR || IsCPSR) {
668
669 // Filter out T1 instructions that have a dead CPSR,
670 // allowing IT blocks to be generated containing T1 instructions
671 const MCInstrDesc &MCID = MI.getDesc();
672 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
673 SkipDead)
674 continue;
675
676 Pred.push_back(MO);
677 Found = true;
678 }
679 }
680
681 return Found;
682}
683
685 for (const auto &MO : MI.operands())
686 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
687 return true;
688 return false;
689}
690
692 switch (MI->getOpcode()) {
693 default: return true;
694 case ARM::tADC: // ADC (register) T1
695 case ARM::tADDi3: // ADD (immediate) T1
696 case ARM::tADDi8: // ADD (immediate) T2
697 case ARM::tADDrr: // ADD (register) T1
698 case ARM::tAND: // AND (register) T1
699 case ARM::tASRri: // ASR (immediate) T1
700 case ARM::tASRrr: // ASR (register) T1
701 case ARM::tBIC: // BIC (register) T1
702 case ARM::tEOR: // EOR (register) T1
703 case ARM::tLSLri: // LSL (immediate) T1
704 case ARM::tLSLrr: // LSL (register) T1
705 case ARM::tLSRri: // LSR (immediate) T1
706 case ARM::tLSRrr: // LSR (register) T1
707 case ARM::tMUL: // MUL T1
708 case ARM::tMVN: // MVN (register) T1
709 case ARM::tORR: // ORR (register) T1
710 case ARM::tROR: // ROR (register) T1
711 case ARM::tRSB: // RSB (immediate) T1
712 case ARM::tSBC: // SBC (register) T1
713 case ARM::tSUBi3: // SUB (immediate) T1
714 case ARM::tSUBi8: // SUB (immediate) T2
715 case ARM::tSUBrr: // SUB (register) T1
717 }
718}
719
720/// isPredicable - Return true if the specified instruction can be predicated.
721/// By default, this returns true for every instruction with a
722/// PredicateOperand.
724 if (!MI.isPredicable())
725 return false;
726
727 if (MI.isBundle())
728 return false;
729
731 return false;
732
733 const MachineFunction *MF = MI.getParent()->getParent();
734 const ARMFunctionInfo *AFI =
736
737 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
738 // In their ARM encoding, they can't be encoded in a conditional form.
739 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
740 return false;
741
742 // Make indirect control flow changes unpredicable when SLS mitigation is
743 // enabled.
744 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
745 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
746 return false;
747 if (ST.hardenSlsBlr() && isIndirectCall(MI))
748 return false;
749
750 if (AFI->isThumb2Function()) {
751 if (getSubtarget().restrictIT())
752 return isV8EligibleForIT(&MI);
753 }
754
755 return true;
756}
757
758namespace llvm {
759
760template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
761 for (const MachineOperand &MO : MI->operands()) {
762 if (!MO.isReg() || MO.isUndef() || MO.isUse())
763 continue;
764 if (MO.getReg() != ARM::CPSR)
765 continue;
766 if (!MO.isDead())
767 return false;
768 }
769 // all definitions of CPSR are dead
770 return true;
771}
772
773} // end namespace llvm
774
775/// GetInstSize - Return the size of the specified MachineInstr.
776///
778 const MachineBasicBlock &MBB = *MI.getParent();
779 const MachineFunction *MF = MBB.getParent();
780 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
781
782 const MCInstrDesc &MCID = MI.getDesc();
783
784 switch (MI.getOpcode()) {
785 default:
786 // Return the size specified in .td file. If there's none, return 0, as we
787 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
788 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
789 // contrast to AArch64 instructions which have a default size of 4 bytes for
790 // example.
791 return MCID.getSize();
792 case TargetOpcode::BUNDLE:
793 return getInstBundleLength(MI);
794 case ARM::CONSTPOOL_ENTRY:
795 case ARM::JUMPTABLE_INSTS:
796 case ARM::JUMPTABLE_ADDRS:
797 case ARM::JUMPTABLE_TBB:
798 case ARM::JUMPTABLE_TBH:
799 // If this machine instr is a constant pool entry, its size is recorded as
800 // operand #2.
801 return MI.getOperand(2).getImm();
802 case ARM::SPACE:
803 return MI.getOperand(1).getImm();
804 case ARM::INLINEASM:
805 case ARM::INLINEASM_BR: {
806 // If this machine instr is an inline asm, measure it.
807 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
809 Size = alignTo(Size, 4);
810 return Size;
811 }
812 }
813}
814
815unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
816 unsigned Size = 0;
818 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
819 while (++I != E && I->isInsideBundle()) {
820 assert(!I->isBundle() && "No nested bundle!");
822 }
823 return Size;
824}
825
828 MCRegister DestReg, bool KillSrc,
829 const ARMSubtarget &Subtarget) const {
830 unsigned Opc = Subtarget.isThumb()
831 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
832 : ARM::MRS;
833
835 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
836
837 // There is only 1 A/R class MRS instruction, and it always refers to
838 // APSR. However, there are lots of other possibilities on M-class cores.
839 if (Subtarget.isMClass())
840 MIB.addImm(0x800);
841
842 MIB.add(predOps(ARMCC::AL))
843 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
844}
845
848 MCRegister SrcReg, bool KillSrc,
849 const ARMSubtarget &Subtarget) const {
850 unsigned Opc = Subtarget.isThumb()
851 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
852 : ARM::MSR;
853
854 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
855
856 if (Subtarget.isMClass())
857 MIB.addImm(0x800);
858 else
859 MIB.addImm(8);
860
861 MIB.addReg(SrcReg, getKillRegState(KillSrc))
864}
865
867 MIB.addImm(ARMVCC::None);
868 MIB.addReg(0);
869 MIB.addReg(0); // tp_reg
870}
871
873 Register DestReg) {
875 MIB.addReg(DestReg, RegState::Undef);
876}
877
879 MIB.addImm(Cond);
880 MIB.addReg(ARM::VPR, RegState::Implicit);
881 MIB.addReg(0); // tp_reg
882}
883
885 unsigned Cond, unsigned Inactive) {
887 MIB.addReg(Inactive);
888}
889
892 const DebugLoc &DL, MCRegister DestReg,
893 MCRegister SrcReg, bool KillSrc,
894 bool RenamableDest,
895 bool RenamableSrc) const {
896 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
897 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
898
899 if (GPRDest && GPRSrc) {
900 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
901 .addReg(SrcReg, getKillRegState(KillSrc))
903 .add(condCodeOp());
904 return;
905 }
906
907 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
908 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
909
910 unsigned Opc = 0;
911 if (SPRDest && SPRSrc)
912 Opc = ARM::VMOVS;
913 else if (GPRDest && SPRSrc)
914 Opc = ARM::VMOVRS;
915 else if (SPRDest && GPRSrc)
916 Opc = ARM::VMOVSR;
917 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
918 Opc = ARM::VMOVD;
919 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
920 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
921
922 if (Opc) {
923 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
924 MIB.addReg(SrcReg, getKillRegState(KillSrc));
925 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
926 MIB.addReg(SrcReg, getKillRegState(KillSrc));
927 if (Opc == ARM::MVE_VORR)
928 addUnpredicatedMveVpredROp(MIB, DestReg);
929 else if (Opc != ARM::MQPRCopy)
930 MIB.add(predOps(ARMCC::AL));
931 return;
932 }
933
934 // Handle register classes that require multiple instructions.
935 unsigned BeginIdx = 0;
936 unsigned SubRegs = 0;
937 int Spacing = 1;
938
939 // Use VORRq when possible.
940 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
941 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
942 BeginIdx = ARM::qsub_0;
943 SubRegs = 2;
944 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
945 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
946 BeginIdx = ARM::qsub_0;
947 SubRegs = 4;
948 // Fall back to VMOVD.
949 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
950 Opc = ARM::VMOVD;
951 BeginIdx = ARM::dsub_0;
952 SubRegs = 2;
953 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
954 Opc = ARM::VMOVD;
955 BeginIdx = ARM::dsub_0;
956 SubRegs = 3;
957 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
958 Opc = ARM::VMOVD;
959 BeginIdx = ARM::dsub_0;
960 SubRegs = 4;
961 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
962 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
963 BeginIdx = ARM::gsub_0;
964 SubRegs = 2;
965 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
966 Opc = ARM::VMOVD;
967 BeginIdx = ARM::dsub_0;
968 SubRegs = 2;
969 Spacing = 2;
970 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
971 Opc = ARM::VMOVD;
972 BeginIdx = ARM::dsub_0;
973 SubRegs = 3;
974 Spacing = 2;
975 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
976 Opc = ARM::VMOVD;
977 BeginIdx = ARM::dsub_0;
978 SubRegs = 4;
979 Spacing = 2;
980 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
981 !Subtarget.hasFP64()) {
982 Opc = ARM::VMOVS;
983 BeginIdx = ARM::ssub_0;
984 SubRegs = 2;
985 } else if (SrcReg == ARM::CPSR) {
986 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
987 return;
988 } else if (DestReg == ARM::CPSR) {
989 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
990 return;
991 } else if (DestReg == ARM::VPR) {
992 assert(ARM::GPRRegClass.contains(SrcReg));
993 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
994 .addReg(SrcReg, getKillRegState(KillSrc))
996 return;
997 } else if (SrcReg == ARM::VPR) {
998 assert(ARM::GPRRegClass.contains(DestReg));
999 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
1000 .addReg(SrcReg, getKillRegState(KillSrc))
1002 return;
1003 } else if (DestReg == ARM::FPSCR_NZCV) {
1004 assert(ARM::GPRRegClass.contains(SrcReg));
1005 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1006 .addReg(SrcReg, getKillRegState(KillSrc))
1008 return;
1009 } else if (SrcReg == ARM::FPSCR_NZCV) {
1010 assert(ARM::GPRRegClass.contains(DestReg));
1011 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1012 .addReg(SrcReg, getKillRegState(KillSrc))
1014 return;
1015 }
1016
1017 assert(Opc && "Impossible reg-to-reg copy");
1018
1021
1022 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1023 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1024 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1025 Spacing = -Spacing;
1026 }
1027#ifndef NDEBUG
1028 SmallSet<unsigned, 4> DstRegs;
1029#endif
1030 for (unsigned i = 0; i != SubRegs; ++i) {
1031 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1032 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1033 assert(Dst && Src && "Bad sub-register");
1034#ifndef NDEBUG
1035 assert(!DstRegs.count(Src) && "destructive vector copy");
1036 DstRegs.insert(Dst);
1037#endif
1038 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1039 // VORR (NEON or MVE) takes two source operands.
1040 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1041 Mov.addReg(Src);
1042 }
1043 // MVE VORR takes predicate operands in place of an ordinary condition.
1044 if (Opc == ARM::MVE_VORR)
1046 else
1047 Mov = Mov.add(predOps(ARMCC::AL));
1048 // MOVr can set CC.
1049 if (Opc == ARM::MOVr)
1050 Mov = Mov.add(condCodeOp());
1051 }
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov->addRegisterDefined(DestReg, TRI);
1054 if (KillSrc)
1055 Mov->addRegisterKilled(SrcReg, TRI);
1056}
1057
1058std::optional<DestSourcePair>
1060 // VMOVRRD is also a copy instruction but it requires
1061 // special way of handling. It is more complex copy version
1062 // and since that we are not considering it. For recognition
1063 // of such instruction isExtractSubregLike MI interface fuction
1064 // could be used.
1065 // VORRq is considered as a move only if two inputs are
1066 // the same register.
1067 if (!MI.isMoveReg() ||
1068 (MI.getOpcode() == ARM::VORRq &&
1069 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1070 return std::nullopt;
1071 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1072}
1073
1074std::optional<ParamLoadedValue>
1076 Register Reg) const {
1077 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1078 Register DstReg = DstSrcPair->Destination->getReg();
1079
1080 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1081 // than the copy registers. Consider for example:
1082 //
1083 // s16 = VMOVS s0
1084 // s17 = VMOVS s1
1085 // call @callee(d0)
1086 //
1087 // We'd like to describe the call site value of d0 as d8, but this requires
1088 // gathering and merging the descriptions for the two VMOVS instructions.
1089 //
1090 // We also don't handle the reverse situation, where the forwarding reg is
1091 // narrower than the copy destination:
1092 //
1093 // d8 = VMOVD d0
1094 // call @callee(s1)
1095 //
1096 // We need to produce a fragment description (the call site value of s1 is
1097 // /not/ just d8).
1098 if (DstReg != Reg)
1099 return std::nullopt;
1100 }
1102}
1103
1104const MachineInstrBuilder &
1106 unsigned SubIdx, unsigned State,
1107 const TargetRegisterInfo *TRI) const {
1108 if (!SubIdx)
1109 return MIB.addReg(Reg, State);
1110
1112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1113 return MIB.addReg(Reg, State, SubIdx);
1114}
1115
1118 Register SrcReg, bool isKill, int FI,
1119 const TargetRegisterClass *RC,
1120 const TargetRegisterInfo *TRI,
1121 Register VReg,
1122 MachineInstr::MIFlag Flags) const {
1123 MachineFunction &MF = *MBB.getParent();
1124 MachineFrameInfo &MFI = MF.getFrameInfo();
1125 Align Alignment = MFI.getObjectAlign(FI);
1126
1129 MFI.getObjectSize(FI), Alignment);
1130
1131 switch (TRI->getSpillSize(*RC)) {
1132 case 2:
1133 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1134 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1135 .addReg(SrcReg, getKillRegState(isKill))
1136 .addFrameIndex(FI)
1137 .addImm(0)
1138 .addMemOperand(MMO)
1140 } else
1141 llvm_unreachable("Unknown reg class!");
1142 break;
1143 case 4:
1144 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1145 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1146 .addReg(SrcReg, getKillRegState(isKill))
1147 .addFrameIndex(FI)
1148 .addImm(0)
1149 .addMemOperand(MMO)
1151 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1152 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1153 .addReg(SrcReg, getKillRegState(isKill))
1154 .addFrameIndex(FI)
1155 .addImm(0)
1156 .addMemOperand(MMO)
1158 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1159 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1160 .addReg(SrcReg, getKillRegState(isKill))
1161 .addFrameIndex(FI)
1162 .addImm(0)
1163 .addMemOperand(MMO)
1165 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1166 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
1167 .addReg(SrcReg, getKillRegState(isKill))
1168 .addFrameIndex(FI)
1169 .addImm(0)
1170 .addMemOperand(MMO)
1172 } else
1173 llvm_unreachable("Unknown reg class!");
1174 break;
1175 case 8:
1176 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1177 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1178 .addReg(SrcReg, getKillRegState(isKill))
1179 .addFrameIndex(FI)
1180 .addImm(0)
1181 .addMemOperand(MMO)
1183 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1184 if (Subtarget.hasV5TEOps()) {
1185 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1186 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1187 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1188 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1190 } else {
1191 // Fallback to STM instruction, which has existed since the dawn of
1192 // time.
1193 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1194 .addFrameIndex(FI)
1195 .addMemOperand(MMO)
1197 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1198 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1199 }
1200 } else
1201 llvm_unreachable("Unknown reg class!");
1202 break;
1203 case 16:
1204 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1205 // Use aligned spills if the stack can be realigned.
1206 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1207 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1208 .addFrameIndex(FI)
1209 .addImm(16)
1210 .addReg(SrcReg, getKillRegState(isKill))
1211 .addMemOperand(MMO)
1213 } else {
1214 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1215 .addReg(SrcReg, getKillRegState(isKill))
1216 .addFrameIndex(FI)
1217 .addMemOperand(MMO)
1219 }
1220 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1221 Subtarget.hasMVEIntegerOps()) {
1222 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1223 MIB.addReg(SrcReg, getKillRegState(isKill))
1224 .addFrameIndex(FI)
1225 .addImm(0)
1226 .addMemOperand(MMO);
1228 } else
1229 llvm_unreachable("Unknown reg class!");
1230 break;
1231 case 24:
1232 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1233 // Use aligned spills if the stack can be realigned.
1234 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1235 Subtarget.hasNEON()) {
1236 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1237 .addFrameIndex(FI)
1238 .addImm(16)
1239 .addReg(SrcReg, getKillRegState(isKill))
1240 .addMemOperand(MMO)
1242 } else {
1244 get(ARM::VSTMDIA))
1245 .addFrameIndex(FI)
1247 .addMemOperand(MMO);
1248 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1249 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1250 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1251 }
1252 } else
1253 llvm_unreachable("Unknown reg class!");
1254 break;
1255 case 32:
1256 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1257 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1258 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1259 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1260 Subtarget.hasNEON()) {
1261 // FIXME: It's possible to only store part of the QQ register if the
1262 // spilled def has a sub-register index.
1263 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1264 .addFrameIndex(FI)
1265 .addImm(16)
1266 .addReg(SrcReg, getKillRegState(isKill))
1267 .addMemOperand(MMO)
1269 } else if (Subtarget.hasMVEIntegerOps()) {
1270 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1271 .addReg(SrcReg, getKillRegState(isKill))
1272 .addFrameIndex(FI)
1273 .addMemOperand(MMO);
1274 } else {
1276 get(ARM::VSTMDIA))
1277 .addFrameIndex(FI)
1279 .addMemOperand(MMO);
1280 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1281 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1282 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1283 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1284 }
1285 } else
1286 llvm_unreachable("Unknown reg class!");
1287 break;
1288 case 64:
1289 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1290 Subtarget.hasMVEIntegerOps()) {
1291 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1292 .addReg(SrcReg, getKillRegState(isKill))
1293 .addFrameIndex(FI)
1294 .addMemOperand(MMO);
1295 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1296 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1297 .addFrameIndex(FI)
1299 .addMemOperand(MMO);
1300 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1301 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1302 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1303 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1304 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1305 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1306 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1307 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1308 } else
1309 llvm_unreachable("Unknown reg class!");
1310 break;
1311 default:
1312 llvm_unreachable("Unknown reg class!");
1313 }
1314}
1315
1317 int &FrameIndex) const {
1318 switch (MI.getOpcode()) {
1319 default: break;
1320 case ARM::STRrs:
1321 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1322 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1323 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1324 MI.getOperand(3).getImm() == 0) {
1325 FrameIndex = MI.getOperand(1).getIndex();
1326 return MI.getOperand(0).getReg();
1327 }
1328 break;
1329 case ARM::STRi12:
1330 case ARM::t2STRi12:
1331 case ARM::tSTRspi:
1332 case ARM::VSTRD:
1333 case ARM::VSTRS:
1334 case ARM::VSTRH:
1335 case ARM::VSTR_P0_off:
1336 case ARM::VSTR_FPSCR_NZCVQC_off:
1337 case ARM::MVE_VSTRWU32:
1338 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1339 MI.getOperand(2).getImm() == 0) {
1340 FrameIndex = MI.getOperand(1).getIndex();
1341 return MI.getOperand(0).getReg();
1342 }
1343 break;
1344 case ARM::VST1q64:
1345 case ARM::VST1d64TPseudo:
1346 case ARM::VST1d64QPseudo:
1347 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1348 FrameIndex = MI.getOperand(0).getIndex();
1349 return MI.getOperand(2).getReg();
1350 }
1351 break;
1352 case ARM::VSTMQIA:
1353 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1354 FrameIndex = MI.getOperand(1).getIndex();
1355 return MI.getOperand(0).getReg();
1356 }
1357 break;
1358 case ARM::MQQPRStore:
1359 case ARM::MQQQQPRStore:
1360 if (MI.getOperand(1).isFI()) {
1361 FrameIndex = MI.getOperand(1).getIndex();
1362 return MI.getOperand(0).getReg();
1363 }
1364 break;
1365 }
1366
1367 return 0;
1368}
1369
1371 int &FrameIndex) const {
1373 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1374 Accesses.size() == 1) {
1375 FrameIndex =
1376 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1377 ->getFrameIndex();
1378 return true;
1379 }
1380 return false;
1381}
1382
1385 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
1386 Register VReg, MachineInstr::MIFlag Flags) const {
1387 DebugLoc DL;
1388 if (I != MBB.end()) DL = I->getDebugLoc();
1389 MachineFunction &MF = *MBB.getParent();
1390 MachineFrameInfo &MFI = MF.getFrameInfo();
1391 const Align Alignment = MFI.getObjectAlign(FI);
1394 MFI.getObjectSize(FI), Alignment);
1395
1396 switch (TRI->getSpillSize(*RC)) {
1397 case 2:
1398 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1399 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1400 .addFrameIndex(FI)
1401 .addImm(0)
1402 .addMemOperand(MMO)
1404 } else
1405 llvm_unreachable("Unknown reg class!");
1406 break;
1407 case 4:
1408 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1409 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1410 .addFrameIndex(FI)
1411 .addImm(0)
1412 .addMemOperand(MMO)
1414 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1415 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1416 .addFrameIndex(FI)
1417 .addImm(0)
1418 .addMemOperand(MMO)
1420 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1421 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1422 .addFrameIndex(FI)
1423 .addImm(0)
1424 .addMemOperand(MMO)
1426 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1427 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1428 .addFrameIndex(FI)
1429 .addImm(0)
1430 .addMemOperand(MMO)
1432 } else
1433 llvm_unreachable("Unknown reg class!");
1434 break;
1435 case 8:
1436 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1437 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1438 .addFrameIndex(FI)
1439 .addImm(0)
1440 .addMemOperand(MMO)
1442 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1444
1445 if (Subtarget.hasV5TEOps()) {
1446 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1447 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1448 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1449 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1451 } else {
1452 // Fallback to LDM instruction, which has existed since the dawn of
1453 // time.
1454 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1455 .addFrameIndex(FI)
1456 .addMemOperand(MMO)
1458 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1459 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1460 }
1461
1462 if (DestReg.isPhysical())
1463 MIB.addReg(DestReg, RegState::ImplicitDefine);
1464 } else
1465 llvm_unreachable("Unknown reg class!");
1466 break;
1467 case 16:
1468 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1469 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1470 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1471 .addFrameIndex(FI)
1472 .addImm(16)
1473 .addMemOperand(MMO)
1475 } else {
1476 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1477 .addFrameIndex(FI)
1478 .addMemOperand(MMO)
1480 }
1481 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1482 Subtarget.hasMVEIntegerOps()) {
1483 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1484 MIB.addFrameIndex(FI)
1485 .addImm(0)
1486 .addMemOperand(MMO);
1488 } else
1489 llvm_unreachable("Unknown reg class!");
1490 break;
1491 case 24:
1492 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1493 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1494 Subtarget.hasNEON()) {
1495 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1496 .addFrameIndex(FI)
1497 .addImm(16)
1498 .addMemOperand(MMO)
1500 } else {
1501 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1502 .addFrameIndex(FI)
1503 .addMemOperand(MMO)
1505 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1506 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1507 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1508 if (DestReg.isPhysical())
1509 MIB.addReg(DestReg, RegState::ImplicitDefine);
1510 }
1511 } else
1512 llvm_unreachable("Unknown reg class!");
1513 break;
1514 case 32:
1515 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1516 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1517 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1518 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1519 Subtarget.hasNEON()) {
1520 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1521 .addFrameIndex(FI)
1522 .addImm(16)
1523 .addMemOperand(MMO)
1525 } else if (Subtarget.hasMVEIntegerOps()) {
1526 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1527 .addFrameIndex(FI)
1528 .addMemOperand(MMO);
1529 } else {
1530 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1531 .addFrameIndex(FI)
1533 .addMemOperand(MMO);
1534 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1535 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1536 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1537 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1538 if (DestReg.isPhysical())
1539 MIB.addReg(DestReg, RegState::ImplicitDefine);
1540 }
1541 } else
1542 llvm_unreachable("Unknown reg class!");
1543 break;
1544 case 64:
1545 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1546 Subtarget.hasMVEIntegerOps()) {
1547 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1548 .addFrameIndex(FI)
1549 .addMemOperand(MMO);
1550 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1551 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1552 .addFrameIndex(FI)
1554 .addMemOperand(MMO);
1555 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1556 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1557 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1558 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1559 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1560 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1561 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1562 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1563 if (DestReg.isPhysical())
1564 MIB.addReg(DestReg, RegState::ImplicitDefine);
1565 } else
1566 llvm_unreachable("Unknown reg class!");
1567 break;
1568 default:
1569 llvm_unreachable("Unknown regclass!");
1570 }
1571}
1572
1574 int &FrameIndex) const {
1575 switch (MI.getOpcode()) {
1576 default: break;
1577 case ARM::LDRrs:
1578 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1579 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1580 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1581 MI.getOperand(3).getImm() == 0) {
1582 FrameIndex = MI.getOperand(1).getIndex();
1583 return MI.getOperand(0).getReg();
1584 }
1585 break;
1586 case ARM::LDRi12:
1587 case ARM::t2LDRi12:
1588 case ARM::tLDRspi:
1589 case ARM::VLDRD:
1590 case ARM::VLDRS:
1591 case ARM::VLDRH:
1592 case ARM::VLDR_P0_off:
1593 case ARM::VLDR_FPSCR_NZCVQC_off:
1594 case ARM::MVE_VLDRWU32:
1595 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1596 MI.getOperand(2).getImm() == 0) {
1597 FrameIndex = MI.getOperand(1).getIndex();
1598 return MI.getOperand(0).getReg();
1599 }
1600 break;
1601 case ARM::VLD1q64:
1602 case ARM::VLD1d8TPseudo:
1603 case ARM::VLD1d16TPseudo:
1604 case ARM::VLD1d32TPseudo:
1605 case ARM::VLD1d64TPseudo:
1606 case ARM::VLD1d8QPseudo:
1607 case ARM::VLD1d16QPseudo:
1608 case ARM::VLD1d32QPseudo:
1609 case ARM::VLD1d64QPseudo:
1610 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1611 FrameIndex = MI.getOperand(1).getIndex();
1612 return MI.getOperand(0).getReg();
1613 }
1614 break;
1615 case ARM::VLDMQIA:
1616 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1617 FrameIndex = MI.getOperand(1).getIndex();
1618 return MI.getOperand(0).getReg();
1619 }
1620 break;
1621 case ARM::MQQPRLoad:
1622 case ARM::MQQQQPRLoad:
1623 if (MI.getOperand(1).isFI()) {
1624 FrameIndex = MI.getOperand(1).getIndex();
1625 return MI.getOperand(0).getReg();
1626 }
1627 break;
1628 }
1629
1630 return 0;
1631}
1632
1634 int &FrameIndex) const {
1636 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1637 Accesses.size() == 1) {
1638 FrameIndex =
1639 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1640 ->getFrameIndex();
1641 return true;
1642 }
1643 return false;
1644}
1645
1646/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1647/// depending on whether the result is used.
1648void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1649 bool isThumb1 = Subtarget.isThumb1Only();
1650 bool isThumb2 = Subtarget.isThumb2();
1651 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1652
1653 DebugLoc dl = MI->getDebugLoc();
1654 MachineBasicBlock *BB = MI->getParent();
1655
1656 MachineInstrBuilder LDM, STM;
1657 if (isThumb1 || !MI->getOperand(1).isDead()) {
1658 MachineOperand LDWb(MI->getOperand(1));
1659 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1660 : isThumb1 ? ARM::tLDMIA_UPD
1661 : ARM::LDMIA_UPD))
1662 .add(LDWb);
1663 } else {
1664 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1665 }
1666
1667 if (isThumb1 || !MI->getOperand(0).isDead()) {
1668 MachineOperand STWb(MI->getOperand(0));
1669 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1670 : isThumb1 ? ARM::tSTMIA_UPD
1671 : ARM::STMIA_UPD))
1672 .add(STWb);
1673 } else {
1674 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1675 }
1676
1677 MachineOperand LDBase(MI->getOperand(3));
1678 LDM.add(LDBase).add(predOps(ARMCC::AL));
1679
1680 MachineOperand STBase(MI->getOperand(2));
1681 STM.add(STBase).add(predOps(ARMCC::AL));
1682
1683 // Sort the scratch registers into ascending order.
1685 SmallVector<unsigned, 6> ScratchRegs;
1686 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1687 ScratchRegs.push_back(MO.getReg());
1688 llvm::sort(ScratchRegs,
1689 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1690 return TRI.getEncodingValue(Reg1) <
1691 TRI.getEncodingValue(Reg2);
1692 });
1693
1694 for (const auto &Reg : ScratchRegs) {
1695 LDM.addReg(Reg, RegState::Define);
1696 STM.addReg(Reg, RegState::Kill);
1697 }
1698
1699 BB->erase(MI);
1700}
1701
1703 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1704 expandLoadStackGuard(MI);
1705 MI.getParent()->erase(MI);
1706 return true;
1707 }
1708
1709 if (MI.getOpcode() == ARM::MEMCPY) {
1710 expandMEMCPY(MI);
1711 return true;
1712 }
1713
1714 // This hook gets to expand COPY instructions before they become
1715 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1716 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1717 // changed into a VORR that can go down the NEON pipeline.
1718 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1719 return false;
1720
1721 // Look for a copy between even S-registers. That is where we keep floats
1722 // when using NEON v2f32 instructions for f32 arithmetic.
1723 Register DstRegS = MI.getOperand(0).getReg();
1724 Register SrcRegS = MI.getOperand(1).getReg();
1725 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1726 return false;
1727
1729 MCRegister DstRegD =
1730 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1731 MCRegister SrcRegD =
1732 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1733 if (!DstRegD || !SrcRegD)
1734 return false;
1735
1736 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1737 // legal if the COPY already defines the full DstRegD, and it isn't a
1738 // sub-register insertion.
1739 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1740 return false;
1741
1742 // A dead copy shouldn't show up here, but reject it just in case.
1743 if (MI.getOperand(0).isDead())
1744 return false;
1745
1746 // All clear, widen the COPY.
1747 LLVM_DEBUG(dbgs() << "widening: " << MI);
1748 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1749
1750 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1751 // or some other super-register.
1752 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1753 if (ImpDefIdx != -1)
1754 MI.removeOperand(ImpDefIdx);
1755
1756 // Change the opcode and operands.
1757 MI.setDesc(get(ARM::VMOVD));
1758 MI.getOperand(0).setReg(DstRegD);
1759 MI.getOperand(1).setReg(SrcRegD);
1760 MIB.add(predOps(ARMCC::AL));
1761
1762 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1763 // register scavenger and machine verifier, so we need to indicate that we
1764 // are reading an undefined value from SrcRegD, but a proper value from
1765 // SrcRegS.
1766 MI.getOperand(1).setIsUndef();
1767 MIB.addReg(SrcRegS, RegState::Implicit);
1768
1769 // SrcRegD may actually contain an unrelated value in the ssub_1
1770 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1771 if (MI.getOperand(1).isKill()) {
1772 MI.getOperand(1).setIsKill(false);
1773 MI.addRegisterKilled(SrcRegS, TRI, true);
1774 }
1775
1776 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1777 return true;
1778}
1779
1780/// Create a copy of a const pool value. Update CPI to the new index and return
1781/// the label UID.
1782static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1785
1786 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1787 assert(MCPE.isMachineConstantPoolEntry() &&
1788 "Expecting a machine constantpool entry!");
1789 ARMConstantPoolValue *ACPV =
1790 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1791
1792 unsigned PCLabelId = AFI->createPICLabelUId();
1793 ARMConstantPoolValue *NewCPV = nullptr;
1794
1795 // FIXME: The below assumes PIC relocation model and that the function
1796 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1797 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1798 // instructions, so that's probably OK, but is PIC always correct when
1799 // we get here?
1800 if (ACPV->isGlobalValue())
1802 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1803 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1804 else if (ACPV->isExtSymbol())
1805 NewCPV = ARMConstantPoolSymbol::
1806 Create(MF.getFunction().getContext(),
1807 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1808 else if (ACPV->isBlockAddress())
1809 NewCPV = ARMConstantPoolConstant::
1810 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1812 else if (ACPV->isLSDA())
1813 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1814 ARMCP::CPLSDA, 4);
1815 else if (ACPV->isMachineBasicBlock())
1816 NewCPV = ARMConstantPoolMBB::
1817 Create(MF.getFunction().getContext(),
1818 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1819 else
1820 llvm_unreachable("Unexpected ARM constantpool value type!!");
1821 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1822 return PCLabelId;
1823}
1824
1827 Register DestReg, unsigned SubIdx,
1828 const MachineInstr &Orig,
1829 const TargetRegisterInfo &TRI) const {
1830 unsigned Opcode = Orig.getOpcode();
1831 switch (Opcode) {
1832 default: {
1834 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1835 MBB.insert(I, MI);
1836 break;
1837 }
1838 case ARM::tLDRpci_pic:
1839 case ARM::t2LDRpci_pic: {
1840 MachineFunction &MF = *MBB.getParent();
1841 unsigned CPI = Orig.getOperand(1).getIndex();
1842 unsigned PCLabelId = duplicateCPV(MF, CPI);
1843 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1845 .addImm(PCLabelId)
1846 .cloneMemRefs(Orig);
1847 break;
1848 }
1849 }
1850}
1851
1854 MachineBasicBlock::iterator InsertBefore,
1855 const MachineInstr &Orig) const {
1856 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1858 for (;;) {
1859 switch (I->getOpcode()) {
1860 case ARM::tLDRpci_pic:
1861 case ARM::t2LDRpci_pic: {
1862 MachineFunction &MF = *MBB.getParent();
1863 unsigned CPI = I->getOperand(1).getIndex();
1864 unsigned PCLabelId = duplicateCPV(MF, CPI);
1865 I->getOperand(1).setIndex(CPI);
1866 I->getOperand(2).setImm(PCLabelId);
1867 break;
1868 }
1869 }
1870 if (!I->isBundledWithSucc())
1871 break;
1872 ++I;
1873 }
1874 return Cloned;
1875}
1876
1878 const MachineInstr &MI1,
1879 const MachineRegisterInfo *MRI) const {
1880 unsigned Opcode = MI0.getOpcode();
1881 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1882 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1883 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1884 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1885 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1886 Opcode == ARM::t2MOV_ga_pcrel) {
1887 if (MI1.getOpcode() != Opcode)
1888 return false;
1889 if (MI0.getNumOperands() != MI1.getNumOperands())
1890 return false;
1891
1892 const MachineOperand &MO0 = MI0.getOperand(1);
1893 const MachineOperand &MO1 = MI1.getOperand(1);
1894 if (MO0.getOffset() != MO1.getOffset())
1895 return false;
1896
1897 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1898 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1899 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1900 Opcode == ARM::t2MOV_ga_pcrel)
1901 // Ignore the PC labels.
1902 return MO0.getGlobal() == MO1.getGlobal();
1903
1904 const MachineFunction *MF = MI0.getParent()->getParent();
1905 const MachineConstantPool *MCP = MF->getConstantPool();
1906 int CPI0 = MO0.getIndex();
1907 int CPI1 = MO1.getIndex();
1908 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1909 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1910 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1911 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1912 if (isARMCP0 && isARMCP1) {
1913 ARMConstantPoolValue *ACPV0 =
1914 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1915 ARMConstantPoolValue *ACPV1 =
1916 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1917 return ACPV0->hasSameValue(ACPV1);
1918 } else if (!isARMCP0 && !isARMCP1) {
1919 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1920 }
1921 return false;
1922 } else if (Opcode == ARM::PICLDR) {
1923 if (MI1.getOpcode() != Opcode)
1924 return false;
1925 if (MI0.getNumOperands() != MI1.getNumOperands())
1926 return false;
1927
1928 Register Addr0 = MI0.getOperand(1).getReg();
1929 Register Addr1 = MI1.getOperand(1).getReg();
1930 if (Addr0 != Addr1) {
1931 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1932 return false;
1933
1934 // This assumes SSA form.
1935 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1936 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1937 // Check if the loaded value, e.g. a constantpool of a global address, are
1938 // the same.
1939 if (!produceSameValue(*Def0, *Def1, MRI))
1940 return false;
1941 }
1942
1943 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1944 // %12 = PICLDR %11, 0, 14, %noreg
1945 const MachineOperand &MO0 = MI0.getOperand(i);
1946 const MachineOperand &MO1 = MI1.getOperand(i);
1947 if (!MO0.isIdenticalTo(MO1))
1948 return false;
1949 }
1950 return true;
1951 }
1952
1954}
1955
1956/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1957/// determine if two loads are loading from the same base address. It should
1958/// only return true if the base pointers are the same and the only differences
1959/// between the two addresses is the offset. It also returns the offsets by
1960/// reference.
1961///
1962/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1963/// is permanently disabled.
1965 int64_t &Offset1,
1966 int64_t &Offset2) const {
1967 // Don't worry about Thumb: just ARM and Thumb2.
1968 if (Subtarget.isThumb1Only()) return false;
1969
1970 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1971 return false;
1972
1973 auto IsLoadOpcode = [&](unsigned Opcode) {
1974 switch (Opcode) {
1975 default:
1976 return false;
1977 case ARM::LDRi12:
1978 case ARM::LDRBi12:
1979 case ARM::LDRD:
1980 case ARM::LDRH:
1981 case ARM::LDRSB:
1982 case ARM::LDRSH:
1983 case ARM::VLDRD:
1984 case ARM::VLDRS:
1985 case ARM::t2LDRi8:
1986 case ARM::t2LDRBi8:
1987 case ARM::t2LDRDi8:
1988 case ARM::t2LDRSHi8:
1989 case ARM::t2LDRi12:
1990 case ARM::t2LDRBi12:
1991 case ARM::t2LDRSHi12:
1992 return true;
1993 }
1994 };
1995
1996 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1997 !IsLoadOpcode(Load2->getMachineOpcode()))
1998 return false;
1999
2000 // Check if base addresses and chain operands match.
2001 if (Load1->getOperand(0) != Load2->getOperand(0) ||
2002 Load1->getOperand(4) != Load2->getOperand(4))
2003 return false;
2004
2005 // Index should be Reg0.
2006 if (Load1->getOperand(3) != Load2->getOperand(3))
2007 return false;
2008
2009 // Determine the offsets.
2010 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
2011 isa<ConstantSDNode>(Load2->getOperand(1))) {
2012 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
2013 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2014 return true;
2015 }
2016
2017 return false;
2018}
2019
2020/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2021/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2022/// be scheduled togther. On some targets if two loads are loading from
2023/// addresses in the same cache line, it's better if they are scheduled
2024/// together. This function takes two integers that represent the load offsets
2025/// from the common base address. It returns true if it decides it's desirable
2026/// to schedule the two loads together. "NumLoads" is the number of loads that
2027/// have already been scheduled after Load1.
2028///
2029/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2030/// is permanently disabled.
2032 int64_t Offset1, int64_t Offset2,
2033 unsigned NumLoads) const {
2034 // Don't worry about Thumb: just ARM and Thumb2.
2035 if (Subtarget.isThumb1Only()) return false;
2036
2037 assert(Offset2 > Offset1);
2038
2039 if ((Offset2 - Offset1) / 8 > 64)
2040 return false;
2041
2042 // Check if the machine opcodes are different. If they are different
2043 // then we consider them to not be of the same base address,
2044 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2045 // In this case, they are considered to be the same because they are different
2046 // encoding forms of the same basic instruction.
2047 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2048 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2049 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2050 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2051 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2052 return false; // FIXME: overly conservative?
2053
2054 // Four loads in a row should be sufficient.
2055 if (NumLoads >= 3)
2056 return false;
2057
2058 return true;
2059}
2060
2062 const MachineBasicBlock *MBB,
2063 const MachineFunction &MF) const {
2064 // Debug info is never a scheduling boundary. It's necessary to be explicit
2065 // due to the special treatment of IT instructions below, otherwise a
2066 // dbg_value followed by an IT will result in the IT instruction being
2067 // considered a scheduling hazard, which is wrong. It should be the actual
2068 // instruction preceding the dbg_value instruction(s), just like it is
2069 // when debug info is not present.
2070 if (MI.isDebugInstr())
2071 return false;
2072
2073 // Terminators and labels can't be scheduled around.
2074 if (MI.isTerminator() || MI.isPosition())
2075 return true;
2076
2077 // INLINEASM_BR can jump to another block
2078 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2079 return true;
2080
2081 if (isSEHInstruction(MI))
2082 return true;
2083
2084 // Treat the start of the IT block as a scheduling boundary, but schedule
2085 // t2IT along with all instructions following it.
2086 // FIXME: This is a big hammer. But the alternative is to add all potential
2087 // true and anti dependencies to IT block instructions as implicit operands
2088 // to the t2IT instruction. The added compile time and complexity does not
2089 // seem worth it.
2091 // Make sure to skip any debug instructions
2092 while (++I != MBB->end() && I->isDebugInstr())
2093 ;
2094 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2095 return true;
2096
2097 // Don't attempt to schedule around any instruction that defines
2098 // a stack-oriented pointer, as it's unlikely to be profitable. This
2099 // saves compile time, because it doesn't require every single
2100 // stack slot reference to depend on the instruction that does the
2101 // modification.
2102 // Calls don't actually change the stack pointer, even if they have imp-defs.
2103 // No ARM calling conventions change the stack pointer. (X86 calling
2104 // conventions sometimes do).
2105 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
2106 return true;
2107
2108 return false;
2109}
2110
2113 unsigned NumCycles, unsigned ExtraPredCycles,
2114 BranchProbability Probability) const {
2115 if (!NumCycles)
2116 return false;
2117
2118 // If we are optimizing for size, see if the branch in the predecessor can be
2119 // lowered to cbn?z by the constant island lowering pass, and return false if
2120 // so. This results in a shorter instruction sequence.
2121 if (MBB.getParent()->getFunction().hasOptSize()) {
2122 MachineBasicBlock *Pred = *MBB.pred_begin();
2123 if (!Pred->empty()) {
2124 MachineInstr *LastMI = &*Pred->rbegin();
2125 if (LastMI->getOpcode() == ARM::t2Bcc) {
2127 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2128 if (CmpMI)
2129 return false;
2130 }
2131 }
2132 }
2133 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2134 MBB, 0, 0, Probability);
2135}
2136
2139 unsigned TCycles, unsigned TExtra,
2140 MachineBasicBlock &FBB,
2141 unsigned FCycles, unsigned FExtra,
2142 BranchProbability Probability) const {
2143 if (!TCycles)
2144 return false;
2145
2146 // In thumb code we often end up trading one branch for a IT block, and
2147 // if we are cloning the instruction can increase code size. Prevent
2148 // blocks with multiple predecesors from being ifcvted to prevent this
2149 // cloning.
2150 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2151 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2152 return false;
2153 }
2154
2155 // Attempt to estimate the relative costs of predication versus branching.
2156 // Here we scale up each component of UnpredCost to avoid precision issue when
2157 // scaling TCycles/FCycles by Probability.
2158 const unsigned ScalingUpFactor = 1024;
2159
2160 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2161 unsigned UnpredCost;
2162 if (!Subtarget.hasBranchPredictor()) {
2163 // When we don't have a branch predictor it's always cheaper to not take a
2164 // branch than take it, so we have to take that into account.
2165 unsigned NotTakenBranchCost = 1;
2166 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2167 unsigned TUnpredCycles, FUnpredCycles;
2168 if (!FCycles) {
2169 // Triangle: TBB is the fallthrough
2170 TUnpredCycles = TCycles + NotTakenBranchCost;
2171 FUnpredCycles = TakenBranchCost;
2172 } else {
2173 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2174 TUnpredCycles = TCycles + TakenBranchCost;
2175 FUnpredCycles = FCycles + NotTakenBranchCost;
2176 // The branch at the end of FBB will disappear when it's predicated, so
2177 // discount it from PredCost.
2178 PredCost -= 1 * ScalingUpFactor;
2179 }
2180 // The total cost is the cost of each path scaled by their probabilites
2181 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2182 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2183 UnpredCost = TUnpredCost + FUnpredCost;
2184 // When predicating assume that the first IT can be folded away but later
2185 // ones cost one cycle each
2186 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2187 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2188 }
2189 } else {
2190 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2191 unsigned FUnpredCost =
2192 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2193 UnpredCost = TUnpredCost + FUnpredCost;
2194 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2195 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2196 }
2197
2198 return PredCost <= UnpredCost;
2199}
2200
2201unsigned
2203 unsigned NumInsts) const {
2204 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2205 // ARM has a condition code field in every predicable instruction, using it
2206 // doesn't change code size.
2207 if (!Subtarget.isThumb2())
2208 return 0;
2209
2210 // It's possible that the size of the IT is restricted to a single block.
2211 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2212 return divideCeil(NumInsts, MaxInsts) * 2;
2213}
2214
2215unsigned
2217 // If this branch is likely to be folded into the comparison to form a
2218 // CB(N)Z, then removing it won't reduce code size at all, because that will
2219 // just replace the CB(N)Z with a CMP.
2220 if (MI.getOpcode() == ARM::t2Bcc &&
2222 return 0;
2223
2224 unsigned Size = getInstSizeInBytes(MI);
2225
2226 // For Thumb2, all branches are 32-bit instructions during the if conversion
2227 // pass, but may be replaced with 16-bit instructions during size reduction.
2228 // Since the branches considered by if conversion tend to be forward branches
2229 // over small basic blocks, they are very likely to be in range for the
2230 // narrow instructions, so we assume the final code size will be half what it
2231 // currently is.
2232 if (Subtarget.isThumb2())
2233 Size /= 2;
2234
2235 return Size;
2236}
2237
2238bool
2240 MachineBasicBlock &FMBB) const {
2241 // Reduce false anti-dependencies to let the target's out-of-order execution
2242 // engine do its thing.
2243 return Subtarget.isProfitableToUnpredicate();
2244}
2245
2246/// getInstrPredicate - If instruction is predicated, returns its predicate
2247/// condition, otherwise returns AL. It also returns the condition code
2248/// register by reference.
2250 Register &PredReg) {
2251 int PIdx = MI.findFirstPredOperandIdx();
2252 if (PIdx == -1) {
2253 PredReg = 0;
2254 return ARMCC::AL;
2255 }
2256
2257 PredReg = MI.getOperand(PIdx+1).getReg();
2258 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2259}
2260
2262 if (Opc == ARM::B)
2263 return ARM::Bcc;
2264 if (Opc == ARM::tB)
2265 return ARM::tBcc;
2266 if (Opc == ARM::t2B)
2267 return ARM::t2Bcc;
2268
2269 llvm_unreachable("Unknown unconditional branch opcode!");
2270}
2271
2273 bool NewMI,
2274 unsigned OpIdx1,
2275 unsigned OpIdx2) const {
2276 switch (MI.getOpcode()) {
2277 case ARM::MOVCCr:
2278 case ARM::t2MOVCCr: {
2279 // MOVCC can be commuted by inverting the condition.
2280 Register PredReg;
2282 // MOVCC AL can't be inverted. Shouldn't happen.
2283 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2284 return nullptr;
2285 MachineInstr *CommutedMI =
2286 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2287 if (!CommutedMI)
2288 return nullptr;
2289 // After swapping the MOVCC operands, also invert the condition.
2290 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2292 return CommutedMI;
2293 }
2294 }
2295 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2296}
2297
2298/// Identify instructions that can be folded into a MOVCC instruction, and
2299/// return the defining instruction.
2301ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2302 const TargetInstrInfo *TII) const {
2303 if (!Reg.isVirtual())
2304 return nullptr;
2305 if (!MRI.hasOneNonDBGUse(Reg))
2306 return nullptr;
2307 MachineInstr *MI = MRI.getVRegDef(Reg);
2308 if (!MI)
2309 return nullptr;
2310 // Check if MI can be predicated and folded into the MOVCC.
2311 if (!isPredicable(*MI))
2312 return nullptr;
2313 // Check if MI has any non-dead defs or physreg uses. This also detects
2314 // predicated instructions which will be reading CPSR.
2315 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2316 // Reject frame index operands, PEI can't handle the predicated pseudos.
2317 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2318 return nullptr;
2319 if (!MO.isReg())
2320 continue;
2321 // MI can't have any tied operands, that would conflict with predication.
2322 if (MO.isTied())
2323 return nullptr;
2324 if (MO.getReg().isPhysical())
2325 return nullptr;
2326 if (MO.isDef() && !MO.isDead())
2327 return nullptr;
2328 }
2329 bool DontMoveAcrossStores = true;
2330 if (!MI->isSafeToMove(DontMoveAcrossStores))
2331 return nullptr;
2332 return MI;
2333}
2334
2337 unsigned &TrueOp, unsigned &FalseOp,
2338 bool &Optimizable) const {
2339 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2340 "Unknown select instruction");
2341 // MOVCC operands:
2342 // 0: Def.
2343 // 1: True use.
2344 // 2: False use.
2345 // 3: Condition code.
2346 // 4: CPSR use.
2347 TrueOp = 1;
2348 FalseOp = 2;
2349 Cond.push_back(MI.getOperand(3));
2350 Cond.push_back(MI.getOperand(4));
2351 // We can always fold a def.
2352 Optimizable = true;
2353 return false;
2354}
2355
2359 bool PreferFalse) const {
2360 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2361 "Unknown select instruction");
2362 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2363 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2364 bool Invert = !DefMI;
2365 if (!DefMI)
2366 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2367 if (!DefMI)
2368 return nullptr;
2369
2370 // Find new register class to use.
2371 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2372 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2373 Register DestReg = MI.getOperand(0).getReg();
2374 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2375 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2376 if (!MRI.constrainRegClass(DestReg, FalseClass))
2377 return nullptr;
2378 if (!MRI.constrainRegClass(DestReg, TrueClass))
2379 return nullptr;
2380
2381 // Create a new predicated version of DefMI.
2382 // Rfalse is the first use.
2383 MachineInstrBuilder NewMI =
2384 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2385
2386 // Copy all the DefMI operands, excluding its (null) predicate.
2387 const MCInstrDesc &DefDesc = DefMI->getDesc();
2388 for (unsigned i = 1, e = DefDesc.getNumOperands();
2389 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2390 NewMI.add(DefMI->getOperand(i));
2391
2392 unsigned CondCode = MI.getOperand(3).getImm();
2393 if (Invert)
2395 else
2396 NewMI.addImm(CondCode);
2397 NewMI.add(MI.getOperand(4));
2398
2399 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2400 if (NewMI->hasOptionalDef())
2401 NewMI.add(condCodeOp());
2402
2403 // The output register value when the predicate is false is an implicit
2404 // register operand tied to the first def.
2405 // The tie makes the register allocator ensure the FalseReg is allocated the
2406 // same register as operand 0.
2407 FalseReg.setImplicit();
2408 NewMI.add(FalseReg);
2409 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2410
2411 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2412 SeenMIs.insert(NewMI);
2413 SeenMIs.erase(DefMI);
2414
2415 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2416 // DefMI would be invalid when tranferred inside the loop. Checking for a
2417 // loop is expensive, but at least remove kill flags if they are in different
2418 // BBs.
2419 if (DefMI->getParent() != MI.getParent())
2420 NewMI->clearKillInfo();
2421
2422 // The caller will erase MI, but not DefMI.
2424 return NewMI;
2425}
2426
2427/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2428/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2429/// def operand.
2430///
2431/// This will go away once we can teach tblgen how to set the optional CPSR def
2432/// operand itself.
2436};
2437
2439 {ARM::ADDSri, ARM::ADDri},
2440 {ARM::ADDSrr, ARM::ADDrr},
2441 {ARM::ADDSrsi, ARM::ADDrsi},
2442 {ARM::ADDSrsr, ARM::ADDrsr},
2443
2444 {ARM::SUBSri, ARM::SUBri},
2445 {ARM::SUBSrr, ARM::SUBrr},
2446 {ARM::SUBSrsi, ARM::SUBrsi},
2447 {ARM::SUBSrsr, ARM::SUBrsr},
2448
2449 {ARM::RSBSri, ARM::RSBri},
2450 {ARM::RSBSrsi, ARM::RSBrsi},
2451 {ARM::RSBSrsr, ARM::RSBrsr},
2452
2453 {ARM::tADDSi3, ARM::tADDi3},
2454 {ARM::tADDSi8, ARM::tADDi8},
2455 {ARM::tADDSrr, ARM::tADDrr},
2456 {ARM::tADCS, ARM::tADC},
2457
2458 {ARM::tSUBSi3, ARM::tSUBi3},
2459 {ARM::tSUBSi8, ARM::tSUBi8},
2460 {ARM::tSUBSrr, ARM::tSUBrr},
2461 {ARM::tSBCS, ARM::tSBC},
2462 {ARM::tRSBS, ARM::tRSB},
2463 {ARM::tLSLSri, ARM::tLSLri},
2464
2465 {ARM::t2ADDSri, ARM::t2ADDri},
2466 {ARM::t2ADDSrr, ARM::t2ADDrr},
2467 {ARM::t2ADDSrs, ARM::t2ADDrs},
2468
2469 {ARM::t2SUBSri, ARM::t2SUBri},
2470 {ARM::t2SUBSrr, ARM::t2SUBrr},
2471 {ARM::t2SUBSrs, ARM::t2SUBrs},
2472
2473 {ARM::t2RSBSri, ARM::t2RSBri},
2474 {ARM::t2RSBSrs, ARM::t2RSBrs},
2475};
2476
2477unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2478 for (const auto &Entry : AddSubFlagsOpcodeMap)
2479 if (OldOpc == Entry.PseudoOpc)
2480 return Entry.MachineOpc;
2481 return 0;
2482}
2483
2486 const DebugLoc &dl, Register DestReg,
2487 Register BaseReg, int NumBytes,
2488 ARMCC::CondCodes Pred, Register PredReg,
2489 const ARMBaseInstrInfo &TII,
2490 unsigned MIFlags) {
2491 if (NumBytes == 0 && DestReg != BaseReg) {
2492 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2493 .addReg(BaseReg, RegState::Kill)
2494 .add(predOps(Pred, PredReg))
2495 .add(condCodeOp())
2496 .setMIFlags(MIFlags);
2497 return;
2498 }
2499
2500 bool isSub = NumBytes < 0;
2501 if (isSub) NumBytes = -NumBytes;
2502
2503 while (NumBytes) {
2504 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2505 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2506 assert(ThisVal && "Didn't extract field correctly");
2507
2508 // We will handle these bits from offset, clear them.
2509 NumBytes &= ~ThisVal;
2510
2511 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2512
2513 // Build the new ADD / SUB.
2514 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2515 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2516 .addReg(BaseReg, RegState::Kill)
2517 .addImm(ThisVal)
2518 .add(predOps(Pred, PredReg))
2519 .add(condCodeOp())
2520 .setMIFlags(MIFlags);
2521 BaseReg = DestReg;
2522 }
2523}
2524
2527 unsigned NumBytes) {
2528 // This optimisation potentially adds lots of load and store
2529 // micro-operations, it's only really a great benefit to code-size.
2530 if (!Subtarget.hasMinSize())
2531 return false;
2532
2533 // If only one register is pushed/popped, LLVM can use an LDR/STR
2534 // instead. We can't modify those so make sure we're dealing with an
2535 // instruction we understand.
2536 bool IsPop = isPopOpcode(MI->getOpcode());
2537 bool IsPush = isPushOpcode(MI->getOpcode());
2538 if (!IsPush && !IsPop)
2539 return false;
2540
2541 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2542 MI->getOpcode() == ARM::VLDMDIA_UPD;
2543 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2544 MI->getOpcode() == ARM::tPOP ||
2545 MI->getOpcode() == ARM::tPOP_RET;
2546
2547 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2548 MI->getOperand(1).getReg() == ARM::SP)) &&
2549 "trying to fold sp update into non-sp-updating push/pop");
2550
2551 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2552 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2553 // if this is violated.
2554 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2555 return false;
2556
2557 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2558 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2559 int RegListIdx = IsT1PushPop ? 2 : 4;
2560
2561 // Calculate the space we'll need in terms of registers.
2562 unsigned RegsNeeded;
2563 const TargetRegisterClass *RegClass;
2564 if (IsVFPPushPop) {
2565 RegsNeeded = NumBytes / 8;
2566 RegClass = &ARM::DPRRegClass;
2567 } else {
2568 RegsNeeded = NumBytes / 4;
2569 RegClass = &ARM::GPRRegClass;
2570 }
2571
2572 // We're going to have to strip all list operands off before
2573 // re-adding them since the order matters, so save the existing ones
2574 // for later.
2576
2577 // We're also going to need the first register transferred by this
2578 // instruction, which won't necessarily be the first register in the list.
2579 unsigned FirstRegEnc = -1;
2580
2582 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2583 MachineOperand &MO = MI->getOperand(i);
2584 RegList.push_back(MO);
2585
2586 if (MO.isReg() && !MO.isImplicit() &&
2587 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2588 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2589 }
2590
2591 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2592
2593 // Now try to find enough space in the reglist to allocate NumBytes.
2594 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2595 --CurRegEnc) {
2596 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2597 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2598 continue;
2599 if (!IsPop) {
2600 // Pushing any register is completely harmless, mark the register involved
2601 // as undef since we don't care about its value and must not restore it
2602 // during stack unwinding.
2603 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2604 false, false, true));
2605 --RegsNeeded;
2606 continue;
2607 }
2608
2609 // However, we can only pop an extra register if it's not live. For
2610 // registers live within the function we might clobber a return value
2611 // register; the other way a register can be live here is if it's
2612 // callee-saved.
2613 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2614 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2616 // VFP pops don't allow holes in the register list, so any skip is fatal
2617 // for our transformation. GPR pops do, so we should just keep looking.
2618 if (IsVFPPushPop)
2619 return false;
2620 else
2621 continue;
2622 }
2623
2624 // Mark the unimportant registers as <def,dead> in the POP.
2625 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2626 true));
2627 --RegsNeeded;
2628 }
2629
2630 if (RegsNeeded > 0)
2631 return false;
2632
2633 // Finally we know we can profitably perform the optimisation so go
2634 // ahead: strip all existing registers off and add them back again
2635 // in the right order.
2636 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2637 MI->removeOperand(i);
2638
2639 // Add the complete list back in.
2640 MachineInstrBuilder MIB(MF, &*MI);
2641 for (const MachineOperand &MO : llvm::reverse(RegList))
2642 MIB.add(MO);
2643
2644 return true;
2645}
2646
2647bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2648 Register FrameReg, int &Offset,
2649 const ARMBaseInstrInfo &TII) {
2650 unsigned Opcode = MI.getOpcode();
2651 const MCInstrDesc &Desc = MI.getDesc();
2652 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2653 bool isSub = false;
2654
2655 // Memory operands in inline assembly always use AddrMode2.
2656 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2658
2659 if (Opcode == ARM::ADDri) {
2660 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2661 if (Offset == 0) {
2662 // Turn it into a move.
2663 MI.setDesc(TII.get(ARM::MOVr));
2664 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2665 MI.removeOperand(FrameRegIdx+1);
2666 Offset = 0;
2667 return true;
2668 } else if (Offset < 0) {
2669 Offset = -Offset;
2670 isSub = true;
2671 MI.setDesc(TII.get(ARM::SUBri));
2672 }
2673
2674 // Common case: small offset, fits into instruction.
2675 if (ARM_AM::getSOImmVal(Offset) != -1) {
2676 // Replace the FrameIndex with sp / fp
2677 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2678 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2679 Offset = 0;
2680 return true;
2681 }
2682
2683 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2684 // as possible.
2685 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2686 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2687
2688 // We will handle these bits from offset, clear them.
2689 Offset &= ~ThisImmVal;
2690
2691 // Get the properly encoded SOImmVal field.
2692 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2693 "Bit extraction didn't work?");
2694 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2695 } else {
2696 unsigned ImmIdx = 0;
2697 int InstrOffs = 0;
2698 unsigned NumBits = 0;
2699 unsigned Scale = 1;
2700 switch (AddrMode) {
2702 ImmIdx = FrameRegIdx + 1;
2703 InstrOffs = MI.getOperand(ImmIdx).getImm();
2704 NumBits = 12;
2705 break;
2706 case ARMII::AddrMode2:
2707 ImmIdx = FrameRegIdx+2;
2708 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2709 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2710 InstrOffs *= -1;
2711 NumBits = 12;
2712 break;
2713 case ARMII::AddrMode3:
2714 ImmIdx = FrameRegIdx+2;
2715 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2716 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2717 InstrOffs *= -1;
2718 NumBits = 8;
2719 break;
2720 case ARMII::AddrMode4:
2721 case ARMII::AddrMode6:
2722 // Can't fold any offset even if it's zero.
2723 return false;
2724 case ARMII::AddrMode5:
2725 ImmIdx = FrameRegIdx+1;
2726 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2727 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2728 InstrOffs *= -1;
2729 NumBits = 8;
2730 Scale = 4;
2731 break;
2733 ImmIdx = FrameRegIdx+1;
2734 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2735 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2736 InstrOffs *= -1;
2737 NumBits = 8;
2738 Scale = 2;
2739 break;
2743 ImmIdx = FrameRegIdx+1;
2744 InstrOffs = MI.getOperand(ImmIdx).getImm();
2745 NumBits = 7;
2746 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2747 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2748 break;
2749 default:
2750 llvm_unreachable("Unsupported addressing mode!");
2751 }
2752
2753 Offset += InstrOffs * Scale;
2754 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2755 if (Offset < 0) {
2756 Offset = -Offset;
2757 isSub = true;
2758 }
2759
2760 // Attempt to fold address comp. if opcode has offset bits
2761 if (NumBits > 0) {
2762 // Common case: small offset, fits into instruction.
2763 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2764 int ImmedOffset = Offset / Scale;
2765 unsigned Mask = (1 << NumBits) - 1;
2766 if ((unsigned)Offset <= Mask * Scale) {
2767 // Replace the FrameIndex with sp
2768 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2769 // FIXME: When addrmode2 goes away, this will simplify (like the
2770 // T2 version), as the LDR.i12 versions don't need the encoding
2771 // tricks for the offset value.
2772 if (isSub) {
2774 ImmedOffset = -ImmedOffset;
2775 else
2776 ImmedOffset |= 1 << NumBits;
2777 }
2778 ImmOp.ChangeToImmediate(ImmedOffset);
2779 Offset = 0;
2780 return true;
2781 }
2782
2783 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2784 ImmedOffset = ImmedOffset & Mask;
2785 if (isSub) {
2787 ImmedOffset = -ImmedOffset;
2788 else
2789 ImmedOffset |= 1 << NumBits;
2790 }
2791 ImmOp.ChangeToImmediate(ImmedOffset);
2792 Offset &= ~(Mask*Scale);
2793 }
2794 }
2795
2796 Offset = (isSub) ? -Offset : Offset;
2797 return Offset == 0;
2798}
2799
2800/// analyzeCompare - For a comparison instruction, return the source registers
2801/// in SrcReg and SrcReg2 if having two register operands, and the value it
2802/// compares against in CmpValue. Return true if the comparison instruction
2803/// can be analyzed.
2805 Register &SrcReg2, int64_t &CmpMask,
2806 int64_t &CmpValue) const {
2807 switch (MI.getOpcode()) {
2808 default: break;
2809 case ARM::CMPri:
2810 case ARM::t2CMPri:
2811 case ARM::tCMPi8:
2812 SrcReg = MI.getOperand(0).getReg();
2813 SrcReg2 = 0;
2814 CmpMask = ~0;
2815 CmpValue = MI.getOperand(1).getImm();
2816 return true;
2817 case ARM::CMPrr:
2818 case ARM::t2CMPrr:
2819 case ARM::tCMPr:
2820 SrcReg = MI.getOperand(0).getReg();
2821 SrcReg2 = MI.getOperand(1).getReg();
2822 CmpMask = ~0;
2823 CmpValue = 0;
2824 return true;
2825 case ARM::TSTri:
2826 case ARM::t2TSTri:
2827 SrcReg = MI.getOperand(0).getReg();
2828 SrcReg2 = 0;
2829 CmpMask = MI.getOperand(1).getImm();
2830 CmpValue = 0;
2831 return true;
2832 }
2833
2834 return false;
2835}
2836
2837/// isSuitableForMask - Identify a suitable 'and' instruction that
2838/// operates on the given source register and applies the same mask
2839/// as a 'tst' instruction. Provide a limited look-through for copies.
2840/// When successful, MI will hold the found instruction.
2842 int CmpMask, bool CommonUse) {
2843 switch (MI->getOpcode()) {
2844 case ARM::ANDri:
2845 case ARM::t2ANDri:
2846 if (CmpMask != MI->getOperand(2).getImm())
2847 return false;
2848 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2849 return true;
2850 break;
2851 }
2852
2853 return false;
2854}
2855
2856/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2857/// the condition code if we modify the instructions such that flags are
2858/// set by ADD(a,b,X).
2860 switch (CC) {
2861 default: return ARMCC::AL;
2862 case ARMCC::HS: return ARMCC::LO;
2863 case ARMCC::LO: return ARMCC::HS;
2864 case ARMCC::VS: return ARMCC::VS;
2865 case ARMCC::VC: return ARMCC::VC;
2866 }
2867}
2868
2869/// isRedundantFlagInstr - check whether the first instruction, whose only
2870/// purpose is to update flags, can be made redundant.
2871/// CMPrr can be made redundant by SUBrr if the operands are the same.
2872/// CMPri can be made redundant by SUBri if the operands are the same.
2873/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2874/// This function can be extended later on.
2875inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2876 Register SrcReg, Register SrcReg2,
2877 int64_t ImmValue,
2878 const MachineInstr *OI,
2879 bool &IsThumb1) {
2880 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2881 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2882 ((OI->getOperand(1).getReg() == SrcReg &&
2883 OI->getOperand(2).getReg() == SrcReg2) ||
2884 (OI->getOperand(1).getReg() == SrcReg2 &&
2885 OI->getOperand(2).getReg() == SrcReg))) {
2886 IsThumb1 = false;
2887 return true;
2888 }
2889
2890 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2891 ((OI->getOperand(2).getReg() == SrcReg &&
2892 OI->getOperand(3).getReg() == SrcReg2) ||
2893 (OI->getOperand(2).getReg() == SrcReg2 &&
2894 OI->getOperand(3).getReg() == SrcReg))) {
2895 IsThumb1 = true;
2896 return true;
2897 }
2898
2899 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2900 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2901 OI->getOperand(1).getReg() == SrcReg &&
2902 OI->getOperand(2).getImm() == ImmValue) {
2903 IsThumb1 = false;
2904 return true;
2905 }
2906
2907 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2908 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2909 OI->getOperand(2).getReg() == SrcReg &&
2910 OI->getOperand(3).getImm() == ImmValue) {
2911 IsThumb1 = true;
2912 return true;
2913 }
2914
2915 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2916 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2917 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2918 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2919 OI->getOperand(0).getReg() == SrcReg &&
2920 OI->getOperand(1).getReg() == SrcReg2) {
2921 IsThumb1 = false;
2922 return true;
2923 }
2924
2925 if (CmpI->getOpcode() == ARM::tCMPr &&
2926 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2927 OI->getOpcode() == ARM::tADDrr) &&
2928 OI->getOperand(0).getReg() == SrcReg &&
2929 OI->getOperand(2).getReg() == SrcReg2) {
2930 IsThumb1 = true;
2931 return true;
2932 }
2933
2934 return false;
2935}
2936
2937static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2938 switch (MI->getOpcode()) {
2939 default: return false;
2940 case ARM::tLSLri:
2941 case ARM::tLSRri:
2942 case ARM::tLSLrr:
2943 case ARM::tLSRrr:
2944 case ARM::tSUBrr:
2945 case ARM::tADDrr:
2946 case ARM::tADDi3:
2947 case ARM::tADDi8:
2948 case ARM::tSUBi3:
2949 case ARM::tSUBi8:
2950 case ARM::tMUL:
2951 case ARM::tADC:
2952 case ARM::tSBC:
2953 case ARM::tRSB:
2954 case ARM::tAND:
2955 case ARM::tORR:
2956 case ARM::tEOR:
2957 case ARM::tBIC:
2958 case ARM::tMVN:
2959 case ARM::tASRri:
2960 case ARM::tASRrr:
2961 case ARM::tROR:
2962 IsThumb1 = true;
2963 [[fallthrough]];
2964 case ARM::RSBrr:
2965 case ARM::RSBri:
2966 case ARM::RSCrr:
2967 case ARM::RSCri:
2968 case ARM::ADDrr:
2969 case ARM::ADDri:
2970 case ARM::ADCrr:
2971 case ARM::ADCri:
2972 case ARM::SUBrr:
2973 case ARM::SUBri:
2974 case ARM::SBCrr:
2975 case ARM::SBCri:
2976 case ARM::t2RSBri:
2977 case ARM::t2ADDrr:
2978 case ARM::t2ADDri:
2979 case ARM::t2ADCrr:
2980 case ARM::t2ADCri:
2981 case ARM::t2SUBrr:
2982 case ARM::t2SUBri:
2983 case ARM::t2SBCrr:
2984 case ARM::t2SBCri:
2985 case ARM::ANDrr:
2986 case ARM::ANDri:
2987 case ARM::ANDrsr:
2988 case ARM::ANDrsi:
2989 case ARM::t2ANDrr:
2990 case ARM::t2ANDri:
2991 case ARM::t2ANDrs:
2992 case ARM::ORRrr:
2993 case ARM::ORRri:
2994 case ARM::ORRrsr:
2995 case ARM::ORRrsi:
2996 case ARM::t2ORRrr:
2997 case ARM::t2ORRri:
2998 case ARM::t2ORRrs:
2999 case ARM::EORrr:
3000 case ARM::EORri:
3001 case ARM::EORrsr:
3002 case ARM::EORrsi:
3003 case ARM::t2EORrr:
3004 case ARM::t2EORri:
3005 case ARM::t2EORrs:
3006 case ARM::BICri:
3007 case ARM::BICrr:
3008 case ARM::BICrsi:
3009 case ARM::BICrsr:
3010 case ARM::t2BICri:
3011 case ARM::t2BICrr:
3012 case ARM::t2BICrs:
3013 case ARM::t2LSRri:
3014 case ARM::t2LSRrr:
3015 case ARM::t2LSLri:
3016 case ARM::t2LSLrr:
3017 case ARM::MOVsr:
3018 case ARM::MOVsi:
3019 return true;
3020 }
3021}
3022
3023/// optimizeCompareInstr - Convert the instruction supplying the argument to the
3024/// comparison into one that sets the zero bit in the flags register;
3025/// Remove a redundant Compare instruction if an earlier instruction can set the
3026/// flags in the same way as Compare.
3027/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3028/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3029/// condition code of instructions which use the flags.
3031 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3032 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3033 // Get the unique definition of SrcReg.
3034 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3035 if (!MI) return false;
3036
3037 // Masked compares sometimes use the same register as the corresponding 'and'.
3038 if (CmpMask != ~0) {
3039 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3040 MI = nullptr;
3042 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3043 UI != UE; ++UI) {
3044 if (UI->getParent() != CmpInstr.getParent())
3045 continue;
3046 MachineInstr *PotentialAND = &*UI;
3047 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3048 isPredicated(*PotentialAND))
3049 continue;
3050 MI = PotentialAND;
3051 break;
3052 }
3053 if (!MI) return false;
3054 }
3055 }
3056
3057 // Get ready to iterate backward from CmpInstr.
3058 MachineBasicBlock::iterator I = CmpInstr, E = MI,
3059 B = CmpInstr.getParent()->begin();
3060
3061 // Early exit if CmpInstr is at the beginning of the BB.
3062 if (I == B) return false;
3063
3064 // There are two possible candidates which can be changed to set CPSR:
3065 // One is MI, the other is a SUB or ADD instruction.
3066 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3067 // ADDr[ri](r1, r2, X).
3068 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3069 MachineInstr *SubAdd = nullptr;
3070 if (SrcReg2 != 0)
3071 // MI is not a candidate for CMPrr.
3072 MI = nullptr;
3073 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3074 // Conservatively refuse to convert an instruction which isn't in the same
3075 // BB as the comparison.
3076 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3077 // Thus we cannot return here.
3078 if (CmpInstr.getOpcode() == ARM::CMPri ||
3079 CmpInstr.getOpcode() == ARM::t2CMPri ||
3080 CmpInstr.getOpcode() == ARM::tCMPi8)
3081 MI = nullptr;
3082 else
3083 return false;
3084 }
3085
3086 bool IsThumb1 = false;
3087 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3088 return false;
3089
3090 // We also want to do this peephole for cases like this: if (a*b == 0),
3091 // and optimise away the CMP instruction from the generated code sequence:
3092 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3093 // resulting from the select instruction, but these MOVS instructions for
3094 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3095 // However, if we only have MOVS instructions in between the CMP and the
3096 // other instruction (the MULS in this example), then the CPSR is dead so we
3097 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3098 // reordering and then continue the analysis hoping we can eliminate the
3099 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3100 // consequence, the movs won't redefine/kill the MUL operands which would
3101 // make this reordering illegal.
3103 if (MI && IsThumb1) {
3104 --I;
3105 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3106 bool CanReorder = true;
3107 for (; I != E; --I) {
3108 if (I->getOpcode() != ARM::tMOVi8) {
3109 CanReorder = false;
3110 break;
3111 }
3112 }
3113 if (CanReorder) {
3114 MI = MI->removeFromParent();
3115 E = CmpInstr;
3116 CmpInstr.getParent()->insert(E, MI);
3117 }
3118 }
3119 I = CmpInstr;
3120 E = MI;
3121 }
3122
3123 // Check that CPSR isn't set between the comparison instruction and the one we
3124 // want to change. At the same time, search for SubAdd.
3125 bool SubAddIsThumb1 = false;
3126 do {
3127 const MachineInstr &Instr = *--I;
3128
3129 // Check whether CmpInstr can be made redundant by the current instruction.
3130 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3131 SubAddIsThumb1)) {
3132 SubAdd = &*I;
3133 break;
3134 }
3135
3136 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3137 if (I == E)
3138 break;
3139
3140 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3141 Instr.readsRegister(ARM::CPSR, TRI))
3142 // This instruction modifies or uses CPSR after the one we want to
3143 // change. We can't do this transformation.
3144 return false;
3145
3146 if (I == B) {
3147 // In some cases, we scan the use-list of an instruction for an AND;
3148 // that AND is in the same BB, but may not be scheduled before the
3149 // corresponding TST. In that case, bail out.
3150 //
3151 // FIXME: We could try to reschedule the AND.
3152 return false;
3153 }
3154 } while (true);
3155
3156 // Return false if no candidates exist.
3157 if (!MI && !SubAdd)
3158 return false;
3159
3160 // If we found a SubAdd, use it as it will be closer to the CMP
3161 if (SubAdd) {
3162 MI = SubAdd;
3163 IsThumb1 = SubAddIsThumb1;
3164 }
3165
3166 // We can't use a predicated instruction - it doesn't always write the flags.
3167 if (isPredicated(*MI))
3168 return false;
3169
3170 // Scan forward for the use of CPSR
3171 // When checking against MI: if it's a conditional code that requires
3172 // checking of the V bit or C bit, then this is not safe to do.
3173 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3174 // If we are done with the basic block, we need to check whether CPSR is
3175 // live-out.
3177 OperandsToUpdate;
3178 bool isSafe = false;
3179 I = CmpInstr;
3180 E = CmpInstr.getParent()->end();
3181 while (!isSafe && ++I != E) {
3182 const MachineInstr &Instr = *I;
3183 for (unsigned IO = 0, EO = Instr.getNumOperands();
3184 !isSafe && IO != EO; ++IO) {
3185 const MachineOperand &MO = Instr.getOperand(IO);
3186 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3187 isSafe = true;
3188 break;
3189 }
3190 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3191 continue;
3192 if (MO.isDef()) {
3193 isSafe = true;
3194 break;
3195 }
3196 // Condition code is after the operand before CPSR except for VSELs.
3198 bool IsInstrVSel = true;
3199 switch (Instr.getOpcode()) {
3200 default:
3201 IsInstrVSel = false;
3202 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3203 break;
3204 case ARM::VSELEQD:
3205 case ARM::VSELEQS:
3206 case ARM::VSELEQH:
3207 CC = ARMCC::EQ;
3208 break;
3209 case ARM::VSELGTD:
3210 case ARM::VSELGTS:
3211 case ARM::VSELGTH:
3212 CC = ARMCC::GT;
3213 break;
3214 case ARM::VSELGED:
3215 case ARM::VSELGES:
3216 case ARM::VSELGEH:
3217 CC = ARMCC::GE;
3218 break;
3219 case ARM::VSELVSD:
3220 case ARM::VSELVSS:
3221 case ARM::VSELVSH:
3222 CC = ARMCC::VS;
3223 break;
3224 }
3225
3226 if (SubAdd) {
3227 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3228 // on CMP needs to be updated to be based on SUB.
3229 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3230 // needs to be modified.
3231 // Push the condition code operands to OperandsToUpdate.
3232 // If it is safe to remove CmpInstr, the condition code of these
3233 // operands will be modified.
3234 unsigned Opc = SubAdd->getOpcode();
3235 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3236 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3237 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3238 Opc == ARM::tSUBi8;
3239 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3240 if (!IsSub ||
3241 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3242 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3243 // VSel doesn't support condition code update.
3244 if (IsInstrVSel)
3245 return false;
3246 // Ensure we can swap the condition.
3248 if (NewCC == ARMCC::AL)
3249 return false;
3250 OperandsToUpdate.push_back(
3251 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3252 }
3253 } else {
3254 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3255 switch (CC) {
3256 case ARMCC::EQ: // Z
3257 case ARMCC::NE: // Z
3258 case ARMCC::MI: // N
3259 case ARMCC::PL: // N
3260 case ARMCC::AL: // none
3261 // CPSR can be used multiple times, we should continue.
3262 break;
3263 case ARMCC::HS: // C
3264 case ARMCC::LO: // C
3265 case ARMCC::VS: // V
3266 case ARMCC::VC: // V
3267 case ARMCC::HI: // C Z
3268 case ARMCC::LS: // C Z
3269 case ARMCC::GE: // N V
3270 case ARMCC::LT: // N V
3271 case ARMCC::GT: // Z N V
3272 case ARMCC::LE: // Z N V
3273 // The instruction uses the V bit or C bit which is not safe.
3274 return false;
3275 }
3276 }
3277 }
3278 }
3279
3280 // If CPSR is not killed nor re-defined, we should check whether it is
3281 // live-out. If it is live-out, do not optimize.
3282 if (!isSafe) {
3283 MachineBasicBlock *MBB = CmpInstr.getParent();
3284 for (MachineBasicBlock *Succ : MBB->successors())
3285 if (Succ->isLiveIn(ARM::CPSR))
3286 return false;
3287 }
3288
3289 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3290 // set CPSR so this is represented as an explicit output)
3291 if (!IsThumb1) {
3292 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3293 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3294 MI->getOperand(CPSRRegNum).setIsDef(true);
3295 }
3296 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3297 CmpInstr.eraseFromParent();
3298
3299 // Modify the condition code of operands in OperandsToUpdate.
3300 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3301 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3302 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3303 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3304
3305 MI->clearRegisterDeads(ARM::CPSR);
3306
3307 return true;
3308}
3309
3311 // Do not sink MI if it might be used to optimize a redundant compare.
3312 // We heuristically only look at the instruction immediately following MI to
3313 // avoid potentially searching the entire basic block.
3314 if (isPredicated(MI))
3315 return true;
3317 ++Next;
3318 Register SrcReg, SrcReg2;
3319 int64_t CmpMask, CmpValue;
3320 bool IsThumb1;
3321 if (Next != MI.getParent()->end() &&
3322 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3323 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3324 return false;
3325 return true;
3326}
3327
3329 Register Reg,
3330 MachineRegisterInfo *MRI) const {
3331 // Fold large immediates into add, sub, or, xor.
3332 unsigned DefOpc = DefMI.getOpcode();
3333 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3334 DefOpc != ARM::tMOVi32imm)
3335 return false;
3336 if (!DefMI.getOperand(1).isImm())
3337 // Could be t2MOVi32imm @xx
3338 return false;
3339
3340 if (!MRI->hasOneNonDBGUse(Reg))
3341 return false;
3342
3343 const MCInstrDesc &DefMCID = DefMI.getDesc();
3344 if (DefMCID.hasOptionalDef()) {
3345 unsigned NumOps = DefMCID.getNumOperands();
3346 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3347 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3348 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3349 // to delete DefMI.
3350 return false;
3351 }
3352
3353 const MCInstrDesc &UseMCID = UseMI.getDesc();
3354 if (UseMCID.hasOptionalDef()) {
3355 unsigned NumOps = UseMCID.getNumOperands();
3356 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3357 // If the instruction sets the flag, do not attempt this optimization
3358 // since it may change the semantics of the code.
3359 return false;
3360 }
3361
3362 unsigned UseOpc = UseMI.getOpcode();
3363 unsigned NewUseOpc = 0;
3364 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3365 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3366 bool Commute = false;
3367 switch (UseOpc) {
3368 default: return false;
3369 case ARM::SUBrr:
3370 case ARM::ADDrr:
3371 case ARM::ORRrr:
3372 case ARM::EORrr:
3373 case ARM::t2SUBrr:
3374 case ARM::t2ADDrr:
3375 case ARM::t2ORRrr:
3376 case ARM::t2EORrr: {
3377 Commute = UseMI.getOperand(2).getReg() != Reg;
3378 switch (UseOpc) {
3379 default: break;
3380 case ARM::ADDrr:
3381 case ARM::SUBrr:
3382 if (UseOpc == ARM::SUBrr && Commute)
3383 return false;
3384
3385 // ADD/SUB are special because they're essentially the same operation, so
3386 // we can handle a larger range of immediates.
3387 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3388 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3389 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3390 ImmVal = -ImmVal;
3391 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3392 } else
3393 return false;
3394 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3395 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3396 break;
3397 case ARM::ORRrr:
3398 case ARM::EORrr:
3399 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3400 return false;
3401 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3402 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3403 switch (UseOpc) {
3404 default: break;
3405 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3406 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3407 }
3408 break;
3409 case ARM::t2ADDrr:
3410 case ARM::t2SUBrr: {
3411 if (UseOpc == ARM::t2SUBrr && Commute)
3412 return false;
3413
3414 // ADD/SUB are special because they're essentially the same operation, so
3415 // we can handle a larger range of immediates.
3416 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3417 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3418 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3419 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3420 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3421 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3422 ImmVal = -ImmVal;
3423 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3424 } else
3425 return false;
3426 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3427 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3428 break;
3429 }
3430 case ARM::t2ORRrr:
3431 case ARM::t2EORrr:
3432 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3433 return false;
3434 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3435 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3436 switch (UseOpc) {
3437 default: break;
3438 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3439 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3440 }
3441 break;
3442 }
3443 }
3444 }
3445
3446 unsigned OpIdx = Commute ? 2 : 1;
3447 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3448 bool isKill = UseMI.getOperand(OpIdx).isKill();
3449 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3450 Register NewReg = MRI->createVirtualRegister(TRC);
3451 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3452 NewReg)
3453 .addReg(Reg1, getKillRegState(isKill))
3454 .addImm(SOImmValV1)
3456 .add(condCodeOp());
3457 UseMI.setDesc(get(NewUseOpc));
3458 UseMI.getOperand(1).setReg(NewReg);
3459 UseMI.getOperand(1).setIsKill();
3460 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3461 DefMI.eraseFromParent();
3462 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3463 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3464 // Then the below code will not be needed, as the input/output register
3465 // classes will be rgpr or gprSP.
3466 // For now, we fix the UseMI operand explicitly here:
3467 switch(NewUseOpc){
3468 case ARM::t2ADDspImm:
3469 case ARM::t2SUBspImm:
3470 case ARM::t2ADDri:
3471 case ARM::t2SUBri:
3472 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3473 }
3474 return true;
3475}
3476
3477static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3478 const MachineInstr &MI) {
3479 switch (MI.getOpcode()) {
3480 default: {
3481 const MCInstrDesc &Desc = MI.getDesc();
3482 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3483 assert(UOps >= 0 && "bad # UOps");
3484 return UOps;
3485 }
3486
3487 case ARM::LDRrs:
3488 case ARM::LDRBrs:
3489 case ARM::STRrs:
3490 case ARM::STRBrs: {
3491 unsigned ShOpVal = MI.getOperand(3).getImm();
3492 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3493 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3494 if (!isSub &&
3495 (ShImm == 0 ||
3496 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3497 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3498 return 1;
3499 return 2;
3500 }
3501
3502 case ARM::LDRH:
3503 case ARM::STRH: {
3504 if (!MI.getOperand(2).getReg())
3505 return 1;
3506
3507 unsigned ShOpVal = MI.getOperand(3).getImm();
3508 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3509 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3510 if (!isSub &&
3511 (ShImm == 0 ||
3512 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3513 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3514 return 1;
3515 return 2;
3516 }
3517
3518 case ARM::LDRSB:
3519 case ARM::LDRSH:
3520 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3521
3522 case ARM::LDRSB_POST:
3523 case ARM::LDRSH_POST: {
3524 Register Rt = MI.getOperand(0).getReg();
3525 Register Rm = MI.getOperand(3).getReg();
3526 return (Rt == Rm) ? 4 : 3;
3527 }
3528
3529 case ARM::LDR_PRE_REG:
3530 case ARM::LDRB_PRE_REG: {
3531 Register Rt = MI.getOperand(0).getReg();
3532 Register Rm = MI.getOperand(3).getReg();
3533 if (Rt == Rm)
3534 return 3;
3535 unsigned ShOpVal = MI.getOperand(4).getImm();
3536 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3537 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3538 if (!isSub &&
3539 (ShImm == 0 ||
3540 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3541 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3542 return 2;
3543 return 3;
3544 }
3545
3546 case ARM::STR_PRE_REG:
3547 case ARM::STRB_PRE_REG: {
3548 unsigned ShOpVal = MI.getOperand(4).getImm();
3549 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3550 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3551 if (!isSub &&
3552 (ShImm == 0 ||
3553 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3554 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3555 return 2;
3556 return 3;
3557 }
3558
3559 case ARM::LDRH_PRE:
3560 case ARM::STRH_PRE: {
3561 Register Rt = MI.getOperand(0).getReg();
3562 Register Rm = MI.getOperand(3).getReg();
3563 if (!Rm)
3564 return 2;
3565 if (Rt == Rm)
3566 return 3;
3567 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3568 }
3569
3570 case ARM::LDR_POST_REG:
3571 case ARM::LDRB_POST_REG:
3572 case ARM::LDRH_POST: {
3573 Register Rt = MI.getOperand(0).getReg();
3574 Register Rm = MI.getOperand(3).getReg();
3575 return (Rt == Rm) ? 3 : 2;
3576 }
3577
3578 case ARM::LDR_PRE_IMM:
3579 case ARM::LDRB_PRE_IMM:
3580 case ARM::LDR_POST_IMM:
3581 case ARM::LDRB_POST_IMM:
3582 case ARM::STRB_POST_IMM:
3583 case ARM::STRB_POST_REG:
3584 case ARM::STRB_PRE_IMM:
3585 case ARM::STRH_POST:
3586 case ARM::STR_POST_IMM:
3587 case ARM::STR_POST_REG:
3588 case ARM::STR_PRE_IMM:
3589 return 2;
3590
3591 case ARM::LDRSB_PRE:
3592 case ARM::LDRSH_PRE: {
3593 Register Rm = MI.getOperand(3).getReg();
3594 if (Rm == 0)
3595 return 3;
3596 Register Rt = MI.getOperand(0).getReg();
3597 if (Rt == Rm)
3598 return 4;
3599 unsigned ShOpVal = MI.getOperand(4).getImm();
3600 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3601 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3602 if (!isSub &&
3603 (ShImm == 0 ||
3604 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3605 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3606 return 3;
3607 return 4;
3608 }
3609
3610 case ARM::LDRD: {
3611 Register Rt = MI.getOperand(0).getReg();
3612 Register Rn = MI.getOperand(2).getReg();
3613 Register Rm = MI.getOperand(3).getReg();
3614 if (Rm)
3615 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3616 : 3;
3617 return (Rt == Rn) ? 3 : 2;
3618 }
3619
3620 case ARM::STRD: {
3621 Register Rm = MI.getOperand(3).getReg();
3622 if (Rm)
3623 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3624 : 3;
3625 return 2;
3626 }
3627
3628 case ARM::LDRD_POST:
3629 case ARM::t2LDRD_POST:
3630 return 3;
3631
3632 case ARM::STRD_POST:
3633 case ARM::t2STRD_POST:
3634 return 4;
3635
3636 case ARM::LDRD_PRE: {
3637 Register Rt = MI.getOperand(0).getReg();
3638 Register Rn = MI.getOperand(3).getReg();
3639 Register Rm = MI.getOperand(4).getReg();
3640 if (Rm)
3641 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3642 : 4;
3643 return (Rt == Rn) ? 4 : 3;
3644 }
3645
3646 case ARM::t2LDRD_PRE: {
3647 Register Rt = MI.getOperand(0).getReg();
3648 Register Rn = MI.getOperand(3).getReg();
3649 return (Rt == Rn) ? 4 : 3;
3650 }
3651
3652 case ARM::STRD_PRE: {
3653 Register Rm = MI.getOperand(4).getReg();
3654 if (Rm)
3655 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3656 : 4;
3657 return 3;
3658 }
3659
3660 case ARM::t2STRD_PRE:
3661 return 3;
3662
3663 case ARM::t2LDR_POST:
3664 case ARM::t2LDRB_POST:
3665 case ARM::t2LDRB_PRE:
3666 case ARM::t2LDRSBi12:
3667 case ARM::t2LDRSBi8:
3668 case ARM::t2LDRSBpci:
3669 case ARM::t2LDRSBs:
3670 case ARM::t2LDRH_POST:
3671 case ARM::t2LDRH_PRE:
3672 case ARM::t2LDRSBT:
3673 case ARM::t2LDRSB_POST:
3674 case ARM::t2LDRSB_PRE:
3675 case ARM::t2LDRSH_POST:
3676 case ARM::t2LDRSH_PRE:
3677 case ARM::t2LDRSHi12:
3678 case ARM::t2LDRSHi8:
3679 case ARM::t2LDRSHpci:
3680 case ARM::t2LDRSHs:
3681 return 2;
3682
3683 case ARM::t2LDRDi8: {
3684 Register Rt = MI.getOperand(0).getReg();
3685 Register Rn = MI.getOperand(2).getReg();
3686 return (Rt == Rn) ? 3 : 2;
3687 }
3688
3689 case ARM::t2STRB_POST:
3690 case ARM::t2STRB_PRE:
3691 case ARM::t2STRBs:
3692 case ARM::t2STRDi8:
3693 case ARM::t2STRH_POST:
3694 case ARM::t2STRH_PRE:
3695 case ARM::t2STRHs:
3696 case ARM::t2STR_POST:
3697 case ARM::t2STR_PRE:
3698 case ARM::t2STRs:
3699 return 2;
3700 }
3701}
3702
3703// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3704// can't be easily determined return 0 (missing MachineMemOperand).
3705//
3706// FIXME: The current MachineInstr design does not support relying on machine
3707// mem operands to determine the width of a memory access. Instead, we expect
3708// the target to provide this information based on the instruction opcode and
3709// operands. However, using MachineMemOperand is the best solution now for
3710// two reasons:
3711//
3712// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3713// operands. This is much more dangerous than using the MachineMemOperand
3714// sizes because CodeGen passes can insert/remove optional machine operands. In
3715// fact, it's totally incorrect for preRA passes and appears to be wrong for
3716// postRA passes as well.
3717//
3718// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3719// machine model that calls this should handle the unknown (zero size) case.
3720//
3721// Long term, we should require a target hook that verifies MachineMemOperand
3722// sizes during MC lowering. That target hook should be local to MC lowering
3723// because we can't ensure that it is aware of other MI forms. Doing this will
3724// ensure that MachineMemOperands are correctly propagated through all passes.
3726 unsigned Size = 0;
3727 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3728 E = MI.memoperands_end();
3729 I != E; ++I) {
3730 Size += (*I)->getSize().getValue();
3731 }
3732 // FIXME: The scheduler currently can't handle values larger than 16. But
3733 // the values can actually go up to 32 for floating-point load/store
3734 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3735 // operations isn't right; we could end up with "extra" memory operands for
3736 // various reasons, like tail merge merging two memory operations.
3737 return std::min(Size / 4, 16U);
3738}
3739
3740static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3741 unsigned NumRegs) {
3742 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3743 switch (Opc) {
3744 default:
3745 break;
3746 case ARM::VLDMDIA_UPD:
3747 case ARM::VLDMDDB_UPD:
3748 case ARM::VLDMSIA_UPD:
3749 case ARM::VLDMSDB_UPD:
3750 case ARM::VSTMDIA_UPD:
3751 case ARM::VSTMDDB_UPD:
3752 case ARM::VSTMSIA_UPD:
3753 case ARM::VSTMSDB_UPD:
3754 case ARM::LDMIA_UPD:
3755 case ARM::LDMDA_UPD:
3756 case ARM::LDMDB_UPD:
3757 case ARM::LDMIB_UPD:
3758 case ARM::STMIA_UPD:
3759 case ARM::STMDA_UPD:
3760 case ARM::STMDB_UPD:
3761 case ARM::STMIB_UPD:
3762 case ARM::tLDMIA_UPD:
3763 case ARM::tSTMIA_UPD:
3764 case ARM::t2LDMIA_UPD:
3765 case ARM::t2LDMDB_UPD:
3766 case ARM::t2STMIA_UPD:
3767 case ARM::t2STMDB_UPD:
3768 ++UOps; // One for base register writeback.
3769 break;
3770 case ARM::LDMIA_RET:
3771 case ARM::tPOP_RET:
3772 case ARM::t2LDMIA_RET:
3773 UOps += 2; // One for base reg wb, one for write to pc.
3774 break;
3775 }
3776 return UOps;
3777}
3778
3780 const MachineInstr &MI) const {
3781 if (!ItinData || ItinData->isEmpty())
3782 return 1;
3783
3784 const MCInstrDesc &Desc = MI.getDesc();
3785 unsigned Class = Desc.getSchedClass();
3786 int ItinUOps = ItinData->getNumMicroOps(Class);
3787 if (ItinUOps >= 0) {
3788 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3789 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3790
3791 return ItinUOps;
3792 }
3793
3794 unsigned Opc = MI.getOpcode();
3795 switch (Opc) {
3796 default:
3797 llvm_unreachable("Unexpected multi-uops instruction!");
3798 case ARM::VLDMQIA:
3799 case ARM::VSTMQIA:
3800 return 2;
3801
3802 // The number of uOps for load / store multiple are determined by the number
3803 // registers.
3804 //
3805 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3806 // same cycle. The scheduling for the first load / store must be done
3807 // separately by assuming the address is not 64-bit aligned.
3808 //
3809 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3810 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3811 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3812 case ARM::VLDMDIA:
3813 case ARM::VLDMDIA_UPD:
3814 case ARM::VLDMDDB_UPD:
3815 case ARM::VLDMSIA:
3816 case ARM::VLDMSIA_UPD:
3817 case ARM::VLDMSDB_UPD:
3818 case ARM::VSTMDIA:
3819 case ARM::VSTMDIA_UPD:
3820 case ARM::VSTMDDB_UPD:
3821 case ARM::VSTMSIA:
3822 case ARM::VSTMSIA_UPD:
3823 case ARM::VSTMSDB_UPD: {
3824 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3825 return (NumRegs / 2) + (NumRegs % 2) + 1;
3826 }
3827
3828 case ARM::LDMIA_RET:
3829 case ARM::LDMIA:
3830 case ARM::LDMDA:
3831 case ARM::LDMDB:
3832 case ARM::LDMIB:
3833 case ARM::LDMIA_UPD:
3834 case ARM::LDMDA_UPD:
3835 case ARM::LDMDB_UPD:
3836 case ARM::LDMIB_UPD:
3837 case ARM::STMIA:
3838 case ARM::STMDA:
3839 case ARM::STMDB:
3840 case ARM::STMIB:
3841 case ARM::STMIA_UPD:
3842 case ARM::STMDA_UPD:
3843 case ARM::STMDB_UPD:
3844 case ARM::STMIB_UPD:
3845 case ARM::tLDMIA:
3846 case ARM::tLDMIA_UPD:
3847 case ARM::tSTMIA_UPD:
3848 case ARM::tPOP_RET:
3849 case ARM::tPOP:
3850 case ARM::tPUSH:
3851 case ARM::t2LDMIA_RET:
3852 case ARM::t2LDMIA:
3853 case ARM::t2LDMDB:
3854 case ARM::t2LDMIA_UPD:
3855 case ARM::t2LDMDB_UPD:
3856 case ARM::t2STMIA:
3857 case ARM::t2STMDB:
3858 case ARM::t2STMIA_UPD:
3859 case ARM::t2STMDB_UPD: {
3860 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3861 switch (Subtarget.getLdStMultipleTiming()) {
3863 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3865 // Assume the worst.
3866 return NumRegs;
3868 if (NumRegs < 4)
3869 return 2;
3870 // 4 registers would be issued: 2, 2.
3871 // 5 registers would be issued: 2, 2, 1.
3872 unsigned UOps = (NumRegs / 2);
3873 if (NumRegs % 2)
3874 ++UOps;
3875 return UOps;
3876 }
3878 unsigned UOps = (NumRegs / 2);
3879 // If there are odd number of registers or if it's not 64-bit aligned,
3880 // then it takes an extra AGU (Address Generation Unit) cycle.
3881 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3882 (*MI.memoperands_begin())->getAlign() < Align(8))
3883 ++UOps;
3884 return UOps;
3885 }
3886 }
3887 }
3888 }
3889 llvm_unreachable("Didn't find the number of microops");
3890}
3891
3892std::optional<unsigned>
3893ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3894 const MCInstrDesc &DefMCID, unsigned DefClass,
3895 unsigned DefIdx, unsigned DefAlign) const {
3896 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3897 if (RegNo <= 0)
3898 // Def is the address writeback.
3899 return ItinData->getOperandCycle(DefClass, DefIdx);
3900
3901 unsigned DefCycle;
3902 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3903 // (regno / 2) + (regno % 2) + 1
3904 DefCycle = RegNo / 2 + 1;
3905 if (RegNo % 2)
3906 ++DefCycle;
3907 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3908 DefCycle = RegNo;
3909 bool isSLoad = false;
3910
3911 switch (DefMCID.getOpcode()) {
3912 default: break;
3913 case ARM::VLDMSIA:
3914 case ARM::VLDMSIA_UPD:
3915 case ARM::VLDMSDB_UPD:
3916 isSLoad = true;
3917 break;
3918 }
3919
3920 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3921 // then it takes an extra cycle.
3922 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3923 ++DefCycle;
3924 } else {
3925 // Assume the worst.
3926 DefCycle = RegNo + 2;
3927 }
3928
3929 return DefCycle;
3930}
3931
3932std::optional<unsigned>
3933ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3934 const MCInstrDesc &DefMCID, unsigned DefClass,
3935 unsigned DefIdx, unsigned DefAlign) const {
3936 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3937 if (RegNo <= 0)
3938 // Def is the address writeback.
3939 return ItinData->getOperandCycle(DefClass, DefIdx);
3940
3941 unsigned DefCycle;
3942 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3943 // 4 registers would be issued: 1, 2, 1.
3944 // 5 registers would be issued: 1, 2, 2.
3945 DefCycle = RegNo / 2;
3946 if (DefCycle < 1)
3947 DefCycle = 1;
3948 // Result latency is issue cycle + 2: E2.
3949 DefCycle += 2;
3950 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3951 DefCycle = (RegNo / 2);
3952 // If there are odd number of registers or if it's not 64-bit aligned,
3953 // then it takes an extra AGU (Address Generation Unit) cycle.
3954 if ((RegNo % 2) || DefAlign < 8)
3955 ++DefCycle;
3956 // Result latency is AGU cycles + 2.
3957 DefCycle += 2;
3958 } else {
3959 // Assume the worst.
3960 DefCycle = RegNo + 2;
3961 }
3962
3963 return DefCycle;
3964}
3965
3966std::optional<unsigned>
3967ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3968 const MCInstrDesc &UseMCID, unsigned UseClass,
3969 unsigned UseIdx, unsigned UseAlign) const {
3970 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3971 if (RegNo <= 0)
3972 return ItinData->getOperandCycle(UseClass, UseIdx);
3973
3974 unsigned UseCycle;
3975 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3976 // (regno / 2) + (regno % 2) + 1
3977 UseCycle = RegNo / 2 + 1;
3978 if (RegNo % 2)
3979 ++UseCycle;
3980 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3981 UseCycle = RegNo;
3982 bool isSStore = false;
3983
3984 switch (UseMCID.getOpcode()) {
3985 default: break;
3986 case ARM::VSTMSIA:
3987 case ARM::VSTMSIA_UPD:
3988 case ARM::VSTMSDB_UPD:
3989 isSStore = true;
3990 break;
3991 }
3992
3993 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3994 // then it takes an extra cycle.
3995 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3996 ++UseCycle;
3997 } else {
3998 // Assume the worst.
3999 UseCycle = RegNo + 2;
4000 }
4001
4002 return UseCycle;
4003}
4004
4005std::optional<unsigned>
4006ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
4007 const MCInstrDesc &UseMCID, unsigned UseClass,
4008 unsigned UseIdx, unsigned UseAlign) const {
4009 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
4010 if (RegNo <= 0)
4011 return ItinData->getOperandCycle(UseClass, UseIdx);
4012
4013 unsigned UseCycle;
4014 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
4015 UseCycle = RegNo / 2;
4016 if (UseCycle < 2)
4017 UseCycle = 2;
4018 // Read in E3.
4019 UseCycle += 2;
4020 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4021 UseCycle = (RegNo / 2);
4022 // If there are odd number of registers or if it's not 64-bit aligned,
4023 // then it takes an extra AGU (Address Generation Unit) cycle.
4024 if ((RegNo % 2) || UseAlign < 8)
4025 ++UseCycle;
4026 } else {
4027 // Assume the worst.
4028 UseCycle = 1;
4029 }
4030 return UseCycle;
4031}
4032
4033std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4034 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
4035 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
4036 unsigned UseIdx, unsigned UseAlign) const {
4037 unsigned DefClass = DefMCID.getSchedClass();
4038 unsigned UseClass = UseMCID.getSchedClass();
4039
4040 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4041 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4042
4043 // This may be a def / use of a variable_ops instruction, the operand
4044 // latency might be determinable dynamically. Let the target try to
4045 // figure it out.
4046 std::optional<unsigned> DefCycle;
4047 bool LdmBypass = false;
4048 switch (DefMCID.getOpcode()) {
4049 default:
4050 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4051 break;
4052
4053 case ARM::VLDMDIA:
4054 case ARM::VLDMDIA_UPD:
4055 case ARM::VLDMDDB_UPD:
4056 case ARM::VLDMSIA:
4057 case ARM::VLDMSIA_UPD:
4058 case ARM::VLDMSDB_UPD:
4059 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4060 break;
4061
4062 case ARM::LDMIA_RET:
4063 case ARM::LDMIA:
4064 case ARM::LDMDA:
4065 case ARM::LDMDB:
4066 case ARM::LDMIB:
4067 case ARM::LDMIA_UPD:
4068 case ARM::LDMDA_UPD:
4069 case ARM::LDMDB_UPD:
4070 case ARM::LDMIB_UPD:
4071 case ARM::tLDMIA:
4072 case ARM::tLDMIA_UPD:
4073 case ARM::tPUSH:
4074 case ARM::t2LDMIA_RET:
4075 case ARM::t2LDMIA:
4076 case ARM::t2LDMDB:
4077 case ARM::t2LDMIA_UPD:
4078 case ARM::t2LDMDB_UPD:
4079 LdmBypass = true;
4080 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4081 break;
4082 }
4083
4084 if (!DefCycle)
4085 // We can't seem to determine the result latency of the def, assume it's 2.
4086 DefCycle = 2;
4087
4088 std::optional<unsigned> UseCycle;
4089 switch (UseMCID.getOpcode()) {
4090 default:
4091 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4092 break;
4093
4094 case ARM::VSTMDIA:
4095 case ARM::VSTMDIA_UPD:
4096 case ARM::VSTMDDB_UPD:
4097 case ARM::VSTMSIA:
4098 case ARM::VSTMSIA_UPD:
4099 case ARM::VSTMSDB_UPD:
4100 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4101 break;
4102
4103 case ARM::STMIA:
4104 case ARM::STMDA:
4105 case ARM::STMDB:
4106 case ARM::STMIB:
4107 case ARM::STMIA_UPD:
4108 case ARM::STMDA_UPD:
4109 case ARM::STMDB_UPD:
4110 case ARM::STMIB_UPD:
4111 case ARM::tSTMIA_UPD:
4112 case ARM::tPOP_RET:
4113 case ARM::tPOP:
4114 case ARM::t2STMIA:
4115 case ARM::t2STMDB:
4116 case ARM::t2STMIA_UPD:
4117 case ARM::t2STMDB_UPD:
4118 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4119 break;
4120 }
4121
4122 if (!UseCycle)
4123 // Assume it's read in the first stage.
4124 UseCycle = 1;
4125
4126 if (UseCycle > *DefCycle + 1)
4127 return std::nullopt;
4128
4129 UseCycle = *DefCycle - *UseCycle + 1;
4130 if (UseCycle > 0u) {
4131 if (LdmBypass) {
4132 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4133 // first def operand.
4134 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4135 UseClass, UseIdx))
4136 UseCycle = *UseCycle - 1;
4137 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4138 UseClass, UseIdx)) {
4139 UseCycle = *UseCycle - 1;
4140 }
4141 }
4142
4143 return UseCycle;
4144}
4145
4147 const MachineInstr *MI, unsigned Reg,
4148 unsigned &DefIdx, unsigned &Dist) {
4149 Dist = 0;
4150
4152 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4153 assert(II->isInsideBundle() && "Empty bundle?");
4154
4155 int Idx = -1;
4156 while (II->isInsideBundle()) {
4157 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
4158 if (Idx != -1)
4159 break;
4160 --II;
4161 ++Dist;
4162 }
4163
4164 assert(Idx != -1 && "Cannot find bundled definition!");
4165 DefIdx = Idx;
4166 return &*II;
4167}
4168
4170 const MachineInstr &MI, unsigned Reg,
4171 unsigned &UseIdx, unsigned &Dist) {
4172 Dist = 0;
4173
4175 assert(II->isInsideBundle() && "Empty bundle?");
4176 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4177
4178 // FIXME: This doesn't properly handle multiple uses.
4179 int Idx = -1;
4180 while (II != E && II->isInsideBundle()) {
4181 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4182 if (Idx != -1)
4183 break;
4184 if (II->getOpcode() != ARM::t2IT)
4185 ++Dist;
4186 ++II;
4187 }
4188
4189 if (Idx == -1) {
4190 Dist = 0;
4191 return nullptr;
4192 }
4193
4194 UseIdx = Idx;
4195 return &*II;
4196}
4197
4198/// Return the number of cycles to add to (or subtract from) the static
4199/// itinerary based on the def opcode and alignment. The caller will ensure that
4200/// adjusted latency is at least one cycle.
4201static int adjustDefLatency(const ARMSubtarget &Subtarget,
4202 const MachineInstr &DefMI,
4203 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4204 int Adjust = 0;
4205 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4206 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4207 // variants are one cycle cheaper.
4208 switch (DefMCID.getOpcode()) {
4209 default: break;
4210 case ARM::LDRrs:
4211 case ARM::LDRBrs: {
4212 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4213 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4214 if (ShImm == 0 ||
4215 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4216 --Adjust;
4217 break;
4218 }
4219 case ARM::t2LDRs:
4220 case ARM::t2LDRBs:
4221 case ARM::t2LDRHs:
4222 case ARM::t2LDRSHs: {
4223 // Thumb2 mode: lsl only.
4224 unsigned ShAmt = DefMI.getOperand(3).getImm();
4225 if (ShAmt == 0 || ShAmt == 2)
4226 --Adjust;
4227 break;
4228 }
4229 }
4230 } else if (Subtarget.isSwift()) {
4231 // FIXME: Properly handle all of the latency adjustments for address
4232 // writeback.
4233 switch (DefMCID.getOpcode()) {
4234 default: break;
4235 case ARM::LDRrs:
4236 case ARM::LDRBrs: {
4237 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4238 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4239 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4240 if (!isSub &&
4241 (ShImm == 0 ||
4242 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4243 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4244 Adjust -= 2;
4245 else if (!isSub &&
4246 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4247 --Adjust;
4248 break;
4249 }
4250 case ARM::t2LDRs:
4251 case ARM::t2LDRBs:
4252 case ARM::t2LDRHs:
4253 case ARM::t2LDRSHs: {
4254 // Thumb2 mode: lsl only.
4255 unsigned ShAmt = DefMI.getOperand(3).getImm();
4256 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4257 Adjust -= 2;
4258 break;
4259 }
4260 }
4261 }
4262
4263 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4264 switch (DefMCID.getOpcode()) {
4265 default: break;
4266 case ARM::VLD1q8:
4267 case ARM::VLD1q16:
4268 case ARM::VLD1q32:
4269 case ARM::VLD1q64:
4270 case ARM::VLD1q8wb_fixed:
4271 case ARM::VLD1q16wb_fixed:
4272 case ARM::VLD1q32wb_fixed:
4273 case ARM::VLD1q64wb_fixed:
4274 case ARM::VLD1q8wb_register:
4275 case ARM::VLD1q16wb_register:
4276 case ARM::VLD1q32wb_register:
4277 case ARM::VLD1q64wb_register:
4278 case ARM::VLD2d8:
4279 case ARM::VLD2d16:
4280 case ARM::VLD2d32:
4281 case ARM::VLD2q8:
4282 case ARM::VLD2q16:
4283 case ARM::VLD2q32:
4284 case ARM::VLD2d8wb_fixed:
4285 case ARM::VLD2d16wb_fixed:
4286 case ARM::VLD2d32wb_fixed:
4287 case ARM::VLD2q8wb_fixed:
4288 case ARM::VLD2q16wb_fixed:
4289 case ARM::VLD2q32wb_fixed:
4290 case ARM::VLD2d8wb_register:
4291 case ARM::VLD2d16wb_register:
4292 case ARM::VLD2d32wb_register:
4293 case ARM::VLD2q8wb_register:
4294 case ARM::VLD2q16wb_register:
4295 case ARM::VLD2q32wb_register:
4296 case ARM::VLD3d8:
4297 case ARM::VLD3d16:
4298 case ARM::VLD3d32:
4299 case ARM::VLD1d64T:
4300 case ARM::VLD3d8_UPD:
4301 case ARM::VLD3d16_UPD:
4302 case ARM::VLD3d32_UPD:
4303 case ARM::VLD1d64Twb_fixed:
4304 case ARM::VLD1d64Twb_register:
4305 case ARM::VLD3q8_UPD:
4306 case ARM::VLD3q16_UPD:
4307 case ARM::VLD3q32_UPD:
4308 case ARM::VLD4d8:
4309 case ARM::VLD4d16:
4310 case ARM::VLD4d32:
4311 case ARM::VLD1d64Q:
4312 case ARM::VLD4d8_UPD:
4313 case ARM::VLD4d16_UPD:
4314 case ARM::VLD4d32_UPD:
4315 case ARM::VLD1d64Qwb_fixed:
4316 case ARM::VLD1d64Qwb_register:
4317 case ARM::VLD4q8_UPD:
4318 case ARM::VLD4q16_UPD:
4319 case ARM::VLD4q32_UPD:
4320 case ARM::VLD1DUPq8:
4321 case ARM::VLD1DUPq16:
4322 case ARM::VLD1DUPq32:
4323 case ARM::VLD1DUPq8wb_fixed:
4324 case ARM::VLD1DUPq16wb_fixed:
4325 case ARM::VLD1DUPq32wb_fixed:
4326 case ARM::VLD1DUPq8wb_register:
4327 case ARM::VLD1DUPq16wb_register:
4328 case ARM::VLD1DUPq32wb_register:
4329 case ARM::VLD2DUPd8:
4330 case ARM::VLD2DUPd16:
4331 case ARM::VLD2DUPd32:
4332 case ARM::VLD2DUPd8wb_fixed:
4333 case ARM::VLD2DUPd16wb_fixed:
4334 case ARM::VLD2DUPd32wb_fixed:
4335 case ARM::VLD2DUPd8wb_register:
4336 case ARM::VLD2DUPd16wb_register:
4337 case ARM::VLD2DUPd32wb_register:
4338 case ARM::VLD4DUPd8:
4339 case ARM::VLD4DUPd16:
4340 case ARM::VLD4DUPd32:
4341 case ARM::VLD4DUPd8_UPD:
4342 case ARM::VLD4DUPd16_UPD:
4343 case ARM::VLD4DUPd32_UPD:
4344 case ARM::VLD1LNd8:
4345 case ARM::VLD1LNd16:
4346 case ARM::VLD1LNd32:
4347 case ARM::VLD1LNd8_UPD:
4348 case ARM::VLD1LNd16_UPD:
4349 case ARM::VLD1LNd32_UPD:
4350 case ARM::VLD2LNd8:
4351 case ARM::VLD2LNd16:
4352 case ARM::VLD2LNd32:
4353 case ARM::VLD2LNq16:
4354 case ARM::VLD2LNq32:
4355 case ARM::VLD2LNd8_UPD:
4356 case ARM::VLD2LNd16_UPD:
4357 case ARM::VLD2LNd32_UPD:
4358 case ARM::VLD2LNq16_UPD:
4359 case ARM::VLD2LNq32_UPD:
4360 case ARM::VLD4LNd8:
4361 case ARM::VLD4LNd16:
4362 case ARM::VLD4LNd32:
4363 case ARM::VLD4LNq16:
4364 case ARM::VLD4LNq32:
4365 case ARM::VLD4LNd8_UPD:
4366 case ARM::VLD4LNd16_UPD:
4367 case ARM::VLD4LNd32_UPD:
4368 case ARM::VLD4LNq16_UPD:
4369 case ARM::VLD4LNq32_UPD:
4370 // If the address is not 64-bit aligned, the latencies of these
4371 // instructions increases by one.
4372 ++Adjust;
4373 break;
4374 }
4375 }
4376 return Adjust;
4377}
4378
4380 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4381 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4382 // No operand latency. The caller may fall back to getInstrLatency.
4383 if (!ItinData || ItinData->isEmpty())
4384 return std::nullopt;
4385
4386 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4387 Register Reg = DefMO.getReg();
4388
4389 const MachineInstr *ResolvedDefMI = &DefMI;
4390 unsigned DefAdj = 0;
4391 if (DefMI.isBundle())
4392 ResolvedDefMI =
4393 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4394 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4395 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4396 return 1;
4397 }
4398
4399 const MachineInstr *ResolvedUseMI = &UseMI;
4400 unsigned UseAdj = 0;
4401 if (UseMI.isBundle()) {
4402 ResolvedUseMI =
4403 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4404 if (!ResolvedUseMI)
4405 return std::nullopt;
4406 }
4407
4408 return getOperandLatencyImpl(
4409 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4410 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4411}
4412
4413std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4414 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4415 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4416 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4417 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4418 if (Reg == ARM::CPSR) {
4419 if (DefMI.getOpcode() == ARM::FMSTAT) {
4420 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4421 return Subtarget.isLikeA9() ? 1 : 20;
4422 }
4423
4424 // CPSR set and branch can be paired in the same cycle.
4425 if (UseMI.isBranch())
4426 return 0;
4427
4428 // Otherwise it takes the instruction latency (generally one).
4429 unsigned Latency = getInstrLatency(ItinData, DefMI);
4430
4431 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4432 // its uses. Instructions which are otherwise scheduled between them may
4433 // incur a code size penalty (not able to use the CPSR setting 16-bit
4434 // instructions).
4435 if (Latency > 0 && Subtarget.isThumb2()) {
4436 const MachineFunction *MF = DefMI.getParent()->getParent();
4437 // FIXME: Use Function::hasOptSize().
4438 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4439 --Latency;
4440 }
4441 return Latency;
4442 }
4443
4444 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4445 return std::nullopt;
4446
4447 unsigned DefAlign = DefMI.hasOneMemOperand()
4448 ? (*DefMI.memoperands_begin())->getAlign().value()
4449 : 0;
4450 unsigned UseAlign = UseMI.hasOneMemOperand()
4451 ? (*UseMI.memoperands_begin())->getAlign().value()
4452 : 0;
4453
4454 // Get the itinerary's latency if possible, and handle variable_ops.
4455 std::optional<unsigned> Latency = getOperandLatency(
4456 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4457 // Unable to find operand latency. The caller may resort to getInstrLatency.
4458 if (!Latency)
4459 return std::nullopt;
4460
4461 // Adjust for IT block position.
4462 int Adj = DefAdj + UseAdj;
4463
4464 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4465 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4466 if (Adj >= 0 || (int)*Latency > -Adj) {
4467 return *Latency + Adj;
4468 }
4469 // Return the itinerary latency, which may be zero but not less than zero.
4470 return Latency;
4471}
4472
4473std::optional<unsigned>
4475 SDNode *DefNode, unsigned DefIdx,
4476 SDNode *UseNode, unsigned UseIdx) const {
4477 if (!DefNode->isMachineOpcode())
4478 return 1;
4479
4480 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4481
4482 if (isZeroCost(DefMCID.Opcode))
4483 return 0;
4484
4485 if (!ItinData || ItinData->isEmpty())
4486 return DefMCID.mayLoad() ? 3 : 1;
4487
4488 if (!UseNode->isMachineOpcode()) {
4489 std::optional<unsigned> Latency =
4490 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4491 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4492 int Threshold = 1 + Adj;
4493 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4494 }
4495
4496 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4497 auto *DefMN = cast<MachineSDNode>(DefNode);
4498 unsigned DefAlign = !DefMN->memoperands_empty()
4499 ? (*DefMN->memoperands_begin())->getAlign().value()
4500 : 0;
4501 auto *UseMN = cast<MachineSDNode>(UseNode);
4502 unsigned UseAlign = !UseMN->memoperands_empty()
4503 ? (*UseMN->memoperands_begin())->getAlign().value()
4504 : 0;
4505 std::optional<unsigned> Latency = getOperandLatency(
4506 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4507 if (!Latency)
4508 return std::nullopt;
4509
4510 if (Latency > 1U &&
4511 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4512 Subtarget.isCortexA7())) {
4513 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4514 // variants are one cycle cheaper.
4515 switch (DefMCID.getOpcode()) {
4516 default: break;
4517 case ARM::LDRrs:
4518 case ARM::LDRBrs: {
4519 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4520 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4521 if (ShImm == 0 ||
4522 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4523 Latency = *Latency - 1;
4524 break;
4525 }
4526 case ARM::t2LDRs:
4527 case ARM::t2LDRBs:
4528 case ARM::t2LDRHs:
4529 case ARM::t2LDRSHs: {
4530 // Thumb2 mode: lsl only.
4531 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4532 if (ShAmt == 0 || ShAmt == 2)
4533 Latency = *Latency - 1;
4534 break;
4535 }
4536 }
4537 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4538 // FIXME: Properly handle all of the latency adjustments for address
4539 // writeback.
4540 switch (DefMCID.getOpcode()) {
4541 default: break;
4542 case ARM::LDRrs:
4543 case ARM::LDRBrs: {
4544 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4545 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4546 if (ShImm == 0 ||
4547 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4549 Latency = *Latency - 2;
4550 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4551 Latency = *Latency - 1;
4552 break;
4553 }
4554 case ARM::t2LDRs:
4555 case ARM::t2LDRBs:
4556 case ARM::t2LDRHs:
4557 case ARM::t2LDRSHs:
4558 // Thumb2 mode: lsl 0-3 only.
4559 Latency = *Latency - 2;
4560 break;
4561 }
4562 }
4563
4564 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4565 switch (DefMCID.getOpcode()) {
4566 default: break;
4567 case ARM::VLD1q8:
4568 case ARM::VLD1q16:
4569 case ARM::VLD1q32:
4570 case ARM::VLD1q64:
4571 case ARM::VLD1q8wb_register:
4572 case ARM::VLD1q16wb_register:
4573 case ARM::VLD1q32wb_register:
4574 case ARM::VLD1q64wb_register:
4575 case ARM::VLD1q8wb_fixed:
4576 case ARM::VLD1q16wb_fixed:
4577 case ARM::VLD1q32wb_fixed:
4578 case ARM::VLD1q64wb_fixed:
4579 case ARM::VLD2d8:
4580 case ARM::VLD2d16:
4581 case ARM::VLD2d32:
4582 case ARM::VLD2q8Pseudo:
4583 case ARM::VLD2q16Pseudo:
4584 case ARM::VLD2q32Pseudo:
4585 case ARM::VLD2d8wb_fixed:
4586 case ARM::VLD2d16wb_fixed:
4587 case ARM::VLD2d32wb_fixed:
4588 case ARM::VLD2q8PseudoWB_fixed:
4589 case ARM::VLD2q16PseudoWB_fixed:
4590 case ARM::VLD2q32PseudoWB_fixed:
4591 case ARM::VLD2d8wb_register:
4592 case ARM::VLD2d16wb_register:
4593 case ARM::VLD2d32wb_register:
4594 case ARM::VLD2q8PseudoWB_register:
4595 case ARM::VLD2q16PseudoWB_register:
4596 case ARM::VLD2q32PseudoWB_register:
4597 case ARM::VLD3d8Pseudo:
4598 case ARM::VLD3d16Pseudo:
4599 case ARM::VLD3d32Pseudo:
4600 case ARM::VLD1d8TPseudo:
4601 case ARM::VLD1d16TPseudo:
4602 case ARM::VLD1d32TPseudo:
4603 case ARM::VLD1d64TPseudo:
4604 case ARM::VLD1d64TPseudoWB_fixed:
4605 case ARM::VLD1d64TPseudoWB_register:
4606 case ARM::VLD3d8Pseudo_UPD:
4607 case ARM::VLD3d16Pseudo_UPD:
4608 case ARM::VLD3d32Pseudo_UPD:
4609 case ARM::VLD3q8Pseudo_UPD:
4610 case ARM::VLD3q16Pseudo_UPD:
4611 case ARM::VLD3q32Pseudo_UPD:
4612 case ARM::VLD3q8oddPseudo:
4613 case ARM::VLD3q16oddPseudo:
4614 case ARM::VLD3q32oddPseudo:
4615 case ARM::VLD3q8oddPseudo_UPD:
4616 case ARM::VLD3q16oddPseudo_UPD:
4617 case ARM::VLD3q32oddPseudo_UPD:
4618 case ARM::VLD4d8Pseudo:
4619 case ARM::VLD4d16Pseudo:
4620 case ARM::VLD4d32Pseudo:
4621 case ARM::VLD1d8QPseudo:
4622 case ARM::VLD1d16QPseudo:
4623 case ARM::VLD1d32QPseudo:
4624 case ARM::VLD1d64QPseudo:
4625 case ARM::VLD1d64QPseudoWB_fixed:
4626 case ARM::VLD1d64QPseudoWB_register:
4627 case ARM::VLD1q8HighQPseudo:
4628 case ARM::VLD1q8LowQPseudo_UPD:
4629 case ARM::VLD1q8HighTPseudo:
4630 case ARM::VLD1q8LowTPseudo_UPD:
4631 case ARM::VLD1q16HighQPseudo:
4632 case ARM::VLD1q16LowQPseudo_UPD:
4633 case ARM::VLD1q16HighTPseudo:
4634 case ARM::VLD1q16LowTPseudo_UPD:
4635 case ARM::VLD1q32HighQPseudo:
4636 case ARM::VLD1q32LowQPseudo_UPD:
4637 case ARM::VLD1q32HighTPseudo:
4638 case ARM::VLD1q32LowTPseudo_UPD:
4639 case ARM::VLD1q64HighQPseudo:
4640 case ARM::VLD1q64LowQPseudo_UPD:
4641 case ARM::VLD1q64HighTPseudo:
4642 case ARM::VLD1q64LowTPseudo_UPD:
4643 case ARM::VLD4d8Pseudo_UPD:
4644 case ARM::VLD4d16Pseudo_UPD:
4645 case ARM::VLD4d32Pseudo_UPD:
4646 case ARM::VLD4q8Pseudo_UPD:
4647 case ARM::VLD4q16Pseudo_UPD:
4648 case ARM::VLD4q32Pseudo_UPD:
4649 case ARM::VLD4q8oddPseudo:
4650 case ARM::VLD4q16oddPseudo:
4651 case ARM::VLD4q32oddPseudo:
4652 case ARM::VLD4q8oddPseudo_UPD:
4653 case ARM::VLD4q16oddPseudo_UPD:
4654 case ARM::VLD4q32oddPseudo_UPD:
4655 case ARM::VLD1DUPq8:
4656 case ARM::VLD1DUPq16:
4657 case ARM::VLD1DUPq32:
4658 case ARM::VLD1DUPq8wb_fixed:
4659 case ARM::VLD1DUPq16wb_fixed:
4660 case ARM::VLD1DUPq32wb_fixed:
4661 case ARM::VLD1DUPq8wb_register:
4662 case ARM::VLD1DUPq16wb_register:
4663 case ARM::VLD1DUPq32wb_register:
4664 case ARM::VLD2DUPd8:
4665 case ARM::VLD2DUPd16:
4666 case ARM::VLD2DUPd32:
4667 case ARM::VLD2DUPd8wb_fixed:
4668 case ARM::VLD2DUPd16wb_fixed:
4669 case ARM::VLD2DUPd32wb_fixed:
4670 case ARM::VLD2DUPd8wb_register:
4671 case ARM::VLD2DUPd16wb_register:
4672 case ARM::VLD2DUPd32wb_register:
4673 case ARM::VLD2DUPq8EvenPseudo:
4674 case ARM::VLD2DUPq8OddPseudo:
4675 case ARM::VLD2DUPq16EvenPseudo:
4676 case ARM::VLD2DUPq16OddPseudo:
4677 case ARM::VLD2DUPq32EvenPseudo:
4678 case ARM::VLD2DUPq32OddPseudo:
4679 case ARM::VLD3DUPq8EvenPseudo:
4680 case ARM::VLD3DUPq8OddPseudo:
4681 case ARM::VLD3DUPq16EvenPseudo:
4682 case ARM::VLD3DUPq16OddPseudo:
4683 case ARM::VLD3DUPq32EvenPseudo:
4684 case ARM::VLD3DUPq32OddPseudo:
4685 case ARM::VLD4DUPd8Pseudo:
4686 case ARM::VLD4DUPd16Pseudo:
4687 case ARM::VLD4DUPd32Pseudo:
4688 case ARM::VLD4DUPd8Pseudo_UPD:
4689 case ARM::VLD4DUPd16Pseudo_UPD:
4690 case ARM::VLD4DUPd32Pseudo_UPD:
4691 case ARM::VLD4DUPq8EvenPseudo:
4692 case ARM::VLD4DUPq8OddPseudo:
4693 case ARM::VLD4DUPq16EvenPseudo:
4694 case ARM::VLD4DUPq16OddPseudo:
4695 case ARM::VLD4DUPq32EvenPseudo:
4696 case ARM::VLD4DUPq32OddPseudo:
4697 case ARM::VLD1LNq8Pseudo:
4698 case ARM::VLD1LNq16Pseudo:
4699 case ARM::VLD1LNq32Pseudo:
4700 case ARM::VLD1LNq8Pseudo_UPD:
4701 case ARM::VLD1LNq16Pseudo_UPD:
4702 case ARM::VLD1LNq32Pseudo_UPD:
4703 case ARM::VLD2LNd8Pseudo:
4704 case ARM::VLD2LNd16Pseudo:
4705 case ARM::VLD2LNd32Pseudo:
4706 case ARM::VLD2LNq16Pseudo:
4707 case ARM::VLD2LNq32Pseudo:
4708 case ARM::VLD2LNd8Pseudo_UPD:
4709 case ARM::VLD2LNd16Pseudo_UPD:
4710 case ARM::VLD2LNd32Pseudo_UPD:
4711 case ARM::VLD2LNq16Pseudo_UPD:
4712 case ARM::VLD2LNq32Pseudo_UPD:
4713 case ARM::VLD4LNd8Pseudo:
4714 case ARM::VLD4LNd16Pseudo:
4715 case ARM::VLD4LNd32Pseudo:
4716 case ARM::VLD4LNq16Pseudo:
4717 case ARM::VLD4LNq32Pseudo:
4718 case ARM::VLD4LNd8Pseudo_UPD:
4719 case ARM::VLD4LNd16Pseudo_UPD:
4720 case ARM::VLD4LNd32Pseudo_UPD:
4721 case ARM::VLD4LNq16Pseudo_UPD:
4722 case ARM::VLD4LNq32Pseudo_UPD:
4723 // If the address is not 64-bit aligned, the latencies of these
4724 // instructions increases by one.
4725 Latency = *Latency + 1;
4726 break;
4727 }
4728
4729 return Latency;
4730}
4731
4732unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4733 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4734 MI.isImplicitDef())
4735 return 0;
4736
4737 if (MI.isBundle())
4738 return 0;
4739
4740 const MCInstrDesc &MCID = MI.getDesc();
4741
4742 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4743 !Subtarget.cheapPredicableCPSRDef())) {
4744 // When predicated, CPSR is an additional source operand for CPSR updating
4745 // instructions, this apparently increases their latencies.
4746 return 1;
4747 }
4748 return 0;
4749}
4750
4751unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4752 const MachineInstr &MI,
4753 unsigned *PredCost) const {
4754 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4755 MI.isImplicitDef())
4756 return 1;
4757
4758 // An instruction scheduler typically runs on unbundled instructions, however
4759 // other passes may query the latency of a bundled instruction.
4760 if (MI.isBundle()) {
4761 unsigned Latency = 0;
4763 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4764 while (++I != E && I->isInsideBundle()) {
4765 if (I->getOpcode() != ARM::t2IT)
4766 Latency += getInstrLatency(ItinData, *I, PredCost);
4767 }
4768 return Latency;
4769 }
4770
4771 const MCInstrDesc &MCID = MI.getDesc();
4772 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4773 !Subtarget.cheapPredicableCPSRDef()))) {
4774 // When predicated, CPSR is an additional source operand for CPSR updating
4775 // instructions, this apparently increases their latencies.
4776 *PredCost = 1;
4777 }
4778 // Be sure to call getStageLatency for an empty itinerary in case it has a
4779 // valid MinLatency property.
4780 if (!ItinData)
4781 return MI.mayLoad() ? 3 : 1;
4782
4783 unsigned Class = MCID.getSchedClass();
4784
4785 // For instructions with variable uops, use uops as latency.
4786 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4787 return getNumMicroOps(ItinData, MI);
4788
4789 // For the common case, fall back on the itinerary's latency.
4790 unsigned Latency = ItinData->getStageLatency(Class);
4791
4792 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4793 unsigned DefAlign =
4794 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4795 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4796 if (Adj >= 0 || (int)Latency > -Adj) {
4797 return Latency + Adj;
4798 }
4799 return Latency;
4800}
4801
4802unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4803 SDNode *Node) const {
4804 if (!Node->isMachineOpcode())
4805 return 1;
4806
4807 if (!ItinData || ItinData->isEmpty())
4808 return 1;
4809
4810 unsigned Opcode = Node->getMachineOpcode();
4811 switch (Opcode) {
4812 default:
4813 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4814 case ARM::VLDMQIA:
4815 case ARM::VSTMQIA:
4816 return 2;
4817 }
4818}
4819
4820bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4821 const MachineRegisterInfo *MRI,
4822 const MachineInstr &DefMI,
4823 unsigned DefIdx,
4824 const MachineInstr &UseMI,
4825 unsigned UseIdx) const {
4826 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4827 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4828 if (Subtarget.nonpipelinedVFP() &&
4829 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4830 return true;
4831
4832 // Hoist VFP / NEON instructions with 4 or higher latency.
4833 unsigned Latency =
4834 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4835 if (Latency <= 3)
4836 return false;
4837 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4838 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4839}
4840
4841bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4842 const MachineInstr &DefMI,
4843 unsigned DefIdx) const {
4844 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4845 if (!ItinData || ItinData->isEmpty())
4846 return false;
4847
4848 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4849 if (DDomain == ARMII::DomainGeneral) {
4850 unsigned DefClass = DefMI.getDesc().getSchedClass();
4851 std::optional<unsigned> DefCycle =
4852 ItinData->getOperandCycle(DefClass, DefIdx);
4853 return DefCycle && DefCycle <= 2U;
4854 }
4855 return false;
4856}
4857
4858bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4859 StringRef &ErrInfo) const {
4860 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4861 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4862 return false;
4863 }
4864 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4865 // Make sure we don't generate a lo-lo mov that isn't supported.
4866 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4867 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4868 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4869 return false;
4870 }
4871 }
4872 if (MI.getOpcode() == ARM::tPUSH ||
4873 MI.getOpcode() == ARM::tPOP ||
4874 MI.getOpcode() == ARM::tPOP_RET) {
4875 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4876 if (MO.isImplicit() || !MO.isReg())
4877 continue;
4878 Register Reg = MO.getReg();
4879 if (Reg < ARM::R0 || Reg > ARM::R7) {
4880 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4881 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4882 ErrInfo = "Unsupported register in Thumb1 push/pop";
4883 return false;
4884 }
4885 }
4886 }
4887 }
4888 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4889 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4890 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4891 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4892 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4893 return false;
4894 }
4895 }
4896
4897 // Check the address model by taking the first Imm operand and checking it is
4898 // legal for that addressing mode.
4900 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4901 switch (AddrMode) {
4902 default:
4903 break;
4911 case ARMII::AddrModeT2_i12: {
4912 uint32_t Imm = 0;
4913 for (auto Op : MI.operands()) {
4914 if (Op.isImm()) {
4915 Imm = Op.getImm();
4916 break;
4917 }
4918 }
4919 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4920 ErrInfo = "Incorrect AddrMode Imm for instruction";
4921 return false;
4922 }
4923 break;
4924 }
4925 }
4926 return true;
4927}
4928
4930 unsigned LoadImmOpc,
4931 unsigned LoadOpc) const {
4932 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4933 "ROPI/RWPI not currently supported with stack guard");
4934
4935 MachineBasicBlock &MBB = *MI->getParent();
4936 DebugLoc DL = MI->getDebugLoc();
4937 Register Reg = MI->getOperand(0).getReg();
4939 unsigned int Offset = 0;
4940
4941 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4942 assert(!Subtarget.isReadTPSoft() &&
4943 "TLS stack protector requires hardware TLS register");
4944
4945 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4946 .addImm(15)
4947 .addImm(0)
4948 .addImm(13)
4949 .addImm(0)
4950 .addImm(3)
4952
4954 Offset = M.getStackProtectorGuardOffset();
4955 if (Offset & ~0xfffU) {
4956 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4957 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4958 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4959 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4960 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4961 .addReg(Reg, RegState::Kill)
4962 .addImm(Offset & ~0xfffU)
4964 .addReg(0);
4965 Offset &= 0xfffU;
4966 }
4967 } else {
4968 const GlobalValue *GV =
4969 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4970 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4971
4972 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4973 if (Subtarget.isTargetMachO()) {
4974 TargetFlags |= ARMII::MO_NONLAZY;
4975 } else if (Subtarget.isTargetCOFF()) {
4976 if (GV->hasDLLImportStorageClass())
4977 TargetFlags |= ARMII::MO_DLLIMPORT;
4978 else if (IsIndirect)
4979 TargetFlags |= ARMII::MO_COFFSTUB;
4980 } else if (IsIndirect) {
4981 TargetFlags |= ARMII::MO_GOT;
4982 }
4983
4984 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4985 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4986 auto APSREncoding =
4987 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4988 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4989 .addImm(APSREncoding)
4991 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4992 .addGlobalAddress(GV, 0, TargetFlags);
4993 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4994 .addImm(APSREncoding)
4995 .addReg(CPSRSaveReg, RegState::Kill)
4997 } else {
4998 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4999 .addGlobalAddress(GV, 0, TargetFlags);
5000 }
5001
5002 if (IsIndirect) {
5003 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
5004 MIB.addReg(Reg, RegState::Kill).addImm(0);
5005 auto Flags = MachineMemOperand::MOLoad |
5009 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
5011 }
5012 }
5013
5014 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
5015 MIB.addReg(Reg, RegState::Kill)
5016 .addImm(Offset)
5017 .cloneMemRefs(*MI)
5019}
5020
5021bool
5022ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
5023 unsigned &AddSubOpc,
5024 bool &NegAcc, bool &HasLane) const {
5026 if (I == MLxEntryMap.end())
5027 return false;
5028
5029 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
5030 MulOpc = Entry.MulOpc;
5031 AddSubOpc = Entry.AddSubOpc;
5032 NegAcc = Entry.NegAcc;
5033 HasLane = Entry.HasLane;
5034 return true;
5035}
5036
5037//===----------------------------------------------------------------------===//
5038// Execution domains.
5039//===----------------------------------------------------------------------===//
5040//
5041// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5042// and some can go down both. The vmov instructions go down the VFP pipeline,
5043// but they can be changed to vorr equivalents that are executed by the NEON
5044// pipeline.
5045//
5046// We use the following execution domain numbering:
5047//
5051 ExeNEON = 2
5053
5054//
5055// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5056//
5057std::pair<uint16_t, uint16_t>
5059 // If we don't have access to NEON instructions then we won't be able
5060 // to swizzle anything to the NEON domain. Check to make sure.
5061 if (Subtarget.hasNEON()) {
5062 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5063 // if they are not predicated.
5064 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5065 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5066
5067 // CortexA9 is particularly picky about mixing the two and wants these
5068 // converted.
5069 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5070 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5071 MI.getOpcode() == ARM::VMOVS))
5072 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5073 }
5074 // No other instructions can be swizzled, so just determine their domain.
5075 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5076
5078 return std::make_pair(ExeNEON, 0);
5079
5080 // Certain instructions can go either way on Cortex-A8.
5081 // Treat them as NEON instructions.
5082 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5083 return std::make_pair(ExeNEON, 0);
5084
5086 return std::make_pair(ExeVFP, 0);
5087
5088 return std::make_pair(ExeGeneric, 0);
5089}
5090
5092 unsigned SReg, unsigned &Lane) {
5093 MCRegister DReg =
5094 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5095 Lane = 0;
5096
5097 if (DReg)
5098 return DReg;
5099
5100 Lane = 1;
5101 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5102
5103 assert(DReg && "S-register with no D super-register?");
5104 return DReg;
5105}
5106
5107/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5108/// set ImplicitSReg to a register number that must be marked as implicit-use or
5109/// zero if no register needs to be defined as implicit-use.
5110///
5111/// If the function cannot determine if an SPR should be marked implicit use or
5112/// not, it returns false.
5113///
5114/// This function handles cases where an instruction is being modified from taking
5115/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5116/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5117/// lane of the DPR).
5118///
5119/// If the other SPR is defined, an implicit-use of it should be added. Else,
5120/// (including the case where the DPR itself is defined), it should not.
5121///
5123 MachineInstr &MI, MCRegister DReg,
5124 unsigned Lane,
5125 MCRegister &ImplicitSReg) {
5126 // If the DPR is defined or used already, the other SPR lane will be chained
5127 // correctly, so there is nothing to be done.
5128 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5129 ImplicitSReg = MCRegister();
5130 return true;
5131 }
5132
5133 // Otherwise we need to go searching to see if the SPR is set explicitly.
5134 ImplicitSReg = TRI->getSubReg(DReg,
5135 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5137 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5138
5139 if (LQR == MachineBasicBlock::LQR_Live)
5140 return true;
5141 else if (LQR == MachineBasicBlock::LQR_Unknown)
5142 return false;
5143
5144 // If the register is known not to be live, there is no need to add an
5145 // implicit-use.
5146 ImplicitSReg = MCRegister();
5147 return true;
5148}
5149
5151 unsigned Domain) const {
5152 unsigned DstReg, SrcReg;
5153 MCRegister DReg;
5154 unsigned Lane;
5155 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5157 switch (MI.getOpcode()) {
5158 default:
5159 llvm_unreachable("cannot handle opcode!");
5160 break;
5161 case ARM::VMOVD:
5162 if (Domain != ExeNEON)
5163 break;
5164
5165 // Zap the predicate operands.
5166 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5167
5168 // Make sure we've got NEON instructions.
5169 assert(Subtarget.hasNEON() && "VORRd requires NEON");
5170
5171 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5172 DstReg = MI.getOperand(0).getReg();
5173 SrcReg = MI.getOperand(1).getReg();
5174
5175 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5176 MI.removeOperand(i - 1);
5177
5178 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5179 MI.setDesc(get(ARM::VORRd));
5180 MIB.addReg(DstReg, RegState::Define)
5181 .addReg(SrcReg)
5182 .addReg(SrcReg)
5184 break;
5185 case ARM::VMOVRS:
5186 if (Domain != ExeNEON)
5187 break;
5188 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5189
5190 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5191 DstReg = MI.getOperand(0).getReg();
5192 SrcReg = MI.getOperand(1).getReg();
5193
5194 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5195 MI.removeOperand(i - 1);
5196
5197 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5198
5199 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5200 // Note that DSrc has been widened and the other lane may be undef, which
5201 // contaminates the entire register.
5202 MI.setDesc(get(ARM::VGETLNi32));
5203 MIB.addReg(DstReg, RegState::Define)
5204 .addReg(DReg, RegState::Undef)
5205 .addImm(Lane)
5207
5208 // The old source should be an implicit use, otherwise we might think it
5209 // was dead before here.
5210 MIB.addReg(SrcReg, RegState::Implicit);
5211 break;
5212 case ARM::VMOVSR: {
5213 if (Domain != ExeNEON)
5214 break;
5215 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5216
5217 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5218 DstReg = MI.getOperand(0).getReg();
5219 SrcReg = MI.getOperand(1).getReg();
5220
5221 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5222
5223 MCRegister ImplicitSReg;
5224 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5225 break;
5226
5227 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5228 MI.removeOperand(i - 1);
5229
5230 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5231 // Again DDst may be undefined at the beginning of this instruction.
5232 MI.setDesc(get(ARM::VSETLNi32));
5233 MIB.addReg(DReg, RegState::Define)
5234 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5235 .addReg(SrcReg)
5236 .addImm(Lane)
5238
5239 // The narrower destination must be marked as set to keep previous chains
5240 // in place.
5242 if (ImplicitSReg)
5243 MIB.addReg(ImplicitSReg, RegState::Implicit);
5244 break;
5245 }
5246 case ARM::VMOVS: {
5247 if (Domain != ExeNEON)
5248 break;
5249
5250 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5251 DstReg = MI.getOperand(0).getReg();
5252 SrcReg = MI.getOperand(1).getReg();
5253
5254 unsigned DstLane = 0, SrcLane = 0;
5255 MCRegister DDst, DSrc;
5256 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5257 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5258
5259 MCRegister ImplicitSReg;
5260 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5261 break;
5262
5263 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5264 MI.removeOperand(i - 1);
5265
5266 if (DSrc == DDst) {
5267 // Destination can be:
5268 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5269 MI.setDesc(get(ARM::VDUPLN32d));
5270 MIB.addReg(DDst, RegState::Define)
5271 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5272 .addImm(SrcLane)
5274
5275 // Neither the source or the destination are naturally represented any
5276 // more, so add them in manually.
5278 MIB.addReg(SrcReg, RegState::Implicit);
5279 if (ImplicitSReg)
5280 MIB.addReg(ImplicitSReg, RegState::Implicit);
5281 break;
5282 }
5283
5284 // In general there's no single instruction that can perform an S <-> S
5285 // move in NEON space, but a pair of VEXT instructions *can* do the
5286 // job. It turns out that the VEXTs needed will only use DSrc once, with
5287 // the position based purely on the combination of lane-0 and lane-1
5288 // involved. For example
5289 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5290 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5291 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5292 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5293 //
5294 // Pattern of the MachineInstrs is:
5295 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5296 MachineInstrBuilder NewMIB;
5297 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5298 DDst);
5299
5300 // On the first instruction, both DSrc and DDst may be undef if present.
5301 // Specifically when the original instruction didn't have them as an
5302 // <imp-use>.
5303 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5304 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5305 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5306
5307 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5308 CurUndef = !MI.readsRegister(CurReg, TRI);
5309 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5310 .addImm(1)
5312
5313 if (SrcLane == DstLane)
5314 NewMIB.addReg(SrcReg, RegState::Implicit);
5315
5316 MI.setDesc(get(ARM::VEXTd32));
5317 MIB.addReg(DDst, RegState::Define);
5318
5319 // On the second instruction, DDst has definitely been defined above, so
5320 // it is not undef. DSrc, if present, can be undef as above.
5321 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5322 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5323 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5324
5325 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5326 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5327 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5328 .addImm(1)
5330
5331 if (SrcLane != DstLane)
5332 MIB.addReg(SrcReg, RegState::Implicit);
5333
5334 // As before, the original destination is no longer represented, add it
5335 // implicitly.
5337 if (ImplicitSReg != 0)
5338 MIB.addReg(ImplicitSReg, RegState::Implicit);
5339 break;
5340 }
5341 }
5342}
5343
5344//===----------------------------------------------------------------------===//
5345// Partial register updates
5346//===----------------------------------------------------------------------===//
5347//
5348// Swift renames NEON registers with 64-bit granularity. That means any
5349// instruction writing an S-reg implicitly reads the containing D-reg. The
5350// problem is mostly avoided by translating f32 operations to v2f32 operations
5351// on D-registers, but f32 loads are still a problem.
5352//
5353// These instructions can load an f32 into a NEON register:
5354//
5355// VLDRS - Only writes S, partial D update.
5356// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5357// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5358//
5359// FCONSTD can be used as a dependency-breaking instruction.
5361 const MachineInstr &MI, unsigned OpNum,
5362 const TargetRegisterInfo *TRI) const {
5363 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5364 if (!PartialUpdateClearance)
5365 return 0;
5366
5367 assert(TRI && "Need TRI instance");
5368
5369 const MachineOperand &MO = MI.getOperand(OpNum);
5370 if (MO.readsReg())
5371 return 0;
5372 Register Reg = MO.getReg();
5373 int UseOp = -1;
5374
5375 switch (MI.getOpcode()) {
5376 // Normal instructions writing only an S-register.
5377 case ARM::VLDRS:
5378 case ARM::FCONSTS:
5379 case ARM::VMOVSR:
5380 case ARM::VMOVv8i8:
5381 case ARM::VMOVv4i16:
5382 case ARM::VMOVv2i32:
5383 case ARM::VMOVv2f32:
5384 case ARM::VMOVv1i64:
5385 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5386 break;
5387
5388 // Explicitly reads the dependency.
5389 case ARM::VLD1LNd32:
5390 UseOp = 3;
5391 break;
5392 default:
5393 return 0;
5394 }
5395
5396 // If this instruction actually reads a value from Reg, there is no unwanted
5397 // dependency.
5398 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5399 return 0;
5400
5401 // We must be able to clobber the whole D-reg.
5402 if (Reg.isVirtual()) {
5403 // Virtual register must be a def undef foo:ssub_0 operand.
5404 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5405 return 0;
5406 } else if (ARM::SPRRegClass.contains(Reg)) {
5407 // Physical register: MI must define the full D-reg.
5408 MCRegister DReg =
5409 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5410 if (!DReg || !MI.definesRegister(DReg, TRI))
5411 return 0;
5412 }
5413
5414 // MI has an unwanted D-register dependency.
5415 // Avoid defs in the previous N instructrions.
5416 return PartialUpdateClearance;
5417}
5418
5419// Break a partial register dependency after getPartialRegUpdateClearance
5420// returned non-zero.
5422 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5423 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5424 assert(TRI && "Need TRI instance");
5425
5426 const MachineOperand &MO = MI.getOperand(OpNum);
5427 Register Reg = MO.getReg();
5428 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5429 unsigned DReg = Reg;
5430
5431 // If MI defines an S-reg, find the corresponding D super-register.
5432 if (ARM::SPRRegClass.contains(Reg)) {
5433 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5434 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5435 }
5436
5437 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5438 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5439
5440 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5441 // the full D-register by loading the same value to both lanes. The
5442 // instruction is micro-coded with 2 uops, so don't do this until we can
5443 // properly schedule micro-coded instructions. The dispatcher stalls cause
5444 // too big regressions.
5445
5446 // Insert the dependency-breaking FCONSTD before MI.
5447 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5448 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5449 .addImm(96)
5451 MI.addRegisterKilled(DReg, TRI, true);
5452}
5453
5455 return Subtarget.hasFeature(ARM::HasV6KOps);
5456}
5457
5459 if (MI->getNumOperands() < 4)
5460 return true;
5461 unsigned ShOpVal = MI->getOperand(3).getImm();
5462 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5463 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5464 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5465 ((ShImm == 1 || ShImm == 2) &&
5466 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5467 return true;
5468
5469 return false;
5470}
5471
5473 const MachineInstr &MI, unsigned DefIdx,
5474 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5475 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5476 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5477
5478 switch (MI.getOpcode()) {
5479 case ARM::VMOVDRR:
5480 // dX = VMOVDRR rY, rZ
5481 // is the same as:
5482 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5483 // Populate the InputRegs accordingly.
5484 // rY
5485 const MachineOperand *MOReg = &MI.getOperand(1);
5486 if (!MOReg->isUndef())
5487 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5488 MOReg->getSubReg(), ARM::ssub_0));
5489 // rZ
5490 MOReg = &MI.getOperand(2);
5491 if (!MOReg->isUndef())
5492 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5493 MOReg->getSubReg(), ARM::ssub_1));
5494 return true;
5495 }
5496 llvm_unreachable("Target dependent opcode missing");
5497}
5498
5500 const MachineInstr &MI, unsigned DefIdx,
5501 RegSubRegPairAndIdx &InputReg) const {
5502 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5503 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5504
5505 switch (MI.getOpcode()) {
5506 case ARM::VMOVRRD:
5507 // rX, rY = VMOVRRD dZ
5508 // is the same as:
5509 // rX = EXTRACT_SUBREG dZ, ssub_0
5510 // rY = EXTRACT_SUBREG dZ, ssub_1
5511 const MachineOperand &MOReg = MI.getOperand(2);
5512 if (MOReg.isUndef())
5513 return false;
5514 InputReg.Reg = MOReg.getReg();
5515 InputReg.SubReg = MOReg.getSubReg();
5516 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5517 return true;
5518 }
5519 llvm_unreachable("Target dependent opcode missing");
5520}
5521
5523 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5524 RegSubRegPairAndIdx &InsertedReg) const {
5525 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5526 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5527
5528 switch (MI.getOpcode()) {
5529 case ARM::VSETLNi32:
5530 case ARM::MVE_VMOV_to_lane_32:
5531 // dX = VSETLNi32 dY, rZ, imm
5532 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5533 const MachineOperand &MOBaseReg = MI.getOperand(1);
5534 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5535 if (MOInsertedReg.isUndef())
5536 return false;
5537 const MachineOperand &MOIndex = MI.getOperand(3);
5538 BaseReg.Reg = MOBaseReg.getReg();
5539 BaseReg.SubReg = MOBaseReg.getSubReg();
5540
5541 InsertedReg.Reg = MOInsertedReg.getReg();
5542 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5543 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5544 return true;
5545 }
5546 llvm_unreachable("Target dependent opcode missing");
5547}
5548
5549std::pair<unsigned, unsigned>
5551 const unsigned Mask = ARMII::MO_OPTION_MASK;
5552 return std::make_pair(TF & Mask, TF & ~Mask);
5553}
5554
5557 using namespace ARMII;
5558
5559 static const std::pair<unsigned, const char *> TargetFlags[] = {
5560 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5561 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5562 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5563 };
5564 return ArrayRef(TargetFlags);
5565}
5566
5569 using namespace ARMII;
5570
5571 static const std::pair<unsigned, const char *> TargetFlags[] = {
5572 {MO_COFFSTUB, "arm-coffstub"},
5573 {MO_GOT, "arm-got"},
5574 {MO_SBREL, "arm-sbrel"},
5575 {MO_DLLIMPORT, "arm-dllimport"},
5576 {MO_SECREL, "arm-secrel"},
5577 {MO_NONLAZY, "arm-nonlazy"}};
5578 return ArrayRef(TargetFlags);
5579}
5580
5581std::optional<RegImmPair>
5583 int Sign = 1;
5584 unsigned Opcode = MI.getOpcode();
5585 int64_t Offset = 0;
5586
5587 // TODO: Handle cases where Reg is a super- or sub-register of the
5588 // destination register.
5589 const MachineOperand &Op0 = MI.getOperand(0);
5590 if (!Op0.isReg() || Reg != Op0.getReg())
5591 return std::nullopt;
5592
5593 // We describe SUBri or ADDri instructions.
5594 if (Opcode == ARM::SUBri)
5595 Sign = -1;
5596 else if (Opcode != ARM::ADDri)
5597 return std::nullopt;
5598
5599 // TODO: Third operand can be global address (usually some string). Since
5600 // strings can be relocated we cannot calculate their offsets for
5601 // now.
5602 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5603 return std::nullopt;
5604
5605 Offset = MI.getOperand(2).getImm() * Sign;
5606 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5607}
5608
5612 const TargetRegisterInfo *TRI) {
5613 for (auto I = From; I != To; ++I)
5614 if (I->modifiesRegister(Reg, TRI))
5615 return true;
5616 return false;
5617}
5618
5620 const TargetRegisterInfo *TRI) {
5621 // Search backwards to the instruction that defines CSPR. This may or not
5622 // be a CMP, we check that after this loop. If we find another instruction
5623 // that reads cpsr, we return nullptr.
5624 MachineBasicBlock::iterator CmpMI = Br;
5625 while (CmpMI != Br->getParent()->begin()) {
5626 --CmpMI;
5627 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5628 break;
5629 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5630 break;
5631 }
5632
5633 // Check that this inst is a CMP r[0-7], #0 and that the register
5634 // is not redefined between the cmp and the br.
5635 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5636 return nullptr;
5637 Register Reg = CmpMI->getOperand(0).getReg();
5638 Register PredReg;
5639 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5640 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5641 return nullptr;
5642 if (!isARMLowRegister(Reg))
5643 return nullptr;
5644 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5645 return nullptr;
5646
5647 return &*CmpMI;
5648}
5649
5651 const ARMSubtarget *Subtarget,
5652 bool ForCodesize) {
5653 if (Subtarget->isThumb()) {
5654 if (Val <= 255) // MOV
5655 return ForCodesize ? 2 : 1;
5656 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5657 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5658 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5659 return ForCodesize ? 4 : 1;
5660 if (Val <= 510) // MOV + ADDi8
5661 return ForCodesize ? 4 : 2;
5662 if (~Val <= 255) // MOV + MVN
5663 return ForCodesize ? 4 : 2;
5664 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5665 return ForCodesize ? 4 : 2;
5666 } else {
5667 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5668 return ForCodesize ? 4 : 1;
5669 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5670 return ForCodesize ? 4 : 1;
5671 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5672 return ForCodesize ? 4 : 1;
5673 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5674 return ForCodesize ? 8 : 2;
5675 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5676 return ForCodesize ? 8 : 2;
5677 }
5678 if (Subtarget->useMovt()) // MOVW + MOVT
5679 return ForCodesize ? 8 : 2;
5680 return ForCodesize ? 8 : 3; // Literal pool load
5681}
5682
5683bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5684 const ARMSubtarget *Subtarget,
5685 bool ForCodesize) {
5686 // Check with ForCodesize
5687 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5688 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5689 if (Cost1 < Cost2)
5690 return true;
5691 if (Cost1 > Cost2)
5692 return false;
5693
5694 // If they are equal, try with !ForCodesize
5695 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5696 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5697}
5698
5699/// Constants defining how certain sequences should be outlined.
5700/// This encompasses how an outlined function should be called, and what kind of
5701/// frame should be emitted for that outlined function.
5702///
5703/// \p MachineOutlinerTailCall implies that the function is being created from
5704/// a sequence of instructions ending in a return.
5705///
5706/// That is,
5707///
5708/// I1 OUTLINED_FUNCTION:
5709/// I2 --> B OUTLINED_FUNCTION I1
5710/// BX LR I2
5711/// BX LR
5712///
5713/// +-------------------------+--------+-----+
5714/// | | Thumb2 | ARM |
5715/// +-------------------------+--------+-----+
5716/// | Call overhead in Bytes | 4 | 4 |
5717/// | Frame overhead in Bytes | 0 | 0 |
5718/// | Stack fixup required | No | No |
5719/// +-------------------------+--------+-----+
5720///
5721/// \p MachineOutlinerThunk implies that the function is being created from
5722/// a sequence of instructions ending in a call. The outlined function is
5723/// called with a BL instruction, and the outlined function tail-calls the
5724/// original call destination.
5725///
5726/// That is,
5727///
5728/// I1 OUTLINED_FUNCTION:
5729/// I2 --> BL OUTLINED_FUNCTION I1
5730/// BL f I2
5731/// B f
5732///
5733/// +-------------------------+--------+-----+
5734/// | | Thumb2 | ARM |
5735/// +-------------------------+--------+-----+
5736/// | Call overhead in Bytes | 4 | 4 |
5737/// | Frame overhead in Bytes | 0 | 0 |
5738/// | Stack fixup required | No | No |
5739/// +-------------------------+--------+-----+
5740///
5741/// \p MachineOutlinerNoLRSave implies that the function should be called using
5742/// a BL instruction, but doesn't require LR to be saved and restored. This
5743/// happens when LR is known to be dead.
5744///
5745/// That is,
5746///
5747/// I1 OUTLINED_FUNCTION:
5748/// I2 --> BL OUTLINED_FUNCTION I1
5749/// I3 I2
5750/// I3
5751/// BX LR
5752///
5753/// +-------------------------+--------+-----+
5754/// | | Thumb2 | ARM |
5755/// +-------------------------+--------+-----+
5756/// | Call overhead in Bytes | 4 | 4 |
5757/// | Frame overhead in Bytes | 2 | 4 |
5758/// | Stack fixup required | No | No |
5759/// +-------------------------+--------+-----+
5760///
5761/// \p MachineOutlinerRegSave implies that the function should be called with a
5762/// save and restore of LR to an available register. This allows us to avoid
5763/// stack fixups. Note that this outlining variant is compatible with the
5764/// NoLRSave case.
5765///
5766/// That is,
5767///
5768/// I1 Save LR OUTLINED_FUNCTION:
5769/// I2 --> BL OUTLINED_FUNCTION I1
5770/// I3 Restore LR I2
5771/// I3
5772/// BX LR
5773///
5774/// +-------------------------+--------+-----+
5775/// | | Thumb2 | ARM |
5776/// +-------------------------+--------+-----+
5777/// | Call overhead in Bytes | 8 | 12 |
5778/// | Frame overhead in Bytes | 2 | 4 |
5779/// | Stack fixup required | No | No |
5780/// +-------------------------+--------+-----+
5781///
5782/// \p MachineOutlinerDefault implies that the function should be called with
5783/// a save and restore of LR to the stack.
5784///
5785/// That is,
5786///
5787/// I1 Save LR OUTLINED_FUNCTION:
5788/// I2 --> BL OUTLINED_FUNCTION I1
5789/// I3 Restore LR I2
5790/// I3
5791/// BX LR
5792///
5793/// +-------------------------+--------+-----+
5794/// | | Thumb2 | ARM |
5795/// +-------------------------+--------+-----+
5796/// | Call overhead in Bytes | 8 | 12 |
5797/// | Frame overhead in Bytes | 2 | 4 |
5798/// | Stack fixup required | Yes | Yes |
5799/// +-------------------------+--------+-----+
5800
5808
5812 UnsafeRegsDead = 0x8
5814
5827
5829 : CallTailCall(target.isThumb() ? 4 : 4),
5830 FrameTailCall(target.isThumb() ? 0 : 0),
5831 CallThunk(target.isThumb() ? 4 : 4),
5832 FrameThunk(target.isThumb() ? 0 : 0),
5833 CallNoLRSave(target.isThumb() ? 4 : 4),
5834 FrameNoLRSave(target.isThumb() ? 2 : 4),
5835 CallRegSave(target.isThumb() ? 8 : 12),
5836 FrameRegSave(target.isThumb() ? 2 : 4),
5837 CallDefault(target.isThumb() ? 8 : 12),
5838 FrameDefault(target.isThumb() ? 2 : 4),
5839 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5840};
5841
5843ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5844 MachineFunction *MF = C.getMF();
5846 const ARMBaseRegisterInfo *ARI =
5847 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5848
5849 BitVector regsReserved = ARI->getReservedRegs(*MF);
5850 // Check if there is an available register across the sequence that we can
5851 // use.
5852 for (Register Reg : ARM::rGPRRegClass) {
5853 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5854 Reg != ARM::LR && // LR is not reserved, but don't use it.
5855 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5856 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5857 C.isAvailableInsideSeq(Reg, TRI))
5858 return Reg;
5859 }
5860 return Register();
5861}
5862
5863// Compute liveness of LR at the point after the interval [I, E), which
5864// denotes a *backward* iteration through instructions. Used only for return
5865// basic blocks, which do not end with a tail call.
5869 // At the end of the function LR dead.
5870 bool Live = false;
5871 for (; I != E; ++I) {
5872 const MachineInstr &MI = *I;
5873
5874 // Check defs of LR.
5875 if (MI.modifiesRegister(ARM::LR, &TRI))
5876 Live = false;
5877
5878 // Check uses of LR.
5879 unsigned Opcode = MI.getOpcode();
5880 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5881 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5882 Opcode == ARM::tBXNS_RET) {
5883 // These instructions use LR, but it's not an (explicit or implicit)
5884 // operand.
5885 Live = true;
5886 continue;
5887 }
5888 if (MI.readsRegister(ARM::LR, &TRI))
5889 Live = true;
5890 }
5891 return !Live;
5892}
5893
5894std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5896 const MachineModuleInfo &MMI,
5897 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5898 unsigned MinRepeats) const {
5899 unsigned SequenceSize = 0;
5900 for (auto &MI : RepeatedSequenceLocs[0])
5901 SequenceSize += getInstSizeInBytes(MI);
5902
5903 // Properties about candidate MBBs that hold for all of them.
5904 unsigned FlagsSetInAll = 0xF;
5905
5906 // Compute liveness information for each candidate, and set FlagsSetInAll.
5908 for (outliner::Candidate &C : RepeatedSequenceLocs)
5909 FlagsSetInAll &= C.Flags;
5910
5911 // According to the ARM Procedure Call Standard, the following are
5912 // undefined on entry/exit from a function call:
5913 //
5914 // * Register R12(IP),
5915 // * Condition codes (and thus the CPSR register)
5916 //
5917 // Since we control the instructions which are part of the outlined regions
5918 // we don't need to be fully compliant with the AAPCS, but we have to
5919 // guarantee that if a veneer is inserted at link time the code is still
5920 // correct. Because of this, we can't outline any sequence of instructions
5921 // where one of these registers is live into/across it. Thus, we need to
5922 // delete those candidates.
5923 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5924 // If the unsafe registers in this block are all dead, then we don't need
5925 // to compute liveness here.
5926 if (C.Flags & UnsafeRegsDead)
5927 return false;
5928 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5929 };
5930
5931 // Are there any candidates where those registers are live?
5932 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5933 // Erase every candidate that violates the restrictions above. (It could be
5934 // true that we have viable candidates, so it's not worth bailing out in
5935 // the case that, say, 1 out of 20 candidates violate the restructions.)
5936 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5937
5938 // If the sequence doesn't have enough candidates left, then we're done.
5939 if (RepeatedSequenceLocs.size() < MinRepeats)
5940 return std::nullopt;
5941 }
5942
5943 // We expect the majority of the outlining candidates to be in consensus with
5944 // regard to return address sign and authentication, and branch target
5945 // enforcement, in other words, partitioning according to all the four
5946 // possible combinations of PAC-RET and BTI is going to yield one big subset
5947 // and three small (likely empty) subsets. That allows us to cull incompatible
5948 // candidates separately for PAC-RET and BTI.
5949
5950 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5951 // disabled. Remove the candidates from the smaller set. If they are the same
5952 // number prefer the non-BTI ones for outlining, since they have less
5953 // overhead.
5954 auto NoBTI =
5955 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5956 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5957 return AFI.branchTargetEnforcement();
5958 });
5959 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5960 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5961 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5962 else
5963 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5964
5965 if (RepeatedSequenceLocs.size() < MinRepeats)
5966 return std::nullopt;
5967
5968 // Likewise, partition the candidates according to PAC-RET enablement.
5969 auto NoPAC =
5970 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5971 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5972 // If the function happens to not spill the LR, do not disqualify it
5973 // from the outlining.
5974 return AFI.shouldSignReturnAddress(true);
5975 });
5976 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5977 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5978 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5979 else
5980 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5981
5982 if (RepeatedSequenceLocs.size() < MinRepeats)
5983 return std::nullopt;
5984
5985 // At this point, we have only "safe" candidates to outline. Figure out
5986 // frame + call instruction information.
5987
5988 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5989
5990 // Helper lambda which sets call information for every candidate.
5991 auto SetCandidateCallInfo =
5992 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5993 for (outliner::Candidate &C : RepeatedSequenceLocs)
5994 C.setCallInfo(CallID, NumBytesForCall);
5995 };
5996
5997 OutlinerCosts Costs(Subtarget);
5998
5999 const auto &SomeMFI =
6000 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
6001 // Adjust costs to account for the BTI instructions.
6002 if (SomeMFI.branchTargetEnforcement()) {
6003 Costs.FrameDefault += 4;
6004 Costs.FrameNoLRSave += 4;
6005 Costs.FrameRegSave += 4;
6006 Costs.FrameTailCall += 4;
6007 Costs.FrameThunk += 4;
6008 }
6009
6010 // Adjust costs to account for sign and authentication instructions.
6011 if (SomeMFI.shouldSignReturnAddress(true)) {
6012 Costs.CallDefault += 8; // +PAC instr, +AUT instr
6013 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
6014 }
6015
6016 unsigned FrameID = MachineOutlinerDefault;
6017 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
6018
6019 // If the last instruction in any candidate is a terminator, then we should
6020 // tail call all of the candidates.
6021 if (RepeatedSequenceLocs[0].back().isTerminator()) {
6022 FrameID = MachineOutlinerTailCall;
6023 NumBytesToCreateFrame = Costs.FrameTailCall;
6024 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
6025 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
6026 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
6027 LastInstrOpcode == ARM::tBLXr ||
6028 LastInstrOpcode == ARM::tBLXr_noip ||
6029 LastInstrOpcode == ARM::tBLXi) {
6030 FrameID = MachineOutlinerThunk;
6031 NumBytesToCreateFrame = Costs.FrameThunk;
6032 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
6033 } else {
6034 // We need to decide how to emit calls + frames. We can always emit the same
6035 // frame if we don't need to save to the stack. If we have to save to the
6036 // stack, then we need a different frame.
6037 unsigned NumBytesNoStackCalls = 0;
6038 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6039
6040 for (outliner::Candidate &C : RepeatedSequenceLocs) {
6041 // LR liveness is overestimated in return blocks, unless they end with a
6042 // tail call.
6043 const auto Last = C.getMBB()->rbegin();
6044 const bool LRIsAvailable =
6045 C.getMBB()->isReturnBlock() && !Last->isCall()
6048 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
6049 if (LRIsAvailable) {
6050 FrameID = MachineOutlinerNoLRSave;
6051 NumBytesNoStackCalls += Costs.CallNoLRSave;
6052 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
6053 CandidatesWithoutStackFixups.push_back(C);
6054 }
6055
6056 // Is an unused register available? If so, we won't modify the stack, so
6057 // we can outline with the same frame type as those that don't save LR.
6058 else if (findRegisterToSaveLRTo(C)) {
6059 FrameID = MachineOutlinerRegSave;
6060 NumBytesNoStackCalls += Costs.CallRegSave;
6061 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
6062 CandidatesWithoutStackFixups.push_back(C);
6063 }
6064
6065 // Is SP used in the sequence at all? If not, we don't have to modify
6066 // the stack, so we are guaranteed to get the same frame.
6067 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
6068 NumBytesNoStackCalls += Costs.CallDefault;
6069 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6070 CandidatesWithoutStackFixups.push_back(C);
6071 }
6072
6073 // If we outline this, we need to modify the stack. Pretend we don't
6074 // outline this by saving all of its bytes.
6075 else
6076 NumBytesNoStackCalls += SequenceSize;
6077 }
6078
6079 // If there are no places where we have to save LR, then note that we don't
6080 // have to update the stack. Otherwise, give every candidate the default
6081 // call type
6082 if (NumBytesNoStackCalls <=
6083 RepeatedSequenceLocs.size() * Costs.CallDefault) {
6084 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6085 FrameID = MachineOutlinerNoLRSave;
6086 if (RepeatedSequenceLocs.size() < MinRepeats)
6087 return std::nullopt;
6088 } else
6089 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6090 }
6091
6092 // Does every candidate's MBB contain a call? If so, then we might have a
6093 // call in the range.
6094 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6095 // check if the range contains a call. These require a save + restore of
6096 // the link register.
6097 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
6098 if (any_of(drop_end(FirstCand),
6099 [](const MachineInstr &MI) { return MI.isCall(); }))
6100 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6101
6102 // Handle the last instruction separately. If it is tail call, then the
6103 // last instruction is a call, we don't want to save + restore in this
6104 // case. However, it could be possible that the last instruction is a
6105 // call without it being valid to tail call this sequence. We should
6106 // consider this as well.
6107 else if (FrameID != MachineOutlinerThunk &&
6108 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
6109 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6110 }
6111
6112 return std::make_unique<outliner::OutlinedFunction>(
6113 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
6114}
6115
6116bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
6117 int64_t Fixup,
6118 bool Updt) const {
6119 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
6120 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
6121 if (SPIdx < 0)
6122 // No SP operand
6123 return true;
6124 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
6125 // If SP is not the base register we can't do much
6126 return false;
6127
6128 // Stack might be involved but addressing mode doesn't handle any offset.
6129 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6130 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
6131 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
6132 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
6133 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
6134 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
6135 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
6136 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
6137 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
6138 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
6140 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
6141 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
6142 return false;
6143
6144 unsigned NumOps = MI->getDesc().getNumOperands();
6145 unsigned ImmIdx = NumOps - 3;
6146
6147 const MachineOperand &Offset = MI->getOperand(ImmIdx);
6148 assert(Offset.isImm() && "Is not an immediate");
6149 int64_t OffVal = Offset.getImm();
6150
6151 if (OffVal < 0)
6152 // Don't override data if the are below SP.
6153 return false;
6154
6155 unsigned NumBits = 0;
6156 unsigned Scale = 1;
6157
6158 switch (AddrMode) {
6159 case ARMII::AddrMode3:
6160 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
6161 return false;
6162 OffVal = ARM_AM::getAM3Offset(OffVal);
6163 NumBits = 8;
6164 break;
6165 case ARMII::AddrMode5:
6166 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
6167 return false;
6168 OffVal = ARM_AM::getAM5Offset(OffVal);
6169 NumBits = 8;
6170 Scale = 4;
6171 break;
6173 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6174 return false;
6175 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6176 NumBits = 8;
6177 Scale = 2;
6178 break;
6180 NumBits = 8;
6181 break;
6183 // FIXME: Values are already scaled in this addressing mode.
6184 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6185 NumBits = 10;
6186 break;
6188 NumBits = 8;
6189 Scale = 4;
6190 break;
6193 NumBits = 12;
6194 break;
6195 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6196 NumBits = 8;
6197 Scale = 4;
6198 break;
6199 default:
6200 llvm_unreachable("Unsupported addressing mode!");
6201 }
6202 // Make sure the offset is encodable for instructions that scale the
6203 // immediate.
6204 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6205 "Can't encode this offset!");
6206 OffVal += Fixup / Scale;
6207
6208 unsigned Mask = (1 << NumBits) - 1;
6209
6210 if (OffVal <= Mask) {
6211 if (Updt)
6212 MI->getOperand(ImmIdx).setImm(OffVal);
6213 return true;
6214 }
6215
6216 return false;
6217}
6218
6220 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6221 outliner::Candidate &C = Candidates.front();
6222 // branch-target-enforcement is guaranteed to be consistent between all
6223 // candidates, so we only need to look at one.
6224 const Function &CFn = C.getMF()->getFunction();
6225 if (CFn.hasFnAttribute("branch-target-enforcement"))
6226 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6227
6228 if (CFn.hasFnAttribute("sign-return-address"))
6229 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6230
6231 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6232}
6233
6235 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6236 const Function &F = MF.getFunction();
6237
6238 // Can F be deduplicated by the linker? If it can, don't outline from it.
6239 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6240 return false;
6241
6242 // Don't outline from functions with section markings; the program could
6243 // expect that all the code is in the named section.
6244 // FIXME: Allow outlining from multiple functions with the same section
6245 // marking.
6246 if (F.hasSection())
6247 return false;
6248
6249 // FIXME: Thumb1 outlining is not handled
6251 return false;
6252
6253 // It's safe to outline from MF.
6254 return true;
6255}
6256
6258 unsigned &Flags) const {
6259 // Check if LR is available through all of the MBB. If it's not, then set
6260 // a flag.
6262 "Suitable Machine Function for outlining must track liveness");
6263
6265
6267 LRU.accumulate(MI);
6268
6269 // Check if each of the unsafe registers are available...
6270 bool R12AvailableInBlock = LRU.available(ARM::R12);
6271 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6272
6273 // If all of these are dead (and not live out), we know we don't have to check
6274 // them later.
6275 if (R12AvailableInBlock && CPSRAvailableInBlock)
6276 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6277
6278 // Now, add the live outs to the set.
6279 LRU.addLiveOuts(MBB);
6280
6281 // If any of these registers is available in the MBB, but also a live out of
6282 // the block, then we know outlining is unsafe.
6283 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6284 return false;
6285 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6286 return false;
6287
6288 // Check if there's a call inside this MachineBasicBlock. If there is, then
6289 // set a flag.
6290 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6291 Flags |= MachineOutlinerMBBFlags::HasCalls;
6292
6293 // LR liveness is overestimated in return blocks.
6294
6295 bool LRIsAvailable =
6296 MBB.isReturnBlock() && !MBB.back().isCall()
6298 : LRU.available(ARM::LR);
6299 if (!LRIsAvailable)
6300 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6301
6302 return true;
6303}
6304
6308 unsigned Flags) const {
6309 MachineInstr &MI = *MIT;
6311
6312 // PIC instructions contain labels, outlining them would break offset
6313 // computing. unsigned Opc = MI.getOpcode();
6314 unsigned Opc = MI.getOpcode();
6315 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6316 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6317 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6318 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6319 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6320 Opc == ARM::t2MOV_ga_pcrel)
6322
6323 // Be conservative with ARMv8.1 MVE instructions.
6324 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6325 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6326 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6327 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6328 Opc == ARM::t2LoopEndDec)
6330
6331 const MCInstrDesc &MCID = MI.getDesc();
6332 uint64_t MIFlags = MCID.TSFlags;
6333 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6335
6336 // Is this a terminator for a basic block?
6337 if (MI.isTerminator())
6338 // TargetInstrInfo::getOutliningType has already filtered out anything
6339 // that would break this, so we can allow it here.
6341
6342 // Don't outline if link register or program counter value are used.
6343 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6345
6346 if (MI.isCall()) {
6347 // Get the function associated with the call. Look at each operand and find
6348 // the one that represents the calle and get its name.
6349 const Function *Callee = nullptr;
6350 for (const MachineOperand &MOP : MI.operands()) {
6351 if (MOP.isGlobal()) {
6352 Callee = dyn_cast<Function>(MOP.getGlobal());
6353 break;
6354 }
6355 }
6356
6357 // Dont't outline calls to "mcount" like functions, in particular Linux
6358 // kernel function tracing relies on it.
6359 if (Callee &&
6360 (Callee->getName() == "\01__gnu_mcount_nc" ||
6361 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6363
6364 // If we don't know anything about the callee, assume it depends on the
6365 // stack layout of the caller. In that case, it's only legal to outline
6366 // as a tail-call. Explicitly list the call instructions we know about so
6367 // we don't get unexpected results with call pseudo-instructions.
6368 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6369 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6370 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6371 Opc == ARM::tBLXi)
6372 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6373
6374 if (!Callee)
6375 return UnknownCallOutlineType;
6376
6377 // We have a function we have information about. Check if it's something we
6378 // can safely outline.
6379 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6380
6381 // We don't know what's going on with the callee at all. Don't touch it.
6382 if (!CalleeMF)
6383 return UnknownCallOutlineType;
6384
6385 // Check if we know anything about the callee saves on the function. If we
6386 // don't, then don't touch it, since that implies that we haven't computed
6387 // anything about its stack frame yet.
6388 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6389 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6390 MFI.getNumObjects() > 0)
6391 return UnknownCallOutlineType;
6392
6393 // At this point, we can say that CalleeMF ought to not pass anything on the
6394 // stack. Therefore, we can outline it.
6396 }
6397
6398 // Since calls are handled, don't touch LR or PC
6399 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6401
6402 // Does this use the stack?
6403 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6404 // True if there is no chance that any outlined candidate from this range
6405 // could require stack fixups. That is, both
6406 // * LR is available in the range (No save/restore around call)
6407 // * The range doesn't include calls (No save/restore in outlined frame)
6408 // are true.
6409 // These conditions also ensure correctness of the return address
6410 // authentication - we insert sign and authentication instructions only if
6411 // we save/restore LR on stack, but then this condition ensures that the
6412 // outlined range does not modify the SP, therefore the SP value used for
6413 // signing is the same as the one used for authentication.
6414 // FIXME: This is very restrictive; the flags check the whole block,
6415 // not just the bit we will try to outline.
6416 bool MightNeedStackFixUp =
6417 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6418 MachineOutlinerMBBFlags::HasCalls));
6419
6420 if (!MightNeedStackFixUp)
6422
6423 // Any modification of SP will break our code to save/restore LR.
6424 // FIXME: We could handle some instructions which add a constant offset to
6425 // SP, with a bit more work.
6426 if (MI.modifiesRegister(ARM::SP, TRI))
6428
6429 // At this point, we have a stack instruction that we might need to fix up.
6430 // up. We'll handle it if it's a load or store.
6431 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6432 false))
6434
6435 // We can't fix it up, so don't outline it.
6437 }
6438
6439 // Be conservative with IT blocks.
6440 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6441 MI.modifiesRegister(ARM::ITSTATE, TRI))
6443
6444 // Don't outline CFI instructions.
6445 if (MI.isCFIInstruction())
6447
6449}
6450
6451void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6452 for (MachineInstr &MI : MBB) {
6453 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6454 }
6455}
6456
6457void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6458 MachineBasicBlock::iterator It, bool CFI,
6459 bool Auth) const {
6460 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6461 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6462 assert(Align >= 8 && Align <= 256);
6463 if (Auth) {
6464 assert(Subtarget.isThumb2());
6465 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6466 // sequence.
6467 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6468 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6469 .addReg(ARM::R12, RegState::Kill)
6470 .addReg(ARM::LR, RegState::Kill)
6471 .addReg(ARM::SP)
6472 .addImm(-Align)
6474 .setMIFlags(MIFlags);
6475 } else {
6476 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6477 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6478 .addReg(ARM::LR, RegState::Kill)
6479 .addReg(ARM::SP)
6480 .addImm(-Align)
6482 .setMIFlags(MIFlags);
6483 }
6484
6485 if (!CFI)
6486 return;
6487
6488 MachineFunction &MF = *MBB.getParent();
6489
6490 // Add a CFI, saying CFA is offset by Align bytes from SP.
6491 int64_t StackPosEntry =
6493 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6494 .addCFIIndex(StackPosEntry)
6496
6497 // Add a CFI saying that the LR that we want to find is now higher than
6498 // before.
6499 int LROffset = Auth ? Align - 4 : Align;
6500 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6501 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6502 int64_t LRPosEntry = MF.addFrameInst(
6503 MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
6504 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6505 .addCFIIndex(LRPosEntry)
6507 if (Auth) {
6508 // Add a CFI for the location of the return adddress PAC.
6509 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6510 int64_t RACPosEntry = MF.addFrameInst(
6511 MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
6512 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6513 .addCFIIndex(RACPosEntry)
6515 }
6516}
6517
6518void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
6520 Register Reg) const {
6521 MachineFunction &MF = *MBB.getParent();
6522 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6523 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6524 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
6525
6526 int64_t LRPosEntry = MF.addFrameInst(
6527 MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
6528 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6529 .addCFIIndex(LRPosEntry)
6531}
6532
6533void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6535 bool CFI, bool Auth) const {
6536 int Align = Subtarget.getStackAlignment().value();
6537 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6538 if (Auth) {
6539 assert(Subtarget.isThumb2());
6540 // Restore return address PAC and LR.
6541 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6542 .addReg(ARM::R12, RegState::Define)
6543 .addReg(ARM::LR, RegState::Define)
6544 .addReg(ARM::SP, RegState::Define)
6545 .addReg(ARM::SP)
6546 .addImm(Align)
6548 .setMIFlags(MIFlags);
6549 // LR authentication is after the CFI instructions, below.
6550 } else {
6551 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6552 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6553 .addReg(ARM::SP, RegState::Define)
6554 .addReg(ARM::SP);
6555 if (!Subtarget.isThumb())
6556 MIB.addReg(0);
6557 MIB.addImm(Subtarget.getStackAlignment().value())
6559 .setMIFlags(MIFlags);
6560 }
6561
6562 if (CFI) {
6563 // Now stack has moved back up...
6564 MachineFunction &MF = *MBB.getParent();
6565 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6566 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6567 int64_t StackPosEntry =
6569 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6570 .addCFIIndex(StackPosEntry)
6572
6573 // ... and we have restored LR.
6574 int64_t LRPosEntry =
6575 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6576 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6577 .addCFIIndex(LRPosEntry)
6579
6580 if (Auth) {
6581 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6582 int64_t Entry =
6583 MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));
6584 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6585 .addCFIIndex(Entry)
6587 }
6588 }
6589
6590 if (Auth)
6591 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6592}
6593
6594void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
6596 MachineFunction &MF = *MBB.getParent();
6597 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6598 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6599
6600 int64_t LRPosEntry =
6601 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6602 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6603 .addCFIIndex(LRPosEntry)
6605}
6606
6609 const outliner::OutlinedFunction &OF) const {
6610 // For thunk outlining, rewrite the last instruction from a call to a
6611 // tail-call.
6613 MachineInstr *Call = &*--MBB.instr_end();
6614 bool isThumb = Subtarget.isThumb();
6615 unsigned FuncOp = isThumb ? 2 : 0;
6616 unsigned Opc = Call->getOperand(FuncOp).isReg()
6617 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6618 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6619 : ARM::tTAILJMPdND
6620 : ARM::TAILJMPd;
6621 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6622 .add(Call->getOperand(FuncOp));
6623 if (isThumb && !Call->getOperand(FuncOp).isReg())
6624 MIB.add(predOps(ARMCC::AL));
6625 Call->eraseFromParent();
6626 }
6627
6628 // Is there a call in the outlined range?
6629 auto IsNonTailCall = [](MachineInstr &MI) {
6630 return MI.isCall() && !MI.isReturn();
6631 };
6632 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6635
6638 Et = std::prev(MBB.end());
6639
6640 // We have to save and restore LR, we need to add it to the liveins if it
6641 // is not already part of the set. This is suffient since outlined
6642 // functions only have one block.
6643 if (!MBB.isLiveIn(ARM::LR))
6644 MBB.addLiveIn(ARM::LR);
6645
6646 // Insert a save before the outlined region
6647 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6648 saveLROnStack(MBB, It, true, Auth);
6649
6650 // Fix up the instructions in the range, since we're going to modify the
6651 // stack.
6653 "Can only fix up stack references once");
6654 fixupPostOutline(MBB);
6655
6656 // Insert a restore before the terminator for the function. Restore LR.
6657 restoreLRFromStack(MBB, Et, true, Auth);
6658 }
6659
6660 // If this is a tail call outlined function, then there's already a return.
6663 return;
6664
6665 // Here we have to insert the return ourselves. Get the correct opcode from
6666 // current feature set.
6667 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6669
6670 // Did we have to modify the stack by saving the link register?
6672 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6673 return;
6674
6675 // We modified the stack.
6676 // Walk over the basic block and fix up all the stack accesses.
6677 fixupPostOutline(MBB);
6678}
6679
6685 unsigned Opc;
6686 bool isThumb = Subtarget.isThumb();
6687
6688 // Are we tail calling?
6689 if (C.CallConstructionID == MachineOutlinerTailCall) {
6690 // If yes, then we can just branch to the label.
6691 Opc = isThumb
6692 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6693 : ARM::TAILJMPd;
6694 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6695 .addGlobalAddress(M.getNamedValue(MF.getName()));
6696 if (isThumb)
6697 MIB.add(predOps(ARMCC::AL));
6698 It = MBB.insert(It, MIB);
6699 return It;
6700 }
6701
6702 // Create the call instruction.
6703 Opc = isThumb ? ARM::tBL : ARM::BL;
6704 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6705 if (isThumb)
6706 CallMIB.add(predOps(ARMCC::AL));
6707 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6708
6709 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6710 C.CallConstructionID == MachineOutlinerThunk) {
6711 // No, so just insert the call.
6712 It = MBB.insert(It, CallMIB);
6713 return It;
6714 }
6715
6716 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6717 // Can we save to a register?
6718 if (C.CallConstructionID == MachineOutlinerRegSave) {
6719 Register Reg = findRegisterToSaveLRTo(C);
6720 assert(Reg != 0 && "No callee-saved register available?");
6721
6722 // Save and restore LR from that register.
6723 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6724 if (!AFI.isLRSpilled())
6725 emitCFIForLRSaveToReg(MBB, It, Reg);
6726 CallPt = MBB.insert(It, CallMIB);
6727 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6728 if (!AFI.isLRSpilled())
6729 emitCFIForLRRestoreFromReg(MBB, It);
6730 It--;
6731 return CallPt;
6732 }
6733 // We have the default case. Save and restore from SP.
6734 if (!MBB.isLiveIn(ARM::LR))
6735 MBB.addLiveIn(ARM::LR);
6736 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6737 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6738 CallPt = MBB.insert(It, CallMIB);
6739 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6740 It--;
6741 return CallPt;
6742}
6743
6745 MachineFunction &MF) const {
6746 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6747}
6748
6749bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6750 const MachineInstr &MI) const {
6751 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6752 // the tail predication conversion. This means that the element count
6753 // register has to be live for longer, but that has to be better than
6754 // spill/restore and VPT predication.
6755 return (isVCTP(&MI) && !isPredicated(MI)) ||
6757}
6758
6760 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6761 : ARM::BLX;
6762}
6763
6765 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6766 : ARM::tBLXr;
6767}
6768
6770 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6771 : ARM::BLX_pred;
6772}
6773
6774namespace {
6775class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6776 MachineInstr *EndLoop, *LoopCount;
6777 MachineFunction *MF;
6778 const TargetInstrInfo *TII;
6779
6780 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6781 // [LAST_IS_USE] : last reference to register in schedule is a use
6782 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6783 static int constexpr MAX_STAGES = 30;
6784 static int constexpr LAST_IS_USE = MAX_STAGES;
6785 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6786 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6787 typedef std::map<unsigned, IterNeed> IterNeeds;
6788
6789 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6790 const IterNeeds &CIN);
6791 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6792
6793 // Meanings of the various stuff with loop types:
6794 // t2Bcc:
6795 // EndLoop = branch at end of original BB that will become a kernel
6796 // LoopCount = CC setter live into branch
6797 // t2LoopEnd:
6798 // EndLoop = branch at end of original BB
6799 // LoopCount = t2LoopDec
6800public:
6801 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6802 : EndLoop(EndLoop), LoopCount(LoopCount),
6803 MF(EndLoop->getParent()->getParent()),
6804 TII(MF->getSubtarget().getInstrInfo()) {}
6805
6806 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6807 // Only ignore the terminator.
6808 return MI == EndLoop || MI == LoopCount;
6809 }
6810
6811 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6812 if (tooMuchRegisterPressure(SSD, SMS))
6813 return false;
6814
6815 return true;
6816 }
6817
6818 std::optional<bool> createTripCountGreaterCondition(
6819 int TC, MachineBasicBlock &MBB,
6821
6822 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6823 Cond.push_back(EndLoop->getOperand(1));
6824 Cond.push_back(EndLoop->getOperand(2));
6825 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6827 }
6828 return {};
6829 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6830 // General case just lets the unrolled t2LoopDec do the subtraction and
6831 // therefore just needs to check if zero has been reached.
6832 MachineInstr *LoopDec = nullptr;
6833 for (auto &I : MBB.instrs())
6834 if (I.getOpcode() == ARM::t2LoopDec)
6835 LoopDec = &I;
6836 assert(LoopDec && "Unable to find copied LoopDec");
6837 // Check if we're done with the loop.
6838 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6839 .addReg(LoopDec->getOperand(0).getReg())
6840 .addImm(0)
6842 .addReg(ARM::NoRegister);
6844 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6845 return {};
6846 } else
6847 llvm_unreachable("Unknown EndLoop");
6848 }
6849
6850 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6851
6852 void adjustTripCount(int TripCountAdjust) override {}
6853};
6854
6855void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6856 const IterNeeds &CIN) {
6857 // Increase pressure by the amounts in CrossIterationNeeds
6858 for (const auto &N : CIN) {
6859 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6860 for (int I = 0; I < Cnt; ++I)
6863 }
6864 // Decrease pressure by the amounts in CrossIterationNeeds
6865 for (const auto &N : CIN) {
6866 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6867 for (int I = 0; I < Cnt; ++I)
6870 }
6871}
6872
6873bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6874 SMSchedule &SMS) {
6875 IterNeeds CrossIterationNeeds;
6876
6877 // Determine which values will be loop-carried after the schedule is
6878 // applied
6879
6880 for (auto &SU : SSD.SUnits) {
6881 const MachineInstr *MI = SU.getInstr();
6882 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6883 for (auto &S : SU.Succs)
6884 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6885 Register Reg = S.getReg();
6886 if (Reg.isVirtual())
6887 CrossIterationNeeds[Reg.id()].set(0);
6888 } else if (S.isAssignedRegDep()) {
6889 int OStg = SMS.stageScheduled(S.getSUnit());
6890 if (OStg >= 0 && OStg != Stg) {
6891 Register Reg = S.getReg();
6892 if (Reg.isVirtual())
6893 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6894 }
6895 }
6896 }
6897
6898 // Determine more-or-less what the proposed schedule (reversed) is going to
6899 // be; it might not be quite the same because the within-cycle ordering
6900 // created by SMSchedule depends upon changes to help with address offsets and
6901 // the like.
6902 std::vector<SUnit *> ProposedSchedule;
6903 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6904 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6905 ++Stage) {
6906 std::deque<SUnit *> Instrs =
6907 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6908 std::sort(Instrs.begin(), Instrs.end(),
6909 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6910 for (SUnit *SU : Instrs)
6911 ProposedSchedule.push_back(SU);
6912 }
6913
6914 // Learn whether the last use/def of each cross-iteration register is a use or
6915 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6916 // and we do not have to add the pressure.
6917 for (auto *SU : ProposedSchedule)
6918 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6919 ++OperI) {
6920 auto MO = *OperI;
6921 if (!MO.isReg() || !MO.getReg())
6922 continue;
6923 Register Reg = MO.getReg();
6924 auto CIter = CrossIterationNeeds.find(Reg.id());
6925 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6926 CIter->second[SEEN_AS_LIVE])
6927 continue;
6928 if (MO.isDef() && !MO.isDead())
6929 CIter->second.set(SEEN_AS_LIVE);
6930 else if (MO.isUse())
6931 CIter->second.set(LAST_IS_USE);
6932 }
6933 for (auto &CI : CrossIterationNeeds)
6934 CI.second.reset(LAST_IS_USE);
6935
6936 RegionPressure RecRegPressure;
6937 RegPressureTracker RPTracker(RecRegPressure);
6938 RegisterClassInfo RegClassInfo;
6939 RegClassInfo.runOnMachineFunction(*MF);
6940 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6941 EndLoop->getParent()->end(), false, false);
6942
6943 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6944
6945 for (auto *SU : ProposedSchedule) {
6946 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6947 RPTracker.setPos(std::next(CurInstI));
6948 RPTracker.recede();
6949
6950 // Track what cross-iteration registers would be seen as live
6951 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6952 auto MO = *OperI;
6953 if (!MO.isReg() || !MO.getReg())
6954 continue;
6955 Register Reg = MO.getReg();
6956 if (MO.isDef() && !MO.isDead()) {
6957 auto CIter = CrossIterationNeeds.find(Reg.id());
6958 if (CIter != CrossIterationNeeds.end()) {
6959 CIter->second.reset(0);
6960 CIter->second.reset(SEEN_AS_LIVE);
6961 }
6962 }
6963 }
6964 for (auto &S : SU->Preds) {
6965 auto Stg = SMS.stageScheduled(SU);
6966 if (S.isAssignedRegDep()) {
6967 Register Reg = S.getReg();
6968 auto CIter = CrossIterationNeeds.find(Reg.id());
6969 if (CIter != CrossIterationNeeds.end()) {
6970 auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));
6971 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6972 if (Stg - Stg2 < MAX_STAGES)
6973 CIter->second.set(Stg - Stg2);
6974 CIter->second.set(SEEN_AS_LIVE);
6975 }
6976 }
6977 }
6978
6979 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6980 }
6981
6982 auto &P = RPTracker.getPressure().MaxSetPressure;
6983 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6984 // Exclude some Neon register classes.
6985 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6986 I == ARM::DTriple_with_qsub_0_in_QPR)
6987 continue;
6988
6989 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6990 return true;
6991 }
6992 }
6993 return false;
6994}
6995
6996} // namespace
6997
6998std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
7001 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
7002 if (Preheader == LoopBB)
7003 Preheader = *std::next(LoopBB->pred_begin());
7004
7005 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
7006 // If the branch is a Bcc, then the CPSR should be set somewhere within the
7007 // block. We need to determine the reaching definition of CPSR so that
7008 // it can be marked as non-pipelineable, allowing the pipeliner to force
7009 // it into stage 0 or give up if it cannot or will not do so.
7010 MachineInstr *CCSetter = nullptr;
7011 for (auto &L : LoopBB->instrs()) {
7012 if (L.isCall())
7013 return nullptr;
7014 if (isCPSRDefined(L))
7015 CCSetter = &L;
7016 }
7017 if (CCSetter)
7018 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
7019 else
7020 return nullptr; // Unable to find the CC setter, so unable to guarantee
7021 // that pipeline will work
7022 }
7023
7024 // Recognize:
7025 // preheader:
7026 // %1 = t2DoopLoopStart %0
7027 // loop:
7028 // %2 = phi %1, <not loop>, %..., %loop
7029 // %3 = t2LoopDec %2, <imm>
7030 // t2LoopEnd %3, %loop
7031
7032 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
7033 for (auto &L : LoopBB->instrs())
7034 if (L.isCall())
7035 return nullptr;
7036 else if (isVCTP(&L))
7037 return nullptr;
7038 Register LoopDecResult = I->getOperand(0).getReg();
7040 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
7041 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
7042 return nullptr;
7043 MachineInstr *LoopStart = nullptr;
7044 for (auto &J : Preheader->instrs())
7045 if (J.getOpcode() == ARM::t2DoLoopStart)
7046 LoopStart = &J;
7047 if (!LoopStart)
7048 return nullptr;
7049 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
7050 }
7051 return nullptr;
7052}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static bool isLoad(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static cl::opt< bool > EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv"))
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
@ ExeGeneric
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallSet class.
This file defines the SmallVector class.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
ARMBaseInstrInfo(const ARMSubtarget &STI)
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
ARMConstantPoolConstant - ARM-specific constant pool values for Constants, Functions,...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic block.
ARMConstantPoolSymbol - ARM-specific constantpool values for external symbols.
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isTargetMachO() const
Definition: ARMSubtarget.h:349
bool isCortexA7() const
Definition: ARMSubtarget.h:288
bool useMovt() const
bool isSwift() const
Definition: ARMSubtarget.h:292
ARMLdStMultipleTiming getLdStMultipleTiming() const
Definition: ARMSubtarget.h:479
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool isCortexM7() const
Definition: ARMSubtarget.h:295
bool isThumb2() const
Definition: ARMSubtarget.h:404
bool isReadTPSoft() const
Definition: ARMSubtarget.h:385
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned getMispredictionPenalty() const
bool isLikeA9() const
Definition: ARMSubtarget.h:297
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:499
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
Definition: ARMSubtarget.h:468
bool hasVFP2Base() const
Definition: ARMSubtarget.h:308
bool isROPI() const
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:347
unsigned getPartialUpdateClearance() const
Definition: ARMSubtarget.h:477
bool hasMinSize() const
Definition: ARMSubtarget.h:402
bool isCortexA8() const
Definition: ARMSubtarget.h:289
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
Definition: ARMSubtarget.h:76
@ SingleIssue
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:78
@ DoubleIssue
Can load/store 2 registers/cycle.
Definition: ARMSubtarget.h:73
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
Definition: ARMSubtarget.h:81
int getPreISelOperandLatencyAdjustment() const
Definition: ARMSubtarget.h:483
bool isRWPI() const
bool isMClass() const
Definition: ARMSubtarget.h:405
bool restrictIT() const
Definition: ARMSubtarget.h:431
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool test(unsigned Idx) const
Definition: BitVector.h:461
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:159
uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
A possibly irreducible generalization of a Loop.
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:279
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterDead - Add information about the fact that the specified register is dead after bei...
void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterKilled - Add information about the fact that the specified register is killed after...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_undefined From now on the previous value of Register can't be restored anymore.
Definition: MCDwarf.h:663
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc={})
.cfi_register Previous value of Register1 is saved in register Register2.
Definition: MCDwarf.h:632
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:604
unsigned short Opcode
Definition: MCInstrDesc.h:205
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isValid() const
isValid - Returns true until all the operands have been visited.
unsigned pred_size() const
instr_iterator instr_begin()
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
union llvm::MachineConstantPoolEntry::@204 Val
The constant itself.
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:577
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:958
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:574
bool isRegSequence() const
bool isInsertSubreg() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:932
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
Definition: ARMBaseInfo.h:258
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:185
@ ThumbArithFlagSetting
Definition: ARMBaseInfo.h:414
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
unsigned getBLXpredOpcode(const MachineFunction &MF)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
MaybeAlign getAlign(const Function &F, unsigned Index)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static bool isJumpTableBranchOpcode(int Opc)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static bool isPopOpcode(int Opc)
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:405
unsigned getUndefRegState(bool B)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
unsigned getKillRegState(bool B)
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1959
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
Definition: ARMBaseInfo.h:146
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Description of the encoding of one expression Op.
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:78
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.
std::vector< Candidate > Candidates