LLVM 23.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98
99 // fp SIMD ops
100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108};
109
112 : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
113 Subtarget(STI) {
114 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
115 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
116 llvm_unreachable("Duplicated entries?");
117 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
118 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
119 }
120}
121
122// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
123// currently defaults to no prepass hazard recognizer.
126 const ScheduleDAG *DAG) const {
127 if (usePreRAHazardRecognizer()) {
128 const InstrItineraryData *II =
129 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
130 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
131 }
133}
134
135// Called during:
136// - pre-RA scheduling
137// - post-RA scheduling when FeatureUseMISched is set
139 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
141
142 // We would like to restrict this hazard recognizer to only
143 // post-RA scheduling; we can tell that we're post-RA because we don't
144 // track VRegLiveness.
145 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
146 // banks banked on bit 2. Assume that TCMs are in use.
147 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
149 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
150
151 // Not inserting ARMHazardRecognizerFPMLx because that would change
152 // legacy behavior
153
155 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
156 return MHR;
157}
158
159// Called during post-RA scheduling when FeatureUseMISched is not set
162 const ScheduleDAG *DAG) const {
164
165 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
166 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
167
169 if (BHR)
170 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
171 return MHR;
172}
173
174// Branch analysis.
175// Cond vector output format:
176// 0 elements indicates an unconditional branch
177// 2 elements indicates a conditional branch; the elements are
178// the condition to check and the CPSR.
179// 3 elements indicates a hardware loop end; the elements
180// are the opcode, the operand value to test, and a dummy
181// operand used to pad out to 3 operands.
184 MachineBasicBlock *&FBB,
186 bool AllowModify) const {
187 TBB = nullptr;
188 FBB = nullptr;
189
191 if (I == MBB.instr_begin())
192 return false; // Empty blocks are easy.
193 --I;
194
195 // Walk backwards from the end of the basic block until the branch is
196 // analyzed or we give up.
197 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
198 // Flag to be raised on unanalyzeable instructions. This is useful in cases
199 // where we want to clean up on the end of the basic block before we bail
200 // out.
201 bool CantAnalyze = false;
202
203 // Skip over DEBUG values, predicated nonterminators and speculation
204 // barrier terminators.
205 while (I->isDebugInstr() || !I->isTerminator() ||
206 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
207 I->getOpcode() == ARM::t2DoLoopStartTP){
208 if (I == MBB.instr_begin())
209 return false;
210 --I;
211 }
212
213 if (isIndirectBranchOpcode(I->getOpcode()) ||
214 isJumpTableBranchOpcode(I->getOpcode())) {
215 // Indirect branches and jump tables can't be analyzed, but we still want
216 // to clean up any instructions at the tail of the basic block.
217 CantAnalyze = true;
218 } else if (isUncondBranchOpcode(I->getOpcode())) {
219 TBB = I->getOperand(0).getMBB();
220 } else if (isCondBranchOpcode(I->getOpcode())) {
221 // Bail out if we encounter multiple conditional branches.
222 if (!Cond.empty())
223 return true;
224
225 assert(!FBB && "FBB should have been null.");
226 FBB = TBB;
227 TBB = I->getOperand(0).getMBB();
228 Cond.push_back(I->getOperand(1));
229 Cond.push_back(I->getOperand(2));
230 } else if (I->isReturn()) {
231 // Returns can't be analyzed, but we should run cleanup.
232 CantAnalyze = true;
233 } else if (I->getOpcode() == ARM::t2LoopEnd &&
234 MBB.getParent()
235 ->getSubtarget<ARMSubtarget>()
237 if (!Cond.empty())
238 return true;
239 FBB = TBB;
240 TBB = I->getOperand(1).getMBB();
241 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
242 Cond.push_back(I->getOperand(0));
243 Cond.push_back(MachineOperand::CreateImm(0));
244 } else {
245 // We encountered other unrecognized terminator. Bail out immediately.
246 return true;
247 }
248
249 // Cleanup code - to be run for unpredicated unconditional branches and
250 // returns.
251 if (!isPredicated(*I) &&
252 (isUncondBranchOpcode(I->getOpcode()) ||
253 isIndirectBranchOpcode(I->getOpcode()) ||
254 isJumpTableBranchOpcode(I->getOpcode()) ||
255 I->isReturn())) {
256 // Forget any previous condition branch information - it no longer applies.
257 Cond.clear();
258 FBB = nullptr;
259
260 // If we can modify the function, delete everything below this
261 // unconditional branch.
262 if (AllowModify) {
263 MachineBasicBlock::iterator DI = std::next(I);
264 while (DI != MBB.instr_end()) {
265 MachineInstr &InstToDelete = *DI;
266 ++DI;
267 // Speculation barriers must not be deleted.
268 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
269 continue;
270 InstToDelete.eraseFromParent();
271 }
272 }
273 }
274
275 if (CantAnalyze) {
276 // We may not be able to analyze the block, but we could still have
277 // an unconditional branch as the last instruction in the block, which
278 // just branches to layout successor. If this is the case, then just
279 // remove it if we're allowed to make modifications.
280 if (AllowModify && !isPredicated(MBB.back()) &&
281 isUncondBranchOpcode(MBB.back().getOpcode()) &&
282 TBB && MBB.isLayoutSuccessor(TBB))
284 return true;
285 }
286
287 if (I == MBB.instr_begin())
288 return false;
289
290 --I;
291 }
292
293 // We made it past the terminators without bailing out - we must have
294 // analyzed this branch successfully.
295 return false;
296}
297
299 int *BytesRemoved) const {
300 assert(!BytesRemoved && "code size not handled");
301
302 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
303 if (I == MBB.end())
304 return 0;
305
306 if (!isUncondBranchOpcode(I->getOpcode()) &&
307 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
308 return 0;
309
310 // Remove the branch.
311 I->eraseFromParent();
312
313 I = MBB.end();
314
315 if (I == MBB.begin()) return 1;
316 --I;
317 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
318 return 1;
319
320 // Remove the branch.
321 I->eraseFromParent();
322 return 2;
323}
324
329 const DebugLoc &DL,
330 int *BytesAdded) const {
331 assert(!BytesAdded && "code size not handled");
332 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
333 int BOpc = !AFI->isThumbFunction()
334 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
335 int BccOpc = !AFI->isThumbFunction()
336 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
337 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
338
339 // Shouldn't be a fall through.
340 assert(TBB && "insertBranch must not be told to insert a fallthrough");
341 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
342 "ARM branch conditions have two or three components!");
343
344 // For conditional branches, we use addOperand to preserve CPSR flags.
345
346 if (!FBB) {
347 if (Cond.empty()) { // Unconditional branch?
348 if (isThumb)
350 else
351 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
352 } else if (Cond.size() == 2) {
353 BuildMI(&MBB, DL, get(BccOpc))
354 .addMBB(TBB)
355 .addImm(Cond[0].getImm())
356 .add(Cond[1]);
357 } else
358 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
359 return 1;
360 }
361
362 // Two-way conditional branch.
363 if (Cond.size() == 2)
364 BuildMI(&MBB, DL, get(BccOpc))
365 .addMBB(TBB)
366 .addImm(Cond[0].getImm())
367 .add(Cond[1]);
368 else if (Cond.size() == 3)
369 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
370 if (isThumb)
371 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
372 else
373 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
374 return 2;
375}
376
379 if (Cond.size() == 2) {
380 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
381 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
382 return false;
383 }
384 return true;
385}
386
388 if (MI.isBundle()) {
390 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
391 while (++I != E && I->isInsideBundle()) {
392 int PIdx = I->findFirstPredOperandIdx();
393 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
394 return true;
395 }
396 return false;
397 }
398
399 int PIdx = MI.findFirstPredOperandIdx();
400 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
401}
402
404 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
405 const TargetRegisterInfo *TRI) const {
406
407 // First, let's see if there is a generic comment for this operand
408 std::string GenericComment =
410 if (!GenericComment.empty())
411 return GenericComment;
412
413 // If not, check if we have an immediate operand.
414 if (!Op.isImm())
415 return std::string();
416
417 // And print its corresponding condition code if the immediate is a
418 // predicate.
419 int FirstPredOp = MI.findFirstPredOperandIdx();
420 if (FirstPredOp != (int) OpIdx)
421 return std::string();
422
423 std::string CC = "CC::";
424 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
425 return CC;
426}
427
430 unsigned Opc = MI.getOpcode();
433 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
434 .addImm(Pred[0].getImm())
435 .addReg(Pred[1].getReg());
436 return true;
437 }
438
439 int PIdx = MI.findFirstPredOperandIdx();
440 if (PIdx != -1) {
441 MachineOperand &PMO = MI.getOperand(PIdx);
442 PMO.setImm(Pred[0].getImm());
443 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
444
445 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
446 // IT block. This affects how they are printed.
447 const MCInstrDesc &MCID = MI.getDesc();
448 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
449 assert(MCID.operands()[1].isOptionalDef() &&
450 "CPSR def isn't expected operand");
451 assert((MI.getOperand(1).isDead() ||
452 MI.getOperand(1).getReg() != ARM::CPSR) &&
453 "if conversion tried to stop defining used CPSR");
454 MI.getOperand(1).setReg(ARM::NoRegister);
455 }
456
457 return true;
458 }
459 return false;
460}
461
463 ArrayRef<MachineOperand> Pred2) const {
464 if (Pred1.size() > 2 || Pred2.size() > 2)
465 return false;
466
467 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
468 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
469 if (CC1 == CC2)
470 return true;
471
472 switch (CC1) {
473 default:
474 return false;
475 case ARMCC::AL:
476 return true;
477 case ARMCC::HS:
478 return CC2 == ARMCC::HI;
479 case ARMCC::LS:
480 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
481 case ARMCC::GE:
482 return CC2 == ARMCC::GT;
483 case ARMCC::LE:
484 return CC2 == ARMCC::LT;
485 }
486}
487
489 std::vector<MachineOperand> &Pred,
490 bool SkipDead) const {
491 bool Found = false;
492 for (const MachineOperand &MO : MI.operands()) {
493 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
494 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
495 if (ClobbersCPSR || IsCPSR) {
496
497 // Filter out T1 instructions that have a dead CPSR,
498 // allowing IT blocks to be generated containing T1 instructions
499 const MCInstrDesc &MCID = MI.getDesc();
500 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
501 SkipDead)
502 continue;
503
504 Pred.push_back(MO);
505 Found = true;
506 }
507 }
508
509 return Found;
510}
511
513 for (const auto &MO : MI.operands())
514 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
515 return true;
516 return false;
517}
518
520 switch (MI->getOpcode()) {
521 default: return true;
522 case ARM::tADC: // ADC (register) T1
523 case ARM::tADDi3: // ADD (immediate) T1
524 case ARM::tADDi8: // ADD (immediate) T2
525 case ARM::tADDrr: // ADD (register) T1
526 case ARM::tAND: // AND (register) T1
527 case ARM::tASRri: // ASR (immediate) T1
528 case ARM::tASRrr: // ASR (register) T1
529 case ARM::tBIC: // BIC (register) T1
530 case ARM::tEOR: // EOR (register) T1
531 case ARM::tLSLri: // LSL (immediate) T1
532 case ARM::tLSLrr: // LSL (register) T1
533 case ARM::tLSRri: // LSR (immediate) T1
534 case ARM::tLSRrr: // LSR (register) T1
535 case ARM::tMUL: // MUL T1
536 case ARM::tMVN: // MVN (register) T1
537 case ARM::tORR: // ORR (register) T1
538 case ARM::tROR: // ROR (register) T1
539 case ARM::tRSB: // RSB (immediate) T1
540 case ARM::tSBC: // SBC (register) T1
541 case ARM::tSUBi3: // SUB (immediate) T1
542 case ARM::tSUBi8: // SUB (immediate) T2
543 case ARM::tSUBrr: // SUB (register) T1
545 }
546}
547
548/// isPredicable - Return true if the specified instruction can be predicated.
549/// By default, this returns true for every instruction with a
550/// PredicateOperand.
552 if (!MI.isPredicable())
553 return false;
554
555 if (MI.isBundle())
556 return false;
557
559 return false;
560
561 const MachineFunction *MF = MI.getParent()->getParent();
562 const ARMFunctionInfo *AFI =
564
565 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
566 // In their ARM encoding, they can't be encoded in a conditional form.
567 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
568 return false;
569
570 // Make indirect control flow changes unpredicable when SLS mitigation is
571 // enabled.
572 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
573 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
574 return false;
575 if (ST.hardenSlsBlr() && isIndirectCall(MI))
576 return false;
577
578 if (AFI->isThumb2Function()) {
579 if (getSubtarget().restrictIT())
580 return isV8EligibleForIT(&MI);
581 }
582
583 return true;
584}
585
586namespace llvm {
587
588template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
589 for (const MachineOperand &MO : MI->operands()) {
590 if (!MO.isReg() || MO.isUndef() || MO.isUse())
591 continue;
592 if (MO.getReg() != ARM::CPSR)
593 continue;
594 if (!MO.isDead())
595 return false;
596 }
597 // all definitions of CPSR are dead
598 return true;
599}
600
601} // end namespace llvm
602
603/// GetInstSize - Return the size of the specified MachineInstr.
604///
606 const MachineBasicBlock &MBB = *MI.getParent();
607 const MachineFunction *MF = MBB.getParent();
608 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
609
610 const MCInstrDesc &MCID = MI.getDesc();
611
612 switch (MI.getOpcode()) {
613 default:
614 // Return the size specified in .td file. If there's none, return 0, as we
615 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
616 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
617 // contrast to AArch64 instructions which have a default size of 4 bytes for
618 // example.
619 return MCID.getSize();
620 case TargetOpcode::BUNDLE:
621 return getInstBundleLength(MI);
622 case TargetOpcode::COPY:
624 return 4;
625 else
626 return 2;
627 case ARM::CONSTPOOL_ENTRY:
628 case ARM::JUMPTABLE_INSTS:
629 case ARM::JUMPTABLE_ADDRS:
630 case ARM::JUMPTABLE_TBB:
631 case ARM::JUMPTABLE_TBH:
632 // If this machine instr is a constant pool entry, its size is recorded as
633 // operand #2.
634 return MI.getOperand(2).getImm();
635 case ARM::SPACE:
636 return MI.getOperand(1).getImm();
637 case ARM::INLINEASM:
638 case ARM::INLINEASM_BR: {
639 // If this machine instr is an inline asm, measure it.
640 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
642 Size = alignTo(Size, 4);
643 return Size;
644 }
645 }
646}
647
648unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
649 unsigned Size = 0;
651 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
652 while (++I != E && I->isInsideBundle()) {
653 assert(!I->isBundle() && "No nested bundle!");
655 }
656 return Size;
657}
658
661 MCRegister DestReg, bool KillSrc,
662 const ARMSubtarget &Subtarget) const {
663 unsigned Opc = Subtarget.isThumb()
664 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
665 : ARM::MRS;
666
668 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
669
670 // There is only 1 A/R class MRS instruction, and it always refers to
671 // APSR. However, there are lots of other possibilities on M-class cores.
672 if (Subtarget.isMClass())
673 MIB.addImm(0x800);
674
675 MIB.add(predOps(ARMCC::AL))
676 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
677}
678
681 MCRegister SrcReg, bool KillSrc,
682 const ARMSubtarget &Subtarget) const {
683 unsigned Opc = Subtarget.isThumb()
684 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
685 : ARM::MSR;
686
687 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
688
689 if (Subtarget.isMClass())
690 MIB.addImm(0x800);
691 else
692 MIB.addImm(8);
693
694 MIB.addReg(SrcReg, getKillRegState(KillSrc))
697}
698
700 MIB.addImm(ARMVCC::None);
701 MIB.addReg(0);
702 MIB.addReg(0); // tp_reg
703}
704
710
712 MIB.addImm(Cond);
713 MIB.addReg(ARM::VPR, RegState::Implicit);
714 MIB.addReg(0); // tp_reg
715}
716
718 unsigned Cond, unsigned Inactive) {
720 MIB.addReg(Inactive);
721}
722
725 const DebugLoc &DL, Register DestReg,
726 Register SrcReg, bool KillSrc,
727 bool RenamableDest,
728 bool RenamableSrc) const {
729 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
730 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
731
732 if (GPRDest && GPRSrc) {
733 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
734 .addReg(SrcReg, getKillRegState(KillSrc))
736 .add(condCodeOp());
737 return;
738 }
739
740 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
741 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
742
743 unsigned Opc = 0;
744 if (SPRDest && SPRSrc)
745 Opc = ARM::VMOVS;
746 else if (GPRDest && SPRSrc)
747 Opc = ARM::VMOVRS;
748 else if (SPRDest && GPRSrc)
749 Opc = ARM::VMOVSR;
750 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
751 Opc = ARM::VMOVD;
752 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
753 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
754
755 if (Opc) {
756 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
757 MIB.addReg(SrcReg, getKillRegState(KillSrc));
758 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
759 MIB.addReg(SrcReg, getKillRegState(KillSrc));
760 if (Opc == ARM::MVE_VORR)
761 addUnpredicatedMveVpredROp(MIB, DestReg);
762 else if (Opc != ARM::MQPRCopy)
763 MIB.add(predOps(ARMCC::AL));
764 return;
765 }
766
767 // Handle register classes that require multiple instructions.
768 unsigned BeginIdx = 0;
769 unsigned SubRegs = 0;
770 int Spacing = 1;
771
772 // Use VORRq when possible.
773 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
774 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
775 BeginIdx = ARM::qsub_0;
776 SubRegs = 2;
777 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
778 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
779 BeginIdx = ARM::qsub_0;
780 SubRegs = 4;
781 // Fall back to VMOVD.
782 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
783 Opc = ARM::VMOVD;
784 BeginIdx = ARM::dsub_0;
785 SubRegs = 2;
786 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
787 Opc = ARM::VMOVD;
788 BeginIdx = ARM::dsub_0;
789 SubRegs = 3;
790 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
791 Opc = ARM::VMOVD;
792 BeginIdx = ARM::dsub_0;
793 SubRegs = 4;
794 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
795 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
796 BeginIdx = ARM::gsub_0;
797 SubRegs = 2;
798 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
799 Opc = ARM::VMOVD;
800 BeginIdx = ARM::dsub_0;
801 SubRegs = 2;
802 Spacing = 2;
803 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
804 Opc = ARM::VMOVD;
805 BeginIdx = ARM::dsub_0;
806 SubRegs = 3;
807 Spacing = 2;
808 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
809 Opc = ARM::VMOVD;
810 BeginIdx = ARM::dsub_0;
811 SubRegs = 4;
812 Spacing = 2;
813 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
814 !Subtarget.hasFP64()) {
815 Opc = ARM::VMOVS;
816 BeginIdx = ARM::ssub_0;
817 SubRegs = 2;
818 } else if (SrcReg == ARM::CPSR) {
819 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
820 return;
821 } else if (DestReg == ARM::CPSR) {
822 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
823 return;
824 } else if (DestReg == ARM::VPR) {
825 assert(ARM::GPRRegClass.contains(SrcReg));
826 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
827 .addReg(SrcReg, getKillRegState(KillSrc))
829 return;
830 } else if (SrcReg == ARM::VPR) {
831 assert(ARM::GPRRegClass.contains(DestReg));
832 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
833 .addReg(SrcReg, getKillRegState(KillSrc))
835 return;
836 } else if (DestReg == ARM::FPSCR_NZCV) {
837 assert(ARM::GPRRegClass.contains(SrcReg));
838 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
839 .addReg(SrcReg, getKillRegState(KillSrc))
841 return;
842 } else if (SrcReg == ARM::FPSCR_NZCV) {
843 assert(ARM::GPRRegClass.contains(DestReg));
844 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
845 .addReg(SrcReg, getKillRegState(KillSrc))
847 return;
848 }
849
850 assert(Opc && "Impossible reg-to-reg copy");
851
854
855 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
856 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
857 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
858 Spacing = -Spacing;
859 }
860#ifndef NDEBUG
861 SmallSet<unsigned, 4> DstRegs;
862#endif
863 for (unsigned i = 0; i != SubRegs; ++i) {
864 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
865 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
866 assert(Dst && Src && "Bad sub-register");
867#ifndef NDEBUG
868 assert(!DstRegs.count(Src) && "destructive vector copy");
869 DstRegs.insert(Dst);
870#endif
871 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
872 // VORR (NEON or MVE) takes two source operands.
873 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
874 Mov.addReg(Src);
875 }
876 // MVE VORR takes predicate operands in place of an ordinary condition.
877 if (Opc == ARM::MVE_VORR)
879 else
880 Mov = Mov.add(predOps(ARMCC::AL));
881 // MOVr can set CC.
882 if (Opc == ARM::MOVr)
883 Mov = Mov.add(condCodeOp());
884 }
885 // Add implicit super-register defs and kills to the last instruction.
886 Mov->addRegisterDefined(DestReg, TRI);
887 if (KillSrc)
888 Mov->addRegisterKilled(SrcReg, TRI);
889}
890
891std::optional<DestSourcePair>
893 // VMOVRRD is also a copy instruction but it requires
894 // special way of handling. It is more complex copy version
895 // and since that we are not considering it. For recognition
896 // of such instruction isExtractSubregLike MI interface fuction
897 // could be used.
898 // VORRq is considered as a move only if two inputs are
899 // the same register.
900 if (!MI.isMoveReg() ||
901 (MI.getOpcode() == ARM::VORRq &&
902 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
903 return std::nullopt;
904 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
905}
906
907std::optional<ParamLoadedValue>
909 Register Reg) const {
910 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
911 Register DstReg = DstSrcPair->Destination->getReg();
912
913 // TODO: We don't handle cases where the forwarding reg is narrower/wider
914 // than the copy registers. Consider for example:
915 //
916 // s16 = VMOVS s0
917 // s17 = VMOVS s1
918 // call @callee(d0)
919 //
920 // We'd like to describe the call site value of d0 as d8, but this requires
921 // gathering and merging the descriptions for the two VMOVS instructions.
922 //
923 // We also don't handle the reverse situation, where the forwarding reg is
924 // narrower than the copy destination:
925 //
926 // d8 = VMOVD d0
927 // call @callee(s1)
928 //
929 // We need to produce a fragment description (the call site value of s1 is
930 // /not/ just d8).
931 if (DstReg != Reg)
932 return std::nullopt;
933 }
935}
936
938 unsigned Reg,
939 unsigned SubIdx,
940 RegState State) const {
941 if (!SubIdx)
942 return MIB.addReg(Reg, State);
943
945 return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State);
946 return MIB.addReg(Reg, State, SubIdx);
947}
948
951 Register SrcReg, bool isKill, int FI,
952 const TargetRegisterClass *RC,
953 Register VReg,
954 MachineInstr::MIFlag Flags) const {
955 MachineFunction &MF = *MBB.getParent();
956 MachineFrameInfo &MFI = MF.getFrameInfo();
957 Align Alignment = MFI.getObjectAlign(FI);
959
962 MFI.getObjectSize(FI), Alignment);
963
964 switch (TRI.getSpillSize(*RC)) {
965 case 2:
966 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
967 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
968 .addReg(SrcReg, getKillRegState(isKill))
969 .addFrameIndex(FI)
970 .addImm(0)
971 .addMemOperand(MMO)
973 } else
974 llvm_unreachable("Unknown reg class!");
975 break;
976 case 4:
977 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
978 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
979 .addReg(SrcReg, getKillRegState(isKill))
980 .addFrameIndex(FI)
981 .addImm(0)
982 .addMemOperand(MMO)
984 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
985 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
986 .addReg(SrcReg, getKillRegState(isKill))
987 .addFrameIndex(FI)
988 .addImm(0)
989 .addMemOperand(MMO)
991 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
992 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
993 .addReg(SrcReg, getKillRegState(isKill))
994 .addFrameIndex(FI)
995 .addImm(0)
996 .addMemOperand(MMO)
998 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
999 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
1000 .addReg(SrcReg, getKillRegState(isKill))
1001 .addFrameIndex(FI)
1002 .addImm(0)
1003 .addMemOperand(MMO)
1005 } else
1006 llvm_unreachable("Unknown reg class!");
1007 break;
1008 case 8:
1009 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1010 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1011 .addReg(SrcReg, getKillRegState(isKill))
1012 .addFrameIndex(FI)
1013 .addImm(0)
1014 .addMemOperand(MMO)
1016 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1017 if (Subtarget.hasV5TEOps()) {
1018 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1019 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1020 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1021 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1023 } else {
1024 // Fallback to STM instruction, which has existed since the dawn of
1025 // time.
1026 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1027 .addFrameIndex(FI)
1028 .addMemOperand(MMO)
1030 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1031 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1032 }
1033 } else
1034 llvm_unreachable("Unknown reg class!");
1035 break;
1036 case 16:
1037 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1038 // Use aligned spills if the stack can be realigned.
1039 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1040 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1041 .addFrameIndex(FI)
1042 .addImm(16)
1043 .addReg(SrcReg, getKillRegState(isKill))
1044 .addMemOperand(MMO)
1046 } else {
1047 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1048 .addReg(SrcReg, getKillRegState(isKill))
1049 .addFrameIndex(FI)
1050 .addMemOperand(MMO)
1052 }
1053 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1054 Subtarget.hasMVEIntegerOps()) {
1055 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1056 MIB.addReg(SrcReg, getKillRegState(isKill))
1057 .addFrameIndex(FI)
1058 .addImm(0)
1059 .addMemOperand(MMO);
1061 } else
1062 llvm_unreachable("Unknown reg class!");
1063 break;
1064 case 24:
1065 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1066 // Use aligned spills if the stack can be realigned.
1067 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1068 Subtarget.hasNEON()) {
1069 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1070 .addFrameIndex(FI)
1071 .addImm(16)
1072 .addReg(SrcReg, getKillRegState(isKill))
1073 .addMemOperand(MMO)
1075 } else {
1077 get(ARM::VSTMDIA))
1078 .addFrameIndex(FI)
1080 .addMemOperand(MMO);
1081 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1082 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1083 AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1084 }
1085 } else
1086 llvm_unreachable("Unknown reg class!");
1087 break;
1088 case 32:
1089 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1090 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1091 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1092 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1093 Subtarget.hasNEON()) {
1094 // FIXME: It's possible to only store part of the QQ register if the
1095 // spilled def has a sub-register index.
1096 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1097 .addFrameIndex(FI)
1098 .addImm(16)
1099 .addReg(SrcReg, getKillRegState(isKill))
1100 .addMemOperand(MMO)
1102 } else if (Subtarget.hasMVEIntegerOps()) {
1103 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1104 .addReg(SrcReg, getKillRegState(isKill))
1105 .addFrameIndex(FI)
1106 .addMemOperand(MMO);
1107 } else {
1109 get(ARM::VSTMDIA))
1110 .addFrameIndex(FI)
1112 .addMemOperand(MMO);
1113 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1114 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1115 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1116 AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1117 }
1118 } else
1119 llvm_unreachable("Unknown reg class!");
1120 break;
1121 case 64:
1122 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1123 Subtarget.hasMVEIntegerOps()) {
1124 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1125 .addReg(SrcReg, getKillRegState(isKill))
1126 .addFrameIndex(FI)
1127 .addMemOperand(MMO);
1128 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1129 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1130 .addFrameIndex(FI)
1132 .addMemOperand(MMO);
1133 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1134 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1135 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1136 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1137 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, {});
1138 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, {});
1139 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, {});
1140 AddDReg(MIB, SrcReg, ARM::dsub_7, {});
1141 } else
1142 llvm_unreachable("Unknown reg class!");
1143 break;
1144 default:
1145 llvm_unreachable("Unknown reg class!");
1146 }
1147}
1148
1150 int &FrameIndex) const {
1151 switch (MI.getOpcode()) {
1152 default: break;
1153 case ARM::STRrs:
1154 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1155 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1156 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1157 MI.getOperand(3).getImm() == 0) {
1158 FrameIndex = MI.getOperand(1).getIndex();
1159 return MI.getOperand(0).getReg();
1160 }
1161 break;
1162 case ARM::STRi12:
1163 case ARM::t2STRi12:
1164 case ARM::tSTRspi:
1165 case ARM::VSTRD:
1166 case ARM::VSTRS:
1167 case ARM::VSTRH:
1168 case ARM::VSTR_P0_off:
1169 case ARM::VSTR_FPSCR_NZCVQC_off:
1170 case ARM::MVE_VSTRWU32:
1171 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1172 MI.getOperand(2).getImm() == 0) {
1173 FrameIndex = MI.getOperand(1).getIndex();
1174 return MI.getOperand(0).getReg();
1175 }
1176 break;
1177 case ARM::VST1q64:
1178 case ARM::VST1d64TPseudo:
1179 case ARM::VST1d64QPseudo:
1180 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1181 FrameIndex = MI.getOperand(0).getIndex();
1182 return MI.getOperand(2).getReg();
1183 }
1184 break;
1185 case ARM::VSTMQIA:
1186 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1187 FrameIndex = MI.getOperand(1).getIndex();
1188 return MI.getOperand(0).getReg();
1189 }
1190 break;
1191 case ARM::MQQPRStore:
1192 case ARM::MQQQQPRStore:
1193 if (MI.getOperand(1).isFI()) {
1194 FrameIndex = MI.getOperand(1).getIndex();
1195 return MI.getOperand(0).getReg();
1196 }
1197 break;
1198 }
1199
1200 return 0;
1201}
1202
1204 int &FrameIndex) const {
1206 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1207 Accesses.size() == 1) {
1208 FrameIndex =
1209 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1210 ->getFrameIndex();
1211 return true;
1212 }
1213 return false;
1214}
1215
1218 Register DestReg, int FI,
1219 const TargetRegisterClass *RC,
1220 Register VReg, unsigned SubReg,
1221 MachineInstr::MIFlag Flags) const {
1222 DebugLoc DL;
1223 if (I != MBB.end()) DL = I->getDebugLoc();
1224 MachineFunction &MF = *MBB.getParent();
1225 MachineFrameInfo &MFI = MF.getFrameInfo();
1226 const Align Alignment = MFI.getObjectAlign(FI);
1229 MFI.getObjectSize(FI), Alignment);
1230
1232 switch (TRI.getSpillSize(*RC)) {
1233 case 2:
1234 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1235 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1236 .addFrameIndex(FI)
1237 .addImm(0)
1238 .addMemOperand(MMO)
1240 } else
1241 llvm_unreachable("Unknown reg class!");
1242 break;
1243 case 4:
1244 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1245 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1246 .addFrameIndex(FI)
1247 .addImm(0)
1248 .addMemOperand(MMO)
1250 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1251 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1252 .addFrameIndex(FI)
1253 .addImm(0)
1254 .addMemOperand(MMO)
1256 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1257 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1258 .addFrameIndex(FI)
1259 .addImm(0)
1260 .addMemOperand(MMO)
1262 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1263 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1264 .addFrameIndex(FI)
1265 .addImm(0)
1266 .addMemOperand(MMO)
1268 } else
1269 llvm_unreachable("Unknown reg class!");
1270 break;
1271 case 8:
1272 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1273 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1274 .addFrameIndex(FI)
1275 .addImm(0)
1276 .addMemOperand(MMO)
1278 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1280
1281 if (Subtarget.hasV5TEOps()) {
1282 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1283 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1284 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1285 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1287 } else {
1288 // Fallback to LDM instruction, which has existed since the dawn of
1289 // time.
1290 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1291 .addFrameIndex(FI)
1292 .addMemOperand(MMO)
1294 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1295 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1296 }
1297
1298 if (DestReg.isPhysical())
1299 MIB.addReg(DestReg, RegState::ImplicitDefine);
1300 } else
1301 llvm_unreachable("Unknown reg class!");
1302 break;
1303 case 16:
1304 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1305 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1306 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1307 .addFrameIndex(FI)
1308 .addImm(16)
1309 .addMemOperand(MMO)
1311 } else {
1312 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1313 .addFrameIndex(FI)
1314 .addMemOperand(MMO)
1316 }
1317 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1318 Subtarget.hasMVEIntegerOps()) {
1319 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1320 MIB.addFrameIndex(FI)
1321 .addImm(0)
1322 .addMemOperand(MMO);
1324 } else
1325 llvm_unreachable("Unknown reg class!");
1326 break;
1327 case 24:
1328 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1329 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1330 Subtarget.hasNEON()) {
1331 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1332 .addFrameIndex(FI)
1333 .addImm(16)
1334 .addMemOperand(MMO)
1336 } else {
1337 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1338 .addFrameIndex(FI)
1339 .addMemOperand(MMO)
1341 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1342 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1343 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1344 if (DestReg.isPhysical())
1345 MIB.addReg(DestReg, RegState::ImplicitDefine);
1346 }
1347 } else
1348 llvm_unreachable("Unknown reg class!");
1349 break;
1350 case 32:
1351 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1352 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1353 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1354 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1355 Subtarget.hasNEON()) {
1356 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1357 .addFrameIndex(FI)
1358 .addImm(16)
1359 .addMemOperand(MMO)
1361 } else if (Subtarget.hasMVEIntegerOps()) {
1362 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1363 .addFrameIndex(FI)
1364 .addMemOperand(MMO);
1365 } else {
1366 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1367 .addFrameIndex(FI)
1369 .addMemOperand(MMO);
1370 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1371 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1372 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1373 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1374 if (DestReg.isPhysical())
1375 MIB.addReg(DestReg, RegState::ImplicitDefine);
1376 }
1377 } else
1378 llvm_unreachable("Unknown reg class!");
1379 break;
1380 case 64:
1381 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1382 Subtarget.hasMVEIntegerOps()) {
1383 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1384 .addFrameIndex(FI)
1385 .addMemOperand(MMO);
1386 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1387 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1388 .addFrameIndex(FI)
1390 .addMemOperand(MMO);
1391 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1392 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1393 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1394 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1395 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead);
1396 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead);
1397 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead);
1398 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead);
1399 if (DestReg.isPhysical())
1400 MIB.addReg(DestReg, RegState::ImplicitDefine);
1401 } else
1402 llvm_unreachable("Unknown reg class!");
1403 break;
1404 default:
1405 llvm_unreachable("Unknown regclass!");
1406 }
1407}
1408
1410 int &FrameIndex) const {
1411 switch (MI.getOpcode()) {
1412 default: break;
1413 case ARM::LDRrs:
1414 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1415 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1416 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1417 MI.getOperand(3).getImm() == 0) {
1418 FrameIndex = MI.getOperand(1).getIndex();
1419 return MI.getOperand(0).getReg();
1420 }
1421 break;
1422 case ARM::LDRi12:
1423 case ARM::t2LDRi12:
1424 case ARM::tLDRspi:
1425 case ARM::VLDRD:
1426 case ARM::VLDRS:
1427 case ARM::VLDRH:
1428 case ARM::VLDR_P0_off:
1429 case ARM::VLDR_FPSCR_NZCVQC_off:
1430 case ARM::MVE_VLDRWU32:
1431 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1432 MI.getOperand(2).getImm() == 0) {
1433 FrameIndex = MI.getOperand(1).getIndex();
1434 return MI.getOperand(0).getReg();
1435 }
1436 break;
1437 case ARM::VLD1q64:
1438 case ARM::VLD1d8TPseudo:
1439 case ARM::VLD1d16TPseudo:
1440 case ARM::VLD1d32TPseudo:
1441 case ARM::VLD1d64TPseudo:
1442 case ARM::VLD1d8QPseudo:
1443 case ARM::VLD1d16QPseudo:
1444 case ARM::VLD1d32QPseudo:
1445 case ARM::VLD1d64QPseudo:
1446 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1447 FrameIndex = MI.getOperand(1).getIndex();
1448 return MI.getOperand(0).getReg();
1449 }
1450 break;
1451 case ARM::VLDMQIA:
1452 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1453 FrameIndex = MI.getOperand(1).getIndex();
1454 return MI.getOperand(0).getReg();
1455 }
1456 break;
1457 case ARM::MQQPRLoad:
1458 case ARM::MQQQQPRLoad:
1459 if (MI.getOperand(1).isFI()) {
1460 FrameIndex = MI.getOperand(1).getIndex();
1461 return MI.getOperand(0).getReg();
1462 }
1463 break;
1464 }
1465
1466 return 0;
1467}
1468
1470 int &FrameIndex) const {
1472 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1473 Accesses.size() == 1) {
1474 FrameIndex =
1475 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1476 ->getFrameIndex();
1477 return true;
1478 }
1479 return false;
1480}
1481
1482/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1483/// depending on whether the result is used.
1484void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1485 bool isThumb1 = Subtarget.isThumb1Only();
1486 bool isThumb2 = Subtarget.isThumb2();
1487 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1488
1489 DebugLoc dl = MI->getDebugLoc();
1490 MachineBasicBlock *BB = MI->getParent();
1491
1492 MachineInstrBuilder LDM, STM;
1493 if (isThumb1 || !MI->getOperand(1).isDead()) {
1494 MachineOperand LDWb(MI->getOperand(1));
1495 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1496 : isThumb1 ? ARM::tLDMIA_UPD
1497 : ARM::LDMIA_UPD))
1498 .add(LDWb);
1499 } else {
1500 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1501 }
1502
1503 if (isThumb1 || !MI->getOperand(0).isDead()) {
1504 MachineOperand STWb(MI->getOperand(0));
1505 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1506 : isThumb1 ? ARM::tSTMIA_UPD
1507 : ARM::STMIA_UPD))
1508 .add(STWb);
1509 } else {
1510 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1511 }
1512
1513 MachineOperand LDBase(MI->getOperand(3));
1514 LDM.add(LDBase).add(predOps(ARMCC::AL));
1515
1516 MachineOperand STBase(MI->getOperand(2));
1517 STM.add(STBase).add(predOps(ARMCC::AL));
1518
1519 // Sort the scratch registers into ascending order.
1520 const TargetRegisterInfo &TRI = getRegisterInfo();
1521 SmallVector<unsigned, 6> ScratchRegs;
1522 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1523 ScratchRegs.push_back(MO.getReg());
1524 llvm::sort(ScratchRegs,
1525 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1526 return TRI.getEncodingValue(Reg1) <
1527 TRI.getEncodingValue(Reg2);
1528 });
1529
1530 for (const auto &Reg : ScratchRegs) {
1533 }
1534
1535 BB->erase(MI);
1536}
1537
1539 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1540 expandLoadStackGuard(MI);
1541 MI.getParent()->erase(MI);
1542 return true;
1543 }
1544
1545 if (MI.getOpcode() == ARM::MEMCPY) {
1546 expandMEMCPY(MI);
1547 return true;
1548 }
1549
1550 // This hook gets to expand COPY instructions before they become
1551 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1552 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1553 // changed into a VORR that can go down the NEON pipeline.
1554 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1555 return false;
1556
1557 // Look for a copy between even S-registers. That is where we keep floats
1558 // when using NEON v2f32 instructions for f32 arithmetic.
1559 Register DstRegS = MI.getOperand(0).getReg();
1560 Register SrcRegS = MI.getOperand(1).getReg();
1561 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1562 return false;
1563
1565 MCRegister DstRegD =
1566 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1567 MCRegister SrcRegD =
1568 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1569 if (!DstRegD || !SrcRegD)
1570 return false;
1571
1572 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1573 // legal if the COPY already defines the full DstRegD, and it isn't a
1574 // sub-register insertion.
1575 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1576 return false;
1577
1578 // A dead copy shouldn't show up here, but reject it just in case.
1579 if (MI.getOperand(0).isDead())
1580 return false;
1581
1582 // All clear, widen the COPY.
1583 LLVM_DEBUG(dbgs() << "widening: " << MI);
1584 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1585
1586 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1587 // or some other super-register.
1588 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1589 if (ImpDefIdx != -1)
1590 MI.removeOperand(ImpDefIdx);
1591
1592 // Change the opcode and operands.
1593 MI.setDesc(get(ARM::VMOVD));
1594 MI.getOperand(0).setReg(DstRegD);
1595 MI.getOperand(1).setReg(SrcRegD);
1596 MIB.add(predOps(ARMCC::AL));
1597
1598 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1599 // register scavenger and machine verifier, so we need to indicate that we
1600 // are reading an undefined value from SrcRegD, but a proper value from
1601 // SrcRegS.
1602 MI.getOperand(1).setIsUndef();
1603 MIB.addReg(SrcRegS, RegState::Implicit);
1604
1605 // SrcRegD may actually contain an unrelated value in the ssub_1
1606 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1607 if (MI.getOperand(1).isKill()) {
1608 MI.getOperand(1).setIsKill(false);
1609 MI.addRegisterKilled(SrcRegS, TRI, true);
1610 }
1611
1612 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1613 return true;
1614}
1615
1616/// Create a copy of a const pool value. Update CPI to the new index and return
1617/// the label UID.
1618static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1621
1622 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1623 assert(MCPE.isMachineConstantPoolEntry() &&
1624 "Expecting a machine constantpool entry!");
1625 ARMConstantPoolValue *ACPV =
1626 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1627
1628 unsigned PCLabelId = AFI->createPICLabelUId();
1629 ARMConstantPoolValue *NewCPV = nullptr;
1630
1631 // FIXME: The below assumes PIC relocation model and that the function
1632 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1633 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1634 // instructions, so that's probably OK, but is PIC always correct when
1635 // we get here?
1636 if (ACPV->isGlobalValue())
1638 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1639 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1640 else if (ACPV->isExtSymbol())
1643 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1644 else if (ACPV->isBlockAddress())
1646 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1648 else if (ACPV->isLSDA())
1649 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1650 ARMCP::CPLSDA, 4);
1651 else if (ACPV->isMachineBasicBlock())
1652 NewCPV = ARMConstantPoolMBB::
1654 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1655 else
1656 llvm_unreachable("Unexpected ARM constantpool value type!!");
1657 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1658 return PCLabelId;
1659}
1660
1663 Register DestReg, unsigned SubIdx,
1664 const MachineInstr &Orig) const {
1665 unsigned Opcode = Orig.getOpcode();
1666 switch (Opcode) {
1667 default: {
1668 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1669 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1670 MBB.insert(I, MI);
1671 break;
1672 }
1673 case ARM::tLDRpci_pic:
1674 case ARM::t2LDRpci_pic: {
1675 MachineFunction &MF = *MBB.getParent();
1676 unsigned CPI = Orig.getOperand(1).getIndex();
1677 unsigned PCLabelId = duplicateCPV(MF, CPI);
1678 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1680 .addImm(PCLabelId)
1681 .cloneMemRefs(Orig);
1682 break;
1683 }
1684 }
1685}
1686
1689 MachineBasicBlock::iterator InsertBefore,
1690 const MachineInstr &Orig) const {
1691 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1693 for (;;) {
1694 switch (I->getOpcode()) {
1695 case ARM::tLDRpci_pic:
1696 case ARM::t2LDRpci_pic: {
1697 MachineFunction &MF = *MBB.getParent();
1698 unsigned CPI = I->getOperand(1).getIndex();
1699 unsigned PCLabelId = duplicateCPV(MF, CPI);
1700 I->getOperand(1).setIndex(CPI);
1701 I->getOperand(2).setImm(PCLabelId);
1702 break;
1703 }
1704 }
1705 if (!I->isBundledWithSucc())
1706 break;
1707 ++I;
1708 }
1709 return Cloned;
1710}
1711
1713 const MachineInstr &MI1,
1714 const MachineRegisterInfo *MRI) const {
1715 unsigned Opcode = MI0.getOpcode();
1716 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1717 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1718 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1719 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1720 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1721 Opcode == ARM::t2MOV_ga_pcrel) {
1722 if (MI1.getOpcode() != Opcode)
1723 return false;
1724 if (MI0.getNumOperands() != MI1.getNumOperands())
1725 return false;
1726
1727 const MachineOperand &MO0 = MI0.getOperand(1);
1728 const MachineOperand &MO1 = MI1.getOperand(1);
1729 if (MO0.getOffset() != MO1.getOffset())
1730 return false;
1731
1732 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1733 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1734 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1735 Opcode == ARM::t2MOV_ga_pcrel)
1736 // Ignore the PC labels.
1737 return MO0.getGlobal() == MO1.getGlobal();
1738
1739 const MachineFunction *MF = MI0.getParent()->getParent();
1740 const MachineConstantPool *MCP = MF->getConstantPool();
1741 int CPI0 = MO0.getIndex();
1742 int CPI1 = MO1.getIndex();
1743 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1744 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1745 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1746 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1747 if (isARMCP0 && isARMCP1) {
1748 ARMConstantPoolValue *ACPV0 =
1749 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1750 ARMConstantPoolValue *ACPV1 =
1751 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1752 return ACPV0->hasSameValue(ACPV1);
1753 } else if (!isARMCP0 && !isARMCP1) {
1754 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1755 }
1756 return false;
1757 } else if (Opcode == ARM::PICLDR) {
1758 if (MI1.getOpcode() != Opcode)
1759 return false;
1760 if (MI0.getNumOperands() != MI1.getNumOperands())
1761 return false;
1762
1763 Register Addr0 = MI0.getOperand(1).getReg();
1764 Register Addr1 = MI1.getOperand(1).getReg();
1765 if (Addr0 != Addr1) {
1766 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1767 return false;
1768
1769 // This assumes SSA form.
1770 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1771 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1772 // Check if the loaded value, e.g. a constantpool of a global address, are
1773 // the same.
1774 if (!produceSameValue(*Def0, *Def1, MRI))
1775 return false;
1776 }
1777
1778 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1779 // %12 = PICLDR %11, 0, 14, %noreg
1780 const MachineOperand &MO0 = MI0.getOperand(i);
1781 const MachineOperand &MO1 = MI1.getOperand(i);
1782 if (!MO0.isIdenticalTo(MO1))
1783 return false;
1784 }
1785 return true;
1786 }
1787
1789}
1790
1791/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1792/// determine if two loads are loading from the same base address. It should
1793/// only return true if the base pointers are the same and the only differences
1794/// between the two addresses is the offset. It also returns the offsets by
1795/// reference.
1796///
1797/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1798/// is permanently disabled.
1800 int64_t &Offset1,
1801 int64_t &Offset2) const {
1802 // Don't worry about Thumb: just ARM and Thumb2.
1803 if (Subtarget.isThumb1Only()) return false;
1804
1805 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1806 return false;
1807
1808 auto IsLoadOpcode = [&](unsigned Opcode) {
1809 switch (Opcode) {
1810 default:
1811 return false;
1812 case ARM::LDRi12:
1813 case ARM::LDRBi12:
1814 case ARM::LDRD:
1815 case ARM::LDRH:
1816 case ARM::LDRSB:
1817 case ARM::LDRSH:
1818 case ARM::VLDRD:
1819 case ARM::VLDRS:
1820 case ARM::t2LDRi8:
1821 case ARM::t2LDRBi8:
1822 case ARM::t2LDRDi8:
1823 case ARM::t2LDRSHi8:
1824 case ARM::t2LDRi12:
1825 case ARM::t2LDRBi12:
1826 case ARM::t2LDRSHi12:
1827 return true;
1828 }
1829 };
1830
1831 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1832 !IsLoadOpcode(Load2->getMachineOpcode()))
1833 return false;
1834
1835 // Check if base addresses and chain operands match.
1836 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1837 Load1->getOperand(4) != Load2->getOperand(4))
1838 return false;
1839
1840 // Index should be Reg0.
1841 if (Load1->getOperand(3) != Load2->getOperand(3))
1842 return false;
1843
1844 // Determine the offsets.
1845 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1846 isa<ConstantSDNode>(Load2->getOperand(1))) {
1847 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1848 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1849 return true;
1850 }
1851
1852 return false;
1853}
1854
1855/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1856/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1857/// be scheduled togther. On some targets if two loads are loading from
1858/// addresses in the same cache line, it's better if they are scheduled
1859/// together. This function takes two integers that represent the load offsets
1860/// from the common base address. It returns true if it decides it's desirable
1861/// to schedule the two loads together. "NumLoads" is the number of loads that
1862/// have already been scheduled after Load1.
1863///
1864/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1865/// is permanently disabled.
1867 int64_t Offset1, int64_t Offset2,
1868 unsigned NumLoads) const {
1869 // Don't worry about Thumb: just ARM and Thumb2.
1870 if (Subtarget.isThumb1Only()) return false;
1871
1872 assert(Offset2 > Offset1);
1873
1874 if ((Offset2 - Offset1) / 8 > 64)
1875 return false;
1876
1877 // Check if the machine opcodes are different. If they are different
1878 // then we consider them to not be of the same base address,
1879 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1880 // In this case, they are considered to be the same because they are different
1881 // encoding forms of the same basic instruction.
1882 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1883 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1884 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1885 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1886 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1887 return false; // FIXME: overly conservative?
1888
1889 // Four loads in a row should be sufficient.
1890 if (NumLoads >= 3)
1891 return false;
1892
1893 return true;
1894}
1895
1897 const MachineBasicBlock *MBB,
1898 const MachineFunction &MF) const {
1899 // Debug info is never a scheduling boundary. It's necessary to be explicit
1900 // due to the special treatment of IT instructions below, otherwise a
1901 // dbg_value followed by an IT will result in the IT instruction being
1902 // considered a scheduling hazard, which is wrong. It should be the actual
1903 // instruction preceding the dbg_value instruction(s), just like it is
1904 // when debug info is not present.
1905 if (MI.isDebugInstr())
1906 return false;
1907
1908 // Terminators and labels can't be scheduled around.
1909 if (MI.isTerminator() || MI.isPosition())
1910 return true;
1911
1912 // INLINEASM_BR can jump to another block
1913 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1914 return true;
1915
1916 if (isSEHInstruction(MI))
1917 return true;
1918
1919 // Treat the start of the IT block as a scheduling boundary, but schedule
1920 // t2IT along with all instructions following it.
1921 // FIXME: This is a big hammer. But the alternative is to add all potential
1922 // true and anti dependencies to IT block instructions as implicit operands
1923 // to the t2IT instruction. The added compile time and complexity does not
1924 // seem worth it.
1926 // Make sure to skip any debug instructions
1927 while (++I != MBB->end() && I->isDebugInstr())
1928 ;
1929 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1930 return true;
1931
1932 // Don't attempt to schedule around any instruction that defines
1933 // a stack-oriented pointer, as it's unlikely to be profitable. This
1934 // saves compile time, because it doesn't require every single
1935 // stack slot reference to depend on the instruction that does the
1936 // modification.
1937 // Calls don't actually change the stack pointer, even if they have imp-defs.
1938 // No ARM calling conventions change the stack pointer. (X86 calling
1939 // conventions sometimes do).
1940 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
1941 return true;
1942
1943 return false;
1944}
1945
1948 unsigned NumCycles, unsigned ExtraPredCycles,
1949 BranchProbability Probability) const {
1950 if (!NumCycles)
1951 return false;
1952
1953 // If we are optimizing for size, see if the branch in the predecessor can be
1954 // lowered to cbn?z by the constant island lowering pass, and return false if
1955 // so. This results in a shorter instruction sequence.
1956 if (MBB.getParent()->getFunction().hasOptSize()) {
1957 MachineBasicBlock *Pred = *MBB.pred_begin();
1958 if (!Pred->empty()) {
1959 MachineInstr *LastMI = &*Pred->rbegin();
1960 if (LastMI->getOpcode() == ARM::t2Bcc) {
1962 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
1963 if (CmpMI)
1964 return false;
1965 }
1966 }
1967 }
1968 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1969 MBB, 0, 0, Probability);
1970}
1971
1974 unsigned TCycles, unsigned TExtra,
1975 MachineBasicBlock &FBB,
1976 unsigned FCycles, unsigned FExtra,
1977 BranchProbability Probability) const {
1978 if (!TCycles)
1979 return false;
1980
1981 // In thumb code we often end up trading one branch for a IT block, and
1982 // if we are cloning the instruction can increase code size. Prevent
1983 // blocks with multiple predecesors from being ifcvted to prevent this
1984 // cloning.
1985 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1986 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1987 return false;
1988 }
1989
1990 // Attempt to estimate the relative costs of predication versus branching.
1991 // Here we scale up each component of UnpredCost to avoid precision issue when
1992 // scaling TCycles/FCycles by Probability.
1993 const unsigned ScalingUpFactor = 1024;
1994
1995 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1996 unsigned UnpredCost;
1997 if (!Subtarget.hasBranchPredictor()) {
1998 // When we don't have a branch predictor it's always cheaper to not take a
1999 // branch than take it, so we have to take that into account.
2000 unsigned NotTakenBranchCost = 1;
2001 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2002 unsigned TUnpredCycles, FUnpredCycles;
2003 if (!FCycles) {
2004 // Triangle: TBB is the fallthrough
2005 TUnpredCycles = TCycles + NotTakenBranchCost;
2006 FUnpredCycles = TakenBranchCost;
2007 } else {
2008 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2009 TUnpredCycles = TCycles + TakenBranchCost;
2010 FUnpredCycles = FCycles + NotTakenBranchCost;
2011 // The branch at the end of FBB will disappear when it's predicated, so
2012 // discount it from PredCost.
2013 PredCost -= 1 * ScalingUpFactor;
2014 }
2015 // The total cost is the cost of each path scaled by their probabilites
2016 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2017 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2018 UnpredCost = TUnpredCost + FUnpredCost;
2019 // When predicating assume that the first IT can be folded away but later
2020 // ones cost one cycle each
2021 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2022 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2023 }
2024 } else {
2025 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2026 unsigned FUnpredCost =
2027 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2028 UnpredCost = TUnpredCost + FUnpredCost;
2029 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2030 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2031 }
2032
2033 return PredCost <= UnpredCost;
2034}
2035
2036unsigned
2038 unsigned NumInsts) const {
2039 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2040 // ARM has a condition code field in every predicable instruction, using it
2041 // doesn't change code size.
2042 if (!Subtarget.isThumb2())
2043 return 0;
2044
2045 // It's possible that the size of the IT is restricted to a single block.
2046 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2047 return divideCeil(NumInsts, MaxInsts) * 2;
2048}
2049
2050unsigned
2052 // If this branch is likely to be folded into the comparison to form a
2053 // CB(N)Z, then removing it won't reduce code size at all, because that will
2054 // just replace the CB(N)Z with a CMP.
2055 if (MI.getOpcode() == ARM::t2Bcc &&
2057 return 0;
2058
2059 unsigned Size = getInstSizeInBytes(MI);
2060
2061 // For Thumb2, all branches are 32-bit instructions during the if conversion
2062 // pass, but may be replaced with 16-bit instructions during size reduction.
2063 // Since the branches considered by if conversion tend to be forward branches
2064 // over small basic blocks, they are very likely to be in range for the
2065 // narrow instructions, so we assume the final code size will be half what it
2066 // currently is.
2067 if (Subtarget.isThumb2())
2068 Size /= 2;
2069
2070 return Size;
2071}
2072
2073bool
2075 MachineBasicBlock &FMBB) const {
2076 // Reduce false anti-dependencies to let the target's out-of-order execution
2077 // engine do its thing.
2078 return Subtarget.isProfitableToUnpredicate();
2079}
2080
2081/// getInstrPredicate - If instruction is predicated, returns its predicate
2082/// condition, otherwise returns AL. It also returns the condition code
2083/// register by reference.
2085 Register &PredReg) {
2086 int PIdx = MI.findFirstPredOperandIdx();
2087 if (PIdx == -1) {
2088 PredReg = 0;
2089 return ARMCC::AL;
2090 }
2091
2092 PredReg = MI.getOperand(PIdx+1).getReg();
2093 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2094}
2095
2097 if (Opc == ARM::B)
2098 return ARM::Bcc;
2099 if (Opc == ARM::tB)
2100 return ARM::tBcc;
2101 if (Opc == ARM::t2B)
2102 return ARM::t2Bcc;
2103
2104 llvm_unreachable("Unknown unconditional branch opcode!");
2105}
2106
2108 bool NewMI,
2109 unsigned OpIdx1,
2110 unsigned OpIdx2) const {
2111 switch (MI.getOpcode()) {
2112 case ARM::MOVCCr:
2113 case ARM::t2MOVCCr: {
2114 // MOVCC can be commuted by inverting the condition.
2115 Register PredReg;
2116 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2117 // MOVCC AL can't be inverted. Shouldn't happen.
2118 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2119 return nullptr;
2120 MachineInstr *CommutedMI =
2121 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2122 if (!CommutedMI)
2123 return nullptr;
2124 // After swapping the MOVCC operands, also invert the condition.
2125 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2127 return CommutedMI;
2128 }
2129 }
2130 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2131}
2132
2133/// Identify instructions that can be folded into a MOVCC instruction, and
2134/// return the defining instruction.
2136ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2137 const TargetInstrInfo *TII) const {
2138 if (!Reg.isVirtual())
2139 return nullptr;
2140 if (!MRI.hasOneNonDBGUse(Reg))
2141 return nullptr;
2142 MachineInstr *MI = MRI.getVRegDef(Reg);
2143 if (!MI)
2144 return nullptr;
2145 // Check if MI can be predicated and folded into the MOVCC.
2146 if (!isPredicable(*MI))
2147 return nullptr;
2148 // Check if MI has any non-dead defs or physreg uses. This also detects
2149 // predicated instructions which will be reading CPSR.
2150 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2151 // Reject frame index operands, PEI can't handle the predicated pseudos.
2152 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2153 return nullptr;
2154 if (!MO.isReg())
2155 continue;
2156 // MI can't have any tied operands, that would conflict with predication.
2157 if (MO.isTied())
2158 return nullptr;
2159 if (MO.getReg().isPhysical())
2160 return nullptr;
2161 if (MO.isDef() && !MO.isDead())
2162 return nullptr;
2163 }
2164 bool DontMoveAcrossStores = true;
2165 if (!MI->isSafeToMove(DontMoveAcrossStores))
2166 return nullptr;
2167 return MI;
2168}
2169
2173 bool PreferFalse) const {
2174 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2175 "Unknown select instruction");
2176 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2177 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2178 bool Invert = !DefMI;
2179 if (!DefMI)
2180 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2181 if (!DefMI)
2182 return nullptr;
2183
2184 // Find new register class to use.
2185 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2186 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2187 Register DestReg = MI.getOperand(0).getReg();
2188 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2189 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2190 if (!MRI.constrainRegClass(DestReg, FalseClass))
2191 return nullptr;
2192 if (!MRI.constrainRegClass(DestReg, TrueClass))
2193 return nullptr;
2194
2195 // Create a new predicated version of DefMI.
2196 // Rfalse is the first use.
2197 MachineInstrBuilder NewMI =
2198 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2199
2200 // Copy all the DefMI operands, excluding its (null) predicate.
2201 const MCInstrDesc &DefDesc = DefMI->getDesc();
2202 for (unsigned i = 1, e = DefDesc.getNumOperands();
2203 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2204 NewMI.add(DefMI->getOperand(i));
2205
2206 unsigned CondCode = MI.getOperand(3).getImm();
2207 if (Invert)
2209 else
2210 NewMI.addImm(CondCode);
2211 NewMI.add(MI.getOperand(4));
2212
2213 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2214 if (NewMI->hasOptionalDef())
2215 NewMI.add(condCodeOp());
2216
2217 // The output register value when the predicate is false is an implicit
2218 // register operand tied to the first def.
2219 // The tie makes the register allocator ensure the FalseReg is allocated the
2220 // same register as operand 0.
2221 FalseReg.setImplicit();
2222 NewMI.add(FalseReg);
2223 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2224
2225 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2226 SeenMIs.insert(NewMI);
2227 SeenMIs.erase(DefMI);
2228
2229 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2230 // DefMI would be invalid when tranferred inside the loop. Checking for a
2231 // loop is expensive, but at least remove kill flags if they are in different
2232 // BBs.
2233 if (DefMI->getParent() != MI.getParent())
2234 NewMI->clearKillInfo();
2235
2236 // The caller will erase MI, but not DefMI.
2237 DefMI->eraseFromParent();
2238 return NewMI;
2239}
2240
2241/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2242/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2243/// def operand.
2244///
2245/// This will go away once we can teach tblgen how to set the optional CPSR def
2246/// operand itself.
2251
2253 {ARM::ADDSri, ARM::ADDri},
2254 {ARM::ADDSrr, ARM::ADDrr},
2255 {ARM::ADDSrsi, ARM::ADDrsi},
2256 {ARM::ADDSrsr, ARM::ADDrsr},
2257
2258 {ARM::SUBSri, ARM::SUBri},
2259 {ARM::SUBSrr, ARM::SUBrr},
2260 {ARM::SUBSrsi, ARM::SUBrsi},
2261 {ARM::SUBSrsr, ARM::SUBrsr},
2262
2263 {ARM::RSBSri, ARM::RSBri},
2264 {ARM::RSBSrsi, ARM::RSBrsi},
2265 {ARM::RSBSrsr, ARM::RSBrsr},
2266
2267 {ARM::tADDSi3, ARM::tADDi3},
2268 {ARM::tADDSi8, ARM::tADDi8},
2269 {ARM::tADDSrr, ARM::tADDrr},
2270 {ARM::tADCS, ARM::tADC},
2271
2272 {ARM::tSUBSi3, ARM::tSUBi3},
2273 {ARM::tSUBSi8, ARM::tSUBi8},
2274 {ARM::tSUBSrr, ARM::tSUBrr},
2275 {ARM::tSBCS, ARM::tSBC},
2276 {ARM::tRSBS, ARM::tRSB},
2277 {ARM::tLSLSri, ARM::tLSLri},
2278
2279 {ARM::t2ADDSri, ARM::t2ADDri},
2280 {ARM::t2ADDSrr, ARM::t2ADDrr},
2281 {ARM::t2ADDSrs, ARM::t2ADDrs},
2282
2283 {ARM::t2SUBSri, ARM::t2SUBri},
2284 {ARM::t2SUBSrr, ARM::t2SUBrr},
2285 {ARM::t2SUBSrs, ARM::t2SUBrs},
2286
2287 {ARM::t2RSBSri, ARM::t2RSBri},
2288 {ARM::t2RSBSrs, ARM::t2RSBrs},
2289};
2290
2291unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2292 for (const auto &Entry : AddSubFlagsOpcodeMap)
2293 if (OldOpc == Entry.PseudoOpc)
2294 return Entry.MachineOpc;
2295 return 0;
2296}
2297
2300 const DebugLoc &dl, Register DestReg,
2301 Register BaseReg, int NumBytes,
2302 ARMCC::CondCodes Pred, Register PredReg,
2303 const ARMBaseInstrInfo &TII,
2304 unsigned MIFlags) {
2305 if (NumBytes == 0 && DestReg != BaseReg) {
2306 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2307 .addReg(BaseReg, RegState::Kill)
2308 .add(predOps(Pred, PredReg))
2309 .add(condCodeOp())
2310 .setMIFlags(MIFlags);
2311 return;
2312 }
2313
2314 bool isSub = NumBytes < 0;
2315 if (isSub) NumBytes = -NumBytes;
2316
2317 while (NumBytes) {
2318 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2319 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2320 assert(ThisVal && "Didn't extract field correctly");
2321
2322 // We will handle these bits from offset, clear them.
2323 NumBytes &= ~ThisVal;
2324
2325 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2326
2327 // Build the new ADD / SUB.
2328 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2329 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2330 .addReg(BaseReg, RegState::Kill)
2331 .addImm(ThisVal)
2332 .add(predOps(Pred, PredReg))
2333 .add(condCodeOp())
2334 .setMIFlags(MIFlags);
2335 BaseReg = DestReg;
2336 }
2337}
2338
2341 unsigned NumBytes) {
2342 // This optimisation potentially adds lots of load and store
2343 // micro-operations, it's only really a great benefit to code-size.
2344 if (!Subtarget.hasMinSize())
2345 return false;
2346
2347 // If only one register is pushed/popped, LLVM can use an LDR/STR
2348 // instead. We can't modify those so make sure we're dealing with an
2349 // instruction we understand.
2350 bool IsPop = isPopOpcode(MI->getOpcode());
2351 bool IsPush = isPushOpcode(MI->getOpcode());
2352 if (!IsPush && !IsPop)
2353 return false;
2354
2355 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2356 MI->getOpcode() == ARM::VLDMDIA_UPD;
2357 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2358 MI->getOpcode() == ARM::tPOP ||
2359 MI->getOpcode() == ARM::tPOP_RET;
2360
2361 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2362 MI->getOperand(1).getReg() == ARM::SP)) &&
2363 "trying to fold sp update into non-sp-updating push/pop");
2364
2365 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2366 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2367 // if this is violated.
2368 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2369 return false;
2370
2371 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2372 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2373 int RegListIdx = IsT1PushPop ? 2 : 4;
2374
2375 // Calculate the space we'll need in terms of registers.
2376 unsigned RegsNeeded;
2377 const TargetRegisterClass *RegClass;
2378 if (IsVFPPushPop) {
2379 RegsNeeded = NumBytes / 8;
2380 RegClass = &ARM::DPRRegClass;
2381 } else {
2382 RegsNeeded = NumBytes / 4;
2383 RegClass = &ARM::GPRRegClass;
2384 }
2385
2386 // We're going to have to strip all list operands off before
2387 // re-adding them since the order matters, so save the existing ones
2388 // for later.
2390
2391 // We're also going to need the first register transferred by this
2392 // instruction, which won't necessarily be the first register in the list.
2393 unsigned FirstRegEnc = -1;
2394
2396 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2397 MachineOperand &MO = MI->getOperand(i);
2398 RegList.push_back(MO);
2399
2400 if (MO.isReg() && !MO.isImplicit() &&
2401 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2402 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2403 }
2404
2405 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2406
2407 // Now try to find enough space in the reglist to allocate NumBytes.
2408 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2409 --CurRegEnc) {
2410 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2411 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2412 continue;
2413 if (!IsPop) {
2414 // Pushing any register is completely harmless, mark the register involved
2415 // as undef since we don't care about its value and must not restore it
2416 // during stack unwinding.
2417 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2418 false, false, true));
2419 --RegsNeeded;
2420 continue;
2421 }
2422
2423 // However, we can only pop an extra register if it's not live. For
2424 // registers live within the function we might clobber a return value
2425 // register; the other way a register can be live here is if it's
2426 // callee-saved.
2427 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2428 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2430 // VFP pops don't allow holes in the register list, so any skip is fatal
2431 // for our transformation. GPR pops do, so we should just keep looking.
2432 if (IsVFPPushPop)
2433 return false;
2434 else
2435 continue;
2436 }
2437
2438 // Mark the unimportant registers as <def,dead> in the POP.
2439 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2440 true));
2441 --RegsNeeded;
2442 }
2443
2444 if (RegsNeeded > 0)
2445 return false;
2446
2447 // Finally we know we can profitably perform the optimisation so go
2448 // ahead: strip all existing registers off and add them back again
2449 // in the right order.
2450 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2451 MI->removeOperand(i);
2452
2453 // Add the complete list back in.
2454 MachineInstrBuilder MIB(MF, &*MI);
2455 for (const MachineOperand &MO : llvm::reverse(RegList))
2456 MIB.add(MO);
2457
2458 return true;
2459}
2460
2461bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2462 Register FrameReg, int &Offset,
2463 const ARMBaseInstrInfo &TII) {
2464 unsigned Opcode = MI.getOpcode();
2465 const MCInstrDesc &Desc = MI.getDesc();
2466 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2467 bool isSub = false;
2468
2469 // Memory operands in inline assembly always use AddrMode2.
2470 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2472
2473 if (Opcode == ARM::ADDri) {
2474 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2475 if (Offset == 0) {
2476 // Turn it into a move.
2477 MI.setDesc(TII.get(ARM::MOVr));
2478 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2479 MI.removeOperand(FrameRegIdx+1);
2480 Offset = 0;
2481 return true;
2482 } else if (Offset < 0) {
2483 Offset = -Offset;
2484 isSub = true;
2485 MI.setDesc(TII.get(ARM::SUBri));
2486 }
2487
2488 // Common case: small offset, fits into instruction.
2489 if (ARM_AM::getSOImmVal(Offset) != -1) {
2490 // Replace the FrameIndex with sp / fp
2491 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2492 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2493 Offset = 0;
2494 return true;
2495 }
2496
2497 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2498 // as possible.
2499 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2500 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2501
2502 // We will handle these bits from offset, clear them.
2503 Offset &= ~ThisImmVal;
2504
2505 // Get the properly encoded SOImmVal field.
2506 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2507 "Bit extraction didn't work?");
2508 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2509 } else {
2510 unsigned ImmIdx = 0;
2511 int InstrOffs = 0;
2512 unsigned NumBits = 0;
2513 unsigned Scale = 1;
2514 switch (AddrMode) {
2516 ImmIdx = FrameRegIdx + 1;
2517 InstrOffs = MI.getOperand(ImmIdx).getImm();
2518 NumBits = 12;
2519 break;
2520 case ARMII::AddrMode2:
2521 ImmIdx = FrameRegIdx+2;
2522 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2523 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2524 InstrOffs *= -1;
2525 NumBits = 12;
2526 break;
2527 case ARMII::AddrMode3:
2528 ImmIdx = FrameRegIdx+2;
2529 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2530 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2531 InstrOffs *= -1;
2532 NumBits = 8;
2533 break;
2534 case ARMII::AddrMode4:
2535 case ARMII::AddrMode6:
2536 // Can't fold any offset even if it's zero.
2537 return false;
2538 case ARMII::AddrMode5:
2539 ImmIdx = FrameRegIdx+1;
2540 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2541 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2542 InstrOffs *= -1;
2543 NumBits = 8;
2544 Scale = 4;
2545 break;
2547 ImmIdx = FrameRegIdx+1;
2548 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2549 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2550 InstrOffs *= -1;
2551 NumBits = 8;
2552 Scale = 2;
2553 break;
2557 ImmIdx = FrameRegIdx+1;
2558 InstrOffs = MI.getOperand(ImmIdx).getImm();
2559 NumBits = 7;
2560 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2561 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2562 break;
2563 default:
2564 llvm_unreachable("Unsupported addressing mode!");
2565 }
2566
2567 Offset += InstrOffs * Scale;
2568 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2569 if (Offset < 0) {
2570 Offset = -Offset;
2571 isSub = true;
2572 }
2573
2574 // Attempt to fold address comp. if opcode has offset bits
2575 if (NumBits > 0) {
2576 // Common case: small offset, fits into instruction.
2577 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2578 int ImmedOffset = Offset / Scale;
2579 unsigned Mask = (1 << NumBits) - 1;
2580 if ((unsigned)Offset <= Mask * Scale) {
2581 // Replace the FrameIndex with sp
2582 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2583 // FIXME: When addrmode2 goes away, this will simplify (like the
2584 // T2 version), as the LDR.i12 versions don't need the encoding
2585 // tricks for the offset value.
2586 if (isSub) {
2588 ImmedOffset = -ImmedOffset;
2589 else
2590 ImmedOffset |= 1 << NumBits;
2591 }
2592 ImmOp.ChangeToImmediate(ImmedOffset);
2593 Offset = 0;
2594 return true;
2595 }
2596
2597 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2598 ImmedOffset = ImmedOffset & Mask;
2599 if (isSub) {
2601 ImmedOffset = -ImmedOffset;
2602 else
2603 ImmedOffset |= 1 << NumBits;
2604 }
2605 ImmOp.ChangeToImmediate(ImmedOffset);
2606 Offset &= ~(Mask*Scale);
2607 }
2608 }
2609
2610 Offset = (isSub) ? -Offset : Offset;
2611 return Offset == 0;
2612}
2613
2614/// analyzeCompare - For a comparison instruction, return the source registers
2615/// in SrcReg and SrcReg2 if having two register operands, and the value it
2616/// compares against in CmpValue. Return true if the comparison instruction
2617/// can be analyzed.
2619 Register &SrcReg2, int64_t &CmpMask,
2620 int64_t &CmpValue) const {
2621 switch (MI.getOpcode()) {
2622 default: break;
2623 case ARM::CMPri:
2624 case ARM::t2CMPri:
2625 case ARM::tCMPi8:
2626 SrcReg = MI.getOperand(0).getReg();
2627 SrcReg2 = 0;
2628 CmpMask = ~0;
2629 CmpValue = MI.getOperand(1).getImm();
2630 return true;
2631 case ARM::CMPrr:
2632 case ARM::t2CMPrr:
2633 case ARM::tCMPr:
2634 SrcReg = MI.getOperand(0).getReg();
2635 SrcReg2 = MI.getOperand(1).getReg();
2636 CmpMask = ~0;
2637 CmpValue = 0;
2638 return true;
2639 case ARM::TSTri:
2640 case ARM::t2TSTri:
2641 SrcReg = MI.getOperand(0).getReg();
2642 SrcReg2 = 0;
2643 CmpMask = MI.getOperand(1).getImm();
2644 CmpValue = 0;
2645 return true;
2646 }
2647
2648 return false;
2649}
2650
2651/// isSuitableForMask - Identify a suitable 'and' instruction that
2652/// operates on the given source register and applies the same mask
2653/// as a 'tst' instruction. Provide a limited look-through for copies.
2654/// When successful, MI will hold the found instruction.
2656 int CmpMask, bool CommonUse) {
2657 switch (MI->getOpcode()) {
2658 case ARM::ANDri:
2659 case ARM::t2ANDri:
2660 if (CmpMask != MI->getOperand(2).getImm())
2661 return false;
2662 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2663 return true;
2664 break;
2665 }
2666
2667 return false;
2668}
2669
2670/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2671/// the condition code if we modify the instructions such that flags are
2672/// set by ADD(a,b,X).
2674 switch (CC) {
2675 default: return ARMCC::AL;
2676 case ARMCC::HS: return ARMCC::LO;
2677 case ARMCC::LO: return ARMCC::HS;
2678 case ARMCC::VS: return ARMCC::VS;
2679 case ARMCC::VC: return ARMCC::VC;
2680 }
2681}
2682
2683/// isRedundantFlagInstr - check whether the first instruction, whose only
2684/// purpose is to update flags, can be made redundant.
2685/// CMPrr can be made redundant by SUBrr if the operands are the same.
2686/// CMPri can be made redundant by SUBri if the operands are the same.
2687/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2688/// This function can be extended later on.
2689inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2690 Register SrcReg, Register SrcReg2,
2691 int64_t ImmValue,
2692 const MachineInstr *OI,
2693 bool &IsThumb1) {
2694 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2695 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2696 ((OI->getOperand(1).getReg() == SrcReg &&
2697 OI->getOperand(2).getReg() == SrcReg2) ||
2698 (OI->getOperand(1).getReg() == SrcReg2 &&
2699 OI->getOperand(2).getReg() == SrcReg))) {
2700 IsThumb1 = false;
2701 return true;
2702 }
2703
2704 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2705 ((OI->getOperand(2).getReg() == SrcReg &&
2706 OI->getOperand(3).getReg() == SrcReg2) ||
2707 (OI->getOperand(2).getReg() == SrcReg2 &&
2708 OI->getOperand(3).getReg() == SrcReg))) {
2709 IsThumb1 = true;
2710 return true;
2711 }
2712
2713 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2714 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2715 OI->getOperand(1).getReg() == SrcReg &&
2716 OI->getOperand(2).getImm() == ImmValue) {
2717 IsThumb1 = false;
2718 return true;
2719 }
2720
2721 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2722 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2723 OI->getOperand(2).getReg() == SrcReg &&
2724 OI->getOperand(3).getImm() == ImmValue) {
2725 IsThumb1 = true;
2726 return true;
2727 }
2728
2729 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2730 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2731 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2732 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2733 OI->getOperand(0).getReg() == SrcReg &&
2734 OI->getOperand(1).getReg() == SrcReg2) {
2735 IsThumb1 = false;
2736 return true;
2737 }
2738
2739 if (CmpI->getOpcode() == ARM::tCMPr &&
2740 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2741 OI->getOpcode() == ARM::tADDrr) &&
2742 OI->getOperand(0).getReg() == SrcReg &&
2743 OI->getOperand(2).getReg() == SrcReg2) {
2744 IsThumb1 = true;
2745 return true;
2746 }
2747
2748 return false;
2749}
2750
2751static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2752 switch (MI->getOpcode()) {
2753 default: return false;
2754 case ARM::tLSLri:
2755 case ARM::tLSRri:
2756 case ARM::tLSLrr:
2757 case ARM::tLSRrr:
2758 case ARM::tSUBrr:
2759 case ARM::tADDrr:
2760 case ARM::tADDi3:
2761 case ARM::tADDi8:
2762 case ARM::tSUBi3:
2763 case ARM::tSUBi8:
2764 case ARM::tMUL:
2765 case ARM::tADC:
2766 case ARM::tSBC:
2767 case ARM::tRSB:
2768 case ARM::tAND:
2769 case ARM::tORR:
2770 case ARM::tEOR:
2771 case ARM::tBIC:
2772 case ARM::tMVN:
2773 case ARM::tASRri:
2774 case ARM::tASRrr:
2775 case ARM::tROR:
2776 IsThumb1 = true;
2777 [[fallthrough]];
2778 case ARM::RSBrr:
2779 case ARM::RSBri:
2780 case ARM::RSCrr:
2781 case ARM::RSCri:
2782 case ARM::ADDrr:
2783 case ARM::ADDri:
2784 case ARM::ADCrr:
2785 case ARM::ADCri:
2786 case ARM::SUBrr:
2787 case ARM::SUBri:
2788 case ARM::SBCrr:
2789 case ARM::SBCri:
2790 case ARM::t2RSBri:
2791 case ARM::t2ADDrr:
2792 case ARM::t2ADDri:
2793 case ARM::t2ADCrr:
2794 case ARM::t2ADCri:
2795 case ARM::t2SUBrr:
2796 case ARM::t2SUBri:
2797 case ARM::t2SBCrr:
2798 case ARM::t2SBCri:
2799 case ARM::ANDrr:
2800 case ARM::ANDri:
2801 case ARM::ANDrsr:
2802 case ARM::ANDrsi:
2803 case ARM::t2ANDrr:
2804 case ARM::t2ANDri:
2805 case ARM::t2ANDrs:
2806 case ARM::ORRrr:
2807 case ARM::ORRri:
2808 case ARM::ORRrsr:
2809 case ARM::ORRrsi:
2810 case ARM::t2ORRrr:
2811 case ARM::t2ORRri:
2812 case ARM::t2ORRrs:
2813 case ARM::EORrr:
2814 case ARM::EORri:
2815 case ARM::EORrsr:
2816 case ARM::EORrsi:
2817 case ARM::t2EORrr:
2818 case ARM::t2EORri:
2819 case ARM::t2EORrs:
2820 case ARM::BICri:
2821 case ARM::BICrr:
2822 case ARM::BICrsi:
2823 case ARM::BICrsr:
2824 case ARM::t2BICri:
2825 case ARM::t2BICrr:
2826 case ARM::t2BICrs:
2827 case ARM::t2LSRri:
2828 case ARM::t2LSRrr:
2829 case ARM::t2LSLri:
2830 case ARM::t2LSLrr:
2831 case ARM::MOVsr:
2832 case ARM::MOVsi:
2833 return true;
2834 }
2835}
2836
2837/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2838/// comparison into one that sets the zero bit in the flags register;
2839/// Remove a redundant Compare instruction if an earlier instruction can set the
2840/// flags in the same way as Compare.
2841/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2842/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2843/// condition code of instructions which use the flags.
2845 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2846 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2847 // Get the unique definition of SrcReg.
2848 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2849 if (!MI) return false;
2850
2851 // Masked compares sometimes use the same register as the corresponding 'and'.
2852 if (CmpMask != ~0) {
2853 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2854 MI = nullptr;
2856 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2857 UI != UE; ++UI) {
2858 if (UI->getParent() != CmpInstr.getParent())
2859 continue;
2860 MachineInstr *PotentialAND = &*UI;
2861 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2862 isPredicated(*PotentialAND))
2863 continue;
2864 MI = PotentialAND;
2865 break;
2866 }
2867 if (!MI) return false;
2868 }
2869 }
2870
2871 // Get ready to iterate backward from CmpInstr.
2872 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2873 B = CmpInstr.getParent()->begin();
2874
2875 // Early exit if CmpInstr is at the beginning of the BB.
2876 if (I == B) return false;
2877
2878 // There are two possible candidates which can be changed to set CPSR:
2879 // One is MI, the other is a SUB or ADD instruction.
2880 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2881 // ADDr[ri](r1, r2, X).
2882 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2883 MachineInstr *SubAdd = nullptr;
2884 if (SrcReg2 != 0)
2885 // MI is not a candidate for CMPrr.
2886 MI = nullptr;
2887 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2888 // Conservatively refuse to convert an instruction which isn't in the same
2889 // BB as the comparison.
2890 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2891 // Thus we cannot return here.
2892 if (CmpInstr.getOpcode() == ARM::CMPri ||
2893 CmpInstr.getOpcode() == ARM::t2CMPri ||
2894 CmpInstr.getOpcode() == ARM::tCMPi8)
2895 MI = nullptr;
2896 else
2897 return false;
2898 }
2899
2900 bool IsThumb1 = false;
2901 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2902 return false;
2903
2904 // We also want to do this peephole for cases like this: if (a*b == 0),
2905 // and optimise away the CMP instruction from the generated code sequence:
2906 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2907 // resulting from the select instruction, but these MOVS instructions for
2908 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2909 // However, if we only have MOVS instructions in between the CMP and the
2910 // other instruction (the MULS in this example), then the CPSR is dead so we
2911 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2912 // reordering and then continue the analysis hoping we can eliminate the
2913 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2914 // consequence, the movs won't redefine/kill the MUL operands which would
2915 // make this reordering illegal.
2917 if (MI && IsThumb1) {
2918 --I;
2919 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2920 bool CanReorder = true;
2921 for (; I != E; --I) {
2922 if (I->getOpcode() != ARM::tMOVi8) {
2923 CanReorder = false;
2924 break;
2925 }
2926 }
2927 if (CanReorder) {
2928 MI = MI->removeFromParent();
2929 E = CmpInstr;
2930 CmpInstr.getParent()->insert(E, MI);
2931 }
2932 }
2933 I = CmpInstr;
2934 E = MI;
2935 }
2936
2937 // Check that CPSR isn't set between the comparison instruction and the one we
2938 // want to change. At the same time, search for SubAdd.
2939 bool SubAddIsThumb1 = false;
2940 do {
2941 const MachineInstr &Instr = *--I;
2942
2943 // Check whether CmpInstr can be made redundant by the current instruction.
2944 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2945 SubAddIsThumb1)) {
2946 SubAdd = &*I;
2947 break;
2948 }
2949
2950 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2951 if (I == E)
2952 break;
2953
2954 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2955 Instr.readsRegister(ARM::CPSR, TRI))
2956 // This instruction modifies or uses CPSR after the one we want to
2957 // change. We can't do this transformation.
2958 return false;
2959
2960 if (I == B) {
2961 // In some cases, we scan the use-list of an instruction for an AND;
2962 // that AND is in the same BB, but may not be scheduled before the
2963 // corresponding TST. In that case, bail out.
2964 //
2965 // FIXME: We could try to reschedule the AND.
2966 return false;
2967 }
2968 } while (true);
2969
2970 // Return false if no candidates exist.
2971 if (!MI && !SubAdd)
2972 return false;
2973
2974 // If we found a SubAdd, use it as it will be closer to the CMP
2975 if (SubAdd) {
2976 MI = SubAdd;
2977 IsThumb1 = SubAddIsThumb1;
2978 }
2979
2980 // We can't use a predicated instruction - it doesn't always write the flags.
2981 if (isPredicated(*MI))
2982 return false;
2983
2984 // Scan forward for the use of CPSR
2985 // When checking against MI: if it's a conditional code that requires
2986 // checking of the V bit or C bit, then this is not safe to do.
2987 // It is safe to remove CmpInstr if CPSR is redefined or killed.
2988 // If we are done with the basic block, we need to check whether CPSR is
2989 // live-out.
2991 OperandsToUpdate;
2992 bool isSafe = false;
2993 I = CmpInstr;
2994 E = CmpInstr.getParent()->end();
2995 while (!isSafe && ++I != E) {
2996 const MachineInstr &Instr = *I;
2997 for (unsigned IO = 0, EO = Instr.getNumOperands();
2998 !isSafe && IO != EO; ++IO) {
2999 const MachineOperand &MO = Instr.getOperand(IO);
3000 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3001 isSafe = true;
3002 break;
3003 }
3004 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3005 continue;
3006 if (MO.isDef()) {
3007 isSafe = true;
3008 break;
3009 }
3010 // Condition code is after the operand before CPSR except for VSELs.
3012 bool IsInstrVSel = true;
3013 switch (Instr.getOpcode()) {
3014 default:
3015 IsInstrVSel = false;
3016 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3017 break;
3018 case ARM::VSELEQD:
3019 case ARM::VSELEQS:
3020 case ARM::VSELEQH:
3021 CC = ARMCC::EQ;
3022 break;
3023 case ARM::VSELGTD:
3024 case ARM::VSELGTS:
3025 case ARM::VSELGTH:
3026 CC = ARMCC::GT;
3027 break;
3028 case ARM::VSELGED:
3029 case ARM::VSELGES:
3030 case ARM::VSELGEH:
3031 CC = ARMCC::GE;
3032 break;
3033 case ARM::VSELVSD:
3034 case ARM::VSELVSS:
3035 case ARM::VSELVSH:
3036 CC = ARMCC::VS;
3037 break;
3038 }
3039
3040 if (SubAdd) {
3041 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3042 // on CMP needs to be updated to be based on SUB.
3043 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3044 // needs to be modified.
3045 // Push the condition code operands to OperandsToUpdate.
3046 // If it is safe to remove CmpInstr, the condition code of these
3047 // operands will be modified.
3048 unsigned Opc = SubAdd->getOpcode();
3049 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3050 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3051 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3052 Opc == ARM::tSUBi8;
3053 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3054 if (!IsSub ||
3055 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3056 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3057 // VSel doesn't support condition code update.
3058 if (IsInstrVSel)
3059 return false;
3060 // Ensure we can swap the condition.
3061 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3062 if (NewCC == ARMCC::AL)
3063 return false;
3064 OperandsToUpdate.push_back(
3065 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3066 }
3067 } else {
3068 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3069 switch (CC) {
3070 case ARMCC::EQ: // Z
3071 case ARMCC::NE: // Z
3072 case ARMCC::MI: // N
3073 case ARMCC::PL: // N
3074 case ARMCC::AL: // none
3075 // CPSR can be used multiple times, we should continue.
3076 break;
3077 case ARMCC::HS: // C
3078 case ARMCC::LO: // C
3079 case ARMCC::VS: // V
3080 case ARMCC::VC: // V
3081 case ARMCC::HI: // C Z
3082 case ARMCC::LS: // C Z
3083 case ARMCC::GE: // N V
3084 case ARMCC::LT: // N V
3085 case ARMCC::GT: // Z N V
3086 case ARMCC::LE: // Z N V
3087 // The instruction uses the V bit or C bit which is not safe.
3088 return false;
3089 }
3090 }
3091 }
3092 }
3093
3094 // If CPSR is not killed nor re-defined, we should check whether it is
3095 // live-out. If it is live-out, do not optimize.
3096 if (!isSafe) {
3097 MachineBasicBlock *MBB = CmpInstr.getParent();
3098 for (MachineBasicBlock *Succ : MBB->successors())
3099 if (Succ->isLiveIn(ARM::CPSR))
3100 return false;
3101 }
3102
3103 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3104 // set CPSR so this is represented as an explicit output)
3105 if (!IsThumb1) {
3106 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3107 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3108 MI->getOperand(CPSRRegNum).setIsDef(true);
3109 }
3110 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3111 CmpInstr.eraseFromParent();
3112
3113 // Modify the condition code of operands in OperandsToUpdate.
3114 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3115 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3116 for (auto &[MO, Cond] : OperandsToUpdate)
3117 MO->setImm(Cond);
3118
3119 MI->clearRegisterDeads(ARM::CPSR);
3120
3121 return true;
3122}
3123
3125 // Do not sink MI if it might be used to optimize a redundant compare.
3126 // We heuristically only look at the instruction immediately following MI to
3127 // avoid potentially searching the entire basic block.
3128 if (isPredicated(MI))
3129 return true;
3131 ++Next;
3132 Register SrcReg, SrcReg2;
3133 int64_t CmpMask, CmpValue;
3134 bool IsThumb1;
3135 if (Next != MI.getParent()->end() &&
3136 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3137 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3138 return false;
3139 return true;
3140}
3141
3143 Register Reg,
3144 MachineRegisterInfo *MRI) const {
3145 // Fold large immediates into add, sub, or, xor.
3146 unsigned DefOpc = DefMI.getOpcode();
3147 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3148 DefOpc != ARM::tMOVi32imm)
3149 return false;
3150 if (!DefMI.getOperand(1).isImm())
3151 // Could be t2MOVi32imm @xx
3152 return false;
3153
3154 if (!MRI->hasOneNonDBGUse(Reg))
3155 return false;
3156
3157 const MCInstrDesc &DefMCID = DefMI.getDesc();
3158 if (DefMCID.hasOptionalDef()) {
3159 unsigned NumOps = DefMCID.getNumOperands();
3160 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3161 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3162 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3163 // to delete DefMI.
3164 return false;
3165 }
3166
3167 const MCInstrDesc &UseMCID = UseMI.getDesc();
3168 if (UseMCID.hasOptionalDef()) {
3169 unsigned NumOps = UseMCID.getNumOperands();
3170 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3171 // If the instruction sets the flag, do not attempt this optimization
3172 // since it may change the semantics of the code.
3173 return false;
3174 }
3175
3176 unsigned UseOpc = UseMI.getOpcode();
3177 unsigned NewUseOpc = 0;
3178 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3179 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3180 bool Commute = false;
3181 switch (UseOpc) {
3182 default: return false;
3183 case ARM::SUBrr:
3184 case ARM::ADDrr:
3185 case ARM::ORRrr:
3186 case ARM::EORrr:
3187 case ARM::t2SUBrr:
3188 case ARM::t2ADDrr:
3189 case ARM::t2ORRrr:
3190 case ARM::t2EORrr: {
3191 Commute = UseMI.getOperand(2).getReg() != Reg;
3192 switch (UseOpc) {
3193 default: break;
3194 case ARM::ADDrr:
3195 case ARM::SUBrr:
3196 if (UseOpc == ARM::SUBrr && Commute)
3197 return false;
3198
3199 // ADD/SUB are special because they're essentially the same operation, so
3200 // we can handle a larger range of immediates.
3201 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3202 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3203 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3204 ImmVal = -ImmVal;
3205 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3206 } else
3207 return false;
3208 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3209 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3210 break;
3211 case ARM::ORRrr:
3212 case ARM::EORrr:
3213 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3214 return false;
3215 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3216 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3217 switch (UseOpc) {
3218 default: break;
3219 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3220 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3221 }
3222 break;
3223 case ARM::t2ADDrr:
3224 case ARM::t2SUBrr: {
3225 if (UseOpc == ARM::t2SUBrr && Commute)
3226 return false;
3227
3228 // ADD/SUB are special because they're essentially the same operation, so
3229 // we can handle a larger range of immediates.
3230 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3231 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3232 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3233 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3234 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3235 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3236 ImmVal = -ImmVal;
3237 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3238 } else
3239 return false;
3240 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3241 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3242 break;
3243 }
3244 case ARM::t2ORRrr:
3245 case ARM::t2EORrr:
3246 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3247 return false;
3248 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3249 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3250 switch (UseOpc) {
3251 default: break;
3252 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3253 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3254 }
3255 break;
3256 }
3257 }
3258 }
3259
3260 unsigned OpIdx = Commute ? 2 : 1;
3261 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3262 bool isKill = UseMI.getOperand(OpIdx).isKill();
3263 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3264 Register NewReg = MRI->createVirtualRegister(TRC);
3265 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3266 NewReg)
3267 .addReg(Reg1, getKillRegState(isKill))
3268 .addImm(SOImmValV1)
3270 .add(condCodeOp());
3271 UseMI.setDesc(get(NewUseOpc));
3272 UseMI.getOperand(1).setReg(NewReg);
3273 UseMI.getOperand(1).setIsKill();
3274 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3275 DefMI.eraseFromParent();
3276 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3277 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3278 // Then the below code will not be needed, as the input/output register
3279 // classes will be rgpr or gprSP.
3280 // For now, we fix the UseMI operand explicitly here:
3281 switch(NewUseOpc){
3282 case ARM::t2ADDspImm:
3283 case ARM::t2SUBspImm:
3284 case ARM::t2ADDri:
3285 case ARM::t2SUBri:
3286 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3287 }
3288 return true;
3289}
3290
3291static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3292 const MachineInstr &MI) {
3293 switch (MI.getOpcode()) {
3294 default: {
3295 const MCInstrDesc &Desc = MI.getDesc();
3296 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3297 assert(UOps >= 0 && "bad # UOps");
3298 return UOps;
3299 }
3300
3301 case ARM::LDRrs:
3302 case ARM::LDRBrs:
3303 case ARM::STRrs:
3304 case ARM::STRBrs: {
3305 unsigned ShOpVal = MI.getOperand(3).getImm();
3306 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3307 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3308 if (!isSub &&
3309 (ShImm == 0 ||
3310 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3311 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3312 return 1;
3313 return 2;
3314 }
3315
3316 case ARM::LDRH:
3317 case ARM::STRH: {
3318 if (!MI.getOperand(2).getReg())
3319 return 1;
3320
3321 unsigned ShOpVal = MI.getOperand(3).getImm();
3322 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3323 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3324 if (!isSub &&
3325 (ShImm == 0 ||
3326 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3327 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3328 return 1;
3329 return 2;
3330 }
3331
3332 case ARM::LDRSB:
3333 case ARM::LDRSH:
3334 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3335
3336 case ARM::LDRSB_POST:
3337 case ARM::LDRSH_POST: {
3338 Register Rt = MI.getOperand(0).getReg();
3339 Register Rm = MI.getOperand(3).getReg();
3340 return (Rt == Rm) ? 4 : 3;
3341 }
3342
3343 case ARM::LDR_PRE_REG:
3344 case ARM::LDRB_PRE_REG: {
3345 Register Rt = MI.getOperand(0).getReg();
3346 Register Rm = MI.getOperand(3).getReg();
3347 if (Rt == Rm)
3348 return 3;
3349 unsigned ShOpVal = MI.getOperand(4).getImm();
3350 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3351 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3352 if (!isSub &&
3353 (ShImm == 0 ||
3354 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3355 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3356 return 2;
3357 return 3;
3358 }
3359
3360 case ARM::STR_PRE_REG:
3361 case ARM::STRB_PRE_REG: {
3362 unsigned ShOpVal = MI.getOperand(4).getImm();
3363 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3364 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3365 if (!isSub &&
3366 (ShImm == 0 ||
3367 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3368 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3369 return 2;
3370 return 3;
3371 }
3372
3373 case ARM::LDRH_PRE:
3374 case ARM::STRH_PRE: {
3375 Register Rt = MI.getOperand(0).getReg();
3376 Register Rm = MI.getOperand(3).getReg();
3377 if (!Rm)
3378 return 2;
3379 if (Rt == Rm)
3380 return 3;
3381 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3382 }
3383
3384 case ARM::LDR_POST_REG:
3385 case ARM::LDRB_POST_REG:
3386 case ARM::LDRH_POST: {
3387 Register Rt = MI.getOperand(0).getReg();
3388 Register Rm = MI.getOperand(3).getReg();
3389 return (Rt == Rm) ? 3 : 2;
3390 }
3391
3392 case ARM::LDR_PRE_IMM:
3393 case ARM::LDRB_PRE_IMM:
3394 case ARM::LDR_POST_IMM:
3395 case ARM::LDRB_POST_IMM:
3396 case ARM::STRB_POST_IMM:
3397 case ARM::STRB_POST_REG:
3398 case ARM::STRB_PRE_IMM:
3399 case ARM::STRH_POST:
3400 case ARM::STR_POST_IMM:
3401 case ARM::STR_POST_REG:
3402 case ARM::STR_PRE_IMM:
3403 return 2;
3404
3405 case ARM::LDRSB_PRE:
3406 case ARM::LDRSH_PRE: {
3407 Register Rm = MI.getOperand(3).getReg();
3408 if (Rm == 0)
3409 return 3;
3410 Register Rt = MI.getOperand(0).getReg();
3411 if (Rt == Rm)
3412 return 4;
3413 unsigned ShOpVal = MI.getOperand(4).getImm();
3414 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3415 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3416 if (!isSub &&
3417 (ShImm == 0 ||
3418 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3419 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3420 return 3;
3421 return 4;
3422 }
3423
3424 case ARM::LDRD: {
3425 Register Rt = MI.getOperand(0).getReg();
3426 Register Rn = MI.getOperand(2).getReg();
3427 Register Rm = MI.getOperand(3).getReg();
3428 if (Rm)
3429 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3430 : 3;
3431 return (Rt == Rn) ? 3 : 2;
3432 }
3433
3434 case ARM::STRD: {
3435 Register Rm = MI.getOperand(3).getReg();
3436 if (Rm)
3437 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3438 : 3;
3439 return 2;
3440 }
3441
3442 case ARM::LDRD_POST:
3443 case ARM::t2LDRD_POST:
3444 return 3;
3445
3446 case ARM::STRD_POST:
3447 case ARM::t2STRD_POST:
3448 return 4;
3449
3450 case ARM::LDRD_PRE: {
3451 Register Rt = MI.getOperand(0).getReg();
3452 Register Rn = MI.getOperand(3).getReg();
3453 Register Rm = MI.getOperand(4).getReg();
3454 if (Rm)
3455 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3456 : 4;
3457 return (Rt == Rn) ? 4 : 3;
3458 }
3459
3460 case ARM::t2LDRD_PRE: {
3461 Register Rt = MI.getOperand(0).getReg();
3462 Register Rn = MI.getOperand(3).getReg();
3463 return (Rt == Rn) ? 4 : 3;
3464 }
3465
3466 case ARM::STRD_PRE: {
3467 Register Rm = MI.getOperand(4).getReg();
3468 if (Rm)
3469 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3470 : 4;
3471 return 3;
3472 }
3473
3474 case ARM::t2STRD_PRE:
3475 return 3;
3476
3477 case ARM::t2LDR_POST:
3478 case ARM::t2LDRB_POST:
3479 case ARM::t2LDRB_PRE:
3480 case ARM::t2LDRSBi12:
3481 case ARM::t2LDRSBi8:
3482 case ARM::t2LDRSBpci:
3483 case ARM::t2LDRSBs:
3484 case ARM::t2LDRH_POST:
3485 case ARM::t2LDRH_PRE:
3486 case ARM::t2LDRSBT:
3487 case ARM::t2LDRSB_POST:
3488 case ARM::t2LDRSB_PRE:
3489 case ARM::t2LDRSH_POST:
3490 case ARM::t2LDRSH_PRE:
3491 case ARM::t2LDRSHi12:
3492 case ARM::t2LDRSHi8:
3493 case ARM::t2LDRSHpci:
3494 case ARM::t2LDRSHs:
3495 return 2;
3496
3497 case ARM::t2LDRDi8: {
3498 Register Rt = MI.getOperand(0).getReg();
3499 Register Rn = MI.getOperand(2).getReg();
3500 return (Rt == Rn) ? 3 : 2;
3501 }
3502
3503 case ARM::t2STRB_POST:
3504 case ARM::t2STRB_PRE:
3505 case ARM::t2STRBs:
3506 case ARM::t2STRDi8:
3507 case ARM::t2STRH_POST:
3508 case ARM::t2STRH_PRE:
3509 case ARM::t2STRHs:
3510 case ARM::t2STR_POST:
3511 case ARM::t2STR_PRE:
3512 case ARM::t2STRs:
3513 return 2;
3514 }
3515}
3516
3517// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3518// can't be easily determined return 0 (missing MachineMemOperand).
3519//
3520// FIXME: The current MachineInstr design does not support relying on machine
3521// mem operands to determine the width of a memory access. Instead, we expect
3522// the target to provide this information based on the instruction opcode and
3523// operands. However, using MachineMemOperand is the best solution now for
3524// two reasons:
3525//
3526// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3527// operands. This is much more dangerous than using the MachineMemOperand
3528// sizes because CodeGen passes can insert/remove optional machine operands. In
3529// fact, it's totally incorrect for preRA passes and appears to be wrong for
3530// postRA passes as well.
3531//
3532// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3533// machine model that calls this should handle the unknown (zero size) case.
3534//
3535// Long term, we should require a target hook that verifies MachineMemOperand
3536// sizes during MC lowering. That target hook should be local to MC lowering
3537// because we can't ensure that it is aware of other MI forms. Doing this will
3538// ensure that MachineMemOperands are correctly propagated through all passes.
3540 unsigned Size = 0;
3541 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3542 E = MI.memoperands_end();
3543 I != E; ++I) {
3544 Size += (*I)->getSize().getValue();
3545 }
3546 // FIXME: The scheduler currently can't handle values larger than 16. But
3547 // the values can actually go up to 32 for floating-point load/store
3548 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3549 // operations isn't right; we could end up with "extra" memory operands for
3550 // various reasons, like tail merge merging two memory operations.
3551 return std::min(Size / 4, 16U);
3552}
3553
3555 unsigned NumRegs) {
3556 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3557 switch (Opc) {
3558 default:
3559 break;
3560 case ARM::VLDMDIA_UPD:
3561 case ARM::VLDMDDB_UPD:
3562 case ARM::VLDMSIA_UPD:
3563 case ARM::VLDMSDB_UPD:
3564 case ARM::VSTMDIA_UPD:
3565 case ARM::VSTMDDB_UPD:
3566 case ARM::VSTMSIA_UPD:
3567 case ARM::VSTMSDB_UPD:
3568 case ARM::LDMIA_UPD:
3569 case ARM::LDMDA_UPD:
3570 case ARM::LDMDB_UPD:
3571 case ARM::LDMIB_UPD:
3572 case ARM::STMIA_UPD:
3573 case ARM::STMDA_UPD:
3574 case ARM::STMDB_UPD:
3575 case ARM::STMIB_UPD:
3576 case ARM::tLDMIA_UPD:
3577 case ARM::tSTMIA_UPD:
3578 case ARM::t2LDMIA_UPD:
3579 case ARM::t2LDMDB_UPD:
3580 case ARM::t2STMIA_UPD:
3581 case ARM::t2STMDB_UPD:
3582 ++UOps; // One for base register writeback.
3583 break;
3584 case ARM::LDMIA_RET:
3585 case ARM::tPOP_RET:
3586 case ARM::t2LDMIA_RET:
3587 UOps += 2; // One for base reg wb, one for write to pc.
3588 break;
3589 }
3590 return UOps;
3591}
3592
3594 const MachineInstr &MI) const {
3595 if (!ItinData || ItinData->isEmpty())
3596 return 1;
3597
3598 const MCInstrDesc &Desc = MI.getDesc();
3599 unsigned Class = Desc.getSchedClass();
3600 int ItinUOps = ItinData->getNumMicroOps(Class);
3601 if (ItinUOps >= 0) {
3602 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3603 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3604
3605 return ItinUOps;
3606 }
3607
3608 unsigned Opc = MI.getOpcode();
3609 switch (Opc) {
3610 default:
3611 llvm_unreachable("Unexpected multi-uops instruction!");
3612 case ARM::VLDMQIA:
3613 case ARM::VSTMQIA:
3614 return 2;
3615
3616 // The number of uOps for load / store multiple are determined by the number
3617 // registers.
3618 //
3619 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3620 // same cycle. The scheduling for the first load / store must be done
3621 // separately by assuming the address is not 64-bit aligned.
3622 //
3623 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3624 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3625 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3626 case ARM::VLDMDIA:
3627 case ARM::VLDMDIA_UPD:
3628 case ARM::VLDMDDB_UPD:
3629 case ARM::VLDMSIA:
3630 case ARM::VLDMSIA_UPD:
3631 case ARM::VLDMSDB_UPD:
3632 case ARM::VSTMDIA:
3633 case ARM::VSTMDIA_UPD:
3634 case ARM::VSTMDDB_UPD:
3635 case ARM::VSTMSIA:
3636 case ARM::VSTMSIA_UPD:
3637 case ARM::VSTMSDB_UPD: {
3638 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3639 return (NumRegs / 2) + (NumRegs % 2) + 1;
3640 }
3641
3642 case ARM::LDMIA_RET:
3643 case ARM::LDMIA:
3644 case ARM::LDMDA:
3645 case ARM::LDMDB:
3646 case ARM::LDMIB:
3647 case ARM::LDMIA_UPD:
3648 case ARM::LDMDA_UPD:
3649 case ARM::LDMDB_UPD:
3650 case ARM::LDMIB_UPD:
3651 case ARM::STMIA:
3652 case ARM::STMDA:
3653 case ARM::STMDB:
3654 case ARM::STMIB:
3655 case ARM::STMIA_UPD:
3656 case ARM::STMDA_UPD:
3657 case ARM::STMDB_UPD:
3658 case ARM::STMIB_UPD:
3659 case ARM::tLDMIA:
3660 case ARM::tLDMIA_UPD:
3661 case ARM::tSTMIA_UPD:
3662 case ARM::tPOP_RET:
3663 case ARM::tPOP:
3664 case ARM::tPUSH:
3665 case ARM::t2LDMIA_RET:
3666 case ARM::t2LDMIA:
3667 case ARM::t2LDMDB:
3668 case ARM::t2LDMIA_UPD:
3669 case ARM::t2LDMDB_UPD:
3670 case ARM::t2STMIA:
3671 case ARM::t2STMDB:
3672 case ARM::t2STMIA_UPD:
3673 case ARM::t2STMDB_UPD: {
3674 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3675 switch (Subtarget.getLdStMultipleTiming()) {
3679 // Assume the worst.
3680 return NumRegs;
3682 if (NumRegs < 4)
3683 return 2;
3684 // 4 registers would be issued: 2, 2.
3685 // 5 registers would be issued: 2, 2, 1.
3686 unsigned UOps = (NumRegs / 2);
3687 if (NumRegs % 2)
3688 ++UOps;
3689 return UOps;
3690 }
3692 unsigned UOps = (NumRegs / 2);
3693 // If there are odd number of registers or if it's not 64-bit aligned,
3694 // then it takes an extra AGU (Address Generation Unit) cycle.
3695 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3696 (*MI.memoperands_begin())->getAlign() < Align(8))
3697 ++UOps;
3698 return UOps;
3699 }
3700 }
3701 }
3702 }
3703 llvm_unreachable("Didn't find the number of microops");
3704}
3705
3706std::optional<unsigned>
3707ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3708 const MCInstrDesc &DefMCID, unsigned DefClass,
3709 unsigned DefIdx, unsigned DefAlign) const {
3710 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3711 if (RegNo <= 0)
3712 // Def is the address writeback.
3713 return ItinData->getOperandCycle(DefClass, DefIdx);
3714
3715 unsigned DefCycle;
3716 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3717 // (regno / 2) + (regno % 2) + 1
3718 DefCycle = RegNo / 2 + 1;
3719 if (RegNo % 2)
3720 ++DefCycle;
3721 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3722 DefCycle = RegNo;
3723 bool isSLoad = false;
3724
3725 switch (DefMCID.getOpcode()) {
3726 default: break;
3727 case ARM::VLDMSIA:
3728 case ARM::VLDMSIA_UPD:
3729 case ARM::VLDMSDB_UPD:
3730 isSLoad = true;
3731 break;
3732 }
3733
3734 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3735 // then it takes an extra cycle.
3736 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3737 ++DefCycle;
3738 } else {
3739 // Assume the worst.
3740 DefCycle = RegNo + 2;
3741 }
3742
3743 return DefCycle;
3744}
3745
3746std::optional<unsigned>
3747ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3748 const MCInstrDesc &DefMCID, unsigned DefClass,
3749 unsigned DefIdx, unsigned DefAlign) const {
3750 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3751 if (RegNo <= 0)
3752 // Def is the address writeback.
3753 return ItinData->getOperandCycle(DefClass, DefIdx);
3754
3755 unsigned DefCycle;
3756 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3757 // 4 registers would be issued: 1, 2, 1.
3758 // 5 registers would be issued: 1, 2, 2.
3759 DefCycle = RegNo / 2;
3760 if (DefCycle < 1)
3761 DefCycle = 1;
3762 // Result latency is issue cycle + 2: E2.
3763 DefCycle += 2;
3764 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3765 DefCycle = (RegNo / 2);
3766 // If there are odd number of registers or if it's not 64-bit aligned,
3767 // then it takes an extra AGU (Address Generation Unit) cycle.
3768 if ((RegNo % 2) || DefAlign < 8)
3769 ++DefCycle;
3770 // Result latency is AGU cycles + 2.
3771 DefCycle += 2;
3772 } else {
3773 // Assume the worst.
3774 DefCycle = RegNo + 2;
3775 }
3776
3777 return DefCycle;
3778}
3779
3780std::optional<unsigned>
3781ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3782 const MCInstrDesc &UseMCID, unsigned UseClass,
3783 unsigned UseIdx, unsigned UseAlign) const {
3784 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3785 if (RegNo <= 0)
3786 return ItinData->getOperandCycle(UseClass, UseIdx);
3787
3788 unsigned UseCycle;
3789 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3790 // (regno / 2) + (regno % 2) + 1
3791 UseCycle = RegNo / 2 + 1;
3792 if (RegNo % 2)
3793 ++UseCycle;
3794 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3795 UseCycle = RegNo;
3796 bool isSStore = false;
3797
3798 switch (UseMCID.getOpcode()) {
3799 default: break;
3800 case ARM::VSTMSIA:
3801 case ARM::VSTMSIA_UPD:
3802 case ARM::VSTMSDB_UPD:
3803 isSStore = true;
3804 break;
3805 }
3806
3807 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3808 // then it takes an extra cycle.
3809 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3810 ++UseCycle;
3811 } else {
3812 // Assume the worst.
3813 UseCycle = RegNo + 2;
3814 }
3815
3816 return UseCycle;
3817}
3818
3819std::optional<unsigned>
3820ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3821 const MCInstrDesc &UseMCID, unsigned UseClass,
3822 unsigned UseIdx, unsigned UseAlign) const {
3823 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3824 if (RegNo <= 0)
3825 return ItinData->getOperandCycle(UseClass, UseIdx);
3826
3827 unsigned UseCycle;
3828 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3829 UseCycle = RegNo / 2;
3830 if (UseCycle < 2)
3831 UseCycle = 2;
3832 // Read in E3.
3833 UseCycle += 2;
3834 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3835 UseCycle = (RegNo / 2);
3836 // If there are odd number of registers or if it's not 64-bit aligned,
3837 // then it takes an extra AGU (Address Generation Unit) cycle.
3838 if ((RegNo % 2) || UseAlign < 8)
3839 ++UseCycle;
3840 } else {
3841 // Assume the worst.
3842 UseCycle = 1;
3843 }
3844 return UseCycle;
3845}
3846
3847std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3848 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3849 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3850 unsigned UseIdx, unsigned UseAlign) const {
3851 unsigned DefClass = DefMCID.getSchedClass();
3852 unsigned UseClass = UseMCID.getSchedClass();
3853
3854 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3855 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3856
3857 // This may be a def / use of a variable_ops instruction, the operand
3858 // latency might be determinable dynamically. Let the target try to
3859 // figure it out.
3860 std::optional<unsigned> DefCycle;
3861 bool LdmBypass = false;
3862 switch (DefMCID.getOpcode()) {
3863 default:
3864 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3865 break;
3866
3867 case ARM::VLDMDIA:
3868 case ARM::VLDMDIA_UPD:
3869 case ARM::VLDMDDB_UPD:
3870 case ARM::VLDMSIA:
3871 case ARM::VLDMSIA_UPD:
3872 case ARM::VLDMSDB_UPD:
3873 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3874 break;
3875
3876 case ARM::LDMIA_RET:
3877 case ARM::LDMIA:
3878 case ARM::LDMDA:
3879 case ARM::LDMDB:
3880 case ARM::LDMIB:
3881 case ARM::LDMIA_UPD:
3882 case ARM::LDMDA_UPD:
3883 case ARM::LDMDB_UPD:
3884 case ARM::LDMIB_UPD:
3885 case ARM::tLDMIA:
3886 case ARM::tLDMIA_UPD:
3887 case ARM::tPUSH:
3888 case ARM::t2LDMIA_RET:
3889 case ARM::t2LDMIA:
3890 case ARM::t2LDMDB:
3891 case ARM::t2LDMIA_UPD:
3892 case ARM::t2LDMDB_UPD:
3893 LdmBypass = true;
3894 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3895 break;
3896 }
3897
3898 if (!DefCycle)
3899 // We can't seem to determine the result latency of the def, assume it's 2.
3900 DefCycle = 2;
3901
3902 std::optional<unsigned> UseCycle;
3903 switch (UseMCID.getOpcode()) {
3904 default:
3905 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3906 break;
3907
3908 case ARM::VSTMDIA:
3909 case ARM::VSTMDIA_UPD:
3910 case ARM::VSTMDDB_UPD:
3911 case ARM::VSTMSIA:
3912 case ARM::VSTMSIA_UPD:
3913 case ARM::VSTMSDB_UPD:
3914 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3915 break;
3916
3917 case ARM::STMIA:
3918 case ARM::STMDA:
3919 case ARM::STMDB:
3920 case ARM::STMIB:
3921 case ARM::STMIA_UPD:
3922 case ARM::STMDA_UPD:
3923 case ARM::STMDB_UPD:
3924 case ARM::STMIB_UPD:
3925 case ARM::tSTMIA_UPD:
3926 case ARM::tPOP_RET:
3927 case ARM::tPOP:
3928 case ARM::t2STMIA:
3929 case ARM::t2STMDB:
3930 case ARM::t2STMIA_UPD:
3931 case ARM::t2STMDB_UPD:
3932 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3933 break;
3934 }
3935
3936 if (!UseCycle)
3937 // Assume it's read in the first stage.
3938 UseCycle = 1;
3939
3940 if (UseCycle > *DefCycle + 1)
3941 return std::nullopt;
3942
3943 UseCycle = *DefCycle - *UseCycle + 1;
3944 if (UseCycle > 0u) {
3945 if (LdmBypass) {
3946 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3947 // first def operand.
3948 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3949 UseClass, UseIdx))
3950 UseCycle = *UseCycle - 1;
3951 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3952 UseClass, UseIdx)) {
3953 UseCycle = *UseCycle - 1;
3954 }
3955 }
3956
3957 return UseCycle;
3958}
3959
3961 const MachineInstr *MI, unsigned Reg,
3962 unsigned &DefIdx, unsigned &Dist) {
3963 Dist = 0;
3964
3966 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3967 assert(II->isInsideBundle() && "Empty bundle?");
3968
3969 int Idx = -1;
3970 while (II->isInsideBundle()) {
3971 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
3972 if (Idx != -1)
3973 break;
3974 --II;
3975 ++Dist;
3976 }
3977
3978 assert(Idx != -1 && "Cannot find bundled definition!");
3979 DefIdx = Idx;
3980 return &*II;
3981}
3982
3984 const MachineInstr &MI, unsigned Reg,
3985 unsigned &UseIdx, unsigned &Dist) {
3986 Dist = 0;
3987
3989 assert(II->isInsideBundle() && "Empty bundle?");
3990 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
3991
3992 // FIXME: This doesn't properly handle multiple uses.
3993 int Idx = -1;
3994 while (II != E && II->isInsideBundle()) {
3995 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
3996 if (Idx != -1)
3997 break;
3998 if (II->getOpcode() != ARM::t2IT)
3999 ++Dist;
4000 ++II;
4001 }
4002
4003 if (Idx == -1) {
4004 Dist = 0;
4005 return nullptr;
4006 }
4007
4008 UseIdx = Idx;
4009 return &*II;
4010}
4011
4012/// Return the number of cycles to add to (or subtract from) the static
4013/// itinerary based on the def opcode and alignment. The caller will ensure that
4014/// adjusted latency is at least one cycle.
4015static int adjustDefLatency(const ARMSubtarget &Subtarget,
4016 const MachineInstr &DefMI,
4017 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4018 int Adjust = 0;
4019 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4020 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4021 // variants are one cycle cheaper.
4022 switch (DefMCID.getOpcode()) {
4023 default: break;
4024 case ARM::LDRrs:
4025 case ARM::LDRBrs: {
4026 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4027 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4028 if (ShImm == 0 ||
4029 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4030 --Adjust;
4031 break;
4032 }
4033 case ARM::t2LDRs:
4034 case ARM::t2LDRBs:
4035 case ARM::t2LDRHs:
4036 case ARM::t2LDRSHs: {
4037 // Thumb2 mode: lsl only.
4038 unsigned ShAmt = DefMI.getOperand(3).getImm();
4039 if (ShAmt == 0 || ShAmt == 2)
4040 --Adjust;
4041 break;
4042 }
4043 }
4044 } else if (Subtarget.isSwift()) {
4045 // FIXME: Properly handle all of the latency adjustments for address
4046 // writeback.
4047 switch (DefMCID.getOpcode()) {
4048 default: break;
4049 case ARM::LDRrs:
4050 case ARM::LDRBrs: {
4051 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4052 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4053 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4054 if (!isSub &&
4055 (ShImm == 0 ||
4056 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4057 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4058 Adjust -= 2;
4059 else if (!isSub &&
4060 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4061 --Adjust;
4062 break;
4063 }
4064 case ARM::t2LDRs:
4065 case ARM::t2LDRBs:
4066 case ARM::t2LDRHs:
4067 case ARM::t2LDRSHs: {
4068 // Thumb2 mode: lsl only.
4069 unsigned ShAmt = DefMI.getOperand(3).getImm();
4070 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4071 Adjust -= 2;
4072 break;
4073 }
4074 }
4075 }
4076
4077 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4078 switch (DefMCID.getOpcode()) {
4079 default: break;
4080 case ARM::VLD1q8:
4081 case ARM::VLD1q16:
4082 case ARM::VLD1q32:
4083 case ARM::VLD1q64:
4084 case ARM::VLD1q8wb_fixed:
4085 case ARM::VLD1q16wb_fixed:
4086 case ARM::VLD1q32wb_fixed:
4087 case ARM::VLD1q64wb_fixed:
4088 case ARM::VLD1q8wb_register:
4089 case ARM::VLD1q16wb_register:
4090 case ARM::VLD1q32wb_register:
4091 case ARM::VLD1q64wb_register:
4092 case ARM::VLD2d8:
4093 case ARM::VLD2d16:
4094 case ARM::VLD2d32:
4095 case ARM::VLD2q8:
4096 case ARM::VLD2q16:
4097 case ARM::VLD2q32:
4098 case ARM::VLD2d8wb_fixed:
4099 case ARM::VLD2d16wb_fixed:
4100 case ARM::VLD2d32wb_fixed:
4101 case ARM::VLD2q8wb_fixed:
4102 case ARM::VLD2q16wb_fixed:
4103 case ARM::VLD2q32wb_fixed:
4104 case ARM::VLD2d8wb_register:
4105 case ARM::VLD2d16wb_register:
4106 case ARM::VLD2d32wb_register:
4107 case ARM::VLD2q8wb_register:
4108 case ARM::VLD2q16wb_register:
4109 case ARM::VLD2q32wb_register:
4110 case ARM::VLD3d8:
4111 case ARM::VLD3d16:
4112 case ARM::VLD3d32:
4113 case ARM::VLD1d64T:
4114 case ARM::VLD3d8_UPD:
4115 case ARM::VLD3d16_UPD:
4116 case ARM::VLD3d32_UPD:
4117 case ARM::VLD1d64Twb_fixed:
4118 case ARM::VLD1d64Twb_register:
4119 case ARM::VLD3q8_UPD:
4120 case ARM::VLD3q16_UPD:
4121 case ARM::VLD3q32_UPD:
4122 case ARM::VLD4d8:
4123 case ARM::VLD4d16:
4124 case ARM::VLD4d32:
4125 case ARM::VLD1d64Q:
4126 case ARM::VLD4d8_UPD:
4127 case ARM::VLD4d16_UPD:
4128 case ARM::VLD4d32_UPD:
4129 case ARM::VLD1d64Qwb_fixed:
4130 case ARM::VLD1d64Qwb_register:
4131 case ARM::VLD4q8_UPD:
4132 case ARM::VLD4q16_UPD:
4133 case ARM::VLD4q32_UPD:
4134 case ARM::VLD1DUPq8:
4135 case ARM::VLD1DUPq16:
4136 case ARM::VLD1DUPq32:
4137 case ARM::VLD1DUPq8wb_fixed:
4138 case ARM::VLD1DUPq16wb_fixed:
4139 case ARM::VLD1DUPq32wb_fixed:
4140 case ARM::VLD1DUPq8wb_register:
4141 case ARM::VLD1DUPq16wb_register:
4142 case ARM::VLD1DUPq32wb_register:
4143 case ARM::VLD2DUPd8:
4144 case ARM::VLD2DUPd16:
4145 case ARM::VLD2DUPd32:
4146 case ARM::VLD2DUPd8wb_fixed:
4147 case ARM::VLD2DUPd16wb_fixed:
4148 case ARM::VLD2DUPd32wb_fixed:
4149 case ARM::VLD2DUPd8wb_register:
4150 case ARM::VLD2DUPd16wb_register:
4151 case ARM::VLD2DUPd32wb_register:
4152 case ARM::VLD4DUPd8:
4153 case ARM::VLD4DUPd16:
4154 case ARM::VLD4DUPd32:
4155 case ARM::VLD4DUPd8_UPD:
4156 case ARM::VLD4DUPd16_UPD:
4157 case ARM::VLD4DUPd32_UPD:
4158 case ARM::VLD1LNd8:
4159 case ARM::VLD1LNd16:
4160 case ARM::VLD1LNd32:
4161 case ARM::VLD1LNd8_UPD:
4162 case ARM::VLD1LNd16_UPD:
4163 case ARM::VLD1LNd32_UPD:
4164 case ARM::VLD2LNd8:
4165 case ARM::VLD2LNd16:
4166 case ARM::VLD2LNd32:
4167 case ARM::VLD2LNq16:
4168 case ARM::VLD2LNq32:
4169 case ARM::VLD2LNd8_UPD:
4170 case ARM::VLD2LNd16_UPD:
4171 case ARM::VLD2LNd32_UPD:
4172 case ARM::VLD2LNq16_UPD:
4173 case ARM::VLD2LNq32_UPD:
4174 case ARM::VLD4LNd8:
4175 case ARM::VLD4LNd16:
4176 case ARM::VLD4LNd32:
4177 case ARM::VLD4LNq16:
4178 case ARM::VLD4LNq32:
4179 case ARM::VLD4LNd8_UPD:
4180 case ARM::VLD4LNd16_UPD:
4181 case ARM::VLD4LNd32_UPD:
4182 case ARM::VLD4LNq16_UPD:
4183 case ARM::VLD4LNq32_UPD:
4184 // If the address is not 64-bit aligned, the latencies of these
4185 // instructions increases by one.
4186 ++Adjust;
4187 break;
4188 }
4189 }
4190 return Adjust;
4191}
4192
4194 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4195 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4196 // No operand latency. The caller may fall back to getInstrLatency.
4197 if (!ItinData || ItinData->isEmpty())
4198 return std::nullopt;
4199
4200 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4201 Register Reg = DefMO.getReg();
4202
4203 const MachineInstr *ResolvedDefMI = &DefMI;
4204 unsigned DefAdj = 0;
4205 if (DefMI.isBundle())
4206 ResolvedDefMI =
4207 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4208 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4209 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4210 return 1;
4211 }
4212
4213 const MachineInstr *ResolvedUseMI = &UseMI;
4214 unsigned UseAdj = 0;
4215 if (UseMI.isBundle()) {
4216 ResolvedUseMI =
4217 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4218 if (!ResolvedUseMI)
4219 return std::nullopt;
4220 }
4221
4222 return getOperandLatencyImpl(
4223 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4224 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4225}
4226
4227std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4228 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4229 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4230 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4231 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4232 if (Reg == ARM::CPSR) {
4233 if (DefMI.getOpcode() == ARM::FMSTAT) {
4234 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4235 return Subtarget.isLikeA9() ? 1 : 20;
4236 }
4237
4238 // CPSR set and branch can be paired in the same cycle.
4239 if (UseMI.isBranch())
4240 return 0;
4241
4242 // Otherwise it takes the instruction latency (generally one).
4243 unsigned Latency = getInstrLatency(ItinData, DefMI);
4244
4245 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4246 // its uses. Instructions which are otherwise scheduled between them may
4247 // incur a code size penalty (not able to use the CPSR setting 16-bit
4248 // instructions).
4249 if (Latency > 0 && Subtarget.isThumb2()) {
4250 const MachineFunction *MF = DefMI.getParent()->getParent();
4251 if (MF->getFunction().hasOptSize())
4252 --Latency;
4253 }
4254 return Latency;
4255 }
4256
4257 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4258 return std::nullopt;
4259
4260 unsigned DefAlign = DefMI.hasOneMemOperand()
4261 ? (*DefMI.memoperands_begin())->getAlign().value()
4262 : 0;
4263 unsigned UseAlign = UseMI.hasOneMemOperand()
4264 ? (*UseMI.memoperands_begin())->getAlign().value()
4265 : 0;
4266
4267 // Get the itinerary's latency if possible, and handle variable_ops.
4268 std::optional<unsigned> Latency = getOperandLatency(
4269 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4270 // Unable to find operand latency. The caller may resort to getInstrLatency.
4271 if (!Latency)
4272 return std::nullopt;
4273
4274 // Adjust for IT block position.
4275 int Adj = DefAdj + UseAdj;
4276
4277 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4278 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4279 if (Adj >= 0 || (int)*Latency > -Adj) {
4280 return *Latency + Adj;
4281 }
4282 // Return the itinerary latency, which may be zero but not less than zero.
4283 return Latency;
4284}
4285
4286std::optional<unsigned>
4288 SDNode *DefNode, unsigned DefIdx,
4289 SDNode *UseNode, unsigned UseIdx) const {
4290 if (!DefNode->isMachineOpcode())
4291 return 1;
4292
4293 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4294
4295 if (isZeroCost(DefMCID.Opcode))
4296 return 0;
4297
4298 if (!ItinData || ItinData->isEmpty())
4299 return DefMCID.mayLoad() ? 3 : 1;
4300
4301 if (!UseNode->isMachineOpcode()) {
4302 std::optional<unsigned> Latency =
4303 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4304 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4305 int Threshold = 1 + Adj;
4306 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4307 }
4308
4309 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4310 auto *DefMN = cast<MachineSDNode>(DefNode);
4311 unsigned DefAlign = !DefMN->memoperands_empty()
4312 ? (*DefMN->memoperands_begin())->getAlign().value()
4313 : 0;
4314 auto *UseMN = cast<MachineSDNode>(UseNode);
4315 unsigned UseAlign = !UseMN->memoperands_empty()
4316 ? (*UseMN->memoperands_begin())->getAlign().value()
4317 : 0;
4318 std::optional<unsigned> Latency = getOperandLatency(
4319 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4320 if (!Latency)
4321 return std::nullopt;
4322
4323 if (Latency > 1U &&
4324 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4325 Subtarget.isCortexA7())) {
4326 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4327 // variants are one cycle cheaper.
4328 switch (DefMCID.getOpcode()) {
4329 default: break;
4330 case ARM::LDRrs:
4331 case ARM::LDRBrs: {
4332 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4333 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4334 if (ShImm == 0 ||
4335 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4336 Latency = *Latency - 1;
4337 break;
4338 }
4339 case ARM::t2LDRs:
4340 case ARM::t2LDRBs:
4341 case ARM::t2LDRHs:
4342 case ARM::t2LDRSHs: {
4343 // Thumb2 mode: lsl only.
4344 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4345 if (ShAmt == 0 || ShAmt == 2)
4346 Latency = *Latency - 1;
4347 break;
4348 }
4349 }
4350 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4351 // FIXME: Properly handle all of the latency adjustments for address
4352 // writeback.
4353 switch (DefMCID.getOpcode()) {
4354 default: break;
4355 case ARM::LDRrs:
4356 case ARM::LDRBrs: {
4357 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4358 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4359 if (ShImm == 0 ||
4360 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4362 Latency = *Latency - 2;
4363 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4364 Latency = *Latency - 1;
4365 break;
4366 }
4367 case ARM::t2LDRs:
4368 case ARM::t2LDRBs:
4369 case ARM::t2LDRHs:
4370 case ARM::t2LDRSHs:
4371 // Thumb2 mode: lsl 0-3 only.
4372 Latency = *Latency - 2;
4373 break;
4374 }
4375 }
4376
4377 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4378 switch (DefMCID.getOpcode()) {
4379 default: break;
4380 case ARM::VLD1q8:
4381 case ARM::VLD1q16:
4382 case ARM::VLD1q32:
4383 case ARM::VLD1q64:
4384 case ARM::VLD1q8wb_register:
4385 case ARM::VLD1q16wb_register:
4386 case ARM::VLD1q32wb_register:
4387 case ARM::VLD1q64wb_register:
4388 case ARM::VLD1q8wb_fixed:
4389 case ARM::VLD1q16wb_fixed:
4390 case ARM::VLD1q32wb_fixed:
4391 case ARM::VLD1q64wb_fixed:
4392 case ARM::VLD2d8:
4393 case ARM::VLD2d16:
4394 case ARM::VLD2d32:
4395 case ARM::VLD2q8Pseudo:
4396 case ARM::VLD2q16Pseudo:
4397 case ARM::VLD2q32Pseudo:
4398 case ARM::VLD2d8wb_fixed:
4399 case ARM::VLD2d16wb_fixed:
4400 case ARM::VLD2d32wb_fixed:
4401 case ARM::VLD2q8PseudoWB_fixed:
4402 case ARM::VLD2q16PseudoWB_fixed:
4403 case ARM::VLD2q32PseudoWB_fixed:
4404 case ARM::VLD2d8wb_register:
4405 case ARM::VLD2d16wb_register:
4406 case ARM::VLD2d32wb_register:
4407 case ARM::VLD2q8PseudoWB_register:
4408 case ARM::VLD2q16PseudoWB_register:
4409 case ARM::VLD2q32PseudoWB_register:
4410 case ARM::VLD3d8Pseudo:
4411 case ARM::VLD3d16Pseudo:
4412 case ARM::VLD3d32Pseudo:
4413 case ARM::VLD1d8TPseudo:
4414 case ARM::VLD1d16TPseudo:
4415 case ARM::VLD1d32TPseudo:
4416 case ARM::VLD1d64TPseudo:
4417 case ARM::VLD1d64TPseudoWB_fixed:
4418 case ARM::VLD1d64TPseudoWB_register:
4419 case ARM::VLD3d8Pseudo_UPD:
4420 case ARM::VLD3d16Pseudo_UPD:
4421 case ARM::VLD3d32Pseudo_UPD:
4422 case ARM::VLD3q8Pseudo_UPD:
4423 case ARM::VLD3q16Pseudo_UPD:
4424 case ARM::VLD3q32Pseudo_UPD:
4425 case ARM::VLD3q8oddPseudo:
4426 case ARM::VLD3q16oddPseudo:
4427 case ARM::VLD3q32oddPseudo:
4428 case ARM::VLD3q8oddPseudo_UPD:
4429 case ARM::VLD3q16oddPseudo_UPD:
4430 case ARM::VLD3q32oddPseudo_UPD:
4431 case ARM::VLD4d8Pseudo:
4432 case ARM::VLD4d16Pseudo:
4433 case ARM::VLD4d32Pseudo:
4434 case ARM::VLD1d8QPseudo:
4435 case ARM::VLD1d16QPseudo:
4436 case ARM::VLD1d32QPseudo:
4437 case ARM::VLD1d64QPseudo:
4438 case ARM::VLD1d64QPseudoWB_fixed:
4439 case ARM::VLD1d64QPseudoWB_register:
4440 case ARM::VLD1q8HighQPseudo:
4441 case ARM::VLD1q8LowQPseudo_UPD:
4442 case ARM::VLD1q8HighTPseudo:
4443 case ARM::VLD1q8LowTPseudo_UPD:
4444 case ARM::VLD1q16HighQPseudo:
4445 case ARM::VLD1q16LowQPseudo_UPD:
4446 case ARM::VLD1q16HighTPseudo:
4447 case ARM::VLD1q16LowTPseudo_UPD:
4448 case ARM::VLD1q32HighQPseudo:
4449 case ARM::VLD1q32LowQPseudo_UPD:
4450 case ARM::VLD1q32HighTPseudo:
4451 case ARM::VLD1q32LowTPseudo_UPD:
4452 case ARM::VLD1q64HighQPseudo:
4453 case ARM::VLD1q64LowQPseudo_UPD:
4454 case ARM::VLD1q64HighTPseudo:
4455 case ARM::VLD1q64LowTPseudo_UPD:
4456 case ARM::VLD4d8Pseudo_UPD:
4457 case ARM::VLD4d16Pseudo_UPD:
4458 case ARM::VLD4d32Pseudo_UPD:
4459 case ARM::VLD4q8Pseudo_UPD:
4460 case ARM::VLD4q16Pseudo_UPD:
4461 case ARM::VLD4q32Pseudo_UPD:
4462 case ARM::VLD4q8oddPseudo:
4463 case ARM::VLD4q16oddPseudo:
4464 case ARM::VLD4q32oddPseudo:
4465 case ARM::VLD4q8oddPseudo_UPD:
4466 case ARM::VLD4q16oddPseudo_UPD:
4467 case ARM::VLD4q32oddPseudo_UPD:
4468 case ARM::VLD1DUPq8:
4469 case ARM::VLD1DUPq16:
4470 case ARM::VLD1DUPq32:
4471 case ARM::VLD1DUPq8wb_fixed:
4472 case ARM::VLD1DUPq16wb_fixed:
4473 case ARM::VLD1DUPq32wb_fixed:
4474 case ARM::VLD1DUPq8wb_register:
4475 case ARM::VLD1DUPq16wb_register:
4476 case ARM::VLD1DUPq32wb_register:
4477 case ARM::VLD2DUPd8:
4478 case ARM::VLD2DUPd16:
4479 case ARM::VLD2DUPd32:
4480 case ARM::VLD2DUPd8wb_fixed:
4481 case ARM::VLD2DUPd16wb_fixed:
4482 case ARM::VLD2DUPd32wb_fixed:
4483 case ARM::VLD2DUPd8wb_register:
4484 case ARM::VLD2DUPd16wb_register:
4485 case ARM::VLD2DUPd32wb_register:
4486 case ARM::VLD2DUPq8EvenPseudo:
4487 case ARM::VLD2DUPq8OddPseudo:
4488 case ARM::VLD2DUPq16EvenPseudo:
4489 case ARM::VLD2DUPq16OddPseudo:
4490 case ARM::VLD2DUPq32EvenPseudo:
4491 case ARM::VLD2DUPq32OddPseudo:
4492 case ARM::VLD3DUPq8EvenPseudo:
4493 case ARM::VLD3DUPq8OddPseudo:
4494 case ARM::VLD3DUPq16EvenPseudo:
4495 case ARM::VLD3DUPq16OddPseudo:
4496 case ARM::VLD3DUPq32EvenPseudo:
4497 case ARM::VLD3DUPq32OddPseudo:
4498 case ARM::VLD4DUPd8Pseudo:
4499 case ARM::VLD4DUPd16Pseudo:
4500 case ARM::VLD4DUPd32Pseudo:
4501 case ARM::VLD4DUPd8Pseudo_UPD:
4502 case ARM::VLD4DUPd16Pseudo_UPD:
4503 case ARM::VLD4DUPd32Pseudo_UPD:
4504 case ARM::VLD4DUPq8EvenPseudo:
4505 case ARM::VLD4DUPq8OddPseudo:
4506 case ARM::VLD4DUPq16EvenPseudo:
4507 case ARM::VLD4DUPq16OddPseudo:
4508 case ARM::VLD4DUPq32EvenPseudo:
4509 case ARM::VLD4DUPq32OddPseudo:
4510 case ARM::VLD1LNq8Pseudo:
4511 case ARM::VLD1LNq16Pseudo:
4512 case ARM::VLD1LNq32Pseudo:
4513 case ARM::VLD1LNq8Pseudo_UPD:
4514 case ARM::VLD1LNq16Pseudo_UPD:
4515 case ARM::VLD1LNq32Pseudo_UPD:
4516 case ARM::VLD2LNd8Pseudo:
4517 case ARM::VLD2LNd16Pseudo:
4518 case ARM::VLD2LNd32Pseudo:
4519 case ARM::VLD2LNq16Pseudo:
4520 case ARM::VLD2LNq32Pseudo:
4521 case ARM::VLD2LNd8Pseudo_UPD:
4522 case ARM::VLD2LNd16Pseudo_UPD:
4523 case ARM::VLD2LNd32Pseudo_UPD:
4524 case ARM::VLD2LNq16Pseudo_UPD:
4525 case ARM::VLD2LNq32Pseudo_UPD:
4526 case ARM::VLD4LNd8Pseudo:
4527 case ARM::VLD4LNd16Pseudo:
4528 case ARM::VLD4LNd32Pseudo:
4529 case ARM::VLD4LNq16Pseudo:
4530 case ARM::VLD4LNq32Pseudo:
4531 case ARM::VLD4LNd8Pseudo_UPD:
4532 case ARM::VLD4LNd16Pseudo_UPD:
4533 case ARM::VLD4LNd32Pseudo_UPD:
4534 case ARM::VLD4LNq16Pseudo_UPD:
4535 case ARM::VLD4LNq32Pseudo_UPD:
4536 // If the address is not 64-bit aligned, the latencies of these
4537 // instructions increases by one.
4538 Latency = *Latency + 1;
4539 break;
4540 }
4541
4542 return Latency;
4543}
4544
4545unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4546 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4547 MI.isImplicitDef())
4548 return 0;
4549
4550 if (MI.isBundle())
4551 return 0;
4552
4553 const MCInstrDesc &MCID = MI.getDesc();
4554
4555 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4556 !Subtarget.cheapPredicableCPSRDef())) {
4557 // When predicated, CPSR is an additional source operand for CPSR updating
4558 // instructions, this apparently increases their latencies.
4559 return 1;
4560 }
4561 return 0;
4562}
4563
4564unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4565 const MachineInstr &MI,
4566 unsigned *PredCost) const {
4567 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4568 MI.isImplicitDef())
4569 return 1;
4570
4571 // An instruction scheduler typically runs on unbundled instructions, however
4572 // other passes may query the latency of a bundled instruction.
4573 if (MI.isBundle()) {
4574 unsigned Latency = 0;
4576 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4577 while (++I != E && I->isInsideBundle()) {
4578 if (I->getOpcode() != ARM::t2IT)
4579 Latency += getInstrLatency(ItinData, *I, PredCost);
4580 }
4581 return Latency;
4582 }
4583
4584 const MCInstrDesc &MCID = MI.getDesc();
4585 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4586 !Subtarget.cheapPredicableCPSRDef()))) {
4587 // When predicated, CPSR is an additional source operand for CPSR updating
4588 // instructions, this apparently increases their latencies.
4589 *PredCost = 1;
4590 }
4591 // Be sure to call getStageLatency for an empty itinerary in case it has a
4592 // valid MinLatency property.
4593 if (!ItinData)
4594 return MI.mayLoad() ? 3 : 1;
4595
4596 unsigned Class = MCID.getSchedClass();
4597
4598 // For instructions with variable uops, use uops as latency.
4599 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4600 return getNumMicroOps(ItinData, MI);
4601
4602 // For the common case, fall back on the itinerary's latency.
4603 unsigned Latency = ItinData->getStageLatency(Class);
4604
4605 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4606 unsigned DefAlign =
4607 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4608 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4609 if (Adj >= 0 || (int)Latency > -Adj) {
4610 return Latency + Adj;
4611 }
4612 return Latency;
4613}
4614
4615unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4616 SDNode *Node) const {
4617 if (!Node->isMachineOpcode())
4618 return 1;
4619
4620 if (!ItinData || ItinData->isEmpty())
4621 return 1;
4622
4623 unsigned Opcode = Node->getMachineOpcode();
4624 switch (Opcode) {
4625 default:
4626 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4627 case ARM::VLDMQIA:
4628 case ARM::VSTMQIA:
4629 return 2;
4630 }
4631}
4632
4633bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4634 const MachineRegisterInfo *MRI,
4635 const MachineInstr &DefMI,
4636 unsigned DefIdx,
4637 const MachineInstr &UseMI,
4638 unsigned UseIdx) const {
4639 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4640 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4641 if (Subtarget.nonpipelinedVFP() &&
4642 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4643 return true;
4644
4645 // Hoist VFP / NEON instructions with 4 or higher latency.
4646 unsigned Latency =
4647 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4648 if (Latency <= 3)
4649 return false;
4650 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4651 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4652}
4653
4654bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4655 const MachineInstr &DefMI,
4656 unsigned DefIdx) const {
4657 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4658 if (!ItinData || ItinData->isEmpty())
4659 return false;
4660
4661 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4662 if (DDomain == ARMII::DomainGeneral) {
4663 unsigned DefClass = DefMI.getDesc().getSchedClass();
4664 std::optional<unsigned> DefCycle =
4665 ItinData->getOperandCycle(DefClass, DefIdx);
4666 return DefCycle && DefCycle <= 2U;
4667 }
4668 return false;
4669}
4670
4671bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4672 StringRef &ErrInfo) const {
4673 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4674 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4675 return false;
4676 }
4677 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4678 // Make sure we don't generate a lo-lo mov that isn't supported.
4679 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4680 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4681 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4682 return false;
4683 }
4684 }
4685 if (MI.getOpcode() == ARM::tPUSH ||
4686 MI.getOpcode() == ARM::tPOP ||
4687 MI.getOpcode() == ARM::tPOP_RET) {
4688 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4689 if (MO.isImplicit() || !MO.isReg())
4690 continue;
4691 Register Reg = MO.getReg();
4692 if (Reg < ARM::R0 || Reg > ARM::R7) {
4693 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4694 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4695 ErrInfo = "Unsupported register in Thumb1 push/pop";
4696 return false;
4697 }
4698 }
4699 }
4700 }
4701 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4702 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4703 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4704 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4705 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4706 return false;
4707 }
4708 }
4709
4710 // Check the address model by taking the first Imm operand and checking it is
4711 // legal for that addressing mode.
4713 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4714 switch (AddrMode) {
4715 default:
4716 break;
4724 case ARMII::AddrModeT2_i12: {
4725 uint32_t Imm = 0;
4726 for (auto Op : MI.operands()) {
4727 if (Op.isImm()) {
4728 Imm = Op.getImm();
4729 break;
4730 }
4731 }
4732 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4733 ErrInfo = "Incorrect AddrMode Imm for instruction";
4734 return false;
4735 }
4736 break;
4737 }
4738 }
4739 return true;
4740}
4741
4743 unsigned LoadImmOpc,
4744 unsigned LoadOpc) const {
4745 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4746 "ROPI/RWPI not currently supported with stack guard");
4747
4748 MachineBasicBlock &MBB = *MI->getParent();
4749 DebugLoc DL = MI->getDebugLoc();
4750 Register Reg = MI->getOperand(0).getReg();
4752 unsigned int Offset = 0;
4753
4754 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4755 assert(!Subtarget.isReadTPSoft() &&
4756 "TLS stack protector requires hardware TLS register");
4757
4758 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4759 .addImm(15)
4760 .addImm(0)
4761 .addImm(13)
4762 .addImm(0)
4763 .addImm(3)
4765
4766 Module &M = *MBB.getParent()->getFunction().getParent();
4767 Offset = M.getStackProtectorGuardOffset();
4768 if (Offset & ~0xfffU) {
4769 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4770 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4771 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4772 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4773 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4774 .addReg(Reg, RegState::Kill)
4775 .addImm(Offset & ~0xfffU)
4777 .addReg(0);
4778 Offset &= 0xfffU;
4779 }
4780 } else {
4781 const GlobalValue *GV =
4782 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4783 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4784
4785 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4786 if (Subtarget.isTargetMachO()) {
4787 TargetFlags |= ARMII::MO_NONLAZY;
4788 } else if (Subtarget.isTargetCOFF()) {
4789 if (GV->hasDLLImportStorageClass())
4790 TargetFlags |= ARMII::MO_DLLIMPORT;
4791 else if (IsIndirect)
4792 TargetFlags |= ARMII::MO_COFFSTUB;
4793 } else if (IsIndirect) {
4794 TargetFlags |= ARMII::MO_GOT;
4795 }
4796
4797 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4798 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4799 auto APSREncoding =
4800 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4801 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4802 .addImm(APSREncoding)
4804 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4805 .addGlobalAddress(GV, 0, TargetFlags);
4806 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4807 .addImm(APSREncoding)
4808 .addReg(CPSRSaveReg, RegState::Kill)
4810 } else {
4811 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4812 .addGlobalAddress(GV, 0, TargetFlags);
4813 }
4814
4815 if (IsIndirect) {
4816 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4817 MIB.addReg(Reg, RegState::Kill).addImm(0);
4818 auto Flags = MachineMemOperand::MOLoad |
4821 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4822 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4824 }
4825 }
4826
4827 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4828 MIB.addReg(Reg, RegState::Kill)
4829 .addImm(Offset)
4830 .cloneMemRefs(*MI)
4832}
4833
4834bool
4835ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4836 unsigned &AddSubOpc,
4837 bool &NegAcc, bool &HasLane) const {
4838 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4839 if (I == MLxEntryMap.end())
4840 return false;
4841
4842 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4843 MulOpc = Entry.MulOpc;
4844 AddSubOpc = Entry.AddSubOpc;
4845 NegAcc = Entry.NegAcc;
4846 HasLane = Entry.HasLane;
4847 return true;
4848}
4849
4850//===----------------------------------------------------------------------===//
4851// Execution domains.
4852//===----------------------------------------------------------------------===//
4853//
4854// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4855// and some can go down both. The vmov instructions go down the VFP pipeline,
4856// but they can be changed to vorr equivalents that are executed by the NEON
4857// pipeline.
4858//
4859// We use the following execution domain numbering:
4860//
4866
4867//
4868// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4869//
4870std::pair<uint16_t, uint16_t>
4872 // If we don't have access to NEON instructions then we won't be able
4873 // to swizzle anything to the NEON domain. Check to make sure.
4874 if (Subtarget.hasNEON()) {
4875 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4876 // if they are not predicated.
4877 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4878 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4879
4880 // CortexA9 is particularly picky about mixing the two and wants these
4881 // converted.
4882 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4883 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4884 MI.getOpcode() == ARM::VMOVS))
4885 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4886 }
4887 // No other instructions can be swizzled, so just determine their domain.
4888 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4889
4891 return std::make_pair(ExeNEON, 0);
4892
4893 // Certain instructions can go either way on Cortex-A8.
4894 // Treat them as NEON instructions.
4895 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4896 return std::make_pair(ExeNEON, 0);
4897
4899 return std::make_pair(ExeVFP, 0);
4900
4901 return std::make_pair(ExeGeneric, 0);
4902}
4903
4905 unsigned SReg, unsigned &Lane) {
4906 MCRegister DReg =
4907 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4908 Lane = 0;
4909
4910 if (DReg)
4911 return DReg;
4912
4913 Lane = 1;
4914 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4915
4916 assert(DReg && "S-register with no D super-register?");
4917 return DReg;
4918}
4919
4920/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4921/// set ImplicitSReg to a register number that must be marked as implicit-use or
4922/// zero if no register needs to be defined as implicit-use.
4923///
4924/// If the function cannot determine if an SPR should be marked implicit use or
4925/// not, it returns false.
4926///
4927/// This function handles cases where an instruction is being modified from taking
4928/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4929/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4930/// lane of the DPR).
4931///
4932/// If the other SPR is defined, an implicit-use of it should be added. Else,
4933/// (including the case where the DPR itself is defined), it should not.
4934///
4936 MachineInstr &MI, MCRegister DReg,
4937 unsigned Lane,
4938 MCRegister &ImplicitSReg) {
4939 // If the DPR is defined or used already, the other SPR lane will be chained
4940 // correctly, so there is nothing to be done.
4941 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4942 ImplicitSReg = MCRegister();
4943 return true;
4944 }
4945
4946 // Otherwise we need to go searching to see if the SPR is set explicitly.
4947 ImplicitSReg = TRI->getSubReg(DReg,
4948 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4950 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4951
4952 if (LQR == MachineBasicBlock::LQR_Live)
4953 return true;
4954 else if (LQR == MachineBasicBlock::LQR_Unknown)
4955 return false;
4956
4957 // If the register is known not to be live, there is no need to add an
4958 // implicit-use.
4959 ImplicitSReg = MCRegister();
4960 return true;
4961}
4962
4964 unsigned Domain) const {
4965 unsigned DstReg, SrcReg;
4966 MCRegister DReg;
4967 unsigned Lane;
4968 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4970 switch (MI.getOpcode()) {
4971 default:
4972 llvm_unreachable("cannot handle opcode!");
4973 break;
4974 case ARM::VMOVD:
4975 if (Domain != ExeNEON)
4976 break;
4977
4978 // Zap the predicate operands.
4979 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4980
4981 // Make sure we've got NEON instructions.
4982 assert(Subtarget.hasNEON() && "VORRd requires NEON");
4983
4984 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4985 DstReg = MI.getOperand(0).getReg();
4986 SrcReg = MI.getOperand(1).getReg();
4987
4988 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4989 MI.removeOperand(i - 1);
4990
4991 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4992 MI.setDesc(get(ARM::VORRd));
4993 MIB.addReg(DstReg, RegState::Define)
4994 .addReg(SrcReg)
4995 .addReg(SrcReg)
4997 break;
4998 case ARM::VMOVRS:
4999 if (Domain != ExeNEON)
5000 break;
5001 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5002
5003 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5004 DstReg = MI.getOperand(0).getReg();
5005 SrcReg = MI.getOperand(1).getReg();
5006
5007 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5008 MI.removeOperand(i - 1);
5009
5010 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5011
5012 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5013 // Note that DSrc has been widened and the other lane may be undef, which
5014 // contaminates the entire register.
5015 MI.setDesc(get(ARM::VGETLNi32));
5016 MIB.addReg(DstReg, RegState::Define)
5017 .addReg(DReg, RegState::Undef)
5018 .addImm(Lane)
5020
5021 // The old source should be an implicit use, otherwise we might think it
5022 // was dead before here.
5023 MIB.addReg(SrcReg, RegState::Implicit);
5024 break;
5025 case ARM::VMOVSR: {
5026 if (Domain != ExeNEON)
5027 break;
5028 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5029
5030 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5031 DstReg = MI.getOperand(0).getReg();
5032 SrcReg = MI.getOperand(1).getReg();
5033
5034 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5035
5036 MCRegister ImplicitSReg;
5037 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5038 break;
5039
5040 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5041 MI.removeOperand(i - 1);
5042
5043 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5044 // Again DDst may be undefined at the beginning of this instruction.
5045 MI.setDesc(get(ARM::VSETLNi32));
5046 MIB.addReg(DReg, RegState::Define)
5047 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5048 .addReg(SrcReg)
5049 .addImm(Lane)
5051
5052 // The narrower destination must be marked as set to keep previous chains
5053 // in place.
5055 if (ImplicitSReg)
5056 MIB.addReg(ImplicitSReg, RegState::Implicit);
5057 break;
5058 }
5059 case ARM::VMOVS: {
5060 if (Domain != ExeNEON)
5061 break;
5062
5063 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5064 DstReg = MI.getOperand(0).getReg();
5065 SrcReg = MI.getOperand(1).getReg();
5066
5067 unsigned DstLane = 0, SrcLane = 0;
5068 MCRegister DDst, DSrc;
5069 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5070 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5071
5072 MCRegister ImplicitSReg;
5073 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5074 break;
5075
5076 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5077 MI.removeOperand(i - 1);
5078
5079 if (DSrc == DDst) {
5080 // Destination can be:
5081 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5082 MI.setDesc(get(ARM::VDUPLN32d));
5083 MIB.addReg(DDst, RegState::Define)
5084 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5085 .addImm(SrcLane)
5087
5088 // Neither the source or the destination are naturally represented any
5089 // more, so add them in manually.
5091 MIB.addReg(SrcReg, RegState::Implicit);
5092 if (ImplicitSReg)
5093 MIB.addReg(ImplicitSReg, RegState::Implicit);
5094 break;
5095 }
5096
5097 // In general there's no single instruction that can perform an S <-> S
5098 // move in NEON space, but a pair of VEXT instructions *can* do the
5099 // job. It turns out that the VEXTs needed will only use DSrc once, with
5100 // the position based purely on the combination of lane-0 and lane-1
5101 // involved. For example
5102 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5103 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5104 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5105 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5106 //
5107 // Pattern of the MachineInstrs is:
5108 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5109 MachineInstrBuilder NewMIB;
5110 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5111 DDst);
5112
5113 // On the first instruction, both DSrc and DDst may be undef if present.
5114 // Specifically when the original instruction didn't have them as an
5115 // <imp-use>.
5116 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5117 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5118 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5119
5120 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5121 CurUndef = !MI.readsRegister(CurReg, TRI);
5122 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5123 .addImm(1)
5125
5126 if (SrcLane == DstLane)
5127 NewMIB.addReg(SrcReg, RegState::Implicit);
5128
5129 MI.setDesc(get(ARM::VEXTd32));
5130 MIB.addReg(DDst, RegState::Define);
5131
5132 // On the second instruction, DDst has definitely been defined above, so
5133 // it is not undef. DSrc, if present, can be undef as above.
5134 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5135 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5136 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5137
5138 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5139 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5140 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5141 .addImm(1)
5143
5144 if (SrcLane != DstLane)
5145 MIB.addReg(SrcReg, RegState::Implicit);
5146
5147 // As before, the original destination is no longer represented, add it
5148 // implicitly.
5150 if (ImplicitSReg != 0)
5151 MIB.addReg(ImplicitSReg, RegState::Implicit);
5152 break;
5153 }
5154 }
5155}
5156
5157//===----------------------------------------------------------------------===//
5158// Partial register updates
5159//===----------------------------------------------------------------------===//
5160//
5161// Swift renames NEON registers with 64-bit granularity. That means any
5162// instruction writing an S-reg implicitly reads the containing D-reg. The
5163// problem is mostly avoided by translating f32 operations to v2f32 operations
5164// on D-registers, but f32 loads are still a problem.
5165//
5166// These instructions can load an f32 into a NEON register:
5167//
5168// VLDRS - Only writes S, partial D update.
5169// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5170// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5171//
5172// FCONSTD can be used as a dependency-breaking instruction.
5174 const MachineInstr &MI, unsigned OpNum,
5175 const TargetRegisterInfo *TRI) const {
5176 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5177 if (!PartialUpdateClearance)
5178 return 0;
5179
5180 assert(TRI && "Need TRI instance");
5181
5182 const MachineOperand &MO = MI.getOperand(OpNum);
5183 if (MO.readsReg())
5184 return 0;
5185 Register Reg = MO.getReg();
5186 int UseOp = -1;
5187
5188 switch (MI.getOpcode()) {
5189 // Normal instructions writing only an S-register.
5190 case ARM::VLDRS:
5191 case ARM::FCONSTS:
5192 case ARM::VMOVSR:
5193 case ARM::VMOVv8i8:
5194 case ARM::VMOVv4i16:
5195 case ARM::VMOVv2i32:
5196 case ARM::VMOVv2f32:
5197 case ARM::VMOVv1i64:
5198 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5199 break;
5200
5201 // Explicitly reads the dependency.
5202 case ARM::VLD1LNd32:
5203 UseOp = 3;
5204 break;
5205 default:
5206 return 0;
5207 }
5208
5209 // If this instruction actually reads a value from Reg, there is no unwanted
5210 // dependency.
5211 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5212 return 0;
5213
5214 // We must be able to clobber the whole D-reg.
5215 if (Reg.isVirtual()) {
5216 // Virtual register must be a def undef foo:ssub_0 operand.
5217 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5218 return 0;
5219 } else if (ARM::SPRRegClass.contains(Reg)) {
5220 // Physical register: MI must define the full D-reg.
5221 MCRegister DReg =
5222 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5223 if (!DReg || !MI.definesRegister(DReg, TRI))
5224 return 0;
5225 }
5226
5227 // MI has an unwanted D-register dependency.
5228 // Avoid defs in the previous N instructrions.
5229 return PartialUpdateClearance;
5230}
5231
5232// Break a partial register dependency after getPartialRegUpdateClearance
5233// returned non-zero.
5235 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5236 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5237 assert(TRI && "Need TRI instance");
5238
5239 const MachineOperand &MO = MI.getOperand(OpNum);
5240 Register Reg = MO.getReg();
5241 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5242 unsigned DReg = Reg;
5243
5244 // If MI defines an S-reg, find the corresponding D super-register.
5245 if (ARM::SPRRegClass.contains(Reg)) {
5246 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5247 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5248 }
5249
5250 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5251 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5252
5253 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5254 // the full D-register by loading the same value to both lanes. The
5255 // instruction is micro-coded with 2 uops, so don't do this until we can
5256 // properly schedule micro-coded instructions. The dispatcher stalls cause
5257 // too big regressions.
5258
5259 // Insert the dependency-breaking FCONSTD before MI.
5260 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5261 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5262 .addImm(96)
5264 MI.addRegisterKilled(DReg, TRI, true);
5265}
5266
5268 return Subtarget.hasFeature(ARM::HasV6KOps);
5269}
5270
5272 if (MI->getNumOperands() < 4)
5273 return true;
5274 unsigned ShOpVal = MI->getOperand(3).getImm();
5275 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5276 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5277 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5278 ((ShImm == 1 || ShImm == 2) &&
5279 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5280 return true;
5281
5282 return false;
5283}
5284
5286 const MachineInstr &MI, unsigned DefIdx,
5287 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5288 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5289 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5290
5291 switch (MI.getOpcode()) {
5292 case ARM::VMOVDRR:
5293 // dX = VMOVDRR rY, rZ
5294 // is the same as:
5295 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5296 // Populate the InputRegs accordingly.
5297 // rY
5298 const MachineOperand *MOReg = &MI.getOperand(1);
5299 if (!MOReg->isUndef())
5300 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5301 MOReg->getSubReg(), ARM::ssub_0));
5302 // rZ
5303 MOReg = &MI.getOperand(2);
5304 if (!MOReg->isUndef())
5305 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5306 MOReg->getSubReg(), ARM::ssub_1));
5307 return true;
5308 }
5309 llvm_unreachable("Target dependent opcode missing");
5310}
5311
5313 const MachineInstr &MI, unsigned DefIdx,
5314 RegSubRegPairAndIdx &InputReg) const {
5315 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5316 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5317
5318 switch (MI.getOpcode()) {
5319 case ARM::VMOVRRD:
5320 // rX, rY = VMOVRRD dZ
5321 // is the same as:
5322 // rX = EXTRACT_SUBREG dZ, ssub_0
5323 // rY = EXTRACT_SUBREG dZ, ssub_1
5324 const MachineOperand &MOReg = MI.getOperand(2);
5325 if (MOReg.isUndef())
5326 return false;
5327 InputReg.Reg = MOReg.getReg();
5328 InputReg.SubReg = MOReg.getSubReg();
5329 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5330 return true;
5331 }
5332 llvm_unreachable("Target dependent opcode missing");
5333}
5334
5336 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5337 RegSubRegPairAndIdx &InsertedReg) const {
5338 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5339 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5340
5341 switch (MI.getOpcode()) {
5342 case ARM::VSETLNi32:
5343 case ARM::MVE_VMOV_to_lane_32:
5344 // dX = VSETLNi32 dY, rZ, imm
5345 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5346 const MachineOperand &MOBaseReg = MI.getOperand(1);
5347 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5348 if (MOInsertedReg.isUndef())
5349 return false;
5350 const MachineOperand &MOIndex = MI.getOperand(3);
5351 BaseReg.Reg = MOBaseReg.getReg();
5352 BaseReg.SubReg = MOBaseReg.getSubReg();
5353
5354 InsertedReg.Reg = MOInsertedReg.getReg();
5355 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5356 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5357 return true;
5358 }
5359 llvm_unreachable("Target dependent opcode missing");
5360}
5361
5362std::pair<unsigned, unsigned>
5364 const unsigned Mask = ARMII::MO_OPTION_MASK;
5365 return std::make_pair(TF & Mask, TF & ~Mask);
5366}
5367
5370 using namespace ARMII;
5371
5372 static const std::pair<unsigned, const char *> TargetFlags[] = {
5373 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5374 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5375 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5376 };
5377 return ArrayRef(TargetFlags);
5378}
5379
5382 using namespace ARMII;
5383
5384 static const std::pair<unsigned, const char *> TargetFlags[] = {
5385 {MO_COFFSTUB, "arm-coffstub"},
5386 {MO_GOT, "arm-got"},
5387 {MO_SBREL, "arm-sbrel"},
5388 {MO_DLLIMPORT, "arm-dllimport"},
5389 {MO_SECREL, "arm-secrel"},
5390 {MO_NONLAZY, "arm-nonlazy"}};
5391 return ArrayRef(TargetFlags);
5392}
5393
5394std::optional<RegImmPair>
5396 int Sign = 1;
5397 unsigned Opcode = MI.getOpcode();
5398 int64_t Offset = 0;
5399
5400 // TODO: Handle cases where Reg is a super- or sub-register of the
5401 // destination register.
5402 const MachineOperand &Op0 = MI.getOperand(0);
5403 if (!Op0.isReg() || Reg != Op0.getReg())
5404 return std::nullopt;
5405
5406 // We describe SUBri or ADDri instructions.
5407 if (Opcode == ARM::SUBri)
5408 Sign = -1;
5409 else if (Opcode != ARM::ADDri)
5410 return std::nullopt;
5411
5412 // TODO: Third operand can be global address (usually some string). Since
5413 // strings can be relocated we cannot calculate their offsets for
5414 // now.
5415 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5416 return std::nullopt;
5417
5418 Offset = MI.getOperand(2).getImm() * Sign;
5419 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5420}
5421
5425 const TargetRegisterInfo *TRI) {
5426 for (auto I = From; I != To; ++I)
5427 if (I->modifiesRegister(Reg, TRI))
5428 return true;
5429 return false;
5430}
5431
5433 const TargetRegisterInfo *TRI) {
5434 // Search backwards to the instruction that defines CSPR. This may or not
5435 // be a CMP, we check that after this loop. If we find another instruction
5436 // that reads cpsr, we return nullptr.
5437 MachineBasicBlock::iterator CmpMI = Br;
5438 while (CmpMI != Br->getParent()->begin()) {
5439 --CmpMI;
5440 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5441 break;
5442 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5443 break;
5444 }
5445
5446 // Check that this inst is a CMP r[0-7], #0 and that the register
5447 // is not redefined between the cmp and the br.
5448 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5449 return nullptr;
5450 Register Reg = CmpMI->getOperand(0).getReg();
5451 Register PredReg;
5452 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5453 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5454 return nullptr;
5455 if (!isARMLowRegister(Reg))
5456 return nullptr;
5457 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5458 return nullptr;
5459
5460 return &*CmpMI;
5461}
5462
5464 const ARMSubtarget *Subtarget,
5465 bool ForCodesize) {
5466 if (Subtarget->isThumb()) {
5467 if (Val <= 255) // MOV
5468 return ForCodesize ? 2 : 1;
5469 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5470 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5471 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5472 return ForCodesize ? 4 : 1;
5473 if (Val <= 510) // MOV + ADDi8
5474 return ForCodesize ? 4 : 2;
5475 if (~Val <= 255) // MOV + MVN
5476 return ForCodesize ? 4 : 2;
5477 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5478 return ForCodesize ? 4 : 2;
5479 } else {
5480 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5481 return ForCodesize ? 4 : 1;
5482 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5483 return ForCodesize ? 4 : 1;
5484 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5485 return ForCodesize ? 4 : 1;
5486 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5487 return ForCodesize ? 8 : 2;
5488 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5489 return ForCodesize ? 8 : 2;
5490 }
5491 if (Subtarget->useMovt()) // MOVW + MOVT
5492 return ForCodesize ? 8 : 2;
5493 return ForCodesize ? 8 : 3; // Literal pool load
5494}
5495
5496bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5497 const ARMSubtarget *Subtarget,
5498 bool ForCodesize) {
5499 // Check with ForCodesize
5500 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5501 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5502 if (Cost1 < Cost2)
5503 return true;
5504 if (Cost1 > Cost2)
5505 return false;
5506
5507 // If they are equal, try with !ForCodesize
5508 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5509 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5510}
5511
5512/// Constants defining how certain sequences should be outlined.
5513/// This encompasses how an outlined function should be called, and what kind of
5514/// frame should be emitted for that outlined function.
5515///
5516/// \p MachineOutlinerTailCall implies that the function is being created from
5517/// a sequence of instructions ending in a return.
5518///
5519/// That is,
5520///
5521/// I1 OUTLINED_FUNCTION:
5522/// I2 --> B OUTLINED_FUNCTION I1
5523/// BX LR I2
5524/// BX LR
5525///
5526/// +-------------------------+--------+-----+
5527/// | | Thumb2 | ARM |
5528/// +-------------------------+--------+-----+
5529/// | Call overhead in Bytes | 4 | 4 |
5530/// | Frame overhead in Bytes | 0 | 0 |
5531/// | Stack fixup required | No | No |
5532/// +-------------------------+--------+-----+
5533///
5534/// \p MachineOutlinerThunk implies that the function is being created from
5535/// a sequence of instructions ending in a call. The outlined function is
5536/// called with a BL instruction, and the outlined function tail-calls the
5537/// original call destination.
5538///
5539/// That is,
5540///
5541/// I1 OUTLINED_FUNCTION:
5542/// I2 --> BL OUTLINED_FUNCTION I1
5543/// BL f I2
5544/// B f
5545///
5546/// +-------------------------+--------+-----+
5547/// | | Thumb2 | ARM |
5548/// +-------------------------+--------+-----+
5549/// | Call overhead in Bytes | 4 | 4 |
5550/// | Frame overhead in Bytes | 0 | 0 |
5551/// | Stack fixup required | No | No |
5552/// +-------------------------+--------+-----+
5553///
5554/// \p MachineOutlinerNoLRSave implies that the function should be called using
5555/// a BL instruction, but doesn't require LR to be saved and restored. This
5556/// happens when LR is known to be dead.
5557///
5558/// That is,
5559///
5560/// I1 OUTLINED_FUNCTION:
5561/// I2 --> BL OUTLINED_FUNCTION I1
5562/// I3 I2
5563/// I3
5564/// BX LR
5565///
5566/// +-------------------------+--------+-----+
5567/// | | Thumb2 | ARM |
5568/// +-------------------------+--------+-----+
5569/// | Call overhead in Bytes | 4 | 4 |
5570/// | Frame overhead in Bytes | 2 | 4 |
5571/// | Stack fixup required | No | No |
5572/// +-------------------------+--------+-----+
5573///
5574/// \p MachineOutlinerRegSave implies that the function should be called with a
5575/// save and restore of LR to an available register. This allows us to avoid
5576/// stack fixups. Note that this outlining variant is compatible with the
5577/// NoLRSave case.
5578///
5579/// That is,
5580///
5581/// I1 Save LR OUTLINED_FUNCTION:
5582/// I2 --> BL OUTLINED_FUNCTION I1
5583/// I3 Restore LR I2
5584/// I3
5585/// BX LR
5586///
5587/// +-------------------------+--------+-----+
5588/// | | Thumb2 | ARM |
5589/// +-------------------------+--------+-----+
5590/// | Call overhead in Bytes | 8 | 12 |
5591/// | Frame overhead in Bytes | 2 | 4 |
5592/// | Stack fixup required | No | No |
5593/// +-------------------------+--------+-----+
5594///
5595/// \p MachineOutlinerDefault implies that the function should be called with
5596/// a save and restore of LR to the stack.
5597///
5598/// That is,
5599///
5600/// I1 Save LR OUTLINED_FUNCTION:
5601/// I2 --> BL OUTLINED_FUNCTION I1
5602/// I3 Restore LR I2
5603/// I3
5604/// BX LR
5605///
5606/// +-------------------------+--------+-----+
5607/// | | Thumb2 | ARM |
5608/// +-------------------------+--------+-----+
5609/// | Call overhead in Bytes | 8 | 12 |
5610/// | Frame overhead in Bytes | 2 | 4 |
5611/// | Stack fixup required | Yes | Yes |
5612/// +-------------------------+--------+-----+
5613
5621
5627
5640
5642 : CallTailCall(target.isThumb() ? 4 : 4),
5643 FrameTailCall(target.isThumb() ? 0 : 0),
5644 CallThunk(target.isThumb() ? 4 : 4),
5645 FrameThunk(target.isThumb() ? 0 : 0),
5646 CallNoLRSave(target.isThumb() ? 4 : 4),
5647 FrameNoLRSave(target.isThumb() ? 2 : 4),
5648 CallRegSave(target.isThumb() ? 8 : 12),
5649 FrameRegSave(target.isThumb() ? 2 : 4),
5650 CallDefault(target.isThumb() ? 8 : 12),
5651 FrameDefault(target.isThumb() ? 2 : 4),
5652 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5653};
5654
5656ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5657 MachineFunction *MF = C.getMF();
5658 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5659 const ARMBaseRegisterInfo *ARI =
5660 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5661
5662 BitVector regsReserved = ARI->getReservedRegs(*MF);
5663 // Check if there is an available register across the sequence that we can
5664 // use.
5665 for (Register Reg : ARM::rGPRRegClass) {
5666 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5667 Reg != ARM::LR && // LR is not reserved, but don't use it.
5668 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5669 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5670 C.isAvailableInsideSeq(Reg, TRI))
5671 return Reg;
5672 }
5673 return Register();
5674}
5675
5676// Compute liveness of LR at the point after the interval [I, E), which
5677// denotes a *backward* iteration through instructions. Used only for return
5678// basic blocks, which do not end with a tail call.
5682 // At the end of the function LR dead.
5683 bool Live = false;
5684 for (; I != E; ++I) {
5685 const MachineInstr &MI = *I;
5686
5687 // Check defs of LR.
5688 if (MI.modifiesRegister(ARM::LR, &TRI))
5689 Live = false;
5690
5691 // Check uses of LR.
5692 unsigned Opcode = MI.getOpcode();
5693 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5694 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5695 Opcode == ARM::tBXNS_RET) {
5696 // These instructions use LR, but it's not an (explicit or implicit)
5697 // operand.
5698 Live = true;
5699 continue;
5700 }
5701 if (MI.readsRegister(ARM::LR, &TRI))
5702 Live = true;
5703 }
5704 return !Live;
5705}
5706
5707std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5709 const MachineModuleInfo &MMI,
5710 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5711 unsigned MinRepeats) const {
5712 unsigned SequenceSize = 0;
5713 for (auto &MI : RepeatedSequenceLocs[0])
5714 SequenceSize += getInstSizeInBytes(MI);
5715
5716 // Properties about candidate MBBs that hold for all of them.
5717 unsigned FlagsSetInAll = 0xF;
5718
5719 // Compute liveness information for each candidate, and set FlagsSetInAll.
5721 for (outliner::Candidate &C : RepeatedSequenceLocs)
5722 FlagsSetInAll &= C.Flags;
5723
5724 // According to the ARM Procedure Call Standard, the following are
5725 // undefined on entry/exit from a function call:
5726 //
5727 // * Register R12(IP),
5728 // * Condition codes (and thus the CPSR register)
5729 //
5730 // Since we control the instructions which are part of the outlined regions
5731 // we don't need to be fully compliant with the AAPCS, but we have to
5732 // guarantee that if a veneer is inserted at link time the code is still
5733 // correct. Because of this, we can't outline any sequence of instructions
5734 // where one of these registers is live into/across it. Thus, we need to
5735 // delete those candidates.
5736 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5737 // If the unsafe registers in this block are all dead, then we don't need
5738 // to compute liveness here.
5739 if (C.Flags & UnsafeRegsDead)
5740 return false;
5741 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5742 };
5743
5744 // Are there any candidates where those registers are live?
5745 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5746 // Erase every candidate that violates the restrictions above. (It could be
5747 // true that we have viable candidates, so it's not worth bailing out in
5748 // the case that, say, 1 out of 20 candidates violate the restructions.)
5749 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5750
5751 // If the sequence doesn't have enough candidates left, then we're done.
5752 if (RepeatedSequenceLocs.size() < MinRepeats)
5753 return std::nullopt;
5754 }
5755
5756 // We expect the majority of the outlining candidates to be in consensus with
5757 // regard to return address sign and authentication, and branch target
5758 // enforcement, in other words, partitioning according to all the four
5759 // possible combinations of PAC-RET and BTI is going to yield one big subset
5760 // and three small (likely empty) subsets. That allows us to cull incompatible
5761 // candidates separately for PAC-RET and BTI.
5762
5763 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5764 // disabled. Remove the candidates from the smaller set. If they are the same
5765 // number prefer the non-BTI ones for outlining, since they have less
5766 // overhead.
5767 auto NoBTI =
5768 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5769 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5770 return AFI.branchTargetEnforcement();
5771 });
5772 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5773 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5774 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5775 else
5776 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5777
5778 if (RepeatedSequenceLocs.size() < MinRepeats)
5779 return std::nullopt;
5780
5781 // Likewise, partition the candidates according to PAC-RET enablement.
5782 auto NoPAC =
5783 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5784 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5785 // If the function happens to not spill the LR, do not disqualify it
5786 // from the outlining.
5787 return AFI.shouldSignReturnAddress(true);
5788 });
5789 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5790 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5791 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5792 else
5793 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5794
5795 if (RepeatedSequenceLocs.size() < MinRepeats)
5796 return std::nullopt;
5797
5798 // At this point, we have only "safe" candidates to outline. Figure out
5799 // frame + call instruction information.
5800
5801 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5802
5803 // Helper lambda which sets call information for every candidate.
5804 auto SetCandidateCallInfo =
5805 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5806 for (outliner::Candidate &C : RepeatedSequenceLocs)
5807 C.setCallInfo(CallID, NumBytesForCall);
5808 };
5809
5810 OutlinerCosts Costs(Subtarget);
5811
5812 const auto &SomeMFI =
5813 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5814 // Adjust costs to account for the BTI instructions.
5815 if (SomeMFI.branchTargetEnforcement()) {
5816 Costs.FrameDefault += 4;
5817 Costs.FrameNoLRSave += 4;
5818 Costs.FrameRegSave += 4;
5819 Costs.FrameTailCall += 4;
5820 Costs.FrameThunk += 4;
5821 }
5822
5823 // Adjust costs to account for sign and authentication instructions.
5824 if (SomeMFI.shouldSignReturnAddress(true)) {
5825 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5826 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5827 }
5828
5829 unsigned FrameID = MachineOutlinerDefault;
5830 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5831
5832 // If the last instruction in any candidate is a terminator, then we should
5833 // tail call all of the candidates.
5834 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5835 FrameID = MachineOutlinerTailCall;
5836 NumBytesToCreateFrame = Costs.FrameTailCall;
5837 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5838 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5839 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5840 LastInstrOpcode == ARM::tBLXr ||
5841 LastInstrOpcode == ARM::tBLXr_noip ||
5842 LastInstrOpcode == ARM::tBLXi) {
5843 FrameID = MachineOutlinerThunk;
5844 NumBytesToCreateFrame = Costs.FrameThunk;
5845 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5846 } else {
5847 // We need to decide how to emit calls + frames. We can always emit the same
5848 // frame if we don't need to save to the stack. If we have to save to the
5849 // stack, then we need a different frame.
5850 unsigned NumBytesNoStackCalls = 0;
5851 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5852
5853 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5854 // LR liveness is overestimated in return blocks, unless they end with a
5855 // tail call.
5856 const auto Last = C.getMBB()->rbegin();
5857 const bool LRIsAvailable =
5858 C.getMBB()->isReturnBlock() && !Last->isCall()
5861 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
5862 if (LRIsAvailable) {
5863 FrameID = MachineOutlinerNoLRSave;
5864 NumBytesNoStackCalls += Costs.CallNoLRSave;
5865 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
5866 CandidatesWithoutStackFixups.push_back(C);
5867 }
5868
5869 // Is an unused register available? If so, we won't modify the stack, so
5870 // we can outline with the same frame type as those that don't save LR.
5871 else if (findRegisterToSaveLRTo(C)) {
5872 FrameID = MachineOutlinerRegSave;
5873 NumBytesNoStackCalls += Costs.CallRegSave;
5874 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
5875 CandidatesWithoutStackFixups.push_back(C);
5876 }
5877
5878 // Is SP used in the sequence at all? If not, we don't have to modify
5879 // the stack, so we are guaranteed to get the same frame.
5880 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
5881 NumBytesNoStackCalls += Costs.CallDefault;
5882 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5883 CandidatesWithoutStackFixups.push_back(C);
5884 }
5885
5886 // If we outline this, we need to modify the stack. Pretend we don't
5887 // outline this by saving all of its bytes.
5888 else
5889 NumBytesNoStackCalls += SequenceSize;
5890 }
5891
5892 // If there are no places where we have to save LR, then note that we don't
5893 // have to update the stack. Otherwise, give every candidate the default
5894 // call type
5895 if (NumBytesNoStackCalls <=
5896 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5897 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5898 FrameID = MachineOutlinerNoLRSave;
5899 if (RepeatedSequenceLocs.size() < MinRepeats)
5900 return std::nullopt;
5901 } else
5902 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5903 }
5904
5905 // Does every candidate's MBB contain a call? If so, then we might have a
5906 // call in the range.
5907 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5908 // check if the range contains a call. These require a save + restore of
5909 // the link register.
5910 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5911 if (any_of(drop_end(FirstCand),
5912 [](const MachineInstr &MI) { return MI.isCall(); }))
5913 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5914
5915 // Handle the last instruction separately. If it is tail call, then the
5916 // last instruction is a call, we don't want to save + restore in this
5917 // case. However, it could be possible that the last instruction is a
5918 // call without it being valid to tail call this sequence. We should
5919 // consider this as well.
5920 else if (FrameID != MachineOutlinerThunk &&
5921 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5922 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5923 }
5924
5925 return std::make_unique<outliner::OutlinedFunction>(
5926 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
5927}
5928
5929bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5930 int64_t Fixup,
5931 bool Updt) const {
5932 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
5933 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5934 if (SPIdx < 0)
5935 // No SP operand
5936 return true;
5937 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5938 // If SP is not the base register we can't do much
5939 return false;
5940
5941 // Stack might be involved but addressing mode doesn't handle any offset.
5942 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5943 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5944 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5945 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5946 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5947 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5948 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5949 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5950 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5951 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5953 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5954 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5955 return false;
5956
5957 unsigned NumOps = MI->getDesc().getNumOperands();
5958 unsigned ImmIdx = NumOps - 3;
5959
5960 const MachineOperand &Offset = MI->getOperand(ImmIdx);
5961 assert(Offset.isImm() && "Is not an immediate");
5962 int64_t OffVal = Offset.getImm();
5963
5964 if (OffVal < 0)
5965 // Don't override data if the are below SP.
5966 return false;
5967
5968 unsigned NumBits = 0;
5969 unsigned Scale = 1;
5970
5971 switch (AddrMode) {
5972 case ARMII::AddrMode3:
5973 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
5974 return false;
5975 OffVal = ARM_AM::getAM3Offset(OffVal);
5976 NumBits = 8;
5977 break;
5978 case ARMII::AddrMode5:
5979 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
5980 return false;
5981 OffVal = ARM_AM::getAM5Offset(OffVal);
5982 NumBits = 8;
5983 Scale = 4;
5984 break;
5986 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
5987 return false;
5988 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
5989 NumBits = 8;
5990 Scale = 2;
5991 break;
5993 NumBits = 8;
5994 break;
5996 // FIXME: Values are already scaled in this addressing mode.
5997 assert((Fixup & 3) == 0 && "Can't encode this offset!");
5998 NumBits = 10;
5999 break;
6001 NumBits = 8;
6002 Scale = 4;
6003 break;
6006 NumBits = 12;
6007 break;
6008 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6009 NumBits = 8;
6010 Scale = 4;
6011 break;
6012 default:
6013 llvm_unreachable("Unsupported addressing mode!");
6014 }
6015 // Make sure the offset is encodable for instructions that scale the
6016 // immediate.
6017 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6018 "Can't encode this offset!");
6019 OffVal += Fixup / Scale;
6020
6021 unsigned Mask = (1 << NumBits) - 1;
6022
6023 if (OffVal <= Mask) {
6024 if (Updt)
6025 MI->getOperand(ImmIdx).setImm(OffVal);
6026 return true;
6027 }
6028
6029 return false;
6030}
6031
6033 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6034 outliner::Candidate &C = Candidates.front();
6035 // branch-target-enforcement is guaranteed to be consistent between all
6036 // candidates, so we only need to look at one.
6037 const Function &CFn = C.getMF()->getFunction();
6038 if (CFn.hasFnAttribute("branch-target-enforcement"))
6039 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6040
6041 if (CFn.hasFnAttribute("sign-return-address"))
6042 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6043
6044 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6045}
6046
6048 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6049 const Function &F = MF.getFunction();
6050
6051 // Can F be deduplicated by the linker? If it can, don't outline from it.
6052 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6053 return false;
6054
6055 // Don't outline from functions with section markings; the program could
6056 // expect that all the code is in the named section.
6057 // FIXME: Allow outlining from multiple functions with the same section
6058 // marking.
6059 if (F.hasSection())
6060 return false;
6061
6062 // FIXME: Thumb1 outlining is not handled
6064 return false;
6065
6066 // It's safe to outline from MF.
6067 return true;
6068}
6069
6071 unsigned &Flags) const {
6072 // Check if LR is available through all of the MBB. If it's not, then set
6073 // a flag.
6074 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6075 "Suitable Machine Function for outlining must track liveness");
6076
6078
6080 LRU.accumulate(MI);
6081
6082 // Check if each of the unsafe registers are available...
6083 bool R12AvailableInBlock = LRU.available(ARM::R12);
6084 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6085
6086 // If all of these are dead (and not live out), we know we don't have to check
6087 // them later.
6088 if (R12AvailableInBlock && CPSRAvailableInBlock)
6090
6091 // Now, add the live outs to the set.
6092 LRU.addLiveOuts(MBB);
6093
6094 // If any of these registers is available in the MBB, but also a live out of
6095 // the block, then we know outlining is unsafe.
6096 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6097 return false;
6098 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6099 return false;
6100
6101 // Check if there's a call inside this MachineBasicBlock. If there is, then
6102 // set a flag.
6103 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6105
6106 // LR liveness is overestimated in return blocks.
6107
6108 bool LRIsAvailable =
6109 MBB.isReturnBlock() && !MBB.back().isCall()
6110 ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
6111 : LRU.available(ARM::LR);
6112 if (!LRIsAvailable)
6114
6115 return true;
6116}
6117
6121 unsigned Flags) const {
6122 MachineInstr &MI = *MIT;
6124
6125 // PIC instructions contain labels, outlining them would break offset
6126 // computing. unsigned Opc = MI.getOpcode();
6127 unsigned Opc = MI.getOpcode();
6128 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6129 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6130 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6131 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6132 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6133 Opc == ARM::t2MOV_ga_pcrel)
6135
6136 // Be conservative with ARMv8.1 MVE instructions.
6137 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6138 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6139 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6140 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6141 Opc == ARM::t2LoopEndDec)
6143
6144 const MCInstrDesc &MCID = MI.getDesc();
6145 uint64_t MIFlags = MCID.TSFlags;
6146 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6148
6149 // Is this a terminator for a basic block?
6150 if (MI.isTerminator())
6151 // TargetInstrInfo::getOutliningType has already filtered out anything
6152 // that would break this, so we can allow it here.
6154
6155 // Don't outline if link register or program counter value are used.
6156 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6158
6159 if (MI.isCall()) {
6160 // Get the function associated with the call. Look at each operand and find
6161 // the one that represents the calle and get its name.
6162 const Function *Callee = nullptr;
6163 for (const MachineOperand &MOP : MI.operands()) {
6164 if (MOP.isGlobal()) {
6165 Callee = dyn_cast<Function>(MOP.getGlobal());
6166 break;
6167 }
6168 }
6169
6170 // Dont't outline calls to "mcount" like functions, in particular Linux
6171 // kernel function tracing relies on it.
6172 if (Callee &&
6173 (Callee->getName() == "\01__gnu_mcount_nc" ||
6174 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6176
6177 // If we don't know anything about the callee, assume it depends on the
6178 // stack layout of the caller. In that case, it's only legal to outline
6179 // as a tail-call. Explicitly list the call instructions we know about so
6180 // we don't get unexpected results with call pseudo-instructions.
6181 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6182 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6183 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6184 Opc == ARM::tBLXi)
6185 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6186
6187 if (!Callee)
6188 return UnknownCallOutlineType;
6189
6190 // We have a function we have information about. Check if it's something we
6191 // can safely outline.
6192 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6193
6194 // We don't know what's going on with the callee at all. Don't touch it.
6195 if (!CalleeMF)
6196 return UnknownCallOutlineType;
6197
6198 // Check if we know anything about the callee saves on the function. If we
6199 // don't, then don't touch it, since that implies that we haven't computed
6200 // anything about its stack frame yet.
6201 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6202 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6203 MFI.getNumObjects() > 0)
6204 return UnknownCallOutlineType;
6205
6206 // At this point, we can say that CalleeMF ought to not pass anything on the
6207 // stack. Therefore, we can outline it.
6209 }
6210
6211 // Since calls are handled, don't touch LR or PC
6212 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6214
6215 // Does this use the stack?
6216 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6217 // True if there is no chance that any outlined candidate from this range
6218 // could require stack fixups. That is, both
6219 // * LR is available in the range (No save/restore around call)
6220 // * The range doesn't include calls (No save/restore in outlined frame)
6221 // are true.
6222 // These conditions also ensure correctness of the return address
6223 // authentication - we insert sign and authentication instructions only if
6224 // we save/restore LR on stack, but then this condition ensures that the
6225 // outlined range does not modify the SP, therefore the SP value used for
6226 // signing is the same as the one used for authentication.
6227 // FIXME: This is very restrictive; the flags check the whole block,
6228 // not just the bit we will try to outline.
6229 bool MightNeedStackFixUp =
6232
6233 if (!MightNeedStackFixUp)
6235
6236 // Any modification of SP will break our code to save/restore LR.
6237 // FIXME: We could handle some instructions which add a constant offset to
6238 // SP, with a bit more work.
6239 if (MI.modifiesRegister(ARM::SP, TRI))
6241
6242 // At this point, we have a stack instruction that we might need to fix up.
6243 // up. We'll handle it if it's a load or store.
6244 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6245 false))
6247
6248 // We can't fix it up, so don't outline it.
6250 }
6251
6252 // Be conservative with IT blocks.
6253 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6254 MI.modifiesRegister(ARM::ITSTATE, TRI))
6256
6257 // Don't outline CFI instructions.
6258 if (MI.isCFIInstruction())
6260
6262}
6263
6264void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6265 for (MachineInstr &MI : MBB) {
6266 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6267 }
6268}
6269
6270void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6271 MachineBasicBlock::iterator It, bool CFI,
6272 bool Auth) const {
6273 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6274 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6275 assert(Align >= 8 && Align <= 256);
6276 if (Auth) {
6277 assert(Subtarget.isThumb2());
6278 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6279 // sequence.
6280 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6281 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6282 .addReg(ARM::R12, RegState::Kill)
6283 .addReg(ARM::LR, RegState::Kill)
6284 .addReg(ARM::SP)
6285 .addImm(-Align)
6287 .setMIFlags(MIFlags);
6288 } else {
6289 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6290 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6291 .addReg(ARM::LR, RegState::Kill)
6292 .addReg(ARM::SP)
6293 .addImm(-Align)
6295 .setMIFlags(MIFlags);
6296 }
6297
6298 if (!CFI)
6299 return;
6300
6301 // Add a CFI, saying CFA is offset by Align bytes from SP.
6302 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
6303 CFIBuilder.buildDefCFAOffset(Align);
6304
6305 // Add a CFI saying that the LR that we want to find is now higher than
6306 // before.
6307 int LROffset = Auth ? Align - 4 : Align;
6308 CFIBuilder.buildOffset(ARM::LR, -LROffset);
6309 if (Auth) {
6310 // Add a CFI for the location of the return adddress PAC.
6311 CFIBuilder.buildOffset(ARM::RA_AUTH_CODE, -Align);
6312 }
6313}
6314
6315void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6317 bool CFI, bool Auth) const {
6318 int Align = Subtarget.getStackAlignment().value();
6319 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6320 if (Auth) {
6321 assert(Subtarget.isThumb2());
6322 // Restore return address PAC and LR.
6323 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6324 .addReg(ARM::R12, RegState::Define)
6325 .addReg(ARM::LR, RegState::Define)
6326 .addReg(ARM::SP, RegState::Define)
6327 .addReg(ARM::SP)
6328 .addImm(Align)
6330 .setMIFlags(MIFlags);
6331 // LR authentication is after the CFI instructions, below.
6332 } else {
6333 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6334 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6335 .addReg(ARM::SP, RegState::Define)
6336 .addReg(ARM::SP);
6337 if (!Subtarget.isThumb())
6338 MIB.addReg(0);
6339 MIB.addImm(Subtarget.getStackAlignment().value())
6341 .setMIFlags(MIFlags);
6342 }
6343
6344 if (CFI) {
6345 // Now stack has moved back up and we have restored LR.
6346 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy);
6347 CFIBuilder.buildDefCFAOffset(0);
6348 CFIBuilder.buildRestore(ARM::LR);
6349 if (Auth)
6350 CFIBuilder.buildUndefined(ARM::RA_AUTH_CODE);
6351 }
6352
6353 if (Auth)
6354 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6355}
6356
6359 const outliner::OutlinedFunction &OF) const {
6360 // For thunk outlining, rewrite the last instruction from a call to a
6361 // tail-call.
6362 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6363 MachineInstr *Call = &*--MBB.instr_end();
6364 bool isThumb = Subtarget.isThumb();
6365 unsigned FuncOp = isThumb ? 2 : 0;
6366 unsigned Opc = Call->getOperand(FuncOp).isReg()
6367 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6368 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6369 : ARM::tTAILJMPdND
6370 : ARM::TAILJMPd;
6371 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6372 .add(Call->getOperand(FuncOp));
6373 if (isThumb && !Call->getOperand(FuncOp).isReg())
6374 MIB.add(predOps(ARMCC::AL));
6375 Call->eraseFromParent();
6376 }
6377
6378 // Is there a call in the outlined range?
6379 auto IsNonTailCall = [](MachineInstr &MI) {
6380 return MI.isCall() && !MI.isReturn();
6381 };
6382 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6383 MachineBasicBlock::iterator It = MBB.begin();
6385
6386 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6387 OF.FrameConstructionID == MachineOutlinerThunk)
6388 Et = std::prev(MBB.end());
6389
6390 // We have to save and restore LR, we need to add it to the liveins if it
6391 // is not already part of the set. This is suffient since outlined
6392 // functions only have one block.
6393 if (!MBB.isLiveIn(ARM::LR))
6394 MBB.addLiveIn(ARM::LR);
6395
6396 // Insert a save before the outlined region
6397 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6398 saveLROnStack(MBB, It, true, Auth);
6399
6400 // Fix up the instructions in the range, since we're going to modify the
6401 // stack.
6402 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6403 "Can only fix up stack references once");
6404 fixupPostOutline(MBB);
6405
6406 // Insert a restore before the terminator for the function. Restore LR.
6407 restoreLRFromStack(MBB, Et, true, Auth);
6408 }
6409
6410 // If this is a tail call outlined function, then there's already a return.
6411 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6412 OF.FrameConstructionID == MachineOutlinerThunk)
6413 return;
6414
6415 // Here we have to insert the return ourselves. Get the correct opcode from
6416 // current feature set.
6417 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6419
6420 // Did we have to modify the stack by saving the link register?
6421 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6422 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6423 return;
6424
6425 // We modified the stack.
6426 // Walk over the basic block and fix up all the stack accesses.
6427 fixupPostOutline(MBB);
6428}
6429
6435 unsigned Opc;
6436 bool isThumb = Subtarget.isThumb();
6437
6438 // Are we tail calling?
6439 if (C.CallConstructionID == MachineOutlinerTailCall) {
6440 // If yes, then we can just branch to the label.
6441 Opc = isThumb
6442 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6443 : ARM::TAILJMPd;
6444 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6445 .addGlobalAddress(M.getNamedValue(MF.getName()));
6446 if (isThumb)
6447 MIB.add(predOps(ARMCC::AL));
6448 It = MBB.insert(It, MIB);
6449 return It;
6450 }
6451
6452 // Create the call instruction.
6453 Opc = isThumb ? ARM::tBL : ARM::BL;
6454 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6455 if (isThumb)
6456 CallMIB.add(predOps(ARMCC::AL));
6457 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6458
6459 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6460 C.CallConstructionID == MachineOutlinerThunk) {
6461 // No, so just insert the call.
6462 It = MBB.insert(It, CallMIB);
6463 return It;
6464 }
6465
6466 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6467 // Can we save to a register?
6468 if (C.CallConstructionID == MachineOutlinerRegSave) {
6469 Register Reg = findRegisterToSaveLRTo(C);
6470 assert(Reg != 0 && "No callee-saved register available?");
6471
6472 // Save and restore LR from that register.
6473 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6474 if (!AFI.isLRSpilled())
6476 .buildRegister(ARM::LR, Reg);
6477 CallPt = MBB.insert(It, CallMIB);
6478 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6479 if (!AFI.isLRSpilled())
6481 It--;
6482 return CallPt;
6483 }
6484 // We have the default case. Save and restore from SP.
6485 if (!MBB.isLiveIn(ARM::LR))
6486 MBB.addLiveIn(ARM::LR);
6487 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6488 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6489 CallPt = MBB.insert(It, CallMIB);
6490 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6491 It--;
6492 return CallPt;
6493}
6494
6496 MachineFunction &MF) const {
6497 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6498}
6499
6500bool ARMBaseInstrInfo::isReMaterializableImpl(
6501 const MachineInstr &MI) const {
6502 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6503 // the tail predication conversion. This means that the element count
6504 // register has to be live for longer, but that has to be better than
6505 // spill/restore and VPT predication.
6506 return (isVCTP(&MI) && !isPredicated(MI)) ||
6508}
6509
6511 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6512 : ARM::BLX;
6513}
6514
6516 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6517 : ARM::tBLXr;
6518}
6519
6521 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6522 : ARM::BLX_pred;
6523}
6524
6525namespace {
6526class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6527 MachineInstr *EndLoop, *LoopCount;
6528 MachineFunction *MF;
6529 const TargetInstrInfo *TII;
6530
6531 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6532 // [LAST_IS_USE] : last reference to register in schedule is a use
6533 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6534 static int constexpr MAX_STAGES = 30;
6535 static int constexpr LAST_IS_USE = MAX_STAGES;
6536 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6537 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6538 typedef std::map<Register, IterNeed> IterNeeds;
6539
6540 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6541 const IterNeeds &CIN);
6542 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6543
6544 // Meanings of the various stuff with loop types:
6545 // t2Bcc:
6546 // EndLoop = branch at end of original BB that will become a kernel
6547 // LoopCount = CC setter live into branch
6548 // t2LoopEnd:
6549 // EndLoop = branch at end of original BB
6550 // LoopCount = t2LoopDec
6551public:
6552 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6553 : EndLoop(EndLoop), LoopCount(LoopCount),
6554 MF(EndLoop->getParent()->getParent()),
6555 TII(MF->getSubtarget().getInstrInfo()) {}
6556
6557 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6558 // Only ignore the terminator.
6559 return MI == EndLoop || MI == LoopCount;
6560 }
6561
6562 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6563 if (tooMuchRegisterPressure(SSD, SMS))
6564 return false;
6565
6566 return true;
6567 }
6568
6569 std::optional<bool> createTripCountGreaterCondition(
6570 int TC, MachineBasicBlock &MBB,
6571 SmallVectorImpl<MachineOperand> &Cond) override {
6572
6573 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6574 Cond.push_back(EndLoop->getOperand(1));
6575 Cond.push_back(EndLoop->getOperand(2));
6576 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6578 }
6579 return {};
6580 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6581 // General case just lets the unrolled t2LoopDec do the subtraction and
6582 // therefore just needs to check if zero has been reached.
6583 MachineInstr *LoopDec = nullptr;
6584 for (auto &I : MBB.instrs())
6585 if (I.getOpcode() == ARM::t2LoopDec)
6586 LoopDec = &I;
6587 assert(LoopDec && "Unable to find copied LoopDec");
6588 // Check if we're done with the loop.
6589 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6590 .addReg(LoopDec->getOperand(0).getReg())
6591 .addImm(0)
6593 .addReg(ARM::NoRegister);
6595 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6596 return {};
6597 } else
6598 llvm_unreachable("Unknown EndLoop");
6599 }
6600
6601 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6602
6603 void adjustTripCount(int TripCountAdjust) override {}
6604};
6605
6606void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6607 const IterNeeds &CIN) {
6608 // Increase pressure by the amounts in CrossIterationNeeds
6609 for (const auto &N : CIN) {
6610 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6611 for (int I = 0; I < Cnt; ++I)
6614 }
6615 // Decrease pressure by the amounts in CrossIterationNeeds
6616 for (const auto &N : CIN) {
6617 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6618 for (int I = 0; I < Cnt; ++I)
6621 }
6622}
6623
6624bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6625 SMSchedule &SMS) {
6626 IterNeeds CrossIterationNeeds;
6627
6628 // Determine which values will be loop-carried after the schedule is
6629 // applied
6630
6631 for (auto &SU : SSD.SUnits) {
6632 const MachineInstr *MI = SU.getInstr();
6633 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6634 for (auto &S : SU.Succs)
6635 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6636 Register Reg = S.getReg();
6637 if (Reg.isVirtual())
6638 CrossIterationNeeds[Reg.id()].set(0);
6639 } else if (S.isAssignedRegDep()) {
6640 int OStg = SMS.stageScheduled(S.getSUnit());
6641 if (OStg >= 0 && OStg != Stg) {
6642 Register Reg = S.getReg();
6643 if (Reg.isVirtual())
6644 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6645 }
6646 }
6647 }
6648
6649 // Determine more-or-less what the proposed schedule (reversed) is going to
6650 // be; it might not be quite the same because the within-cycle ordering
6651 // created by SMSchedule depends upon changes to help with address offsets and
6652 // the like.
6653 std::vector<SUnit *> ProposedSchedule;
6654 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6655 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6656 ++Stage) {
6657 std::deque<SUnit *> Instrs =
6658 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6659 std::sort(Instrs.begin(), Instrs.end(),
6660 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6661 llvm::append_range(ProposedSchedule, Instrs);
6662 }
6663
6664 // Learn whether the last use/def of each cross-iteration register is a use or
6665 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6666 // and we do not have to add the pressure.
6667 for (auto *SU : ProposedSchedule)
6668 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6669 ++OperI) {
6670 auto MO = *OperI;
6671 if (!MO.isReg() || !MO.getReg())
6672 continue;
6673 Register Reg = MO.getReg();
6674 auto CIter = CrossIterationNeeds.find(Reg.id());
6675 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6676 CIter->second[SEEN_AS_LIVE])
6677 continue;
6678 if (MO.isDef() && !MO.isDead())
6679 CIter->second.set(SEEN_AS_LIVE);
6680 else if (MO.isUse())
6681 CIter->second.set(LAST_IS_USE);
6682 }
6683 for (auto &CI : CrossIterationNeeds)
6684 CI.second.reset(LAST_IS_USE);
6685
6686 RegionPressure RecRegPressure;
6687 RegPressureTracker RPTracker(RecRegPressure);
6688 RegisterClassInfo RegClassInfo;
6689 RegClassInfo.runOnMachineFunction(*MF);
6690 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6691 EndLoop->getParent()->end(), false, false);
6692
6693 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6694
6695 for (auto *SU : ProposedSchedule) {
6696 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6697 RPTracker.setPos(std::next(CurInstI));
6698 RPTracker.recede();
6699
6700 // Track what cross-iteration registers would be seen as live
6701 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6702 auto MO = *OperI;
6703 if (!MO.isReg() || !MO.getReg())
6704 continue;
6705 Register Reg = MO.getReg();
6706 if (MO.isDef() && !MO.isDead()) {
6707 auto CIter = CrossIterationNeeds.find(Reg.id());
6708 if (CIter != CrossIterationNeeds.end()) {
6709 CIter->second.reset(0);
6710 CIter->second.reset(SEEN_AS_LIVE);
6711 }
6712 }
6713 }
6714 for (auto &S : SU->Preds) {
6715 auto Stg = SMS.stageScheduled(SU);
6716 if (S.isAssignedRegDep()) {
6717 Register Reg = S.getReg();
6718 auto CIter = CrossIterationNeeds.find(Reg.id());
6719 if (CIter != CrossIterationNeeds.end()) {
6720 auto Stg2 = SMS.stageScheduled(S.getSUnit());
6721 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6722 if (Stg - Stg2 < MAX_STAGES)
6723 CIter->second.set(Stg - Stg2);
6724 CIter->second.set(SEEN_AS_LIVE);
6725 }
6726 }
6727 }
6728
6729 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6730 }
6731
6732 auto &P = RPTracker.getPressure().MaxSetPressure;
6733 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6734 // Exclude some Neon register classes.
6735 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6736 I == ARM::DTriple_with_qsub_0_in_QPR)
6737 continue;
6738
6739 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6740 return true;
6741 }
6742 }
6743 return false;
6744}
6745
6746} // namespace
6747
6748std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6751 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6752 if (Preheader == LoopBB)
6753 Preheader = *std::next(LoopBB->pred_begin());
6754
6755 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6756 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6757 // block. We need to determine the reaching definition of CPSR so that
6758 // it can be marked as non-pipelineable, allowing the pipeliner to force
6759 // it into stage 0 or give up if it cannot or will not do so.
6760 MachineInstr *CCSetter = nullptr;
6761 for (auto &L : LoopBB->instrs()) {
6762 if (L.isCall())
6763 return nullptr;
6764 if (isCPSRDefined(L))
6765 CCSetter = &L;
6766 }
6767 if (CCSetter)
6768 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6769 else
6770 return nullptr; // Unable to find the CC setter, so unable to guarantee
6771 // that pipeline will work
6772 }
6773
6774 // Recognize:
6775 // preheader:
6776 // %1 = t2DoopLoopStart %0
6777 // loop:
6778 // %2 = phi %1, <not loop>, %..., %loop
6779 // %3 = t2LoopDec %2, <imm>
6780 // t2LoopEnd %3, %loop
6781
6782 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6783 for (auto &L : LoopBB->instrs())
6784 if (L.isCall())
6785 return nullptr;
6786 else if (isVCTP(&L))
6787 return nullptr;
6788 Register LoopDecResult = I->getOperand(0).getReg();
6790 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
6791 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6792 return nullptr;
6793 MachineInstr *LoopStart = nullptr;
6794 for (auto &J : Preheader->instrs())
6795 if (J.getOpcode() == ARM::t2DoLoopStart)
6796 LoopStart = &J;
6797 if (!LoopStart)
6798 return nullptr;
6799 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
6800 }
6801 return nullptr;
6802}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, RegState State) const
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const ARMBaseRegisterInfo & getRegisterInfo() const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ARMBaseInstrInfo(const ARMSubtarget &STI, const ARMBaseRegisterInfo &TRI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isCortexA7() const
bool isSwift() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool isThumb2() const
bool isLikeA9() const
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool hasMinSize() const
bool isCortexA8() const
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
@ SingleIssue
Can load/store 1 register/cycle.
@ DoubleIssue
Can load/store 2 registers/cycle.
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:178
LLVM_ABI uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
Helper class for creating CFI instructions and inserting them into MIR.
void buildRegister(MCRegister Reg1, MCRegister Reg2) const
void buildRestore(MCRegister Reg) const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A debug info location.
Definition DebugLoc.h:123
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
bool hasDLLImportStorageClass() const
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
Describe properties that are true of each instruction in the target description file.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isCall() const
Return true if the instruction is a call.
unsigned getOpcode() const
Return the opcode number for this descriptor.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool isValid() const
isValid - Returns true until all the operands have been visited.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
union llvm::MachineConstantPoolEntry::@004270020304201266316354007027341142157160323045 Val
The constant itself.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
ArrayRef< MachineMemOperand * >::iterator mmo_iterator
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
bool isInsertSubreg() const
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void increaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
LLVM_ABI void decreaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition Register.h:60
constexpr unsigned id() const
Definition Register.h:100
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
ARMII - This namespace holds all of the target specific flags that instruction info tracks.
@ ThumbArithFlagSetting
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
AddrMode
ARM Addressing Modes.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
Define some predicates that are used for node matching.
Definition ARMEHABI.h:25
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
constexpr T rotr(T V, int R)
Definition bit.h:382
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
constexpr RegState getKillRegState(bool B)
unsigned getBLXpredOpcode(const MachineFunction &MF)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static bool isJumpTableBranchOpcode(int Opc)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static bool isPopOpcode(int Opc)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:2033
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
constexpr RegState getUndefRegState(bool B)
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.