LLVM 23.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98
99 // fp SIMD ops
100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108};
109
112 : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
113 Subtarget(STI) {
114 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
115 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
116 llvm_unreachable("Duplicated entries?");
117 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
118 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
119 }
120}
121
122// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
123// currently defaults to no prepass hazard recognizer.
126 const ScheduleDAG *DAG) const {
127 if (usePreRAHazardRecognizer()) {
128 const InstrItineraryData *II =
129 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
130 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
131 }
133}
134
135// Called during:
136// - pre-RA scheduling
137// - post-RA scheduling when FeatureUseMISched is set
139 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
141
142 // We would like to restrict this hazard recognizer to only
143 // post-RA scheduling; we can tell that we're post-RA because we don't
144 // track VRegLiveness.
145 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
146 // banks banked on bit 2. Assume that TCMs are in use.
147 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
149 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
150
151 // Not inserting ARMHazardRecognizerFPMLx because that would change
152 // legacy behavior
153
155 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
156 return MHR;
157}
158
159// Called during post-RA scheduling when FeatureUseMISched is not set
162 const ScheduleDAG *DAG) const {
164
165 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
166 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
167
169 if (BHR)
170 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
171 return MHR;
172}
173
174// Branch analysis.
175// Cond vector output format:
176// 0 elements indicates an unconditional branch
177// 2 elements indicates a conditional branch; the elements are
178// the condition to check and the CPSR.
179// 3 elements indicates a hardware loop end; the elements
180// are the opcode, the operand value to test, and a dummy
181// operand used to pad out to 3 operands.
184 MachineBasicBlock *&FBB,
186 bool AllowModify) const {
187 TBB = nullptr;
188 FBB = nullptr;
189
191 if (I == MBB.instr_begin())
192 return false; // Empty blocks are easy.
193 --I;
194
195 // Walk backwards from the end of the basic block until the branch is
196 // analyzed or we give up.
197 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
198 // Flag to be raised on unanalyzeable instructions. This is useful in cases
199 // where we want to clean up on the end of the basic block before we bail
200 // out.
201 bool CantAnalyze = false;
202
203 // Skip over DEBUG values, predicated nonterminators and speculation
204 // barrier terminators.
205 while (I->isDebugInstr() || !I->isTerminator() ||
206 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
207 I->getOpcode() == ARM::t2DoLoopStartTP){
208 if (I == MBB.instr_begin())
209 return false;
210 --I;
211 }
212
213 if (isIndirectBranchOpcode(I->getOpcode()) ||
214 isJumpTableBranchOpcode(I->getOpcode())) {
215 // Indirect branches and jump tables can't be analyzed, but we still want
216 // to clean up any instructions at the tail of the basic block.
217 CantAnalyze = true;
218 } else if (isUncondBranchOpcode(I->getOpcode())) {
219 TBB = I->getOperand(0).getMBB();
220 } else if (isCondBranchOpcode(I->getOpcode())) {
221 // Bail out if we encounter multiple conditional branches.
222 if (!Cond.empty())
223 return true;
224
225 assert(!FBB && "FBB should have been null.");
226 FBB = TBB;
227 TBB = I->getOperand(0).getMBB();
228 Cond.push_back(I->getOperand(1));
229 Cond.push_back(I->getOperand(2));
230 } else if (I->isReturn()) {
231 // Returns can't be analyzed, but we should run cleanup.
232 CantAnalyze = true;
233 } else if (I->getOpcode() == ARM::t2LoopEnd &&
234 MBB.getParent()
235 ->getSubtarget<ARMSubtarget>()
237 if (!Cond.empty())
238 return true;
239 FBB = TBB;
240 TBB = I->getOperand(1).getMBB();
241 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
242 Cond.push_back(I->getOperand(0));
243 Cond.push_back(MachineOperand::CreateImm(0));
244 } else {
245 // We encountered other unrecognized terminator. Bail out immediately.
246 return true;
247 }
248
249 // Cleanup code - to be run for unpredicated unconditional branches and
250 // returns.
251 if (!isPredicated(*I) &&
252 (isUncondBranchOpcode(I->getOpcode()) ||
253 isIndirectBranchOpcode(I->getOpcode()) ||
254 isJumpTableBranchOpcode(I->getOpcode()) ||
255 I->isReturn())) {
256 // Forget any previous condition branch information - it no longer applies.
257 Cond.clear();
258 FBB = nullptr;
259
260 // If we can modify the function, delete everything below this
261 // unconditional branch.
262 if (AllowModify) {
263 MachineBasicBlock::iterator DI = std::next(I);
264 while (DI != MBB.instr_end()) {
265 MachineInstr &InstToDelete = *DI;
266 ++DI;
267 // Speculation barriers must not be deleted.
268 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
269 continue;
270 InstToDelete.eraseFromParent();
271 }
272 }
273 }
274
275 if (CantAnalyze) {
276 // We may not be able to analyze the block, but we could still have
277 // an unconditional branch as the last instruction in the block, which
278 // just branches to layout successor. If this is the case, then just
279 // remove it if we're allowed to make modifications.
280 if (AllowModify && !isPredicated(MBB.back()) &&
281 isUncondBranchOpcode(MBB.back().getOpcode()) &&
282 TBB && MBB.isLayoutSuccessor(TBB))
284 return true;
285 }
286
287 if (I == MBB.instr_begin())
288 return false;
289
290 --I;
291 }
292
293 // We made it past the terminators without bailing out - we must have
294 // analyzed this branch successfully.
295 return false;
296}
297
299 int *BytesRemoved) const {
300 assert(!BytesRemoved && "code size not handled");
301
302 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
303 if (I == MBB.end())
304 return 0;
305
306 if (!isUncondBranchOpcode(I->getOpcode()) &&
307 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
308 return 0;
309
310 // Remove the branch.
311 I->eraseFromParent();
312
313 I = MBB.end();
314
315 if (I == MBB.begin()) return 1;
316 --I;
317 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
318 return 1;
319
320 // Remove the branch.
321 I->eraseFromParent();
322 return 2;
323}
324
329 const DebugLoc &DL,
330 int *BytesAdded) const {
331 assert(!BytesAdded && "code size not handled");
332 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
333 int BOpc = !AFI->isThumbFunction()
334 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
335 int BccOpc = !AFI->isThumbFunction()
336 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
337 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
338
339 // Shouldn't be a fall through.
340 assert(TBB && "insertBranch must not be told to insert a fallthrough");
341 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
342 "ARM branch conditions have two or three components!");
343
344 // For conditional branches, we use addOperand to preserve CPSR flags.
345
346 if (!FBB) {
347 if (Cond.empty()) { // Unconditional branch?
348 if (isThumb)
350 else
351 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
352 } else if (Cond.size() == 2) {
353 BuildMI(&MBB, DL, get(BccOpc))
354 .addMBB(TBB)
355 .addImm(Cond[0].getImm())
356 .add(Cond[1]);
357 } else
358 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
359 return 1;
360 }
361
362 // Two-way conditional branch.
363 if (Cond.size() == 2)
364 BuildMI(&MBB, DL, get(BccOpc))
365 .addMBB(TBB)
366 .addImm(Cond[0].getImm())
367 .add(Cond[1]);
368 else if (Cond.size() == 3)
369 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
370 if (isThumb)
371 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
372 else
373 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
374 return 2;
375}
376
379 if (Cond.size() == 2) {
380 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
381 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
382 return false;
383 }
384 return true;
385}
386
388 if (MI.isBundle()) {
390 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
391 while (++I != E && I->isInsideBundle()) {
392 int PIdx = I->findFirstPredOperandIdx();
393 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
394 return true;
395 }
396 return false;
397 }
398
399 int PIdx = MI.findFirstPredOperandIdx();
400 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
401}
402
404 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
405 const TargetRegisterInfo *TRI) const {
406
407 // First, let's see if there is a generic comment for this operand
408 std::string GenericComment =
410 if (!GenericComment.empty())
411 return GenericComment;
412
413 // If not, check if we have an immediate operand.
414 if (!Op.isImm())
415 return std::string();
416
417 // And print its corresponding condition code if the immediate is a
418 // predicate.
419 int FirstPredOp = MI.findFirstPredOperandIdx();
420 if (FirstPredOp != (int) OpIdx)
421 return std::string();
422
423 std::string CC = "CC::";
424 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
425 return CC;
426}
427
430 unsigned Opc = MI.getOpcode();
433 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
434 .addImm(Pred[0].getImm())
435 .addReg(Pred[1].getReg());
436 return true;
437 }
438
439 int PIdx = MI.findFirstPredOperandIdx();
440 if (PIdx != -1) {
441 MachineOperand &PMO = MI.getOperand(PIdx);
442 PMO.setImm(Pred[0].getImm());
443 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
444
445 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
446 // IT block. This affects how they are printed.
447 const MCInstrDesc &MCID = MI.getDesc();
448 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
449 assert(MCID.operands()[1].isOptionalDef() &&
450 "CPSR def isn't expected operand");
451 assert((MI.getOperand(1).isDead() ||
452 MI.getOperand(1).getReg() != ARM::CPSR) &&
453 "if conversion tried to stop defining used CPSR");
454 MI.getOperand(1).setReg(ARM::NoRegister);
455 }
456
457 return true;
458 }
459 return false;
460}
461
463 ArrayRef<MachineOperand> Pred2) const {
464 if (Pred1.size() > 2 || Pred2.size() > 2)
465 return false;
466
467 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
468 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
469 if (CC1 == CC2)
470 return true;
471
472 switch (CC1) {
473 default:
474 return false;
475 case ARMCC::AL:
476 return true;
477 case ARMCC::HS:
478 return CC2 == ARMCC::HI;
479 case ARMCC::LS:
480 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
481 case ARMCC::GE:
482 return CC2 == ARMCC::GT;
483 case ARMCC::LE:
484 return CC2 == ARMCC::LT;
485 }
486}
487
489 std::vector<MachineOperand> &Pred,
490 bool SkipDead) const {
491 bool Found = false;
492 for (const MachineOperand &MO : MI.operands()) {
493 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
494 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
495 if (ClobbersCPSR || IsCPSR) {
496
497 // Filter out T1 instructions that have a dead CPSR,
498 // allowing IT blocks to be generated containing T1 instructions
499 const MCInstrDesc &MCID = MI.getDesc();
500 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
501 SkipDead)
502 continue;
503
504 Pred.push_back(MO);
505 Found = true;
506 }
507 }
508
509 return Found;
510}
511
513 for (const auto &MO : MI.operands())
514 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
515 return true;
516 return false;
517}
518
520 switch (MI->getOpcode()) {
521 default: return true;
522 case ARM::tADC: // ADC (register) T1
523 case ARM::tADDi3: // ADD (immediate) T1
524 case ARM::tADDi8: // ADD (immediate) T2
525 case ARM::tADDrr: // ADD (register) T1
526 case ARM::tAND: // AND (register) T1
527 case ARM::tASRri: // ASR (immediate) T1
528 case ARM::tASRrr: // ASR (register) T1
529 case ARM::tBIC: // BIC (register) T1
530 case ARM::tEOR: // EOR (register) T1
531 case ARM::tLSLri: // LSL (immediate) T1
532 case ARM::tLSLrr: // LSL (register) T1
533 case ARM::tLSRri: // LSR (immediate) T1
534 case ARM::tLSRrr: // LSR (register) T1
535 case ARM::tMUL: // MUL T1
536 case ARM::tMVN: // MVN (register) T1
537 case ARM::tORR: // ORR (register) T1
538 case ARM::tROR: // ROR (register) T1
539 case ARM::tRSB: // RSB (immediate) T1
540 case ARM::tSBC: // SBC (register) T1
541 case ARM::tSUBi3: // SUB (immediate) T1
542 case ARM::tSUBi8: // SUB (immediate) T2
543 case ARM::tSUBrr: // SUB (register) T1
545 }
546}
547
548/// isPredicable - Return true if the specified instruction can be predicated.
549/// By default, this returns true for every instruction with a
550/// PredicateOperand.
552 if (!MI.isPredicable())
553 return false;
554
555 if (MI.isBundle())
556 return false;
557
559 return false;
560
561 const MachineFunction *MF = MI.getParent()->getParent();
562 const ARMFunctionInfo *AFI =
564
565 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
566 // In their ARM encoding, they can't be encoded in a conditional form.
567 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
568 return false;
569
570 // Make indirect control flow changes unpredictable when SLS mitigation is
571 // enabled.
572 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
573 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
574 return false;
575 if (ST.hardenSlsBlr() && isIndirectCall(MI))
576 return false;
577
578 if (AFI->isThumb2Function()) {
579 if (getSubtarget().restrictIT())
580 return isV8EligibleForIT(&MI);
581 }
582
583 return true;
584}
585
586namespace llvm {
587
588template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
589 for (const MachineOperand &MO : MI->operands()) {
590 if (!MO.isReg() || MO.isUndef() || MO.isUse())
591 continue;
592 if (MO.getReg() != ARM::CPSR)
593 continue;
594 if (!MO.isDead())
595 return false;
596 }
597 // all definitions of CPSR are dead
598 return true;
599}
600
601} // end namespace llvm
602
603/// GetInstSize - Return the size of the specified MachineInstr.
604///
606 const MachineBasicBlock &MBB = *MI.getParent();
607 const MachineFunction *MF = MBB.getParent();
608 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
609
610 const MCInstrDesc &MCID = MI.getDesc();
611
612 switch (MI.getOpcode()) {
613 default:
614 // Return the size specified in .td file. If there's none, return 0, as we
615 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
616 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
617 // contrast to AArch64 instructions which have a default size of 4 bytes for
618 // example.
619 return MCID.getSize();
620 case TargetOpcode::BUNDLE:
621 return getInstBundleLength(MI);
622 case TargetOpcode::COPY:
624 return 4;
625 else
626 return 2;
627 case ARM::CONSTPOOL_ENTRY:
628 case ARM::JUMPTABLE_INSTS:
629 case ARM::JUMPTABLE_ADDRS:
630 case ARM::JUMPTABLE_TBB:
631 case ARM::JUMPTABLE_TBH:
632 // If this machine instr is a constant pool entry, its size is recorded as
633 // operand #2.
634 return MI.getOperand(2).getImm();
635 case ARM::SPACE:
636 return MI.getOperand(1).getImm();
637 case ARM::INLINEASM:
638 case ARM::INLINEASM_BR: {
639 // If this machine instr is an inline asm, measure it.
640 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
642 Size = alignTo(Size, 4);
643 return Size;
644 }
645 }
646}
647
648unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
649 unsigned Size = 0;
651 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
652 while (++I != E && I->isInsideBundle()) {
653 assert(!I->isBundle() && "No nested bundle!");
655 }
656 return Size;
657}
658
661 MCRegister DestReg, bool KillSrc,
662 const ARMSubtarget &Subtarget) const {
663 unsigned Opc = Subtarget.isThumb()
664 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
665 : ARM::MRS;
666
668 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
669
670 // There is only 1 A/R class MRS instruction, and it always refers to
671 // APSR. However, there are lots of other possibilities on M-class cores.
672 if (Subtarget.isMClass())
673 MIB.addImm(0x800);
674
675 MIB.add(predOps(ARMCC::AL))
676 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
677}
678
681 MCRegister SrcReg, bool KillSrc,
682 const ARMSubtarget &Subtarget) const {
683 unsigned Opc = Subtarget.isThumb()
684 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
685 : ARM::MSR;
686
687 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
688
689 if (Subtarget.isMClass())
690 MIB.addImm(0x800);
691 else
692 MIB.addImm(8);
693
694 MIB.addReg(SrcReg, getKillRegState(KillSrc))
697}
698
700 MIB.addImm(ARMVCC::None);
701 MIB.addReg(0);
702 MIB.addReg(0); // tp_reg
703}
704
710
712 MIB.addImm(Cond);
713 MIB.addReg(ARM::VPR, RegState::Implicit);
714 MIB.addReg(0); // tp_reg
715}
716
718 unsigned Cond, unsigned Inactive) {
720 MIB.addReg(Inactive);
721}
722
725 const DebugLoc &DL, Register DestReg,
726 Register SrcReg, bool KillSrc,
727 bool RenamableDest,
728 bool RenamableSrc) const {
729 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
730 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
731
732 if (GPRDest && GPRSrc) {
733 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
734 .addReg(SrcReg, getKillRegState(KillSrc))
736 .add(condCodeOp());
737 return;
738 }
739
740 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
741 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
742
743 unsigned Opc = 0;
744 if (SPRDest && SPRSrc)
745 Opc = ARM::VMOVS;
746 else if (GPRDest && SPRSrc)
747 Opc = ARM::VMOVRS;
748 else if (SPRDest && GPRSrc)
749 Opc = ARM::VMOVSR;
750 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
751 Opc = ARM::VMOVD;
752 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
753 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
754
755 if (Opc) {
756 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
757 MIB.addReg(SrcReg, getKillRegState(KillSrc));
758 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
759 MIB.addReg(SrcReg, getKillRegState(KillSrc));
760 if (Opc == ARM::MVE_VORR)
761 addUnpredicatedMveVpredROp(MIB, DestReg);
762 else if (Opc != ARM::MQPRCopy)
763 MIB.add(predOps(ARMCC::AL));
764 return;
765 }
766
767 // Handle register classes that require multiple instructions.
768 unsigned BeginIdx = 0;
769 unsigned SubRegs = 0;
770 int Spacing = 1;
771
772 // Use VORRq when possible.
773 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
774 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
775 BeginIdx = ARM::qsub_0;
776 SubRegs = 2;
777 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
778 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
779 BeginIdx = ARM::qsub_0;
780 SubRegs = 4;
781 // Fall back to VMOVD.
782 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
783 Opc = ARM::VMOVD;
784 BeginIdx = ARM::dsub_0;
785 SubRegs = 2;
786 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
787 Opc = ARM::VMOVD;
788 BeginIdx = ARM::dsub_0;
789 SubRegs = 3;
790 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
791 Opc = ARM::VMOVD;
792 BeginIdx = ARM::dsub_0;
793 SubRegs = 4;
794 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
795 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
796 BeginIdx = ARM::gsub_0;
797 SubRegs = 2;
798 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
799 Opc = ARM::VMOVD;
800 BeginIdx = ARM::dsub_0;
801 SubRegs = 2;
802 Spacing = 2;
803 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
804 Opc = ARM::VMOVD;
805 BeginIdx = ARM::dsub_0;
806 SubRegs = 3;
807 Spacing = 2;
808 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
809 Opc = ARM::VMOVD;
810 BeginIdx = ARM::dsub_0;
811 SubRegs = 4;
812 Spacing = 2;
813 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
814 !Subtarget.hasFP64()) {
815 Opc = ARM::VMOVS;
816 BeginIdx = ARM::ssub_0;
817 SubRegs = 2;
818 } else if (SrcReg == ARM::CPSR) {
819 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
820 return;
821 } else if (DestReg == ARM::CPSR) {
822 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
823 return;
824 } else if (DestReg == ARM::VPR) {
825 assert(ARM::GPRRegClass.contains(SrcReg));
826 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
827 .addReg(SrcReg, getKillRegState(KillSrc))
829 return;
830 } else if (SrcReg == ARM::VPR) {
831 assert(ARM::GPRRegClass.contains(DestReg));
832 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
833 .addReg(SrcReg, getKillRegState(KillSrc))
835 return;
836 } else if (DestReg == ARM::FPSCR_NZCV) {
837 assert(ARM::GPRRegClass.contains(SrcReg));
838 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
839 .addReg(SrcReg, getKillRegState(KillSrc))
841 return;
842 } else if (SrcReg == ARM::FPSCR_NZCV) {
843 assert(ARM::GPRRegClass.contains(DestReg));
844 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
845 .addReg(SrcReg, getKillRegState(KillSrc))
847 return;
848 }
849
850 assert(Opc && "Impossible reg-to-reg copy");
851
854
855 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
856 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
857 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
858 Spacing = -Spacing;
859 }
860#ifndef NDEBUG
861 SmallSet<unsigned, 4> DstRegs;
862#endif
863 for (unsigned i = 0; i != SubRegs; ++i) {
864 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
865 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
866 assert(Dst && Src && "Bad sub-register");
867#ifndef NDEBUG
868 assert(!DstRegs.count(Src) && "destructive vector copy");
869 DstRegs.insert(Dst);
870#endif
871 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
872 // VORR (NEON or MVE) takes two source operands.
873 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
874 Mov.addReg(Src);
875 }
876 // MVE VORR takes predicate operands in place of an ordinary condition.
877 if (Opc == ARM::MVE_VORR)
879 else
880 Mov = Mov.add(predOps(ARMCC::AL));
881 // MOVr can set CC.
882 if (Opc == ARM::MOVr)
883 Mov = Mov.add(condCodeOp());
884 }
885 // Add implicit super-register defs and kills to the last instruction.
886 Mov->addRegisterDefined(DestReg, TRI);
887 if (KillSrc)
888 Mov->addRegisterKilled(SrcReg, TRI);
889}
890
891std::optional<DestSourcePair>
893 // VMOVRRD is also a copy instruction but it requires
894 // special way of handling. It is more complex copy version
895 // and since that we are not considering it. For recognition
896 // of such instruction isExtractSubregLike MI interface function
897 // could be used.
898 // VORRq is considered as a move only if two inputs are
899 // the same register.
900 if (!MI.isMoveReg() ||
901 (MI.getOpcode() == ARM::VORRq &&
902 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
903 return std::nullopt;
904 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
905}
906
907std::optional<ParamLoadedValue>
909 Register Reg) const {
910 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
911 Register DstReg = DstSrcPair->Destination->getReg();
912
913 // TODO: We don't handle cases where the forwarding reg is narrower/wider
914 // than the copy registers. Consider for example:
915 //
916 // s16 = VMOVS s0
917 // s17 = VMOVS s1
918 // call @callee(d0)
919 //
920 // We'd like to describe the call site value of d0 as d8, but this requires
921 // gathering and merging the descriptions for the two VMOVS instructions.
922 //
923 // We also don't handle the reverse situation, where the forwarding reg is
924 // narrower than the copy destination:
925 //
926 // d8 = VMOVD d0
927 // call @callee(s1)
928 //
929 // We need to produce a fragment description (the call site value of s1 is
930 // /not/ just d8).
931 if (DstReg != Reg)
932 return std::nullopt;
933 }
935}
936
938 unsigned Reg,
939 unsigned SubIdx,
940 RegState State) const {
941 if (!SubIdx)
942 return MIB.addReg(Reg, State);
943
945 return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State);
946 return MIB.addReg(Reg, State, SubIdx);
947}
948
951 Register SrcReg, bool isKill, int FI,
952 const TargetRegisterClass *RC,
953 Register VReg,
954 MachineInstr::MIFlag Flags) const {
955 MachineFunction &MF = *MBB.getParent();
956 MachineFrameInfo &MFI = MF.getFrameInfo();
957 Align Alignment = MFI.getObjectAlign(FI);
959
962 MFI.getObjectSize(FI), Alignment);
963
964 switch (TRI.getSpillSize(*RC)) {
965 case 2:
966 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
967 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
968 .addReg(SrcReg, getKillRegState(isKill))
969 .addFrameIndex(FI)
970 .addImm(0)
971 .addMemOperand(MMO)
973 } else
974 llvm_unreachable("Unknown reg class!");
975 break;
976 case 4:
977 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
978 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
979 .addReg(SrcReg, getKillRegState(isKill))
980 .addFrameIndex(FI)
981 .addImm(0)
982 .addMemOperand(MMO)
984 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
985 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
986 .addReg(SrcReg, getKillRegState(isKill))
987 .addFrameIndex(FI)
988 .addImm(0)
989 .addMemOperand(MMO)
991 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
992 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
993 .addReg(SrcReg, getKillRegState(isKill))
994 .addFrameIndex(FI)
995 .addImm(0)
996 .addMemOperand(MMO)
998 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
999 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
1000 .addReg(SrcReg, getKillRegState(isKill))
1001 .addFrameIndex(FI)
1002 .addImm(0)
1003 .addMemOperand(MMO)
1005 } else
1006 llvm_unreachable("Unknown reg class!");
1007 break;
1008 case 8:
1009 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1010 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1011 .addReg(SrcReg, getKillRegState(isKill))
1012 .addFrameIndex(FI)
1013 .addImm(0)
1014 .addMemOperand(MMO)
1016 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1017 if (Subtarget.hasV5TEOps()) {
1018 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1019 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1020 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1021 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1023 } else {
1024 // Fallback to STM instruction, which has existed since the dawn of
1025 // time.
1026 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1027 .addFrameIndex(FI)
1028 .addMemOperand(MMO)
1030 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1031 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1032 }
1033 } else
1034 llvm_unreachable("Unknown reg class!");
1035 break;
1036 case 16:
1037 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1038 // Use aligned spills if the stack can be realigned.
1039 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1040 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1041 .addFrameIndex(FI)
1042 .addImm(16)
1043 .addReg(SrcReg, getKillRegState(isKill))
1044 .addMemOperand(MMO)
1046 } else {
1047 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1048 .addReg(SrcReg, getKillRegState(isKill))
1049 .addFrameIndex(FI)
1050 .addMemOperand(MMO)
1052 }
1053 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1054 Subtarget.hasMVEIntegerOps()) {
1055 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1056 MIB.addReg(SrcReg, getKillRegState(isKill))
1057 .addFrameIndex(FI)
1058 .addImm(0)
1059 .addMemOperand(MMO);
1061 } else
1062 llvm_unreachable("Unknown reg class!");
1063 break;
1064 case 24:
1065 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1066 // Use aligned spills if the stack can be realigned.
1067 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1068 Subtarget.hasNEON()) {
1069 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1070 .addFrameIndex(FI)
1071 .addImm(16)
1072 .addReg(SrcReg, getKillRegState(isKill))
1073 .addMemOperand(MMO)
1075 } else {
1077 get(ARM::VSTMDIA))
1078 .addFrameIndex(FI)
1080 .addMemOperand(MMO);
1081 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1082 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1083 AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1084 }
1085 } else
1086 llvm_unreachable("Unknown reg class!");
1087 break;
1088 case 32:
1089 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1090 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1091 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1092 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1093 Subtarget.hasNEON()) {
1094 // FIXME: It's possible to only store part of the QQ register if the
1095 // spilled def has a sub-register index.
1096 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1097 .addFrameIndex(FI)
1098 .addImm(16)
1099 .addReg(SrcReg, getKillRegState(isKill))
1100 .addMemOperand(MMO)
1102 } else if (Subtarget.hasMVEIntegerOps()) {
1103 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1104 .addReg(SrcReg, getKillRegState(isKill))
1105 .addFrameIndex(FI)
1106 .addMemOperand(MMO);
1107 } else {
1109 get(ARM::VSTMDIA))
1110 .addFrameIndex(FI)
1112 .addMemOperand(MMO);
1113 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1114 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1115 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1116 AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1117 }
1118 } else
1119 llvm_unreachable("Unknown reg class!");
1120 break;
1121 case 64:
1122 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1123 Subtarget.hasMVEIntegerOps()) {
1124 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1125 .addReg(SrcReg, getKillRegState(isKill))
1126 .addFrameIndex(FI)
1127 .addMemOperand(MMO);
1128 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1129 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1130 .addFrameIndex(FI)
1132 .addMemOperand(MMO);
1133 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1134 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1135 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1136 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1137 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, {});
1138 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, {});
1139 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, {});
1140 AddDReg(MIB, SrcReg, ARM::dsub_7, {});
1141 } else
1142 llvm_unreachable("Unknown reg class!");
1143 break;
1144 default:
1145 llvm_unreachable("Unknown reg class!");
1146 }
1147}
1148
1150 int &FrameIndex) const {
1151 switch (MI.getOpcode()) {
1152 default: break;
1153 case ARM::STRrs:
1154 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1155 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1156 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1157 MI.getOperand(3).getImm() == 0) {
1158 FrameIndex = MI.getOperand(1).getIndex();
1159 return MI.getOperand(0).getReg();
1160 }
1161 break;
1162 case ARM::STRi12:
1163 case ARM::t2STRi12:
1164 case ARM::tSTRspi:
1165 case ARM::VSTRD:
1166 case ARM::VSTRS:
1167 case ARM::VSTRH:
1168 case ARM::VSTR_P0_off:
1169 case ARM::VSTR_FPSCR_NZCVQC_off:
1170 case ARM::MVE_VSTRWU32:
1171 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1172 MI.getOperand(2).getImm() == 0) {
1173 FrameIndex = MI.getOperand(1).getIndex();
1174 return MI.getOperand(0).getReg();
1175 }
1176 break;
1177 case ARM::VST1q64:
1178 case ARM::VST1d64TPseudo:
1179 case ARM::VST1d64QPseudo:
1180 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1181 FrameIndex = MI.getOperand(0).getIndex();
1182 return MI.getOperand(2).getReg();
1183 }
1184 break;
1185 case ARM::VSTMQIA:
1186 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1187 FrameIndex = MI.getOperand(1).getIndex();
1188 return MI.getOperand(0).getReg();
1189 }
1190 break;
1191 case ARM::MQQPRStore:
1192 case ARM::MQQQQPRStore:
1193 if (MI.getOperand(1).isFI()) {
1194 FrameIndex = MI.getOperand(1).getIndex();
1195 return MI.getOperand(0).getReg();
1196 }
1197 break;
1198 }
1199
1200 return 0;
1201}
1202
1204 int &FrameIndex) const {
1206 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1207 Accesses.size() == 1) {
1208 FrameIndex =
1209 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1210 ->getFrameIndex();
1211 return true;
1212 }
1213 return false;
1214}
1215
1218 Register DestReg, int FI,
1219 const TargetRegisterClass *RC,
1220 Register VReg, unsigned SubReg,
1221 MachineInstr::MIFlag Flags) const {
1222 DebugLoc DL;
1223 if (I != MBB.end()) DL = I->getDebugLoc();
1224 MachineFunction &MF = *MBB.getParent();
1225 MachineFrameInfo &MFI = MF.getFrameInfo();
1226 const Align Alignment = MFI.getObjectAlign(FI);
1229 MFI.getObjectSize(FI), Alignment);
1230
1232 switch (TRI.getSpillSize(*RC)) {
1233 case 2:
1234 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1235 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1236 .addFrameIndex(FI)
1237 .addImm(0)
1238 .addMemOperand(MMO)
1240 } else
1241 llvm_unreachable("Unknown reg class!");
1242 break;
1243 case 4:
1244 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1245 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1246 .addFrameIndex(FI)
1247 .addImm(0)
1248 .addMemOperand(MMO)
1250 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1251 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1252 .addFrameIndex(FI)
1253 .addImm(0)
1254 .addMemOperand(MMO)
1256 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1257 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1258 .addFrameIndex(FI)
1259 .addImm(0)
1260 .addMemOperand(MMO)
1262 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1263 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1264 .addFrameIndex(FI)
1265 .addImm(0)
1266 .addMemOperand(MMO)
1268 } else
1269 llvm_unreachable("Unknown reg class!");
1270 break;
1271 case 8:
1272 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1273 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1274 .addFrameIndex(FI)
1275 .addImm(0)
1276 .addMemOperand(MMO)
1278 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1280
1281 if (Subtarget.hasV5TEOps()) {
1282 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1283 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1284 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1285 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1287 } else {
1288 // Fallback to LDM instruction, which has existed since the dawn of
1289 // time.
1290 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1291 .addFrameIndex(FI)
1292 .addMemOperand(MMO)
1294 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1295 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1296 }
1297
1298 if (DestReg.isPhysical())
1299 MIB.addReg(DestReg, RegState::ImplicitDefine);
1300 } else
1301 llvm_unreachable("Unknown reg class!");
1302 break;
1303 case 16:
1304 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1305 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1306 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1307 .addFrameIndex(FI)
1308 .addImm(16)
1309 .addMemOperand(MMO)
1311 } else {
1312 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1313 .addFrameIndex(FI)
1314 .addMemOperand(MMO)
1316 }
1317 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1318 Subtarget.hasMVEIntegerOps()) {
1319 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1320 MIB.addFrameIndex(FI)
1321 .addImm(0)
1322 .addMemOperand(MMO);
1324 } else
1325 llvm_unreachable("Unknown reg class!");
1326 break;
1327 case 24:
1328 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1329 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1330 Subtarget.hasNEON()) {
1331 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1332 .addFrameIndex(FI)
1333 .addImm(16)
1334 .addMemOperand(MMO)
1336 } else {
1337 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1338 .addFrameIndex(FI)
1339 .addMemOperand(MMO)
1341 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1342 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1343 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1344 if (DestReg.isPhysical())
1345 MIB.addReg(DestReg, RegState::ImplicitDefine);
1346 }
1347 } else
1348 llvm_unreachable("Unknown reg class!");
1349 break;
1350 case 32:
1351 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1352 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1353 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1354 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1355 Subtarget.hasNEON()) {
1356 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1357 .addFrameIndex(FI)
1358 .addImm(16)
1359 .addMemOperand(MMO)
1361 } else if (Subtarget.hasMVEIntegerOps()) {
1362 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1363 .addFrameIndex(FI)
1364 .addMemOperand(MMO);
1365 } else {
1366 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1367 .addFrameIndex(FI)
1369 .addMemOperand(MMO);
1370 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1371 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1372 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1373 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1374 if (DestReg.isPhysical())
1375 MIB.addReg(DestReg, RegState::ImplicitDefine);
1376 }
1377 } else
1378 llvm_unreachable("Unknown reg class!");
1379 break;
1380 case 64:
1381 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1382 Subtarget.hasMVEIntegerOps()) {
1383 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1384 .addFrameIndex(FI)
1385 .addMemOperand(MMO);
1386 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1387 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1388 .addFrameIndex(FI)
1390 .addMemOperand(MMO);
1391 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1392 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1393 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1394 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1395 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead);
1396 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead);
1397 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead);
1398 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead);
1399 if (DestReg.isPhysical())
1400 MIB.addReg(DestReg, RegState::ImplicitDefine);
1401 } else
1402 llvm_unreachable("Unknown reg class!");
1403 break;
1404 default:
1405 llvm_unreachable("Unknown regclass!");
1406 }
1407}
1408
1410 int &FrameIndex) const {
1411 switch (MI.getOpcode()) {
1412 default: break;
1413 case ARM::LDRrs:
1414 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1415 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1416 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1417 MI.getOperand(3).getImm() == 0) {
1418 FrameIndex = MI.getOperand(1).getIndex();
1419 return MI.getOperand(0).getReg();
1420 }
1421 break;
1422 case ARM::LDRi12:
1423 case ARM::t2LDRi12:
1424 case ARM::tLDRspi:
1425 case ARM::VLDRD:
1426 case ARM::VLDRS:
1427 case ARM::VLDRH:
1428 case ARM::VLDR_P0_off:
1429 case ARM::VLDR_FPSCR_NZCVQC_off:
1430 case ARM::MVE_VLDRWU32:
1431 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1432 MI.getOperand(2).getImm() == 0) {
1433 FrameIndex = MI.getOperand(1).getIndex();
1434 return MI.getOperand(0).getReg();
1435 }
1436 break;
1437 case ARM::VLD1q64:
1438 case ARM::VLD1d8TPseudo:
1439 case ARM::VLD1d16TPseudo:
1440 case ARM::VLD1d32TPseudo:
1441 case ARM::VLD1d64TPseudo:
1442 case ARM::VLD1d8QPseudo:
1443 case ARM::VLD1d16QPseudo:
1444 case ARM::VLD1d32QPseudo:
1445 case ARM::VLD1d64QPseudo:
1446 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1447 FrameIndex = MI.getOperand(1).getIndex();
1448 return MI.getOperand(0).getReg();
1449 }
1450 break;
1451 case ARM::VLDMQIA:
1452 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1453 FrameIndex = MI.getOperand(1).getIndex();
1454 return MI.getOperand(0).getReg();
1455 }
1456 break;
1457 case ARM::MQQPRLoad:
1458 case ARM::MQQQQPRLoad:
1459 if (MI.getOperand(1).isFI()) {
1460 FrameIndex = MI.getOperand(1).getIndex();
1461 return MI.getOperand(0).getReg();
1462 }
1463 break;
1464 }
1465
1466 return 0;
1467}
1468
1470 int &FrameIndex) const {
1472 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1473 Accesses.size() == 1) {
1474 FrameIndex =
1475 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1476 ->getFrameIndex();
1477 return true;
1478 }
1479 return false;
1480}
1481
1482/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1483/// depending on whether the result is used.
1484void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1485 bool isThumb1 = Subtarget.isThumb1Only();
1486 bool isThumb2 = Subtarget.isThumb2();
1487 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1488
1489 DebugLoc dl = MI->getDebugLoc();
1490 MachineBasicBlock *BB = MI->getParent();
1491
1492 MachineInstrBuilder LDM, STM;
1493 if (isThumb1 || !MI->getOperand(1).isDead()) {
1494 MachineOperand LDWb(MI->getOperand(1));
1495 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1496 : isThumb1 ? ARM::tLDMIA_UPD
1497 : ARM::LDMIA_UPD))
1498 .add(LDWb);
1499 } else {
1500 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1501 }
1502
1503 if (isThumb1 || !MI->getOperand(0).isDead()) {
1504 MachineOperand STWb(MI->getOperand(0));
1505 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1506 : isThumb1 ? ARM::tSTMIA_UPD
1507 : ARM::STMIA_UPD))
1508 .add(STWb);
1509 } else {
1510 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1511 }
1512
1513 MachineOperand LDBase(MI->getOperand(3));
1514 LDM.add(LDBase).add(predOps(ARMCC::AL));
1515
1516 MachineOperand STBase(MI->getOperand(2));
1517 STM.add(STBase).add(predOps(ARMCC::AL));
1518
1519 // Sort the scratch registers into ascending order.
1520 const TargetRegisterInfo &TRI = getRegisterInfo();
1521 SmallVector<unsigned, 6> ScratchRegs;
1522 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1523 ScratchRegs.push_back(MO.getReg());
1524 llvm::sort(ScratchRegs,
1525 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1526 return TRI.getEncodingValue(Reg1) <
1527 TRI.getEncodingValue(Reg2);
1528 });
1529
1530 for (const auto &Reg : ScratchRegs) {
1533 }
1534
1535 BB->erase(MI);
1536}
1537
1539 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1540 expandLoadStackGuard(MI);
1541 MI.getParent()->erase(MI);
1542 return true;
1543 }
1544
1545 if (MI.getOpcode() == ARM::MEMCPY) {
1546 expandMEMCPY(MI);
1547 return true;
1548 }
1549
1550 // This hook gets to expand COPY instructions before they become
1551 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1552 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1553 // changed into a VORR that can go down the NEON pipeline.
1554 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1555 return false;
1556
1557 // Look for a copy between even S-registers. That is where we keep floats
1558 // when using NEON v2f32 instructions for f32 arithmetic.
1559 Register DstRegS = MI.getOperand(0).getReg();
1560 Register SrcRegS = MI.getOperand(1).getReg();
1561 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1562 return false;
1563
1565 MCRegister DstRegD =
1566 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1567 MCRegister SrcRegD =
1568 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1569 if (!DstRegD || !SrcRegD)
1570 return false;
1571
1572 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1573 // legal if the COPY already defines the full DstRegD, and it isn't a
1574 // sub-register insertion.
1575 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1576 return false;
1577
1578 // A dead copy shouldn't show up here, but reject it just in case.
1579 if (MI.getOperand(0).isDead())
1580 return false;
1581
1582 // All clear, widen the COPY.
1583 LLVM_DEBUG(dbgs() << "widening: " << MI);
1584 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1585
1586 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1587 // or some other super-register.
1588 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1589 if (ImpDefIdx != -1)
1590 MI.removeOperand(ImpDefIdx);
1591
1592 // Change the opcode and operands.
1593 MI.setDesc(get(ARM::VMOVD));
1594 MI.getOperand(0).setReg(DstRegD);
1595 MI.getOperand(1).setReg(SrcRegD);
1596 MIB.add(predOps(ARMCC::AL));
1597
1598 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1599 // register scavenger and machine verifier, so we need to indicate that we
1600 // are reading an undefined value from SrcRegD, but a proper value from
1601 // SrcRegS.
1602 MI.getOperand(1).setIsUndef();
1603 MIB.addReg(SrcRegS, RegState::Implicit);
1604
1605 // SrcRegD may actually contain an unrelated value in the ssub_1
1606 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1607 if (MI.getOperand(1).isKill()) {
1608 MI.getOperand(1).setIsKill(false);
1609 MI.addRegisterKilled(SrcRegS, TRI, true);
1610 }
1611
1612 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1613 return true;
1614}
1615
1616/// Create a copy of a const pool value. Update CPI to the new index and return
1617/// the label UID.
1618static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1621
1622 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1623 assert(MCPE.isMachineConstantPoolEntry() &&
1624 "Expecting a machine constantpool entry!");
1625 ARMConstantPoolValue *ACPV =
1626 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1627
1628 unsigned PCLabelId = AFI->createPICLabelUId();
1629 ARMConstantPoolValue *NewCPV = nullptr;
1630
1631 // FIXME: The below assumes PIC relocation model and that the function
1632 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1633 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1634 // instructions, so that's probably OK, but is PIC always correct when
1635 // we get here?
1636 if (ACPV->isGlobalValue())
1638 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1639 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1640 else if (ACPV->isExtSymbol())
1643 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1644 else if (ACPV->isBlockAddress())
1646 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1648 else if (ACPV->isLSDA())
1649 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1650 ARMCP::CPLSDA, 4);
1651 else if (ACPV->isMachineBasicBlock())
1652 NewCPV = ARMConstantPoolMBB::
1654 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1655 else
1656 llvm_unreachable("Unexpected ARM constantpool value type!!");
1657 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1658 return PCLabelId;
1659}
1660
1663 Register DestReg, unsigned SubIdx,
1664 const MachineInstr &Orig,
1665 LaneBitmask UsedLanes) const {
1666 unsigned Opcode = Orig.getOpcode();
1667 switch (Opcode) {
1668 default: {
1669 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1670 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1671 MBB.insert(I, MI);
1672 break;
1673 }
1674 case ARM::tLDRpci_pic:
1675 case ARM::t2LDRpci_pic: {
1676 MachineFunction &MF = *MBB.getParent();
1677 unsigned CPI = Orig.getOperand(1).getIndex();
1678 unsigned PCLabelId = duplicateCPV(MF, CPI);
1679 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1681 .addImm(PCLabelId)
1682 .cloneMemRefs(Orig);
1683 break;
1684 }
1685 }
1686}
1687
1690 MachineBasicBlock::iterator InsertBefore,
1691 const MachineInstr &Orig) const {
1692 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1694 for (;;) {
1695 switch (I->getOpcode()) {
1696 case ARM::tLDRpci_pic:
1697 case ARM::t2LDRpci_pic: {
1698 MachineFunction &MF = *MBB.getParent();
1699 unsigned CPI = I->getOperand(1).getIndex();
1700 unsigned PCLabelId = duplicateCPV(MF, CPI);
1701 I->getOperand(1).setIndex(CPI);
1702 I->getOperand(2).setImm(PCLabelId);
1703 break;
1704 }
1705 }
1706 if (!I->isBundledWithSucc())
1707 break;
1708 ++I;
1709 }
1710 return Cloned;
1711}
1712
1714 const MachineInstr &MI1,
1715 const MachineRegisterInfo *MRI) const {
1716 unsigned Opcode = MI0.getOpcode();
1717 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1718 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1719 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1720 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1721 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1722 Opcode == ARM::t2MOV_ga_pcrel) {
1723 if (MI1.getOpcode() != Opcode)
1724 return false;
1725 if (MI0.getNumOperands() != MI1.getNumOperands())
1726 return false;
1727
1728 const MachineOperand &MO0 = MI0.getOperand(1);
1729 const MachineOperand &MO1 = MI1.getOperand(1);
1730 if (MO0.getOffset() != MO1.getOffset())
1731 return false;
1732
1733 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1734 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1735 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1736 Opcode == ARM::t2MOV_ga_pcrel)
1737 // Ignore the PC labels.
1738 return MO0.getGlobal() == MO1.getGlobal();
1739
1740 const MachineFunction *MF = MI0.getParent()->getParent();
1741 const MachineConstantPool *MCP = MF->getConstantPool();
1742 int CPI0 = MO0.getIndex();
1743 int CPI1 = MO1.getIndex();
1744 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1745 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1746 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1747 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1748 if (isARMCP0 && isARMCP1) {
1749 ARMConstantPoolValue *ACPV0 =
1750 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1751 ARMConstantPoolValue *ACPV1 =
1752 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1753 return ACPV0->hasSameValue(ACPV1);
1754 } else if (!isARMCP0 && !isARMCP1) {
1755 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1756 }
1757 return false;
1758 } else if (Opcode == ARM::PICLDR) {
1759 if (MI1.getOpcode() != Opcode)
1760 return false;
1761 if (MI0.getNumOperands() != MI1.getNumOperands())
1762 return false;
1763
1764 Register Addr0 = MI0.getOperand(1).getReg();
1765 Register Addr1 = MI1.getOperand(1).getReg();
1766 if (Addr0 != Addr1) {
1767 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1768 return false;
1769
1770 // This assumes SSA form.
1771 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1772 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1773 // Check if the loaded value, e.g. a constantpool of a global address, are
1774 // the same.
1775 if (!produceSameValue(*Def0, *Def1, MRI))
1776 return false;
1777 }
1778
1779 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1780 // %12 = PICLDR %11, 0, 14, %noreg
1781 const MachineOperand &MO0 = MI0.getOperand(i);
1782 const MachineOperand &MO1 = MI1.getOperand(i);
1783 if (!MO0.isIdenticalTo(MO1))
1784 return false;
1785 }
1786 return true;
1787 }
1788
1790}
1791
1792/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1793/// determine if two loads are loading from the same base address. It should
1794/// only return true if the base pointers are the same and the only differences
1795/// between the two addresses is the offset. It also returns the offsets by
1796/// reference.
1797///
1798/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1799/// is permanently disabled.
1801 int64_t &Offset1,
1802 int64_t &Offset2) const {
1803 // Don't worry about Thumb: just ARM and Thumb2.
1804 if (Subtarget.isThumb1Only()) return false;
1805
1806 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1807 return false;
1808
1809 auto IsLoadOpcode = [&](unsigned Opcode) {
1810 switch (Opcode) {
1811 default:
1812 return false;
1813 case ARM::LDRi12:
1814 case ARM::LDRBi12:
1815 case ARM::LDRD:
1816 case ARM::LDRH:
1817 case ARM::LDRSB:
1818 case ARM::LDRSH:
1819 case ARM::VLDRD:
1820 case ARM::VLDRS:
1821 case ARM::t2LDRi8:
1822 case ARM::t2LDRBi8:
1823 case ARM::t2LDRDi8:
1824 case ARM::t2LDRSHi8:
1825 case ARM::t2LDRi12:
1826 case ARM::t2LDRBi12:
1827 case ARM::t2LDRSHi12:
1828 return true;
1829 }
1830 };
1831
1832 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1833 !IsLoadOpcode(Load2->getMachineOpcode()))
1834 return false;
1835
1836 // Check if base addresses and chain operands match.
1837 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1838 Load1->getOperand(4) != Load2->getOperand(4))
1839 return false;
1840
1841 // Index should be Reg0.
1842 if (Load1->getOperand(3) != Load2->getOperand(3))
1843 return false;
1844
1845 // Determine the offsets.
1846 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1847 isa<ConstantSDNode>(Load2->getOperand(1))) {
1848 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1849 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1850 return true;
1851 }
1852
1853 return false;
1854}
1855
1856/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1857/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1858/// be scheduled together. On some targets if two loads are loading from
1859/// addresses in the same cache line, it's better if they are scheduled
1860/// together. This function takes two integers that represent the load offsets
1861/// from the common base address. It returns true if it decides it's desirable
1862/// to schedule the two loads together. "NumLoads" is the number of loads that
1863/// have already been scheduled after Load1.
1864///
1865/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1866/// is permanently disabled.
1868 int64_t Offset1, int64_t Offset2,
1869 unsigned NumLoads) const {
1870 // Don't worry about Thumb: just ARM and Thumb2.
1871 if (Subtarget.isThumb1Only()) return false;
1872
1873 assert(Offset2 > Offset1);
1874
1875 if ((Offset2 - Offset1) / 8 > 64)
1876 return false;
1877
1878 // Check if the machine opcodes are different. If they are different
1879 // then we consider them to not be of the same base address,
1880 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1881 // In this case, they are considered to be the same because they are different
1882 // encoding forms of the same basic instruction.
1883 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1884 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1885 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1886 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1887 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1888 return false; // FIXME: overly conservative?
1889
1890 // Four loads in a row should be sufficient.
1891 if (NumLoads >= 3)
1892 return false;
1893
1894 return true;
1895}
1896
1898 const MachineBasicBlock *MBB,
1899 const MachineFunction &MF) const {
1900 // Debug info is never a scheduling boundary. It's necessary to be explicit
1901 // due to the special treatment of IT instructions below, otherwise a
1902 // dbg_value followed by an IT will result in the IT instruction being
1903 // considered a scheduling hazard, which is wrong. It should be the actual
1904 // instruction preceding the dbg_value instruction(s), just like it is
1905 // when debug info is not present.
1906 if (MI.isDebugInstr())
1907 return false;
1908
1909 // Terminators and labels can't be scheduled around.
1910 if (MI.isTerminator() || MI.isPosition())
1911 return true;
1912
1913 // INLINEASM_BR can jump to another block
1914 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1915 return true;
1916
1917 if (isSEHInstruction(MI))
1918 return true;
1919
1920 // Treat the start of the IT block as a scheduling boundary, but schedule
1921 // t2IT along with all instructions following it.
1922 // FIXME: This is a big hammer. But the alternative is to add all potential
1923 // true and anti dependencies to IT block instructions as implicit operands
1924 // to the t2IT instruction. The added compile time and complexity does not
1925 // seem worth it.
1927 // Make sure to skip any debug instructions
1928 while (++I != MBB->end() && I->isDebugInstr())
1929 ;
1930 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1931 return true;
1932
1933 // Don't attempt to schedule around any instruction that defines
1934 // a stack-oriented pointer, as it's unlikely to be profitable. This
1935 // saves compile time, because it doesn't require every single
1936 // stack slot reference to depend on the instruction that does the
1937 // modification.
1938 // Calls don't actually change the stack pointer, even if they have imp-defs.
1939 // No ARM calling conventions change the stack pointer. (X86 calling
1940 // conventions sometimes do).
1941 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
1942 return true;
1943
1944 return false;
1945}
1946
1949 unsigned NumCycles, unsigned ExtraPredCycles,
1950 BranchProbability Probability) const {
1951 if (!NumCycles)
1952 return false;
1953
1954 // If we are optimizing for size, see if the branch in the predecessor can be
1955 // lowered to cbn?z by the constant island lowering pass, and return false if
1956 // so. This results in a shorter instruction sequence.
1957 if (MBB.getParent()->getFunction().hasOptSize()) {
1958 MachineBasicBlock *Pred = *MBB.pred_begin();
1959 if (!Pred->empty()) {
1960 MachineInstr *LastMI = &*Pred->rbegin();
1961 if (LastMI->getOpcode() == ARM::t2Bcc) {
1963 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
1964 if (CmpMI)
1965 return false;
1966 }
1967 }
1968 }
1969 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1970 MBB, 0, 0, Probability);
1971}
1972
1975 unsigned TCycles, unsigned TExtra,
1976 MachineBasicBlock &FBB,
1977 unsigned FCycles, unsigned FExtra,
1978 BranchProbability Probability) const {
1979 if (!TCycles)
1980 return false;
1981
1982 // In thumb code we often end up trading one branch for a IT block, and
1983 // if we are cloning the instruction can increase code size. Prevent
1984 // blocks with multiple predecessors from being ifcvted to prevent this
1985 // cloning.
1986 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1987 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1988 return false;
1989 }
1990
1991 // Attempt to estimate the relative costs of predication versus branching.
1992 // Here we scale up each component of UnpredCost to avoid precision issue when
1993 // scaling TCycles/FCycles by Probability.
1994 const unsigned ScalingUpFactor = 1024;
1995
1996 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1997 unsigned UnpredCost;
1998 if (!Subtarget.hasBranchPredictor()) {
1999 // When we don't have a branch predictor it's always cheaper to not take a
2000 // branch than take it, so we have to take that into account.
2001 unsigned NotTakenBranchCost = 1;
2002 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2003 unsigned TUnpredCycles, FUnpredCycles;
2004 if (!FCycles) {
2005 // Triangle: TBB is the fallthrough
2006 TUnpredCycles = TCycles + NotTakenBranchCost;
2007 FUnpredCycles = TakenBranchCost;
2008 } else {
2009 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2010 TUnpredCycles = TCycles + TakenBranchCost;
2011 FUnpredCycles = FCycles + NotTakenBranchCost;
2012 // The branch at the end of FBB will disappear when it's predicated, so
2013 // discount it from PredCost.
2014 PredCost -= 1 * ScalingUpFactor;
2015 }
2016 // The total cost is the cost of each path scaled by their probabilities
2017 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2018 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2019 UnpredCost = TUnpredCost + FUnpredCost;
2020 // When predicating assume that the first IT can be folded away but later
2021 // ones cost one cycle each
2022 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2023 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2024 }
2025 } else {
2026 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2027 unsigned FUnpredCost =
2028 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2029 UnpredCost = TUnpredCost + FUnpredCost;
2030 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2031 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2032 }
2033
2034 return PredCost <= UnpredCost;
2035}
2036
2037unsigned
2039 unsigned NumInsts) const {
2040 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2041 // ARM has a condition code field in every predicable instruction, using it
2042 // doesn't change code size.
2043 if (!Subtarget.isThumb2())
2044 return 0;
2045
2046 // It's possible that the size of the IT is restricted to a single block.
2047 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2048 return divideCeil(NumInsts, MaxInsts) * 2;
2049}
2050
2051unsigned
2053 // If this branch is likely to be folded into the comparison to form a
2054 // CB(N)Z, then removing it won't reduce code size at all, because that will
2055 // just replace the CB(N)Z with a CMP.
2056 if (MI.getOpcode() == ARM::t2Bcc &&
2058 return 0;
2059
2060 unsigned Size = getInstSizeInBytes(MI);
2061
2062 // For Thumb2, all branches are 32-bit instructions during the if conversion
2063 // pass, but may be replaced with 16-bit instructions during size reduction.
2064 // Since the branches considered by if conversion tend to be forward branches
2065 // over small basic blocks, they are very likely to be in range for the
2066 // narrow instructions, so we assume the final code size will be half what it
2067 // currently is.
2068 if (Subtarget.isThumb2())
2069 Size /= 2;
2070
2071 return Size;
2072}
2073
2074bool
2076 MachineBasicBlock &FMBB) const {
2077 // Reduce false anti-dependencies to let the target's out-of-order execution
2078 // engine do its thing.
2079 return Subtarget.isProfitableToUnpredicate();
2080}
2081
2082/// getInstrPredicate - If instruction is predicated, returns its predicate
2083/// condition, otherwise returns AL. It also returns the condition code
2084/// register by reference.
2086 Register &PredReg) {
2087 int PIdx = MI.findFirstPredOperandIdx();
2088 if (PIdx == -1) {
2089 PredReg = 0;
2090 return ARMCC::AL;
2091 }
2092
2093 PredReg = MI.getOperand(PIdx+1).getReg();
2094 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2095}
2096
2098 if (Opc == ARM::B)
2099 return ARM::Bcc;
2100 if (Opc == ARM::tB)
2101 return ARM::tBcc;
2102 if (Opc == ARM::t2B)
2103 return ARM::t2Bcc;
2104
2105 llvm_unreachable("Unknown unconditional branch opcode!");
2106}
2107
2109 bool NewMI,
2110 unsigned OpIdx1,
2111 unsigned OpIdx2) const {
2112 switch (MI.getOpcode()) {
2113 case ARM::MOVCCr:
2114 case ARM::t2MOVCCr: {
2115 // MOVCC can be commuted by inverting the condition.
2116 Register PredReg;
2117 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2118 // MOVCC AL can't be inverted. Shouldn't happen.
2119 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2120 return nullptr;
2121 MachineInstr *CommutedMI =
2122 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2123 if (!CommutedMI)
2124 return nullptr;
2125 // After swapping the MOVCC operands, also invert the condition.
2126 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2128 return CommutedMI;
2129 }
2130 }
2131 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2132}
2133
2134/// Identify instructions that can be folded into a MOVCC instruction, and
2135/// return the defining instruction.
2137ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2138 const TargetInstrInfo *TII) const {
2139 if (!Reg.isVirtual())
2140 return nullptr;
2141 if (!MRI.hasOneNonDBGUse(Reg))
2142 return nullptr;
2143 MachineInstr *MI = MRI.getVRegDef(Reg);
2144 if (!MI)
2145 return nullptr;
2146 // Check if MI can be predicated and folded into the MOVCC.
2147 if (!isPredicable(*MI))
2148 return nullptr;
2149 // Check if MI has any non-dead defs or physreg uses. This also detects
2150 // predicated instructions which will be reading CPSR.
2151 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2152 // Reject frame index operands, PEI can't handle the predicated pseudos.
2153 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2154 return nullptr;
2155 if (!MO.isReg())
2156 continue;
2157 // MI can't have any tied operands, that would conflict with predication.
2158 if (MO.isTied())
2159 return nullptr;
2160 if (MO.getReg().isPhysical())
2161 return nullptr;
2162 if (MO.isDef() && !MO.isDead())
2163 return nullptr;
2164 }
2165 bool DontMoveAcrossStores = true;
2166 if (!MI->isSafeToMove(DontMoveAcrossStores))
2167 return nullptr;
2168 return MI;
2169}
2170
2174 bool PreferFalse) const {
2175 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2176 "Unknown select instruction");
2177 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2178 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2179 bool Invert = !DefMI;
2180 if (!DefMI)
2181 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2182 if (!DefMI)
2183 return nullptr;
2184
2185 // Find new register class to use.
2186 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2187 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2188 Register DestReg = MI.getOperand(0).getReg();
2189 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2190 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2191 if (!MRI.constrainRegClass(DestReg, FalseClass))
2192 return nullptr;
2193 if (!MRI.constrainRegClass(DestReg, TrueClass))
2194 return nullptr;
2195
2196 // Create a new predicated version of DefMI.
2197 // Rfalse is the first use.
2198 MachineInstrBuilder NewMI =
2199 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2200
2201 // Copy all the DefMI operands, excluding its (null) predicate.
2202 const MCInstrDesc &DefDesc = DefMI->getDesc();
2203 for (unsigned i = 1, e = DefDesc.getNumOperands();
2204 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2205 NewMI.add(DefMI->getOperand(i));
2206
2207 unsigned CondCode = MI.getOperand(3).getImm();
2208 if (Invert)
2210 else
2211 NewMI.addImm(CondCode);
2212 NewMI.add(MI.getOperand(4));
2213
2214 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2215 if (NewMI->hasOptionalDef())
2216 NewMI.add(condCodeOp());
2217
2218 // The output register value when the predicate is false is an implicit
2219 // register operand tied to the first def.
2220 // The tie makes the register allocator ensure the FalseReg is allocated the
2221 // same register as operand 0.
2222 FalseReg.setImplicit();
2223 NewMI.add(FalseReg);
2224 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2225
2226 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2227 SeenMIs.insert(NewMI);
2228 SeenMIs.erase(DefMI);
2229
2230 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2231 // DefMI would be invalid when transferred inside the loop. Checking for a
2232 // loop is expensive, but at least remove kill flags if they are in different
2233 // BBs.
2234 if (DefMI->getParent() != MI.getParent())
2235 NewMI->clearKillInfo();
2236
2237 // The caller will erase MI, but not DefMI.
2238 DefMI->eraseFromParent();
2239 return NewMI;
2240}
2241
2242/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2243/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2244/// def operand.
2245///
2246/// This will go away once we can teach tblgen how to set the optional CPSR def
2247/// operand itself.
2252
2254 {ARM::ADDSri, ARM::ADDri},
2255 {ARM::ADDSrr, ARM::ADDrr},
2256 {ARM::ADDSrsi, ARM::ADDrsi},
2257 {ARM::ADDSrsr, ARM::ADDrsr},
2258
2259 {ARM::SUBSri, ARM::SUBri},
2260 {ARM::SUBSrr, ARM::SUBrr},
2261 {ARM::SUBSrsi, ARM::SUBrsi},
2262 {ARM::SUBSrsr, ARM::SUBrsr},
2263
2264 {ARM::RSBSri, ARM::RSBri},
2265 {ARM::RSBSrsi, ARM::RSBrsi},
2266 {ARM::RSBSrsr, ARM::RSBrsr},
2267
2268 {ARM::tADDSi3, ARM::tADDi3},
2269 {ARM::tADDSi8, ARM::tADDi8},
2270 {ARM::tADDSrr, ARM::tADDrr},
2271 {ARM::tADCS, ARM::tADC},
2272
2273 {ARM::tSUBSi3, ARM::tSUBi3},
2274 {ARM::tSUBSi8, ARM::tSUBi8},
2275 {ARM::tSUBSrr, ARM::tSUBrr},
2276 {ARM::tSBCS, ARM::tSBC},
2277 {ARM::tRSBS, ARM::tRSB},
2278 {ARM::tLSLSri, ARM::tLSLri},
2279
2280 {ARM::t2ADDSri, ARM::t2ADDri},
2281 {ARM::t2ADDSrr, ARM::t2ADDrr},
2282 {ARM::t2ADDSrs, ARM::t2ADDrs},
2283
2284 {ARM::t2SUBSri, ARM::t2SUBri},
2285 {ARM::t2SUBSrr, ARM::t2SUBrr},
2286 {ARM::t2SUBSrs, ARM::t2SUBrs},
2287
2288 {ARM::t2RSBSri, ARM::t2RSBri},
2289 {ARM::t2RSBSrs, ARM::t2RSBrs},
2290};
2291
2292unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2293 for (const auto &Entry : AddSubFlagsOpcodeMap)
2294 if (OldOpc == Entry.PseudoOpc)
2295 return Entry.MachineOpc;
2296 return 0;
2297}
2298
2301 const DebugLoc &dl, Register DestReg,
2302 Register BaseReg, int NumBytes,
2303 ARMCC::CondCodes Pred, Register PredReg,
2304 const ARMBaseInstrInfo &TII,
2305 unsigned MIFlags) {
2306 if (NumBytes == 0 && DestReg != BaseReg) {
2307 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2308 .addReg(BaseReg, RegState::Kill)
2309 .add(predOps(Pred, PredReg))
2310 .add(condCodeOp())
2311 .setMIFlags(MIFlags);
2312 return;
2313 }
2314
2315 bool isSub = NumBytes < 0;
2316 if (isSub) NumBytes = -NumBytes;
2317
2318 while (NumBytes) {
2319 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2320 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2321 assert(ThisVal && "Didn't extract field correctly");
2322
2323 // We will handle these bits from offset, clear them.
2324 NumBytes &= ~ThisVal;
2325
2326 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2327
2328 // Build the new ADD / SUB.
2329 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2330 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2331 .addReg(BaseReg, RegState::Kill)
2332 .addImm(ThisVal)
2333 .add(predOps(Pred, PredReg))
2334 .add(condCodeOp())
2335 .setMIFlags(MIFlags);
2336 BaseReg = DestReg;
2337 }
2338}
2339
2342 unsigned NumBytes) {
2343 // This optimisation potentially adds lots of load and store
2344 // micro-operations, it's only really a great benefit to code-size.
2345 if (!Subtarget.hasMinSize())
2346 return false;
2347
2348 // If only one register is pushed/popped, LLVM can use an LDR/STR
2349 // instead. We can't modify those so make sure we're dealing with an
2350 // instruction we understand.
2351 bool IsPop = isPopOpcode(MI->getOpcode());
2352 bool IsPush = isPushOpcode(MI->getOpcode());
2353 if (!IsPush && !IsPop)
2354 return false;
2355
2356 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2357 MI->getOpcode() == ARM::VLDMDIA_UPD;
2358 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2359 MI->getOpcode() == ARM::tPOP ||
2360 MI->getOpcode() == ARM::tPOP_RET;
2361
2362 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2363 MI->getOperand(1).getReg() == ARM::SP)) &&
2364 "trying to fold sp update into non-sp-updating push/pop");
2365
2366 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2367 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2368 // if this is violated.
2369 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2370 return false;
2371
2372 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2373 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2374 int RegListIdx = IsT1PushPop ? 2 : 4;
2375
2376 // Calculate the space we'll need in terms of registers.
2377 unsigned RegsNeeded;
2378 const TargetRegisterClass *RegClass;
2379 if (IsVFPPushPop) {
2380 RegsNeeded = NumBytes / 8;
2381 RegClass = &ARM::DPRRegClass;
2382 } else {
2383 RegsNeeded = NumBytes / 4;
2384 RegClass = &ARM::GPRRegClass;
2385 }
2386
2387 // We're going to have to strip all list operands off before
2388 // re-adding them since the order matters, so save the existing ones
2389 // for later.
2391
2392 // We're also going to need the first register transferred by this
2393 // instruction, which won't necessarily be the first register in the list.
2394 unsigned FirstRegEnc = -1;
2395
2397 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2398 MachineOperand &MO = MI->getOperand(i);
2399 RegList.push_back(MO);
2400
2401 if (MO.isReg() && !MO.isImplicit() &&
2402 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2403 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2404 }
2405
2406 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2407
2408 // Now try to find enough space in the reglist to allocate NumBytes.
2409 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2410 --CurRegEnc) {
2411 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2412 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2413 continue;
2414 if (!IsPop) {
2415 // Pushing any register is completely harmless, mark the register involved
2416 // as undef since we don't care about its value and must not restore it
2417 // during stack unwinding.
2418 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2419 false, false, true));
2420 --RegsNeeded;
2421 continue;
2422 }
2423
2424 // However, we can only pop an extra register if it's not live. For
2425 // registers live within the function we might clobber a return value
2426 // register; the other way a register can be live here is if it's
2427 // callee-saved.
2428 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2429 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2431 // VFP pops don't allow holes in the register list, so any skip is fatal
2432 // for our transformation. GPR pops do, so we should just keep looking.
2433 if (IsVFPPushPop)
2434 return false;
2435 else
2436 continue;
2437 }
2438
2439 // Mark the unimportant registers as <def,dead> in the POP.
2440 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2441 true));
2442 --RegsNeeded;
2443 }
2444
2445 if (RegsNeeded > 0)
2446 return false;
2447
2448 // Finally we know we can profitably perform the optimisation so go
2449 // ahead: strip all existing registers off and add them back again
2450 // in the right order.
2451 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2452 MI->removeOperand(i);
2453
2454 // Add the complete list back in.
2455 MachineInstrBuilder MIB(MF, &*MI);
2456 for (const MachineOperand &MO : llvm::reverse(RegList))
2457 MIB.add(MO);
2458
2459 return true;
2460}
2461
2462bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2463 Register FrameReg, int &Offset,
2464 const ARMBaseInstrInfo &TII) {
2465 unsigned Opcode = MI.getOpcode();
2466 const MCInstrDesc &Desc = MI.getDesc();
2467 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2468 bool isSub = false;
2469
2470 // Memory operands in inline assembly always use AddrMode2.
2471 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2473
2474 if (Opcode == ARM::ADDri) {
2475 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2476 if (Offset == 0) {
2477 // Turn it into a move.
2478 MI.setDesc(TII.get(ARM::MOVr));
2479 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2480 MI.removeOperand(FrameRegIdx+1);
2481 Offset = 0;
2482 return true;
2483 } else if (Offset < 0) {
2484 Offset = -Offset;
2485 isSub = true;
2486 MI.setDesc(TII.get(ARM::SUBri));
2487 }
2488
2489 // Common case: small offset, fits into instruction.
2490 if (ARM_AM::getSOImmVal(Offset) != -1) {
2491 // Replace the FrameIndex with sp / fp
2492 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2493 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2494 Offset = 0;
2495 return true;
2496 }
2497
2498 // Otherwise, pull as much of the immediate into this ADDri/SUBri
2499 // as possible.
2500 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2501 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2502
2503 // We will handle these bits from offset, clear them.
2504 Offset &= ~ThisImmVal;
2505
2506 // Get the properly encoded SOImmVal field.
2507 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2508 "Bit extraction didn't work?");
2509 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2510 } else {
2511 unsigned ImmIdx = 0;
2512 int InstrOffs = 0;
2513 unsigned NumBits = 0;
2514 unsigned Scale = 1;
2515 switch (AddrMode) {
2517 ImmIdx = FrameRegIdx + 1;
2518 InstrOffs = MI.getOperand(ImmIdx).getImm();
2519 NumBits = 12;
2520 break;
2521 case ARMII::AddrMode2:
2522 ImmIdx = FrameRegIdx+2;
2523 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2524 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2525 InstrOffs *= -1;
2526 NumBits = 12;
2527 break;
2528 case ARMII::AddrMode3:
2529 ImmIdx = FrameRegIdx+2;
2530 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2531 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2532 InstrOffs *= -1;
2533 NumBits = 8;
2534 break;
2535 case ARMII::AddrMode4:
2536 case ARMII::AddrMode6:
2537 // Can't fold any offset even if it's zero.
2538 return false;
2539 case ARMII::AddrMode5:
2540 ImmIdx = FrameRegIdx+1;
2541 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2542 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2543 InstrOffs *= -1;
2544 NumBits = 8;
2545 Scale = 4;
2546 break;
2548 ImmIdx = FrameRegIdx+1;
2549 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2550 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2551 InstrOffs *= -1;
2552 NumBits = 8;
2553 Scale = 2;
2554 break;
2558 ImmIdx = FrameRegIdx+1;
2559 InstrOffs = MI.getOperand(ImmIdx).getImm();
2560 NumBits = 7;
2561 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2562 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2563 break;
2564 default:
2565 llvm_unreachable("Unsupported addressing mode!");
2566 }
2567
2568 Offset += InstrOffs * Scale;
2569 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2570 if (Offset < 0) {
2571 Offset = -Offset;
2572 isSub = true;
2573 }
2574
2575 // Attempt to fold address comp. if opcode has offset bits
2576 if (NumBits > 0) {
2577 // Common case: small offset, fits into instruction.
2578 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2579 int ImmedOffset = Offset / Scale;
2580 unsigned Mask = (1 << NumBits) - 1;
2581 if ((unsigned)Offset <= Mask * Scale) {
2582 // Replace the FrameIndex with sp
2583 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2584 // FIXME: When addrmode2 goes away, this will simplify (like the
2585 // T2 version), as the LDR.i12 versions don't need the encoding
2586 // tricks for the offset value.
2587 if (isSub) {
2589 ImmedOffset = -ImmedOffset;
2590 else
2591 ImmedOffset |= 1 << NumBits;
2592 }
2593 ImmOp.ChangeToImmediate(ImmedOffset);
2594 Offset = 0;
2595 return true;
2596 }
2597
2598 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2599 ImmedOffset = ImmedOffset & Mask;
2600 if (isSub) {
2602 ImmedOffset = -ImmedOffset;
2603 else
2604 ImmedOffset |= 1 << NumBits;
2605 }
2606 ImmOp.ChangeToImmediate(ImmedOffset);
2607 Offset &= ~(Mask*Scale);
2608 }
2609 }
2610
2611 Offset = (isSub) ? -Offset : Offset;
2612 return Offset == 0;
2613}
2614
2615/// analyzeCompare - For a comparison instruction, return the source registers
2616/// in SrcReg and SrcReg2 if having two register operands, and the value it
2617/// compares against in CmpValue. Return true if the comparison instruction
2618/// can be analyzed.
2620 Register &SrcReg2, int64_t &CmpMask,
2621 int64_t &CmpValue) const {
2622 switch (MI.getOpcode()) {
2623 default: break;
2624 case ARM::CMPri:
2625 case ARM::t2CMPri:
2626 case ARM::tCMPi8:
2627 SrcReg = MI.getOperand(0).getReg();
2628 SrcReg2 = 0;
2629 CmpMask = ~0;
2630 CmpValue = MI.getOperand(1).getImm();
2631 return true;
2632 case ARM::CMPrr:
2633 case ARM::t2CMPrr:
2634 case ARM::tCMPr:
2635 SrcReg = MI.getOperand(0).getReg();
2636 SrcReg2 = MI.getOperand(1).getReg();
2637 CmpMask = ~0;
2638 CmpValue = 0;
2639 return true;
2640 case ARM::TSTri:
2641 case ARM::t2TSTri:
2642 SrcReg = MI.getOperand(0).getReg();
2643 SrcReg2 = 0;
2644 CmpMask = MI.getOperand(1).getImm();
2645 CmpValue = 0;
2646 return true;
2647 }
2648
2649 return false;
2650}
2651
2652/// isSuitableForMask - Identify a suitable 'and' instruction that
2653/// operates on the given source register and applies the same mask
2654/// as a 'tst' instruction. Provide a limited look-through for copies.
2655/// When successful, MI will hold the found instruction.
2657 int CmpMask, bool CommonUse) {
2658 switch (MI->getOpcode()) {
2659 case ARM::ANDri:
2660 case ARM::t2ANDri:
2661 if (CmpMask != MI->getOperand(2).getImm())
2662 return false;
2663 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2664 return true;
2665 break;
2666 }
2667
2668 return false;
2669}
2670
2671/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2672/// the condition code if we modify the instructions such that flags are
2673/// set by ADD(a,b,X).
2675 switch (CC) {
2676 default: return ARMCC::AL;
2677 case ARMCC::HS: return ARMCC::LO;
2678 case ARMCC::LO: return ARMCC::HS;
2679 case ARMCC::VS: return ARMCC::VS;
2680 case ARMCC::VC: return ARMCC::VC;
2681 }
2682}
2683
2684/// isRedundantFlagInstr - check whether the first instruction, whose only
2685/// purpose is to update flags, can be made redundant.
2686/// CMPrr can be made redundant by SUBrr if the operands are the same.
2687/// CMPri can be made redundant by SUBri if the operands are the same.
2688/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2689/// This function can be extended later on.
2690inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2691 Register SrcReg, Register SrcReg2,
2692 int64_t ImmValue,
2693 const MachineInstr *OI,
2694 bool &IsThumb1) {
2695 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2696 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2697 ((OI->getOperand(1).getReg() == SrcReg &&
2698 OI->getOperand(2).getReg() == SrcReg2) ||
2699 (OI->getOperand(1).getReg() == SrcReg2 &&
2700 OI->getOperand(2).getReg() == SrcReg))) {
2701 IsThumb1 = false;
2702 return true;
2703 }
2704
2705 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2706 ((OI->getOperand(2).getReg() == SrcReg &&
2707 OI->getOperand(3).getReg() == SrcReg2) ||
2708 (OI->getOperand(2).getReg() == SrcReg2 &&
2709 OI->getOperand(3).getReg() == SrcReg))) {
2710 IsThumb1 = true;
2711 return true;
2712 }
2713
2714 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2715 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2716 OI->getOperand(1).getReg() == SrcReg &&
2717 OI->getOperand(2).getImm() == ImmValue) {
2718 IsThumb1 = false;
2719 return true;
2720 }
2721
2722 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2723 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2724 OI->getOperand(2).getReg() == SrcReg &&
2725 OI->getOperand(3).getImm() == ImmValue) {
2726 IsThumb1 = true;
2727 return true;
2728 }
2729
2730 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2731 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2732 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2733 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2734 OI->getOperand(0).getReg() == SrcReg &&
2735 OI->getOperand(1).getReg() == SrcReg2) {
2736 IsThumb1 = false;
2737 return true;
2738 }
2739
2740 if (CmpI->getOpcode() == ARM::tCMPr &&
2741 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2742 OI->getOpcode() == ARM::tADDrr) &&
2743 OI->getOperand(0).getReg() == SrcReg &&
2744 OI->getOperand(2).getReg() == SrcReg2) {
2745 IsThumb1 = true;
2746 return true;
2747 }
2748
2749 return false;
2750}
2751
2752static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2753 switch (MI->getOpcode()) {
2754 default: return false;
2755 case ARM::tLSLri:
2756 case ARM::tLSRri:
2757 case ARM::tLSLrr:
2758 case ARM::tLSRrr:
2759 case ARM::tSUBrr:
2760 case ARM::tADDrr:
2761 case ARM::tADDi3:
2762 case ARM::tADDi8:
2763 case ARM::tSUBi3:
2764 case ARM::tSUBi8:
2765 case ARM::tMUL:
2766 case ARM::tADC:
2767 case ARM::tSBC:
2768 case ARM::tRSB:
2769 case ARM::tAND:
2770 case ARM::tORR:
2771 case ARM::tEOR:
2772 case ARM::tBIC:
2773 case ARM::tMVN:
2774 case ARM::tASRri:
2775 case ARM::tASRrr:
2776 case ARM::tROR:
2777 IsThumb1 = true;
2778 [[fallthrough]];
2779 case ARM::RSBrr:
2780 case ARM::RSBri:
2781 case ARM::RSCrr:
2782 case ARM::RSCri:
2783 case ARM::ADDrr:
2784 case ARM::ADDri:
2785 case ARM::ADCrr:
2786 case ARM::ADCri:
2787 case ARM::SUBrr:
2788 case ARM::SUBri:
2789 case ARM::SBCrr:
2790 case ARM::SBCri:
2791 case ARM::t2RSBri:
2792 case ARM::t2ADDrr:
2793 case ARM::t2ADDri:
2794 case ARM::t2ADCrr:
2795 case ARM::t2ADCri:
2796 case ARM::t2SUBrr:
2797 case ARM::t2SUBri:
2798 case ARM::t2SBCrr:
2799 case ARM::t2SBCri:
2800 case ARM::ANDrr:
2801 case ARM::ANDri:
2802 case ARM::ANDrsr:
2803 case ARM::ANDrsi:
2804 case ARM::t2ANDrr:
2805 case ARM::t2ANDri:
2806 case ARM::t2ANDrs:
2807 case ARM::ORRrr:
2808 case ARM::ORRri:
2809 case ARM::ORRrsr:
2810 case ARM::ORRrsi:
2811 case ARM::t2ORRrr:
2812 case ARM::t2ORRri:
2813 case ARM::t2ORRrs:
2814 case ARM::EORrr:
2815 case ARM::EORri:
2816 case ARM::EORrsr:
2817 case ARM::EORrsi:
2818 case ARM::t2EORrr:
2819 case ARM::t2EORri:
2820 case ARM::t2EORrs:
2821 case ARM::BICri:
2822 case ARM::BICrr:
2823 case ARM::BICrsi:
2824 case ARM::BICrsr:
2825 case ARM::t2BICri:
2826 case ARM::t2BICrr:
2827 case ARM::t2BICrs:
2828 case ARM::t2LSRri:
2829 case ARM::t2LSRrr:
2830 case ARM::t2LSLri:
2831 case ARM::t2LSLrr:
2832 case ARM::MOVsr:
2833 case ARM::MOVsi:
2834 return true;
2835 }
2836}
2837
2838/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2839/// comparison into one that sets the zero bit in the flags register;
2840/// Remove a redundant Compare instruction if an earlier instruction can set the
2841/// flags in the same way as Compare.
2842/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2843/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2844/// condition code of instructions which use the flags.
2846 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2847 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2848 // Get the unique definition of SrcReg.
2849 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2850 if (!MI) return false;
2851
2852 // Masked compares sometimes use the same register as the corresponding 'and'.
2853 if (CmpMask != ~0) {
2854 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2855 MI = nullptr;
2857 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2858 UI != UE; ++UI) {
2859 if (UI->getParent() != CmpInstr.getParent())
2860 continue;
2861 MachineInstr *PotentialAND = &*UI;
2862 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2863 isPredicated(*PotentialAND))
2864 continue;
2865 MI = PotentialAND;
2866 break;
2867 }
2868 if (!MI) return false;
2869 }
2870 }
2871
2872 // Get ready to iterate backward from CmpInstr.
2873 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2874 B = CmpInstr.getParent()->begin();
2875
2876 // Early exit if CmpInstr is at the beginning of the BB.
2877 if (I == B) return false;
2878
2879 // There are two possible candidates which can be changed to set CPSR:
2880 // One is MI, the other is a SUB or ADD instruction.
2881 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2882 // ADDr[ri](r1, r2, X).
2883 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2884 MachineInstr *SubAdd = nullptr;
2885 if (SrcReg2 != 0)
2886 // MI is not a candidate for CMPrr.
2887 MI = nullptr;
2888 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2889 // Conservatively refuse to convert an instruction which isn't in the same
2890 // BB as the comparison.
2891 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2892 // Thus we cannot return here.
2893 if (CmpInstr.getOpcode() == ARM::CMPri ||
2894 CmpInstr.getOpcode() == ARM::t2CMPri ||
2895 CmpInstr.getOpcode() == ARM::tCMPi8)
2896 MI = nullptr;
2897 else
2898 return false;
2899 }
2900
2901 bool IsThumb1 = false;
2902 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2903 return false;
2904
2905 // We also want to do this peephole for cases like this: if (a*b == 0),
2906 // and optimise away the CMP instruction from the generated code sequence:
2907 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2908 // resulting from the select instruction, but these MOVS instructions for
2909 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2910 // However, if we only have MOVS instructions in between the CMP and the
2911 // other instruction (the MULS in this example), then the CPSR is dead so we
2912 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2913 // reordering and then continue the analysis hoping we can eliminate the
2914 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2915 // consequence, the movs won't redefine/kill the MUL operands which would
2916 // make this reordering illegal.
2918 if (MI && IsThumb1) {
2919 --I;
2920 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2921 bool CanReorder = true;
2922 for (; I != E; --I) {
2923 if (I->getOpcode() != ARM::tMOVi8) {
2924 CanReorder = false;
2925 break;
2926 }
2927 }
2928 if (CanReorder) {
2929 MI = MI->removeFromParent();
2930 E = CmpInstr;
2931 CmpInstr.getParent()->insert(E, MI);
2932 }
2933 }
2934 I = CmpInstr;
2935 E = MI;
2936 }
2937
2938 // Check that CPSR isn't set between the comparison instruction and the one we
2939 // want to change. At the same time, search for SubAdd.
2940 bool SubAddIsThumb1 = false;
2941 do {
2942 const MachineInstr &Instr = *--I;
2943
2944 // Check whether CmpInstr can be made redundant by the current instruction.
2945 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2946 SubAddIsThumb1)) {
2947 SubAdd = &*I;
2948 break;
2949 }
2950
2951 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2952 if (I == E)
2953 break;
2954
2955 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2956 Instr.readsRegister(ARM::CPSR, TRI))
2957 // This instruction modifies or uses CPSR after the one we want to
2958 // change. We can't do this transformation.
2959 return false;
2960
2961 if (I == B) {
2962 // In some cases, we scan the use-list of an instruction for an AND;
2963 // that AND is in the same BB, but may not be scheduled before the
2964 // corresponding TST. In that case, bail out.
2965 //
2966 // FIXME: We could try to reschedule the AND.
2967 return false;
2968 }
2969 } while (true);
2970
2971 // Return false if no candidates exist.
2972 if (!MI && !SubAdd)
2973 return false;
2974
2975 // If we found a SubAdd, use it as it will be closer to the CMP
2976 if (SubAdd) {
2977 MI = SubAdd;
2978 IsThumb1 = SubAddIsThumb1;
2979 }
2980
2981 // We can't use a predicated instruction - it doesn't always write the flags.
2982 if (isPredicated(*MI))
2983 return false;
2984
2985 // Scan forward for the use of CPSR
2986 // When checking against MI: if it's a conditional code that requires
2987 // checking of the V bit or C bit, then this is not safe to do.
2988 // It is safe to remove CmpInstr if CPSR is redefined or killed.
2989 // If we are done with the basic block, we need to check whether CPSR is
2990 // live-out.
2992 OperandsToUpdate;
2993 bool isSafe = false;
2994 I = CmpInstr;
2995 E = CmpInstr.getParent()->end();
2996 while (!isSafe && ++I != E) {
2997 const MachineInstr &Instr = *I;
2998 for (unsigned IO = 0, EO = Instr.getNumOperands();
2999 !isSafe && IO != EO; ++IO) {
3000 const MachineOperand &MO = Instr.getOperand(IO);
3001 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3002 isSafe = true;
3003 break;
3004 }
3005 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3006 continue;
3007 if (MO.isDef()) {
3008 isSafe = true;
3009 break;
3010 }
3011 // Condition code is after the operand before CPSR except for VSELs.
3013 bool IsInstrVSel = true;
3014 switch (Instr.getOpcode()) {
3015 default:
3016 IsInstrVSel = false;
3017 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3018 break;
3019 case ARM::VSELEQD:
3020 case ARM::VSELEQS:
3021 case ARM::VSELEQH:
3022 CC = ARMCC::EQ;
3023 break;
3024 case ARM::VSELGTD:
3025 case ARM::VSELGTS:
3026 case ARM::VSELGTH:
3027 CC = ARMCC::GT;
3028 break;
3029 case ARM::VSELGED:
3030 case ARM::VSELGES:
3031 case ARM::VSELGEH:
3032 CC = ARMCC::GE;
3033 break;
3034 case ARM::VSELVSD:
3035 case ARM::VSELVSS:
3036 case ARM::VSELVSH:
3037 CC = ARMCC::VS;
3038 break;
3039 }
3040
3041 if (SubAdd) {
3042 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3043 // on CMP needs to be updated to be based on SUB.
3044 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3045 // needs to be modified.
3046 // Push the condition code operands to OperandsToUpdate.
3047 // If it is safe to remove CmpInstr, the condition code of these
3048 // operands will be modified.
3049 unsigned Opc = SubAdd->getOpcode();
3050 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3051 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3052 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3053 Opc == ARM::tSUBi8;
3054 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3055 if (!IsSub ||
3056 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3057 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3058 // VSel doesn't support condition code update.
3059 if (IsInstrVSel)
3060 return false;
3061 // Ensure we can swap the condition.
3062 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3063 if (NewCC == ARMCC::AL)
3064 return false;
3065 OperandsToUpdate.push_back(
3066 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3067 }
3068 } else {
3069 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3070 switch (CC) {
3071 case ARMCC::EQ: // Z
3072 case ARMCC::NE: // Z
3073 case ARMCC::MI: // N
3074 case ARMCC::PL: // N
3075 case ARMCC::AL: // none
3076 // CPSR can be used multiple times, we should continue.
3077 break;
3078 case ARMCC::HS: // C
3079 case ARMCC::LO: // C
3080 case ARMCC::VS: // V
3081 case ARMCC::VC: // V
3082 case ARMCC::HI: // C Z
3083 case ARMCC::LS: // C Z
3084 case ARMCC::GE: // N V
3085 case ARMCC::LT: // N V
3086 case ARMCC::GT: // Z N V
3087 case ARMCC::LE: // Z N V
3088 // The instruction uses the V bit or C bit which is not safe.
3089 return false;
3090 }
3091 }
3092 }
3093 }
3094
3095 // If CPSR is not killed nor re-defined, we should check whether it is
3096 // live-out. If it is live-out, do not optimize.
3097 if (!isSafe) {
3098 MachineBasicBlock *MBB = CmpInstr.getParent();
3099 for (MachineBasicBlock *Succ : MBB->successors())
3100 if (Succ->isLiveIn(ARM::CPSR))
3101 return false;
3102 }
3103
3104 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3105 // set CPSR so this is represented as an explicit output)
3106 if (!IsThumb1) {
3107 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3108 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3109 MI->getOperand(CPSRRegNum).setIsDef(true);
3110 }
3111 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3112 CmpInstr.eraseFromParent();
3113
3114 // Modify the condition code of operands in OperandsToUpdate.
3115 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3116 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3117 for (auto &[MO, Cond] : OperandsToUpdate)
3118 MO->setImm(Cond);
3119
3120 MI->clearRegisterDeads(ARM::CPSR);
3121
3122 return true;
3123}
3124
3126 // Do not sink MI if it might be used to optimize a redundant compare.
3127 // We heuristically only look at the instruction immediately following MI to
3128 // avoid potentially searching the entire basic block.
3129 if (isPredicated(MI))
3130 return true;
3132 ++Next;
3133 Register SrcReg, SrcReg2;
3134 int64_t CmpMask, CmpValue;
3135 bool IsThumb1;
3136 if (Next != MI.getParent()->end() &&
3137 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3138 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3139 return false;
3140 return true;
3141}
3142
3144 Register Reg,
3145 MachineRegisterInfo *MRI) const {
3146 // Fold large immediates into add, sub, or, xor.
3147 unsigned DefOpc = DefMI.getOpcode();
3148 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3149 DefOpc != ARM::tMOVi32imm)
3150 return false;
3151 if (!DefMI.getOperand(1).isImm())
3152 // Could be t2MOVi32imm @xx
3153 return false;
3154
3155 if (!MRI->hasOneNonDBGUse(Reg))
3156 return false;
3157
3158 const MCInstrDesc &DefMCID = DefMI.getDesc();
3159 if (DefMCID.hasOptionalDef()) {
3160 unsigned NumOps = DefMCID.getNumOperands();
3161 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3162 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3163 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3164 // to delete DefMI.
3165 return false;
3166 }
3167
3168 const MCInstrDesc &UseMCID = UseMI.getDesc();
3169 if (UseMCID.hasOptionalDef()) {
3170 unsigned NumOps = UseMCID.getNumOperands();
3171 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3172 // If the instruction sets the flag, do not attempt this optimization
3173 // since it may change the semantics of the code.
3174 return false;
3175 }
3176
3177 unsigned UseOpc = UseMI.getOpcode();
3178 unsigned NewUseOpc = 0;
3179 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3180 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3181 bool Commute = false;
3182 switch (UseOpc) {
3183 default: return false;
3184 case ARM::SUBrr:
3185 case ARM::ADDrr:
3186 case ARM::ORRrr:
3187 case ARM::EORrr:
3188 case ARM::t2SUBrr:
3189 case ARM::t2ADDrr:
3190 case ARM::t2ORRrr:
3191 case ARM::t2EORrr: {
3192 Commute = UseMI.getOperand(2).getReg() != Reg;
3193 switch (UseOpc) {
3194 default: break;
3195 case ARM::ADDrr:
3196 case ARM::SUBrr:
3197 if (UseOpc == ARM::SUBrr && Commute)
3198 return false;
3199
3200 // ADD/SUB are special because they're essentially the same operation, so
3201 // we can handle a larger range of immediates.
3202 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3203 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3204 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3205 ImmVal = -ImmVal;
3206 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3207 } else
3208 return false;
3209 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3210 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3211 break;
3212 case ARM::ORRrr:
3213 case ARM::EORrr:
3214 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3215 return false;
3216 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3217 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3218 switch (UseOpc) {
3219 default: break;
3220 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3221 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3222 }
3223 break;
3224 case ARM::t2ADDrr:
3225 case ARM::t2SUBrr: {
3226 if (UseOpc == ARM::t2SUBrr && Commute)
3227 return false;
3228
3229 // ADD/SUB are special because they're essentially the same operation, so
3230 // we can handle a larger range of immediates.
3231 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3232 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3233 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3234 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3235 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3236 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3237 ImmVal = -ImmVal;
3238 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3239 } else
3240 return false;
3241 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3242 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3243 break;
3244 }
3245 case ARM::t2ORRrr:
3246 case ARM::t2EORrr:
3247 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3248 return false;
3249 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3250 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3251 switch (UseOpc) {
3252 default: break;
3253 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3254 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3255 }
3256 break;
3257 }
3258 }
3259 }
3260
3261 unsigned OpIdx = Commute ? 2 : 1;
3262 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3263 bool isKill = UseMI.getOperand(OpIdx).isKill();
3264 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3265 Register NewReg = MRI->createVirtualRegister(TRC);
3266 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3267 NewReg)
3268 .addReg(Reg1, getKillRegState(isKill))
3269 .addImm(SOImmValV1)
3271 .add(condCodeOp());
3272 UseMI.setDesc(get(NewUseOpc));
3273 UseMI.getOperand(1).setReg(NewReg);
3274 UseMI.getOperand(1).setIsKill();
3275 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3276 DefMI.eraseFromParent();
3277 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3278 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3279 // Then the below code will not be needed, as the input/output register
3280 // classes will be rgpr or gprSP.
3281 // For now, we fix the UseMI operand explicitly here:
3282 switch(NewUseOpc){
3283 case ARM::t2ADDspImm:
3284 case ARM::t2SUBspImm:
3285 case ARM::t2ADDri:
3286 case ARM::t2SUBri:
3287 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3288 }
3289 return true;
3290}
3291
3292static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3293 const MachineInstr &MI) {
3294 switch (MI.getOpcode()) {
3295 default: {
3296 const MCInstrDesc &Desc = MI.getDesc();
3297 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3298 assert(UOps >= 0 && "bad # UOps");
3299 return UOps;
3300 }
3301
3302 case ARM::LDRrs:
3303 case ARM::LDRBrs:
3304 case ARM::STRrs:
3305 case ARM::STRBrs: {
3306 unsigned ShOpVal = MI.getOperand(3).getImm();
3307 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3308 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3309 if (!isSub &&
3310 (ShImm == 0 ||
3311 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3312 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3313 return 1;
3314 return 2;
3315 }
3316
3317 case ARM::LDRH:
3318 case ARM::STRH: {
3319 if (!MI.getOperand(2).getReg())
3320 return 1;
3321
3322 unsigned ShOpVal = MI.getOperand(3).getImm();
3323 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3324 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3325 if (!isSub &&
3326 (ShImm == 0 ||
3327 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3328 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3329 return 1;
3330 return 2;
3331 }
3332
3333 case ARM::LDRSB:
3334 case ARM::LDRSH:
3335 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3336
3337 case ARM::LDRSB_POST:
3338 case ARM::LDRSH_POST: {
3339 Register Rt = MI.getOperand(0).getReg();
3340 Register Rm = MI.getOperand(3).getReg();
3341 return (Rt == Rm) ? 4 : 3;
3342 }
3343
3344 case ARM::LDR_PRE_REG:
3345 case ARM::LDRB_PRE_REG: {
3346 Register Rt = MI.getOperand(0).getReg();
3347 Register Rm = MI.getOperand(3).getReg();
3348 if (Rt == Rm)
3349 return 3;
3350 unsigned ShOpVal = MI.getOperand(4).getImm();
3351 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3352 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3353 if (!isSub &&
3354 (ShImm == 0 ||
3355 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3356 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3357 return 2;
3358 return 3;
3359 }
3360
3361 case ARM::STR_PRE_REG:
3362 case ARM::STRB_PRE_REG: {
3363 unsigned ShOpVal = MI.getOperand(4).getImm();
3364 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3365 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3366 if (!isSub &&
3367 (ShImm == 0 ||
3368 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3369 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3370 return 2;
3371 return 3;
3372 }
3373
3374 case ARM::LDRH_PRE:
3375 case ARM::STRH_PRE: {
3376 Register Rt = MI.getOperand(0).getReg();
3377 Register Rm = MI.getOperand(3).getReg();
3378 if (!Rm)
3379 return 2;
3380 if (Rt == Rm)
3381 return 3;
3382 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3383 }
3384
3385 case ARM::LDR_POST_REG:
3386 case ARM::LDRB_POST_REG:
3387 case ARM::LDRH_POST: {
3388 Register Rt = MI.getOperand(0).getReg();
3389 Register Rm = MI.getOperand(3).getReg();
3390 return (Rt == Rm) ? 3 : 2;
3391 }
3392
3393 case ARM::LDR_PRE_IMM:
3394 case ARM::LDRB_PRE_IMM:
3395 case ARM::LDR_POST_IMM:
3396 case ARM::LDRB_POST_IMM:
3397 case ARM::STRB_POST_IMM:
3398 case ARM::STRB_POST_REG:
3399 case ARM::STRB_PRE_IMM:
3400 case ARM::STRH_POST:
3401 case ARM::STR_POST_IMM:
3402 case ARM::STR_POST_REG:
3403 case ARM::STR_PRE_IMM:
3404 return 2;
3405
3406 case ARM::LDRSB_PRE:
3407 case ARM::LDRSH_PRE: {
3408 Register Rm = MI.getOperand(3).getReg();
3409 if (Rm == 0)
3410 return 3;
3411 Register Rt = MI.getOperand(0).getReg();
3412 if (Rt == Rm)
3413 return 4;
3414 unsigned ShOpVal = MI.getOperand(4).getImm();
3415 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3416 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3417 if (!isSub &&
3418 (ShImm == 0 ||
3419 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3420 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3421 return 3;
3422 return 4;
3423 }
3424
3425 case ARM::LDRD: {
3426 Register Rt = MI.getOperand(0).getReg();
3427 Register Rn = MI.getOperand(2).getReg();
3428 Register Rm = MI.getOperand(3).getReg();
3429 if (Rm)
3430 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3431 : 3;
3432 return (Rt == Rn) ? 3 : 2;
3433 }
3434
3435 case ARM::STRD: {
3436 Register Rm = MI.getOperand(3).getReg();
3437 if (Rm)
3438 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3439 : 3;
3440 return 2;
3441 }
3442
3443 case ARM::LDRD_POST:
3444 case ARM::t2LDRD_POST:
3445 return 3;
3446
3447 case ARM::STRD_POST:
3448 case ARM::t2STRD_POST:
3449 return 4;
3450
3451 case ARM::LDRD_PRE: {
3452 Register Rt = MI.getOperand(0).getReg();
3453 Register Rn = MI.getOperand(3).getReg();
3454 Register Rm = MI.getOperand(4).getReg();
3455 if (Rm)
3456 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3457 : 4;
3458 return (Rt == Rn) ? 4 : 3;
3459 }
3460
3461 case ARM::t2LDRD_PRE: {
3462 Register Rt = MI.getOperand(0).getReg();
3463 Register Rn = MI.getOperand(3).getReg();
3464 return (Rt == Rn) ? 4 : 3;
3465 }
3466
3467 case ARM::STRD_PRE: {
3468 Register Rm = MI.getOperand(4).getReg();
3469 if (Rm)
3470 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3471 : 4;
3472 return 3;
3473 }
3474
3475 case ARM::t2STRD_PRE:
3476 return 3;
3477
3478 case ARM::t2LDR_POST:
3479 case ARM::t2LDRB_POST:
3480 case ARM::t2LDRB_PRE:
3481 case ARM::t2LDRSBi12:
3482 case ARM::t2LDRSBi8:
3483 case ARM::t2LDRSBpci:
3484 case ARM::t2LDRSBs:
3485 case ARM::t2LDRH_POST:
3486 case ARM::t2LDRH_PRE:
3487 case ARM::t2LDRSBT:
3488 case ARM::t2LDRSB_POST:
3489 case ARM::t2LDRSB_PRE:
3490 case ARM::t2LDRSH_POST:
3491 case ARM::t2LDRSH_PRE:
3492 case ARM::t2LDRSHi12:
3493 case ARM::t2LDRSHi8:
3494 case ARM::t2LDRSHpci:
3495 case ARM::t2LDRSHs:
3496 return 2;
3497
3498 case ARM::t2LDRDi8: {
3499 Register Rt = MI.getOperand(0).getReg();
3500 Register Rn = MI.getOperand(2).getReg();
3501 return (Rt == Rn) ? 3 : 2;
3502 }
3503
3504 case ARM::t2STRB_POST:
3505 case ARM::t2STRB_PRE:
3506 case ARM::t2STRBs:
3507 case ARM::t2STRDi8:
3508 case ARM::t2STRH_POST:
3509 case ARM::t2STRH_PRE:
3510 case ARM::t2STRHs:
3511 case ARM::t2STR_POST:
3512 case ARM::t2STR_PRE:
3513 case ARM::t2STRs:
3514 return 2;
3515 }
3516}
3517
3518// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3519// can't be easily determined return 0 (missing MachineMemOperand).
3520//
3521// FIXME: The current MachineInstr design does not support relying on machine
3522// mem operands to determine the width of a memory access. Instead, we expect
3523// the target to provide this information based on the instruction opcode and
3524// operands. However, using MachineMemOperand is the best solution now for
3525// two reasons:
3526//
3527// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3528// operands. This is much more dangerous than using the MachineMemOperand
3529// sizes because CodeGen passes can insert/remove optional machine operands. In
3530// fact, it's totally incorrect for preRA passes and appears to be wrong for
3531// postRA passes as well.
3532//
3533// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3534// machine model that calls this should handle the unknown (zero size) case.
3535//
3536// Long term, we should require a target hook that verifies MachineMemOperand
3537// sizes during MC lowering. That target hook should be local to MC lowering
3538// because we can't ensure that it is aware of other MI forms. Doing this will
3539// ensure that MachineMemOperands are correctly propagated through all passes.
3541 unsigned Size = 0;
3542 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3543 E = MI.memoperands_end();
3544 I != E; ++I) {
3545 Size += (*I)->getSize().getValue();
3546 }
3547 // FIXME: The scheduler currently can't handle values larger than 16. But
3548 // the values can actually go up to 32 for floating-point load/store
3549 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3550 // operations isn't right; we could end up with "extra" memory operands for
3551 // various reasons, like tail merge merging two memory operations.
3552 return std::min(Size / 4, 16U);
3553}
3554
3556 unsigned NumRegs) {
3557 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3558 switch (Opc) {
3559 default:
3560 break;
3561 case ARM::VLDMDIA_UPD:
3562 case ARM::VLDMDDB_UPD:
3563 case ARM::VLDMSIA_UPD:
3564 case ARM::VLDMSDB_UPD:
3565 case ARM::VSTMDIA_UPD:
3566 case ARM::VSTMDDB_UPD:
3567 case ARM::VSTMSIA_UPD:
3568 case ARM::VSTMSDB_UPD:
3569 case ARM::LDMIA_UPD:
3570 case ARM::LDMDA_UPD:
3571 case ARM::LDMDB_UPD:
3572 case ARM::LDMIB_UPD:
3573 case ARM::STMIA_UPD:
3574 case ARM::STMDA_UPD:
3575 case ARM::STMDB_UPD:
3576 case ARM::STMIB_UPD:
3577 case ARM::tLDMIA_UPD:
3578 case ARM::tSTMIA_UPD:
3579 case ARM::t2LDMIA_UPD:
3580 case ARM::t2LDMDB_UPD:
3581 case ARM::t2STMIA_UPD:
3582 case ARM::t2STMDB_UPD:
3583 ++UOps; // One for base register writeback.
3584 break;
3585 case ARM::LDMIA_RET:
3586 case ARM::tPOP_RET:
3587 case ARM::t2LDMIA_RET:
3588 UOps += 2; // One for base reg wb, one for write to pc.
3589 break;
3590 }
3591 return UOps;
3592}
3593
3595 const MachineInstr &MI) const {
3596 if (!ItinData || ItinData->isEmpty())
3597 return 1;
3598
3599 const MCInstrDesc &Desc = MI.getDesc();
3600 unsigned Class = Desc.getSchedClass();
3601 int ItinUOps = ItinData->getNumMicroOps(Class);
3602 if (ItinUOps >= 0) {
3603 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3604 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3605
3606 return ItinUOps;
3607 }
3608
3609 unsigned Opc = MI.getOpcode();
3610 switch (Opc) {
3611 default:
3612 llvm_unreachable("Unexpected multi-uops instruction!");
3613 case ARM::VLDMQIA:
3614 case ARM::VSTMQIA:
3615 return 2;
3616
3617 // The number of uOps for load / store multiple are determined by the number
3618 // registers.
3619 //
3620 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3621 // same cycle. The scheduling for the first load / store must be done
3622 // separately by assuming the address is not 64-bit aligned.
3623 //
3624 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3625 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3626 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3627 case ARM::VLDMDIA:
3628 case ARM::VLDMDIA_UPD:
3629 case ARM::VLDMDDB_UPD:
3630 case ARM::VLDMSIA:
3631 case ARM::VLDMSIA_UPD:
3632 case ARM::VLDMSDB_UPD:
3633 case ARM::VSTMDIA:
3634 case ARM::VSTMDIA_UPD:
3635 case ARM::VSTMDDB_UPD:
3636 case ARM::VSTMSIA:
3637 case ARM::VSTMSIA_UPD:
3638 case ARM::VSTMSDB_UPD: {
3639 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3640 return (NumRegs / 2) + (NumRegs % 2) + 1;
3641 }
3642
3643 case ARM::LDMIA_RET:
3644 case ARM::LDMIA:
3645 case ARM::LDMDA:
3646 case ARM::LDMDB:
3647 case ARM::LDMIB:
3648 case ARM::LDMIA_UPD:
3649 case ARM::LDMDA_UPD:
3650 case ARM::LDMDB_UPD:
3651 case ARM::LDMIB_UPD:
3652 case ARM::STMIA:
3653 case ARM::STMDA:
3654 case ARM::STMDB:
3655 case ARM::STMIB:
3656 case ARM::STMIA_UPD:
3657 case ARM::STMDA_UPD:
3658 case ARM::STMDB_UPD:
3659 case ARM::STMIB_UPD:
3660 case ARM::tLDMIA:
3661 case ARM::tLDMIA_UPD:
3662 case ARM::tSTMIA_UPD:
3663 case ARM::tPOP_RET:
3664 case ARM::tPOP:
3665 case ARM::tPUSH:
3666 case ARM::t2LDMIA_RET:
3667 case ARM::t2LDMIA:
3668 case ARM::t2LDMDB:
3669 case ARM::t2LDMIA_UPD:
3670 case ARM::t2LDMDB_UPD:
3671 case ARM::t2STMIA:
3672 case ARM::t2STMDB:
3673 case ARM::t2STMIA_UPD:
3674 case ARM::t2STMDB_UPD: {
3675 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3676 switch (Subtarget.getLdStMultipleTiming()) {
3680 // Assume the worst.
3681 return NumRegs;
3683 if (NumRegs < 4)
3684 return 2;
3685 // 4 registers would be issued: 2, 2.
3686 // 5 registers would be issued: 2, 2, 1.
3687 unsigned UOps = (NumRegs / 2);
3688 if (NumRegs % 2)
3689 ++UOps;
3690 return UOps;
3691 }
3693 unsigned UOps = (NumRegs / 2);
3694 // If there are odd number of registers or if it's not 64-bit aligned,
3695 // then it takes an extra AGU (Address Generation Unit) cycle.
3696 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3697 (*MI.memoperands_begin())->getAlign() < Align(8))
3698 ++UOps;
3699 return UOps;
3700 }
3701 }
3702 }
3703 }
3704 llvm_unreachable("Didn't find the number of microops");
3705}
3706
3707std::optional<unsigned>
3708ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3709 const MCInstrDesc &DefMCID, unsigned DefClass,
3710 unsigned DefIdx, unsigned DefAlign) const {
3711 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3712 if (RegNo <= 0)
3713 // Def is the address writeback.
3714 return ItinData->getOperandCycle(DefClass, DefIdx);
3715
3716 unsigned DefCycle;
3717 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3718 // (regno / 2) + (regno % 2) + 1
3719 DefCycle = RegNo / 2 + 1;
3720 if (RegNo % 2)
3721 ++DefCycle;
3722 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3723 DefCycle = RegNo;
3724 bool isSLoad = false;
3725
3726 switch (DefMCID.getOpcode()) {
3727 default: break;
3728 case ARM::VLDMSIA:
3729 case ARM::VLDMSIA_UPD:
3730 case ARM::VLDMSDB_UPD:
3731 isSLoad = true;
3732 break;
3733 }
3734
3735 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3736 // then it takes an extra cycle.
3737 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3738 ++DefCycle;
3739 } else {
3740 // Assume the worst.
3741 DefCycle = RegNo + 2;
3742 }
3743
3744 return DefCycle;
3745}
3746
3747std::optional<unsigned>
3748ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3749 const MCInstrDesc &DefMCID, unsigned DefClass,
3750 unsigned DefIdx, unsigned DefAlign) const {
3751 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3752 if (RegNo <= 0)
3753 // Def is the address writeback.
3754 return ItinData->getOperandCycle(DefClass, DefIdx);
3755
3756 unsigned DefCycle;
3757 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3758 // 4 registers would be issued: 1, 2, 1.
3759 // 5 registers would be issued: 1, 2, 2.
3760 DefCycle = RegNo / 2;
3761 if (DefCycle < 1)
3762 DefCycle = 1;
3763 // Result latency is issue cycle + 2: E2.
3764 DefCycle += 2;
3765 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3766 DefCycle = (RegNo / 2);
3767 // If there are odd number of registers or if it's not 64-bit aligned,
3768 // then it takes an extra AGU (Address Generation Unit) cycle.
3769 if ((RegNo % 2) || DefAlign < 8)
3770 ++DefCycle;
3771 // Result latency is AGU cycles + 2.
3772 DefCycle += 2;
3773 } else {
3774 // Assume the worst.
3775 DefCycle = RegNo + 2;
3776 }
3777
3778 return DefCycle;
3779}
3780
3781std::optional<unsigned>
3782ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3783 const MCInstrDesc &UseMCID, unsigned UseClass,
3784 unsigned UseIdx, unsigned UseAlign) const {
3785 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3786 if (RegNo <= 0)
3787 return ItinData->getOperandCycle(UseClass, UseIdx);
3788
3789 unsigned UseCycle;
3790 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3791 // (regno / 2) + (regno % 2) + 1
3792 UseCycle = RegNo / 2 + 1;
3793 if (RegNo % 2)
3794 ++UseCycle;
3795 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3796 UseCycle = RegNo;
3797 bool isSStore = false;
3798
3799 switch (UseMCID.getOpcode()) {
3800 default: break;
3801 case ARM::VSTMSIA:
3802 case ARM::VSTMSIA_UPD:
3803 case ARM::VSTMSDB_UPD:
3804 isSStore = true;
3805 break;
3806 }
3807
3808 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3809 // then it takes an extra cycle.
3810 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3811 ++UseCycle;
3812 } else {
3813 // Assume the worst.
3814 UseCycle = RegNo + 2;
3815 }
3816
3817 return UseCycle;
3818}
3819
3820std::optional<unsigned>
3821ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3822 const MCInstrDesc &UseMCID, unsigned UseClass,
3823 unsigned UseIdx, unsigned UseAlign) const {
3824 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3825 if (RegNo <= 0)
3826 return ItinData->getOperandCycle(UseClass, UseIdx);
3827
3828 unsigned UseCycle;
3829 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3830 UseCycle = RegNo / 2;
3831 if (UseCycle < 2)
3832 UseCycle = 2;
3833 // Read in E3.
3834 UseCycle += 2;
3835 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3836 UseCycle = (RegNo / 2);
3837 // If there are odd number of registers or if it's not 64-bit aligned,
3838 // then it takes an extra AGU (Address Generation Unit) cycle.
3839 if ((RegNo % 2) || UseAlign < 8)
3840 ++UseCycle;
3841 } else {
3842 // Assume the worst.
3843 UseCycle = 1;
3844 }
3845 return UseCycle;
3846}
3847
3848std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3849 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3850 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3851 unsigned UseIdx, unsigned UseAlign) const {
3852 unsigned DefClass = DefMCID.getSchedClass();
3853 unsigned UseClass = UseMCID.getSchedClass();
3854
3855 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3856 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3857
3858 // This may be a def / use of a variable_ops instruction, the operand
3859 // latency might be determinable dynamically. Let the target try to
3860 // figure it out.
3861 std::optional<unsigned> DefCycle;
3862 bool LdmBypass = false;
3863 switch (DefMCID.getOpcode()) {
3864 default:
3865 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3866 break;
3867
3868 case ARM::VLDMDIA:
3869 case ARM::VLDMDIA_UPD:
3870 case ARM::VLDMDDB_UPD:
3871 case ARM::VLDMSIA:
3872 case ARM::VLDMSIA_UPD:
3873 case ARM::VLDMSDB_UPD:
3874 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3875 break;
3876
3877 case ARM::LDMIA_RET:
3878 case ARM::LDMIA:
3879 case ARM::LDMDA:
3880 case ARM::LDMDB:
3881 case ARM::LDMIB:
3882 case ARM::LDMIA_UPD:
3883 case ARM::LDMDA_UPD:
3884 case ARM::LDMDB_UPD:
3885 case ARM::LDMIB_UPD:
3886 case ARM::tLDMIA:
3887 case ARM::tLDMIA_UPD:
3888 case ARM::tPUSH:
3889 case ARM::t2LDMIA_RET:
3890 case ARM::t2LDMIA:
3891 case ARM::t2LDMDB:
3892 case ARM::t2LDMIA_UPD:
3893 case ARM::t2LDMDB_UPD:
3894 LdmBypass = true;
3895 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3896 break;
3897 }
3898
3899 if (!DefCycle)
3900 // We can't seem to determine the result latency of the def, assume it's 2.
3901 DefCycle = 2;
3902
3903 std::optional<unsigned> UseCycle;
3904 switch (UseMCID.getOpcode()) {
3905 default:
3906 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3907 break;
3908
3909 case ARM::VSTMDIA:
3910 case ARM::VSTMDIA_UPD:
3911 case ARM::VSTMDDB_UPD:
3912 case ARM::VSTMSIA:
3913 case ARM::VSTMSIA_UPD:
3914 case ARM::VSTMSDB_UPD:
3915 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3916 break;
3917
3918 case ARM::STMIA:
3919 case ARM::STMDA:
3920 case ARM::STMDB:
3921 case ARM::STMIB:
3922 case ARM::STMIA_UPD:
3923 case ARM::STMDA_UPD:
3924 case ARM::STMDB_UPD:
3925 case ARM::STMIB_UPD:
3926 case ARM::tSTMIA_UPD:
3927 case ARM::tPOP_RET:
3928 case ARM::tPOP:
3929 case ARM::t2STMIA:
3930 case ARM::t2STMDB:
3931 case ARM::t2STMIA_UPD:
3932 case ARM::t2STMDB_UPD:
3933 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3934 break;
3935 }
3936
3937 if (!UseCycle)
3938 // Assume it's read in the first stage.
3939 UseCycle = 1;
3940
3941 if (UseCycle > *DefCycle + 1)
3942 return std::nullopt;
3943
3944 UseCycle = *DefCycle - *UseCycle + 1;
3945 if (UseCycle > 0u) {
3946 if (LdmBypass) {
3947 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3948 // first def operand.
3949 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3950 UseClass, UseIdx))
3951 UseCycle = *UseCycle - 1;
3952 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3953 UseClass, UseIdx)) {
3954 UseCycle = *UseCycle - 1;
3955 }
3956 }
3957
3958 return UseCycle;
3959}
3960
3962 const MachineInstr *MI, unsigned Reg,
3963 unsigned &DefIdx, unsigned &Dist) {
3964 Dist = 0;
3965
3967 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3968 assert(II->isInsideBundle() && "Empty bundle?");
3969
3970 int Idx = -1;
3971 while (II->isInsideBundle()) {
3972 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
3973 if (Idx != -1)
3974 break;
3975 --II;
3976 ++Dist;
3977 }
3978
3979 assert(Idx != -1 && "Cannot find bundled definition!");
3980 DefIdx = Idx;
3981 return &*II;
3982}
3983
3985 const MachineInstr &MI, unsigned Reg,
3986 unsigned &UseIdx, unsigned &Dist) {
3987 Dist = 0;
3988
3990 assert(II->isInsideBundle() && "Empty bundle?");
3991 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
3992
3993 // FIXME: This doesn't properly handle multiple uses.
3994 int Idx = -1;
3995 while (II != E && II->isInsideBundle()) {
3996 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
3997 if (Idx != -1)
3998 break;
3999 if (II->getOpcode() != ARM::t2IT)
4000 ++Dist;
4001 ++II;
4002 }
4003
4004 if (Idx == -1) {
4005 Dist = 0;
4006 return nullptr;
4007 }
4008
4009 UseIdx = Idx;
4010 return &*II;
4011}
4012
4013/// Return the number of cycles to add to (or subtract from) the static
4014/// itinerary based on the def opcode and alignment. The caller will ensure that
4015/// adjusted latency is at least one cycle.
4016static int adjustDefLatency(const ARMSubtarget &Subtarget,
4017 const MachineInstr &DefMI,
4018 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4019 int Adjust = 0;
4020 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4021 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4022 // variants are one cycle cheaper.
4023 switch (DefMCID.getOpcode()) {
4024 default: break;
4025 case ARM::LDRrs:
4026 case ARM::LDRBrs: {
4027 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4028 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4029 if (ShImm == 0 ||
4030 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4031 --Adjust;
4032 break;
4033 }
4034 case ARM::t2LDRs:
4035 case ARM::t2LDRBs:
4036 case ARM::t2LDRHs:
4037 case ARM::t2LDRSHs: {
4038 // Thumb2 mode: lsl only.
4039 unsigned ShAmt = DefMI.getOperand(3).getImm();
4040 if (ShAmt == 0 || ShAmt == 2)
4041 --Adjust;
4042 break;
4043 }
4044 }
4045 } else if (Subtarget.isSwift()) {
4046 // FIXME: Properly handle all of the latency adjustments for address
4047 // writeback.
4048 switch (DefMCID.getOpcode()) {
4049 default: break;
4050 case ARM::LDRrs:
4051 case ARM::LDRBrs: {
4052 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4053 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4054 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4055 if (!isSub &&
4056 (ShImm == 0 ||
4057 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4058 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4059 Adjust -= 2;
4060 else if (!isSub &&
4061 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4062 --Adjust;
4063 break;
4064 }
4065 case ARM::t2LDRs:
4066 case ARM::t2LDRBs:
4067 case ARM::t2LDRHs:
4068 case ARM::t2LDRSHs: {
4069 // Thumb2 mode: lsl only.
4070 unsigned ShAmt = DefMI.getOperand(3).getImm();
4071 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4072 Adjust -= 2;
4073 break;
4074 }
4075 }
4076 }
4077
4078 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4079 switch (DefMCID.getOpcode()) {
4080 default: break;
4081 case ARM::VLD1q8:
4082 case ARM::VLD1q16:
4083 case ARM::VLD1q32:
4084 case ARM::VLD1q64:
4085 case ARM::VLD1q8wb_fixed:
4086 case ARM::VLD1q16wb_fixed:
4087 case ARM::VLD1q32wb_fixed:
4088 case ARM::VLD1q64wb_fixed:
4089 case ARM::VLD1q8wb_register:
4090 case ARM::VLD1q16wb_register:
4091 case ARM::VLD1q32wb_register:
4092 case ARM::VLD1q64wb_register:
4093 case ARM::VLD2d8:
4094 case ARM::VLD2d16:
4095 case ARM::VLD2d32:
4096 case ARM::VLD2q8:
4097 case ARM::VLD2q16:
4098 case ARM::VLD2q32:
4099 case ARM::VLD2d8wb_fixed:
4100 case ARM::VLD2d16wb_fixed:
4101 case ARM::VLD2d32wb_fixed:
4102 case ARM::VLD2q8wb_fixed:
4103 case ARM::VLD2q16wb_fixed:
4104 case ARM::VLD2q32wb_fixed:
4105 case ARM::VLD2d8wb_register:
4106 case ARM::VLD2d16wb_register:
4107 case ARM::VLD2d32wb_register:
4108 case ARM::VLD2q8wb_register:
4109 case ARM::VLD2q16wb_register:
4110 case ARM::VLD2q32wb_register:
4111 case ARM::VLD3d8:
4112 case ARM::VLD3d16:
4113 case ARM::VLD3d32:
4114 case ARM::VLD1d64T:
4115 case ARM::VLD3d8_UPD:
4116 case ARM::VLD3d16_UPD:
4117 case ARM::VLD3d32_UPD:
4118 case ARM::VLD1d64Twb_fixed:
4119 case ARM::VLD1d64Twb_register:
4120 case ARM::VLD3q8_UPD:
4121 case ARM::VLD3q16_UPD:
4122 case ARM::VLD3q32_UPD:
4123 case ARM::VLD4d8:
4124 case ARM::VLD4d16:
4125 case ARM::VLD4d32:
4126 case ARM::VLD1d64Q:
4127 case ARM::VLD4d8_UPD:
4128 case ARM::VLD4d16_UPD:
4129 case ARM::VLD4d32_UPD:
4130 case ARM::VLD1d64Qwb_fixed:
4131 case ARM::VLD1d64Qwb_register:
4132 case ARM::VLD4q8_UPD:
4133 case ARM::VLD4q16_UPD:
4134 case ARM::VLD4q32_UPD:
4135 case ARM::VLD1DUPq8:
4136 case ARM::VLD1DUPq16:
4137 case ARM::VLD1DUPq32:
4138 case ARM::VLD1DUPq8wb_fixed:
4139 case ARM::VLD1DUPq16wb_fixed:
4140 case ARM::VLD1DUPq32wb_fixed:
4141 case ARM::VLD1DUPq8wb_register:
4142 case ARM::VLD1DUPq16wb_register:
4143 case ARM::VLD1DUPq32wb_register:
4144 case ARM::VLD2DUPd8:
4145 case ARM::VLD2DUPd16:
4146 case ARM::VLD2DUPd32:
4147 case ARM::VLD2DUPd8wb_fixed:
4148 case ARM::VLD2DUPd16wb_fixed:
4149 case ARM::VLD2DUPd32wb_fixed:
4150 case ARM::VLD2DUPd8wb_register:
4151 case ARM::VLD2DUPd16wb_register:
4152 case ARM::VLD2DUPd32wb_register:
4153 case ARM::VLD4DUPd8:
4154 case ARM::VLD4DUPd16:
4155 case ARM::VLD4DUPd32:
4156 case ARM::VLD4DUPd8_UPD:
4157 case ARM::VLD4DUPd16_UPD:
4158 case ARM::VLD4DUPd32_UPD:
4159 case ARM::VLD1LNd8:
4160 case ARM::VLD1LNd16:
4161 case ARM::VLD1LNd32:
4162 case ARM::VLD1LNd8_UPD:
4163 case ARM::VLD1LNd16_UPD:
4164 case ARM::VLD1LNd32_UPD:
4165 case ARM::VLD2LNd8:
4166 case ARM::VLD2LNd16:
4167 case ARM::VLD2LNd32:
4168 case ARM::VLD2LNq16:
4169 case ARM::VLD2LNq32:
4170 case ARM::VLD2LNd8_UPD:
4171 case ARM::VLD2LNd16_UPD:
4172 case ARM::VLD2LNd32_UPD:
4173 case ARM::VLD2LNq16_UPD:
4174 case ARM::VLD2LNq32_UPD:
4175 case ARM::VLD4LNd8:
4176 case ARM::VLD4LNd16:
4177 case ARM::VLD4LNd32:
4178 case ARM::VLD4LNq16:
4179 case ARM::VLD4LNq32:
4180 case ARM::VLD4LNd8_UPD:
4181 case ARM::VLD4LNd16_UPD:
4182 case ARM::VLD4LNd32_UPD:
4183 case ARM::VLD4LNq16_UPD:
4184 case ARM::VLD4LNq32_UPD:
4185 // If the address is not 64-bit aligned, the latencies of these
4186 // instructions increases by one.
4187 ++Adjust;
4188 break;
4189 }
4190 }
4191 return Adjust;
4192}
4193
4195 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4196 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4197 // No operand latency. The caller may fall back to getInstrLatency.
4198 if (!ItinData || ItinData->isEmpty())
4199 return std::nullopt;
4200
4201 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4202 Register Reg = DefMO.getReg();
4203
4204 const MachineInstr *ResolvedDefMI = &DefMI;
4205 unsigned DefAdj = 0;
4206 if (DefMI.isBundle())
4207 ResolvedDefMI =
4208 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4209 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4210 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4211 return 1;
4212 }
4213
4214 const MachineInstr *ResolvedUseMI = &UseMI;
4215 unsigned UseAdj = 0;
4216 if (UseMI.isBundle()) {
4217 ResolvedUseMI =
4218 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4219 if (!ResolvedUseMI)
4220 return std::nullopt;
4221 }
4222
4223 return getOperandLatencyImpl(
4224 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4225 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4226}
4227
4228std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4229 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4230 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4231 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4232 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4233 if (Reg == ARM::CPSR) {
4234 if (DefMI.getOpcode() == ARM::FMSTAT) {
4235 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4236 return Subtarget.isLikeA9() ? 1 : 20;
4237 }
4238
4239 // CPSR set and branch can be paired in the same cycle.
4240 if (UseMI.isBranch())
4241 return 0;
4242
4243 // Otherwise it takes the instruction latency (generally one).
4244 unsigned Latency = getInstrLatency(ItinData, DefMI);
4245
4246 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4247 // its uses. Instructions which are otherwise scheduled between them may
4248 // incur a code size penalty (not able to use the CPSR setting 16-bit
4249 // instructions).
4250 if (Latency > 0 && Subtarget.isThumb2()) {
4251 const MachineFunction *MF = DefMI.getParent()->getParent();
4252 if (MF->getFunction().hasOptSize())
4253 --Latency;
4254 }
4255 return Latency;
4256 }
4257
4258 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4259 return std::nullopt;
4260
4261 unsigned DefAlign = DefMI.hasOneMemOperand()
4262 ? (*DefMI.memoperands_begin())->getAlign().value()
4263 : 0;
4264 unsigned UseAlign = UseMI.hasOneMemOperand()
4265 ? (*UseMI.memoperands_begin())->getAlign().value()
4266 : 0;
4267
4268 // Get the itinerary's latency if possible, and handle variable_ops.
4269 std::optional<unsigned> Latency = getOperandLatency(
4270 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4271 // Unable to find operand latency. The caller may resort to getInstrLatency.
4272 if (!Latency)
4273 return std::nullopt;
4274
4275 // Adjust for IT block position.
4276 int Adj = DefAdj + UseAdj;
4277
4278 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4279 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4280 if (Adj >= 0 || (int)*Latency > -Adj) {
4281 return *Latency + Adj;
4282 }
4283 // Return the itinerary latency, which may be zero but not less than zero.
4284 return Latency;
4285}
4286
4287std::optional<unsigned>
4289 SDNode *DefNode, unsigned DefIdx,
4290 SDNode *UseNode, unsigned UseIdx) const {
4291 if (!DefNode->isMachineOpcode())
4292 return 1;
4293
4294 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4295
4296 if (isZeroCost(DefMCID.Opcode))
4297 return 0;
4298
4299 if (!ItinData || ItinData->isEmpty())
4300 return DefMCID.mayLoad() ? 3 : 1;
4301
4302 if (!UseNode->isMachineOpcode()) {
4303 std::optional<unsigned> Latency =
4304 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4305 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4306 int Threshold = 1 + Adj;
4307 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4308 }
4309
4310 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4311 auto *DefMN = cast<MachineSDNode>(DefNode);
4312 unsigned DefAlign = !DefMN->memoperands_empty()
4313 ? (*DefMN->memoperands_begin())->getAlign().value()
4314 : 0;
4315 auto *UseMN = cast<MachineSDNode>(UseNode);
4316 unsigned UseAlign = !UseMN->memoperands_empty()
4317 ? (*UseMN->memoperands_begin())->getAlign().value()
4318 : 0;
4319 std::optional<unsigned> Latency = getOperandLatency(
4320 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4321 if (!Latency)
4322 return std::nullopt;
4323
4324 if (Latency > 1U &&
4325 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4326 Subtarget.isCortexA7())) {
4327 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4328 // variants are one cycle cheaper.
4329 switch (DefMCID.getOpcode()) {
4330 default: break;
4331 case ARM::LDRrs:
4332 case ARM::LDRBrs: {
4333 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4334 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4335 if (ShImm == 0 ||
4336 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4337 Latency = *Latency - 1;
4338 break;
4339 }
4340 case ARM::t2LDRs:
4341 case ARM::t2LDRBs:
4342 case ARM::t2LDRHs:
4343 case ARM::t2LDRSHs: {
4344 // Thumb2 mode: lsl only.
4345 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4346 if (ShAmt == 0 || ShAmt == 2)
4347 Latency = *Latency - 1;
4348 break;
4349 }
4350 }
4351 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4352 // FIXME: Properly handle all of the latency adjustments for address
4353 // writeback.
4354 switch (DefMCID.getOpcode()) {
4355 default: break;
4356 case ARM::LDRrs:
4357 case ARM::LDRBrs: {
4358 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4359 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4360 if (ShImm == 0 ||
4361 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4363 Latency = *Latency - 2;
4364 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4365 Latency = *Latency - 1;
4366 break;
4367 }
4368 case ARM::t2LDRs:
4369 case ARM::t2LDRBs:
4370 case ARM::t2LDRHs:
4371 case ARM::t2LDRSHs:
4372 // Thumb2 mode: lsl 0-3 only.
4373 Latency = *Latency - 2;
4374 break;
4375 }
4376 }
4377
4378 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4379 switch (DefMCID.getOpcode()) {
4380 default: break;
4381 case ARM::VLD1q8:
4382 case ARM::VLD1q16:
4383 case ARM::VLD1q32:
4384 case ARM::VLD1q64:
4385 case ARM::VLD1q8wb_register:
4386 case ARM::VLD1q16wb_register:
4387 case ARM::VLD1q32wb_register:
4388 case ARM::VLD1q64wb_register:
4389 case ARM::VLD1q8wb_fixed:
4390 case ARM::VLD1q16wb_fixed:
4391 case ARM::VLD1q32wb_fixed:
4392 case ARM::VLD1q64wb_fixed:
4393 case ARM::VLD2d8:
4394 case ARM::VLD2d16:
4395 case ARM::VLD2d32:
4396 case ARM::VLD2q8Pseudo:
4397 case ARM::VLD2q16Pseudo:
4398 case ARM::VLD2q32Pseudo:
4399 case ARM::VLD2d8wb_fixed:
4400 case ARM::VLD2d16wb_fixed:
4401 case ARM::VLD2d32wb_fixed:
4402 case ARM::VLD2q8PseudoWB_fixed:
4403 case ARM::VLD2q16PseudoWB_fixed:
4404 case ARM::VLD2q32PseudoWB_fixed:
4405 case ARM::VLD2d8wb_register:
4406 case ARM::VLD2d16wb_register:
4407 case ARM::VLD2d32wb_register:
4408 case ARM::VLD2q8PseudoWB_register:
4409 case ARM::VLD2q16PseudoWB_register:
4410 case ARM::VLD2q32PseudoWB_register:
4411 case ARM::VLD3d8Pseudo:
4412 case ARM::VLD3d16Pseudo:
4413 case ARM::VLD3d32Pseudo:
4414 case ARM::VLD1d8TPseudo:
4415 case ARM::VLD1d16TPseudo:
4416 case ARM::VLD1d32TPseudo:
4417 case ARM::VLD1d64TPseudo:
4418 case ARM::VLD1d64TPseudoWB_fixed:
4419 case ARM::VLD1d64TPseudoWB_register:
4420 case ARM::VLD3d8Pseudo_UPD:
4421 case ARM::VLD3d16Pseudo_UPD:
4422 case ARM::VLD3d32Pseudo_UPD:
4423 case ARM::VLD3q8Pseudo_UPD:
4424 case ARM::VLD3q16Pseudo_UPD:
4425 case ARM::VLD3q32Pseudo_UPD:
4426 case ARM::VLD3q8oddPseudo:
4427 case ARM::VLD3q16oddPseudo:
4428 case ARM::VLD3q32oddPseudo:
4429 case ARM::VLD3q8oddPseudo_UPD:
4430 case ARM::VLD3q16oddPseudo_UPD:
4431 case ARM::VLD3q32oddPseudo_UPD:
4432 case ARM::VLD4d8Pseudo:
4433 case ARM::VLD4d16Pseudo:
4434 case ARM::VLD4d32Pseudo:
4435 case ARM::VLD1d8QPseudo:
4436 case ARM::VLD1d16QPseudo:
4437 case ARM::VLD1d32QPseudo:
4438 case ARM::VLD1d64QPseudo:
4439 case ARM::VLD1d64QPseudoWB_fixed:
4440 case ARM::VLD1d64QPseudoWB_register:
4441 case ARM::VLD1q8HighQPseudo:
4442 case ARM::VLD1q8LowQPseudo_UPD:
4443 case ARM::VLD1q8HighTPseudo:
4444 case ARM::VLD1q8LowTPseudo_UPD:
4445 case ARM::VLD1q16HighQPseudo:
4446 case ARM::VLD1q16LowQPseudo_UPD:
4447 case ARM::VLD1q16HighTPseudo:
4448 case ARM::VLD1q16LowTPseudo_UPD:
4449 case ARM::VLD1q32HighQPseudo:
4450 case ARM::VLD1q32LowQPseudo_UPD:
4451 case ARM::VLD1q32HighTPseudo:
4452 case ARM::VLD1q32LowTPseudo_UPD:
4453 case ARM::VLD1q64HighQPseudo:
4454 case ARM::VLD1q64LowQPseudo_UPD:
4455 case ARM::VLD1q64HighTPseudo:
4456 case ARM::VLD1q64LowTPseudo_UPD:
4457 case ARM::VLD4d8Pseudo_UPD:
4458 case ARM::VLD4d16Pseudo_UPD:
4459 case ARM::VLD4d32Pseudo_UPD:
4460 case ARM::VLD4q8Pseudo_UPD:
4461 case ARM::VLD4q16Pseudo_UPD:
4462 case ARM::VLD4q32Pseudo_UPD:
4463 case ARM::VLD4q8oddPseudo:
4464 case ARM::VLD4q16oddPseudo:
4465 case ARM::VLD4q32oddPseudo:
4466 case ARM::VLD4q8oddPseudo_UPD:
4467 case ARM::VLD4q16oddPseudo_UPD:
4468 case ARM::VLD4q32oddPseudo_UPD:
4469 case ARM::VLD1DUPq8:
4470 case ARM::VLD1DUPq16:
4471 case ARM::VLD1DUPq32:
4472 case ARM::VLD1DUPq8wb_fixed:
4473 case ARM::VLD1DUPq16wb_fixed:
4474 case ARM::VLD1DUPq32wb_fixed:
4475 case ARM::VLD1DUPq8wb_register:
4476 case ARM::VLD1DUPq16wb_register:
4477 case ARM::VLD1DUPq32wb_register:
4478 case ARM::VLD2DUPd8:
4479 case ARM::VLD2DUPd16:
4480 case ARM::VLD2DUPd32:
4481 case ARM::VLD2DUPd8wb_fixed:
4482 case ARM::VLD2DUPd16wb_fixed:
4483 case ARM::VLD2DUPd32wb_fixed:
4484 case ARM::VLD2DUPd8wb_register:
4485 case ARM::VLD2DUPd16wb_register:
4486 case ARM::VLD2DUPd32wb_register:
4487 case ARM::VLD2DUPq8EvenPseudo:
4488 case ARM::VLD2DUPq8OddPseudo:
4489 case ARM::VLD2DUPq16EvenPseudo:
4490 case ARM::VLD2DUPq16OddPseudo:
4491 case ARM::VLD2DUPq32EvenPseudo:
4492 case ARM::VLD2DUPq32OddPseudo:
4493 case ARM::VLD3DUPq8EvenPseudo:
4494 case ARM::VLD3DUPq8OddPseudo:
4495 case ARM::VLD3DUPq16EvenPseudo:
4496 case ARM::VLD3DUPq16OddPseudo:
4497 case ARM::VLD3DUPq32EvenPseudo:
4498 case ARM::VLD3DUPq32OddPseudo:
4499 case ARM::VLD4DUPd8Pseudo:
4500 case ARM::VLD4DUPd16Pseudo:
4501 case ARM::VLD4DUPd32Pseudo:
4502 case ARM::VLD4DUPd8Pseudo_UPD:
4503 case ARM::VLD4DUPd16Pseudo_UPD:
4504 case ARM::VLD4DUPd32Pseudo_UPD:
4505 case ARM::VLD4DUPq8EvenPseudo:
4506 case ARM::VLD4DUPq8OddPseudo:
4507 case ARM::VLD4DUPq16EvenPseudo:
4508 case ARM::VLD4DUPq16OddPseudo:
4509 case ARM::VLD4DUPq32EvenPseudo:
4510 case ARM::VLD4DUPq32OddPseudo:
4511 case ARM::VLD1LNq8Pseudo:
4512 case ARM::VLD1LNq16Pseudo:
4513 case ARM::VLD1LNq32Pseudo:
4514 case ARM::VLD1LNq8Pseudo_UPD:
4515 case ARM::VLD1LNq16Pseudo_UPD:
4516 case ARM::VLD1LNq32Pseudo_UPD:
4517 case ARM::VLD2LNd8Pseudo:
4518 case ARM::VLD2LNd16Pseudo:
4519 case ARM::VLD2LNd32Pseudo:
4520 case ARM::VLD2LNq16Pseudo:
4521 case ARM::VLD2LNq32Pseudo:
4522 case ARM::VLD2LNd8Pseudo_UPD:
4523 case ARM::VLD2LNd16Pseudo_UPD:
4524 case ARM::VLD2LNd32Pseudo_UPD:
4525 case ARM::VLD2LNq16Pseudo_UPD:
4526 case ARM::VLD2LNq32Pseudo_UPD:
4527 case ARM::VLD4LNd8Pseudo:
4528 case ARM::VLD4LNd16Pseudo:
4529 case ARM::VLD4LNd32Pseudo:
4530 case ARM::VLD4LNq16Pseudo:
4531 case ARM::VLD4LNq32Pseudo:
4532 case ARM::VLD4LNd8Pseudo_UPD:
4533 case ARM::VLD4LNd16Pseudo_UPD:
4534 case ARM::VLD4LNd32Pseudo_UPD:
4535 case ARM::VLD4LNq16Pseudo_UPD:
4536 case ARM::VLD4LNq32Pseudo_UPD:
4537 // If the address is not 64-bit aligned, the latencies of these
4538 // instructions increases by one.
4539 Latency = *Latency + 1;
4540 break;
4541 }
4542
4543 return Latency;
4544}
4545
4546unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4547 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4548 MI.isImplicitDef())
4549 return 0;
4550
4551 if (MI.isBundle())
4552 return 0;
4553
4554 const MCInstrDesc &MCID = MI.getDesc();
4555
4556 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4557 !Subtarget.cheapPredicableCPSRDef())) {
4558 // When predicated, CPSR is an additional source operand for CPSR updating
4559 // instructions, this apparently increases their latencies.
4560 return 1;
4561 }
4562 return 0;
4563}
4564
4565unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4566 const MachineInstr &MI,
4567 unsigned *PredCost) const {
4568 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4569 MI.isImplicitDef())
4570 return 1;
4571
4572 // An instruction scheduler typically runs on unbundled instructions, however
4573 // other passes may query the latency of a bundled instruction.
4574 if (MI.isBundle()) {
4575 unsigned Latency = 0;
4577 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4578 while (++I != E && I->isInsideBundle()) {
4579 if (I->getOpcode() != ARM::t2IT)
4580 Latency += getInstrLatency(ItinData, *I, PredCost);
4581 }
4582 return Latency;
4583 }
4584
4585 const MCInstrDesc &MCID = MI.getDesc();
4586 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4587 !Subtarget.cheapPredicableCPSRDef()))) {
4588 // When predicated, CPSR is an additional source operand for CPSR updating
4589 // instructions, this apparently increases their latencies.
4590 *PredCost = 1;
4591 }
4592 // Be sure to call getStageLatency for an empty itinerary in case it has a
4593 // valid MinLatency property.
4594 if (!ItinData)
4595 return MI.mayLoad() ? 3 : 1;
4596
4597 unsigned Class = MCID.getSchedClass();
4598
4599 // For instructions with variable uops, use uops as latency.
4600 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4601 return getNumMicroOps(ItinData, MI);
4602
4603 // For the common case, fall back on the itinerary's latency.
4604 unsigned Latency = ItinData->getStageLatency(Class);
4605
4606 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4607 unsigned DefAlign =
4608 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4609 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4610 if (Adj >= 0 || (int)Latency > -Adj) {
4611 return Latency + Adj;
4612 }
4613 return Latency;
4614}
4615
4616unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4617 SDNode *Node) const {
4618 if (!Node->isMachineOpcode())
4619 return 1;
4620
4621 if (!ItinData || ItinData->isEmpty())
4622 return 1;
4623
4624 unsigned Opcode = Node->getMachineOpcode();
4625 switch (Opcode) {
4626 default:
4627 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4628 case ARM::VLDMQIA:
4629 case ARM::VSTMQIA:
4630 return 2;
4631 }
4632}
4633
4634bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4635 const MachineRegisterInfo *MRI,
4636 const MachineInstr &DefMI,
4637 unsigned DefIdx,
4638 const MachineInstr &UseMI,
4639 unsigned UseIdx) const {
4640 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4641 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4642 if (Subtarget.nonpipelinedVFP() &&
4643 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4644 return true;
4645
4646 // Hoist VFP / NEON instructions with 4 or higher latency.
4647 unsigned Latency =
4648 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4649 if (Latency <= 3)
4650 return false;
4651 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4652 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4653}
4654
4655bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4656 const MachineInstr &DefMI,
4657 unsigned DefIdx) const {
4658 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4659 if (!ItinData || ItinData->isEmpty())
4660 return false;
4661
4662 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4663 if (DDomain == ARMII::DomainGeneral) {
4664 unsigned DefClass = DefMI.getDesc().getSchedClass();
4665 std::optional<unsigned> DefCycle =
4666 ItinData->getOperandCycle(DefClass, DefIdx);
4667 return DefCycle && DefCycle <= 2U;
4668 }
4669 return false;
4670}
4671
4672bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4673 StringRef &ErrInfo) const {
4674 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4675 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4676 return false;
4677 }
4678 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4679 // Make sure we don't generate a lo-lo mov that isn't supported.
4680 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4681 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4682 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4683 return false;
4684 }
4685 }
4686 if (MI.getOpcode() == ARM::tPUSH ||
4687 MI.getOpcode() == ARM::tPOP ||
4688 MI.getOpcode() == ARM::tPOP_RET) {
4689 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4690 if (MO.isImplicit() || !MO.isReg())
4691 continue;
4692 Register Reg = MO.getReg();
4693 if (Reg < ARM::R0 || Reg > ARM::R7) {
4694 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4695 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4696 ErrInfo = "Unsupported register in Thumb1 push/pop";
4697 return false;
4698 }
4699 }
4700 }
4701 }
4702 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4703 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4704 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4705 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4706 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4707 return false;
4708 }
4709 }
4710
4711 // Check the address model by taking the first Imm operand and checking it is
4712 // legal for that addressing mode.
4714 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4715 switch (AddrMode) {
4716 default:
4717 break;
4725 case ARMII::AddrModeT2_i12: {
4726 uint32_t Imm = 0;
4727 for (auto Op : MI.operands()) {
4728 if (Op.isImm()) {
4729 Imm = Op.getImm();
4730 break;
4731 }
4732 }
4733 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4734 ErrInfo = "Incorrect AddrMode Imm for instruction";
4735 return false;
4736 }
4737 break;
4738 }
4739 }
4740 return true;
4741}
4742
4744 unsigned LoadImmOpc,
4745 unsigned LoadOpc) const {
4746 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4747 "ROPI/RWPI not currently supported with stack guard");
4748
4749 MachineBasicBlock &MBB = *MI->getParent();
4750 DebugLoc DL = MI->getDebugLoc();
4751 Register Reg = MI->getOperand(0).getReg();
4753 unsigned int Offset = 0;
4754
4755 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4756 assert(!Subtarget.isReadTPSoft() &&
4757 "TLS stack protector requires hardware TLS register");
4758
4759 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4760 .addImm(15)
4761 .addImm(0)
4762 .addImm(13)
4763 .addImm(0)
4764 .addImm(3)
4766
4767 Module &M = *MBB.getParent()->getFunction().getParent();
4768 Offset = M.getStackProtectorGuardOffset();
4769 if (Offset & ~0xfffU) {
4770 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4771 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4772 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4773 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4774 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4775 .addReg(Reg, RegState::Kill)
4776 .addImm(Offset & ~0xfffU)
4778 .addReg(0);
4779 Offset &= 0xfffU;
4780 }
4781 } else {
4782 const GlobalValue *GV =
4783 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4784 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4785
4786 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4787 if (Subtarget.isTargetMachO()) {
4788 TargetFlags |= ARMII::MO_NONLAZY;
4789 } else if (Subtarget.isTargetCOFF()) {
4790 if (GV->hasDLLImportStorageClass())
4791 TargetFlags |= ARMII::MO_DLLIMPORT;
4792 else if (IsIndirect)
4793 TargetFlags |= ARMII::MO_COFFSTUB;
4794 } else if (IsIndirect) {
4795 TargetFlags |= ARMII::MO_GOT;
4796 }
4797
4798 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4799 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4800 auto APSREncoding =
4801 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4802 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4803 .addImm(APSREncoding)
4805 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4806 .addGlobalAddress(GV, 0, TargetFlags);
4807 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4808 .addImm(APSREncoding)
4809 .addReg(CPSRSaveReg, RegState::Kill)
4811 } else {
4812 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4813 .addGlobalAddress(GV, 0, TargetFlags);
4814 }
4815
4816 if (IsIndirect) {
4817 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4818 MIB.addReg(Reg, RegState::Kill).addImm(0);
4819 auto Flags = MachineMemOperand::MOLoad |
4822 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4823 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4825 }
4826 }
4827
4828 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4829 MIB.addReg(Reg, RegState::Kill)
4830 .addImm(Offset)
4831 .cloneMemRefs(*MI)
4833}
4834
4835bool
4836ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4837 unsigned &AddSubOpc,
4838 bool &NegAcc, bool &HasLane) const {
4839 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4840 if (I == MLxEntryMap.end())
4841 return false;
4842
4843 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4844 MulOpc = Entry.MulOpc;
4845 AddSubOpc = Entry.AddSubOpc;
4846 NegAcc = Entry.NegAcc;
4847 HasLane = Entry.HasLane;
4848 return true;
4849}
4850
4851//===----------------------------------------------------------------------===//
4852// Execution domains.
4853//===----------------------------------------------------------------------===//
4854//
4855// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4856// and some can go down both. The vmov instructions go down the VFP pipeline,
4857// but they can be changed to vorr equivalents that are executed by the NEON
4858// pipeline.
4859//
4860// We use the following execution domain numbering:
4861//
4867
4868//
4869// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4870//
4871std::pair<uint16_t, uint16_t>
4873 // If we don't have access to NEON instructions then we won't be able
4874 // to swizzle anything to the NEON domain. Check to make sure.
4875 if (Subtarget.hasNEON()) {
4876 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4877 // if they are not predicated.
4878 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4879 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4880
4881 // CortexA9 is particularly picky about mixing the two and wants these
4882 // converted.
4883 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4884 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4885 MI.getOpcode() == ARM::VMOVS))
4886 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4887 }
4888 // No other instructions can be swizzled, so just determine their domain.
4889 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4890
4892 return std::make_pair(ExeNEON, 0);
4893
4894 // Certain instructions can go either way on Cortex-A8.
4895 // Treat them as NEON instructions.
4896 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4897 return std::make_pair(ExeNEON, 0);
4898
4900 return std::make_pair(ExeVFP, 0);
4901
4902 return std::make_pair(ExeGeneric, 0);
4903}
4904
4906 unsigned SReg, unsigned &Lane) {
4907 MCRegister DReg =
4908 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4909 Lane = 0;
4910
4911 if (DReg)
4912 return DReg;
4913
4914 Lane = 1;
4915 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4916
4917 assert(DReg && "S-register with no D super-register?");
4918 return DReg;
4919}
4920
4921/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4922/// set ImplicitSReg to a register number that must be marked as implicit-use or
4923/// zero if no register needs to be defined as implicit-use.
4924///
4925/// If the function cannot determine if an SPR should be marked implicit use or
4926/// not, it returns false.
4927///
4928/// This function handles cases where an instruction is being modified from taking
4929/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4930/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4931/// lane of the DPR).
4932///
4933/// If the other SPR is defined, an implicit-use of it should be added. Else,
4934/// (including the case where the DPR itself is defined), it should not.
4935///
4937 MachineInstr &MI, MCRegister DReg,
4938 unsigned Lane,
4939 MCRegister &ImplicitSReg) {
4940 // If the DPR is defined or used already, the other SPR lane will be chained
4941 // correctly, so there is nothing to be done.
4942 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4943 ImplicitSReg = MCRegister();
4944 return true;
4945 }
4946
4947 // Otherwise we need to go searching to see if the SPR is set explicitly.
4948 ImplicitSReg = TRI->getSubReg(DReg,
4949 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4951 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4952
4953 if (LQR == MachineBasicBlock::LQR_Live)
4954 return true;
4955 else if (LQR == MachineBasicBlock::LQR_Unknown)
4956 return false;
4957
4958 // If the register is known not to be live, there is no need to add an
4959 // implicit-use.
4960 ImplicitSReg = MCRegister();
4961 return true;
4962}
4963
4965 unsigned Domain) const {
4966 unsigned DstReg, SrcReg;
4967 MCRegister DReg;
4968 unsigned Lane;
4969 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4971 switch (MI.getOpcode()) {
4972 default:
4973 llvm_unreachable("cannot handle opcode!");
4974 break;
4975 case ARM::VMOVD:
4976 if (Domain != ExeNEON)
4977 break;
4978
4979 // Zap the predicate operands.
4980 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4981
4982 // Make sure we've got NEON instructions.
4983 assert(Subtarget.hasNEON() && "VORRd requires NEON");
4984
4985 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4986 DstReg = MI.getOperand(0).getReg();
4987 SrcReg = MI.getOperand(1).getReg();
4988
4989 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4990 MI.removeOperand(i - 1);
4991
4992 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4993 MI.setDesc(get(ARM::VORRd));
4994 MIB.addReg(DstReg, RegState::Define)
4995 .addReg(SrcReg)
4996 .addReg(SrcReg)
4998 break;
4999 case ARM::VMOVRS:
5000 if (Domain != ExeNEON)
5001 break;
5002 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5003
5004 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5005 DstReg = MI.getOperand(0).getReg();
5006 SrcReg = MI.getOperand(1).getReg();
5007
5008 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5009 MI.removeOperand(i - 1);
5010
5011 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5012
5013 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5014 // Note that DSrc has been widened and the other lane may be undef, which
5015 // contaminates the entire register.
5016 MI.setDesc(get(ARM::VGETLNi32));
5017 MIB.addReg(DstReg, RegState::Define)
5018 .addReg(DReg, RegState::Undef)
5019 .addImm(Lane)
5021
5022 // The old source should be an implicit use, otherwise we might think it
5023 // was dead before here.
5024 MIB.addReg(SrcReg, RegState::Implicit);
5025 break;
5026 case ARM::VMOVSR: {
5027 if (Domain != ExeNEON)
5028 break;
5029 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5030
5031 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5032 DstReg = MI.getOperand(0).getReg();
5033 SrcReg = MI.getOperand(1).getReg();
5034
5035 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5036
5037 MCRegister ImplicitSReg;
5038 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5039 break;
5040
5041 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5042 MI.removeOperand(i - 1);
5043
5044 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5045 // Again DDst may be undefined at the beginning of this instruction.
5046 MI.setDesc(get(ARM::VSETLNi32));
5047 MIB.addReg(DReg, RegState::Define)
5048 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5049 .addReg(SrcReg)
5050 .addImm(Lane)
5052
5053 // The narrower destination must be marked as set to keep previous chains
5054 // in place.
5056 if (ImplicitSReg)
5057 MIB.addReg(ImplicitSReg, RegState::Implicit);
5058 break;
5059 }
5060 case ARM::VMOVS: {
5061 if (Domain != ExeNEON)
5062 break;
5063
5064 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5065 DstReg = MI.getOperand(0).getReg();
5066 SrcReg = MI.getOperand(1).getReg();
5067
5068 unsigned DstLane = 0, SrcLane = 0;
5069 MCRegister DDst, DSrc;
5070 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5071 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5072
5073 MCRegister ImplicitSReg;
5074 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5075 break;
5076
5077 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5078 MI.removeOperand(i - 1);
5079
5080 if (DSrc == DDst) {
5081 // Destination can be:
5082 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5083 MI.setDesc(get(ARM::VDUPLN32d));
5084 MIB.addReg(DDst, RegState::Define)
5085 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5086 .addImm(SrcLane)
5088
5089 // Neither the source or the destination are naturally represented any
5090 // more, so add them in manually.
5092 MIB.addReg(SrcReg, RegState::Implicit);
5093 if (ImplicitSReg)
5094 MIB.addReg(ImplicitSReg, RegState::Implicit);
5095 break;
5096 }
5097
5098 // In general there's no single instruction that can perform an S <-> S
5099 // move in NEON space, but a pair of VEXT instructions *can* do the
5100 // job. It turns out that the VEXTs needed will only use DSrc once, with
5101 // the position based purely on the combination of lane-0 and lane-1
5102 // involved. For example
5103 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5104 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5105 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5106 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5107 //
5108 // Pattern of the MachineInstrs is:
5109 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5110 MachineInstrBuilder NewMIB;
5111 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5112 DDst);
5113
5114 // On the first instruction, both DSrc and DDst may be undef if present.
5115 // Specifically when the original instruction didn't have them as an
5116 // <imp-use>.
5117 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5118 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5119 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5120
5121 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5122 CurUndef = !MI.readsRegister(CurReg, TRI);
5123 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5124 .addImm(1)
5126
5127 if (SrcLane == DstLane)
5128 NewMIB.addReg(SrcReg, RegState::Implicit);
5129
5130 MI.setDesc(get(ARM::VEXTd32));
5131 MIB.addReg(DDst, RegState::Define);
5132
5133 // On the second instruction, DDst has definitely been defined above, so
5134 // it is not undef. DSrc, if present, can be undef as above.
5135 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5136 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5137 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5138
5139 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5140 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5141 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5142 .addImm(1)
5144
5145 if (SrcLane != DstLane)
5146 MIB.addReg(SrcReg, RegState::Implicit);
5147
5148 // As before, the original destination is no longer represented, add it
5149 // implicitly.
5151 if (ImplicitSReg != 0)
5152 MIB.addReg(ImplicitSReg, RegState::Implicit);
5153 break;
5154 }
5155 }
5156}
5157
5158//===----------------------------------------------------------------------===//
5159// Partial register updates
5160//===----------------------------------------------------------------------===//
5161//
5162// Swift renames NEON registers with 64-bit granularity. That means any
5163// instruction writing an S-reg implicitly reads the containing D-reg. The
5164// problem is mostly avoided by translating f32 operations to v2f32 operations
5165// on D-registers, but f32 loads are still a problem.
5166//
5167// These instructions can load an f32 into a NEON register:
5168//
5169// VLDRS - Only writes S, partial D update.
5170// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5171// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5172//
5173// FCONSTD can be used as a dependency-breaking instruction.
5175 const MachineInstr &MI, unsigned OpNum,
5176 const TargetRegisterInfo *TRI) const {
5177 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5178 if (!PartialUpdateClearance)
5179 return 0;
5180
5181 assert(TRI && "Need TRI instance");
5182
5183 const MachineOperand &MO = MI.getOperand(OpNum);
5184 if (MO.readsReg())
5185 return 0;
5186 Register Reg = MO.getReg();
5187 int UseOp = -1;
5188
5189 switch (MI.getOpcode()) {
5190 // Normal instructions writing only an S-register.
5191 case ARM::VLDRS:
5192 case ARM::FCONSTS:
5193 case ARM::VMOVSR:
5194 case ARM::VMOVv8i8:
5195 case ARM::VMOVv4i16:
5196 case ARM::VMOVv2i32:
5197 case ARM::VMOVv2f32:
5198 case ARM::VMOVv1i64:
5199 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5200 break;
5201
5202 // Explicitly reads the dependency.
5203 case ARM::VLD1LNd32:
5204 UseOp = 3;
5205 break;
5206 default:
5207 return 0;
5208 }
5209
5210 // If this instruction actually reads a value from Reg, there is no unwanted
5211 // dependency.
5212 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5213 return 0;
5214
5215 // We must be able to clobber the whole D-reg.
5216 if (Reg.isVirtual()) {
5217 // Virtual register must be a def undef foo:ssub_0 operand.
5218 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5219 return 0;
5220 } else if (ARM::SPRRegClass.contains(Reg)) {
5221 // Physical register: MI must define the full D-reg.
5222 MCRegister DReg =
5223 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5224 if (!DReg || !MI.definesRegister(DReg, TRI))
5225 return 0;
5226 }
5227
5228 // MI has an unwanted D-register dependency.
5229 // Avoid defs in the previous N instructrions.
5230 return PartialUpdateClearance;
5231}
5232
5233// Break a partial register dependency after getPartialRegUpdateClearance
5234// returned non-zero.
5236 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5237 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5238 assert(TRI && "Need TRI instance");
5239
5240 const MachineOperand &MO = MI.getOperand(OpNum);
5241 Register Reg = MO.getReg();
5242 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5243 unsigned DReg = Reg;
5244
5245 // If MI defines an S-reg, find the corresponding D super-register.
5246 if (ARM::SPRRegClass.contains(Reg)) {
5247 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5248 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5249 }
5250
5251 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5252 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5253
5254 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5255 // the full D-register by loading the same value to both lanes. The
5256 // instruction is micro-coded with 2 uops, so don't do this until we can
5257 // properly schedule micro-coded instructions. The dispatcher stalls cause
5258 // too big regressions.
5259
5260 // Insert the dependency-breaking FCONSTD before MI.
5261 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5262 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5263 .addImm(96)
5265 MI.addRegisterKilled(DReg, TRI, true);
5266}
5267
5269 return Subtarget.hasFeature(ARM::HasV6KOps);
5270}
5271
5273 if (MI->getNumOperands() < 4)
5274 return true;
5275 unsigned ShOpVal = MI->getOperand(3).getImm();
5276 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5277 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5278 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5279 ((ShImm == 1 || ShImm == 2) &&
5280 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5281 return true;
5282
5283 return false;
5284}
5285
5287 const MachineInstr &MI, unsigned DefIdx,
5288 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5289 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5290 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5291
5292 switch (MI.getOpcode()) {
5293 case ARM::VMOVDRR:
5294 // dX = VMOVDRR rY, rZ
5295 // is the same as:
5296 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5297 // Populate the InputRegs accordingly.
5298 // rY
5299 const MachineOperand *MOReg = &MI.getOperand(1);
5300 if (!MOReg->isUndef())
5301 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5302 MOReg->getSubReg(), ARM::ssub_0));
5303 // rZ
5304 MOReg = &MI.getOperand(2);
5305 if (!MOReg->isUndef())
5306 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5307 MOReg->getSubReg(), ARM::ssub_1));
5308 return true;
5309 }
5310 llvm_unreachable("Target dependent opcode missing");
5311}
5312
5314 const MachineInstr &MI, unsigned DefIdx,
5315 RegSubRegPairAndIdx &InputReg) const {
5316 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5317 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5318
5319 switch (MI.getOpcode()) {
5320 case ARM::VMOVRRD:
5321 // rX, rY = VMOVRRD dZ
5322 // is the same as:
5323 // rX = EXTRACT_SUBREG dZ, ssub_0
5324 // rY = EXTRACT_SUBREG dZ, ssub_1
5325 const MachineOperand &MOReg = MI.getOperand(2);
5326 if (MOReg.isUndef())
5327 return false;
5328 InputReg.Reg = MOReg.getReg();
5329 InputReg.SubReg = MOReg.getSubReg();
5330 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5331 return true;
5332 }
5333 llvm_unreachable("Target dependent opcode missing");
5334}
5335
5337 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5338 RegSubRegPairAndIdx &InsertedReg) const {
5339 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5340 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5341
5342 switch (MI.getOpcode()) {
5343 case ARM::VSETLNi32:
5344 case ARM::MVE_VMOV_to_lane_32:
5345 // dX = VSETLNi32 dY, rZ, imm
5346 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5347 const MachineOperand &MOBaseReg = MI.getOperand(1);
5348 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5349 if (MOInsertedReg.isUndef())
5350 return false;
5351 const MachineOperand &MOIndex = MI.getOperand(3);
5352 BaseReg.Reg = MOBaseReg.getReg();
5353 BaseReg.SubReg = MOBaseReg.getSubReg();
5354
5355 InsertedReg.Reg = MOInsertedReg.getReg();
5356 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5357 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5358 return true;
5359 }
5360 llvm_unreachable("Target dependent opcode missing");
5361}
5362
5363std::pair<unsigned, unsigned>
5365 const unsigned Mask = ARMII::MO_OPTION_MASK;
5366 return std::make_pair(TF & Mask, TF & ~Mask);
5367}
5368
5371 using namespace ARMII;
5372
5373 static const std::pair<unsigned, const char *> TargetFlags[] = {
5374 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5375 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5376 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5377 };
5378 return ArrayRef(TargetFlags);
5379}
5380
5383 using namespace ARMII;
5384
5385 static const std::pair<unsigned, const char *> TargetFlags[] = {
5386 {MO_COFFSTUB, "arm-coffstub"},
5387 {MO_GOT, "arm-got"},
5388 {MO_SBREL, "arm-sbrel"},
5389 {MO_DLLIMPORT, "arm-dllimport"},
5390 {MO_SECREL, "arm-secrel"},
5391 {MO_NONLAZY, "arm-nonlazy"}};
5392 return ArrayRef(TargetFlags);
5393}
5394
5395std::optional<RegImmPair>
5397 int Sign = 1;
5398 unsigned Opcode = MI.getOpcode();
5399 int64_t Offset = 0;
5400
5401 // TODO: Handle cases where Reg is a super- or sub-register of the
5402 // destination register.
5403 const MachineOperand &Op0 = MI.getOperand(0);
5404 if (!Op0.isReg() || Reg != Op0.getReg())
5405 return std::nullopt;
5406
5407 // We describe SUBri or ADDri instructions.
5408 if (Opcode == ARM::SUBri)
5409 Sign = -1;
5410 else if (Opcode != ARM::ADDri)
5411 return std::nullopt;
5412
5413 // TODO: Third operand can be global address (usually some string). Since
5414 // strings can be relocated we cannot calculate their offsets for
5415 // now.
5416 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5417 return std::nullopt;
5418
5419 Offset = MI.getOperand(2).getImm() * Sign;
5420 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5421}
5422
5426 const TargetRegisterInfo *TRI) {
5427 for (auto I = From; I != To; ++I)
5428 if (I->modifiesRegister(Reg, TRI))
5429 return true;
5430 return false;
5431}
5432
5434 const TargetRegisterInfo *TRI) {
5435 // Search backwards to the instruction that defines CSPR. This may or not
5436 // be a CMP, we check that after this loop. If we find another instruction
5437 // that reads cpsr, we return nullptr.
5438 MachineBasicBlock::iterator CmpMI = Br;
5439 while (CmpMI != Br->getParent()->begin()) {
5440 --CmpMI;
5441 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5442 break;
5443 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5444 break;
5445 }
5446
5447 // Check that this inst is a CMP r[0-7], #0 and that the register
5448 // is not redefined between the cmp and the br.
5449 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5450 return nullptr;
5451 Register Reg = CmpMI->getOperand(0).getReg();
5452 Register PredReg;
5453 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5454 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5455 return nullptr;
5456 if (!isARMLowRegister(Reg))
5457 return nullptr;
5458 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5459 return nullptr;
5460
5461 return &*CmpMI;
5462}
5463
5465 const ARMSubtarget *Subtarget,
5466 bool ForCodesize) {
5467 if (Subtarget->isThumb()) {
5468 if (Val <= 255) // MOV
5469 return ForCodesize ? 2 : 1;
5470 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5471 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5472 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5473 return ForCodesize ? 4 : 1;
5474 if (Val <= 510) // MOV + ADDi8
5475 return ForCodesize ? 4 : 2;
5476 if (~Val <= 255) // MOV + MVN
5477 return ForCodesize ? 4 : 2;
5478 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5479 return ForCodesize ? 4 : 2;
5480 } else {
5481 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5482 return ForCodesize ? 4 : 1;
5483 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5484 return ForCodesize ? 4 : 1;
5485 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5486 return ForCodesize ? 4 : 1;
5487 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5488 return ForCodesize ? 8 : 2;
5489 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5490 return ForCodesize ? 8 : 2;
5491 }
5492 if (Subtarget->useMovt()) // MOVW + MOVT
5493 return ForCodesize ? 8 : 2;
5494 return ForCodesize ? 8 : 3; // Literal pool load
5495}
5496
5497bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5498 const ARMSubtarget *Subtarget,
5499 bool ForCodesize) {
5500 // Check with ForCodesize
5501 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5502 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5503 if (Cost1 < Cost2)
5504 return true;
5505 if (Cost1 > Cost2)
5506 return false;
5507
5508 // If they are equal, try with !ForCodesize
5509 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5510 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5511}
5512
5513/// Constants defining how certain sequences should be outlined.
5514/// This encompasses how an outlined function should be called, and what kind of
5515/// frame should be emitted for that outlined function.
5516///
5517/// \p MachineOutlinerTailCall implies that the function is being created from
5518/// a sequence of instructions ending in a return.
5519///
5520/// That is,
5521///
5522/// I1 OUTLINED_FUNCTION:
5523/// I2 --> B OUTLINED_FUNCTION I1
5524/// BX LR I2
5525/// BX LR
5526///
5527/// +-------------------------+--------+-----+
5528/// | | Thumb2 | ARM |
5529/// +-------------------------+--------+-----+
5530/// | Call overhead in Bytes | 4 | 4 |
5531/// | Frame overhead in Bytes | 0 | 0 |
5532/// | Stack fixup required | No | No |
5533/// +-------------------------+--------+-----+
5534///
5535/// \p MachineOutlinerThunk implies that the function is being created from
5536/// a sequence of instructions ending in a call. The outlined function is
5537/// called with a BL instruction, and the outlined function tail-calls the
5538/// original call destination.
5539///
5540/// That is,
5541///
5542/// I1 OUTLINED_FUNCTION:
5543/// I2 --> BL OUTLINED_FUNCTION I1
5544/// BL f I2
5545/// B f
5546///
5547/// +-------------------------+--------+-----+
5548/// | | Thumb2 | ARM |
5549/// +-------------------------+--------+-----+
5550/// | Call overhead in Bytes | 4 | 4 |
5551/// | Frame overhead in Bytes | 0 | 0 |
5552/// | Stack fixup required | No | No |
5553/// +-------------------------+--------+-----+
5554///
5555/// \p MachineOutlinerNoLRSave implies that the function should be called using
5556/// a BL instruction, but doesn't require LR to be saved and restored. This
5557/// happens when LR is known to be dead.
5558///
5559/// That is,
5560///
5561/// I1 OUTLINED_FUNCTION:
5562/// I2 --> BL OUTLINED_FUNCTION I1
5563/// I3 I2
5564/// I3
5565/// BX LR
5566///
5567/// +-------------------------+--------+-----+
5568/// | | Thumb2 | ARM |
5569/// +-------------------------+--------+-----+
5570/// | Call overhead in Bytes | 4 | 4 |
5571/// | Frame overhead in Bytes | 2 | 4 |
5572/// | Stack fixup required | No | No |
5573/// +-------------------------+--------+-----+
5574///
5575/// \p MachineOutlinerRegSave implies that the function should be called with a
5576/// save and restore of LR to an available register. This allows us to avoid
5577/// stack fixups. Note that this outlining variant is compatible with the
5578/// NoLRSave case.
5579///
5580/// That is,
5581///
5582/// I1 Save LR OUTLINED_FUNCTION:
5583/// I2 --> BL OUTLINED_FUNCTION I1
5584/// I3 Restore LR I2
5585/// I3
5586/// BX LR
5587///
5588/// +-------------------------+--------+-----+
5589/// | | Thumb2 | ARM |
5590/// +-------------------------+--------+-----+
5591/// | Call overhead in Bytes | 8 | 12 |
5592/// | Frame overhead in Bytes | 2 | 4 |
5593/// | Stack fixup required | No | No |
5594/// +-------------------------+--------+-----+
5595///
5596/// \p MachineOutlinerDefault implies that the function should be called with
5597/// a save and restore of LR to the stack.
5598///
5599/// That is,
5600///
5601/// I1 Save LR OUTLINED_FUNCTION:
5602/// I2 --> BL OUTLINED_FUNCTION I1
5603/// I3 Restore LR I2
5604/// I3
5605/// BX LR
5606///
5607/// +-------------------------+--------+-----+
5608/// | | Thumb2 | ARM |
5609/// +-------------------------+--------+-----+
5610/// | Call overhead in Bytes | 8 | 12 |
5611/// | Frame overhead in Bytes | 2 | 4 |
5612/// | Stack fixup required | Yes | Yes |
5613/// +-------------------------+--------+-----+
5614
5622
5628
5641
5643 : CallTailCall(target.isThumb() ? 4 : 4),
5644 FrameTailCall(target.isThumb() ? 0 : 0),
5645 CallThunk(target.isThumb() ? 4 : 4),
5646 FrameThunk(target.isThumb() ? 0 : 0),
5647 CallNoLRSave(target.isThumb() ? 4 : 4),
5648 FrameNoLRSave(target.isThumb() ? 2 : 4),
5649 CallRegSave(target.isThumb() ? 8 : 12),
5650 FrameRegSave(target.isThumb() ? 2 : 4),
5651 CallDefault(target.isThumb() ? 8 : 12),
5652 FrameDefault(target.isThumb() ? 2 : 4),
5653 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5654};
5655
5657ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5658 MachineFunction *MF = C.getMF();
5659 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5660 const ARMBaseRegisterInfo *ARI =
5661 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5662
5663 BitVector regsReserved = ARI->getReservedRegs(*MF);
5664 // Check if there is an available register across the sequence that we can
5665 // use.
5666 for (Register Reg : ARM::rGPRRegClass) {
5667 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5668 Reg != ARM::LR && // LR is not reserved, but don't use it.
5669 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5670 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5671 C.isAvailableInsideSeq(Reg, TRI))
5672 return Reg;
5673 }
5674 return Register();
5675}
5676
5677// Compute liveness of LR at the point after the interval [I, E), which
5678// denotes a *backward* iteration through instructions. Used only for return
5679// basic blocks, which do not end with a tail call.
5683 // At the end of the function LR dead.
5684 bool Live = false;
5685 for (; I != E; ++I) {
5686 const MachineInstr &MI = *I;
5687
5688 // Check defs of LR.
5689 if (MI.modifiesRegister(ARM::LR, &TRI))
5690 Live = false;
5691
5692 // Check uses of LR.
5693 unsigned Opcode = MI.getOpcode();
5694 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5695 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5696 Opcode == ARM::tBXNS_RET || Opcode == ARM::t2BXAUT_RET) {
5697 // These instructions use LR, but it's not an (explicit or implicit)
5698 // operand.
5699 Live = true;
5700 continue;
5701 }
5702 if (MI.readsRegister(ARM::LR, &TRI))
5703 Live = true;
5704 }
5705 return !Live;
5706}
5707
5708std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5710 const MachineModuleInfo &MMI,
5711 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5712 unsigned MinRepeats) const {
5713 unsigned SequenceSize = 0;
5714 for (auto &MI : RepeatedSequenceLocs[0])
5715 SequenceSize += getInstSizeInBytes(MI);
5716
5717 // Properties about candidate MBBs that hold for all of them.
5718 unsigned FlagsSetInAll = 0xF;
5719
5720 // Compute liveness information for each candidate, and set FlagsSetInAll.
5722 for (outliner::Candidate &C : RepeatedSequenceLocs)
5723 FlagsSetInAll &= C.Flags;
5724
5725 // According to the ARM Procedure Call Standard, the following are
5726 // undefined on entry/exit from a function call:
5727 //
5728 // * Register R12(IP),
5729 // * Condition codes (and thus the CPSR register)
5730 //
5731 // Since we control the instructions which are part of the outlined regions
5732 // we don't need to be fully compliant with the AAPCS, but we have to
5733 // guarantee that if a veneer is inserted at link time the code is still
5734 // correct. Because of this, we can't outline any sequence of instructions
5735 // where one of these registers is live into/across it. Thus, we need to
5736 // delete those candidates.
5737 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5738 // If the unsafe registers in this block are all dead, then we don't need
5739 // to compute liveness here.
5740 if (C.Flags & UnsafeRegsDead)
5741 return false;
5742 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5743 };
5744
5745 // Are there any candidates where those registers are live?
5746 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5747 // Erase every candidate that violates the restrictions above. (It could be
5748 // true that we have viable candidates, so it's not worth bailing out in
5749 // the case that, say, 1 out of 20 candidates violate the restructions.)
5750 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5751
5752 // If the sequence doesn't have enough candidates left, then we're done.
5753 if (RepeatedSequenceLocs.size() < MinRepeats)
5754 return std::nullopt;
5755 }
5756
5757 // We expect the majority of the outlining candidates to be in consensus with
5758 // regard to return address sign and authentication, and branch target
5759 // enforcement, in other words, partitioning according to all the four
5760 // possible combinations of PAC-RET and BTI is going to yield one big subset
5761 // and three small (likely empty) subsets. That allows us to cull incompatible
5762 // candidates separately for PAC-RET and BTI.
5763
5764 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5765 // disabled. Remove the candidates from the smaller set. If they are the same
5766 // number prefer the non-BTI ones for outlining, since they have less
5767 // overhead.
5768 auto NoBTI =
5769 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5770 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5771 return AFI.branchTargetEnforcement();
5772 });
5773 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5774 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5775 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5776 else
5777 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5778
5779 if (RepeatedSequenceLocs.size() < MinRepeats)
5780 return std::nullopt;
5781
5782 // Likewise, partition the candidates according to PAC-RET enablement.
5783 auto NoPAC =
5784 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5785 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5786 // If the function happens to not spill the LR, do not disqualify it
5787 // from the outlining.
5788 return AFI.shouldSignReturnAddress(true);
5789 });
5790 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5791 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5792 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5793 else
5794 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5795
5796 if (RepeatedSequenceLocs.size() < MinRepeats)
5797 return std::nullopt;
5798
5799 // At this point, we have only "safe" candidates to outline. Figure out
5800 // frame + call instruction information.
5801
5802 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5803
5804 // Helper lambda which sets call information for every candidate.
5805 auto SetCandidateCallInfo =
5806 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5807 for (outliner::Candidate &C : RepeatedSequenceLocs)
5808 C.setCallInfo(CallID, NumBytesForCall);
5809 };
5810
5811 OutlinerCosts Costs(Subtarget);
5812
5813 const auto &SomeMFI =
5814 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5815 // Adjust costs to account for the BTI instructions.
5816 if (SomeMFI.branchTargetEnforcement()) {
5817 Costs.FrameDefault += 4;
5818 Costs.FrameNoLRSave += 4;
5819 Costs.FrameRegSave += 4;
5820 Costs.FrameTailCall += 4;
5821 Costs.FrameThunk += 4;
5822 }
5823
5824 // Adjust costs to account for sign and authentication instructions.
5825 if (SomeMFI.shouldSignReturnAddress(true)) {
5826 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5827 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5828 }
5829
5830 unsigned FrameID = MachineOutlinerDefault;
5831 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5832
5833 // If the last instruction in any candidate is a terminator, then we should
5834 // tail call all of the candidates.
5835 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5836 FrameID = MachineOutlinerTailCall;
5837 NumBytesToCreateFrame = Costs.FrameTailCall;
5838 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5839 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5840 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5841 LastInstrOpcode == ARM::tBLXr ||
5842 LastInstrOpcode == ARM::tBLXr_noip ||
5843 LastInstrOpcode == ARM::tBLXi) {
5844 FrameID = MachineOutlinerThunk;
5845 NumBytesToCreateFrame = Costs.FrameThunk;
5846 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5847 } else {
5848 // We need to decide how to emit calls + frames. We can always emit the same
5849 // frame if we don't need to save to the stack. If we have to save to the
5850 // stack, then we need a different frame.
5851 unsigned NumBytesNoStackCalls = 0;
5852 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5853
5854 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5855 // LR liveness is overestimated in return blocks, unless they end with a
5856 // tail call.
5857 const auto Last = C.getMBB()->rbegin();
5858 const bool LRIsAvailable =
5859 C.getMBB()->isReturnBlock() && !Last->isCall()
5862 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
5863 if (LRIsAvailable) {
5864 FrameID = MachineOutlinerNoLRSave;
5865 NumBytesNoStackCalls += Costs.CallNoLRSave;
5866 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
5867 CandidatesWithoutStackFixups.push_back(C);
5868 }
5869
5870 // Is an unused register available? If so, we won't modify the stack, so
5871 // we can outline with the same frame type as those that don't save LR.
5872 else if (findRegisterToSaveLRTo(C)) {
5873 FrameID = MachineOutlinerRegSave;
5874 NumBytesNoStackCalls += Costs.CallRegSave;
5875 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
5876 CandidatesWithoutStackFixups.push_back(C);
5877 }
5878
5879 // Is SP used in the sequence at all? If not, we don't have to modify
5880 // the stack, so we are guaranteed to get the same frame.
5881 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
5882 NumBytesNoStackCalls += Costs.CallDefault;
5883 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5884 CandidatesWithoutStackFixups.push_back(C);
5885 }
5886
5887 // If we outline this, we need to modify the stack. Pretend we don't
5888 // outline this by saving all of its bytes.
5889 else
5890 NumBytesNoStackCalls += SequenceSize;
5891 }
5892
5893 // If there are no places where we have to save LR, then note that we don't
5894 // have to update the stack. Otherwise, give every candidate the default
5895 // call type
5896 if (NumBytesNoStackCalls <=
5897 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5898 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5899 FrameID = MachineOutlinerNoLRSave;
5900 if (RepeatedSequenceLocs.size() < MinRepeats)
5901 return std::nullopt;
5902 } else
5903 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5904 }
5905
5906 // Does every candidate's MBB contain a call? If so, then we might have a
5907 // call in the range.
5908 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5909 // check if the range contains a call. These require a save + restore of
5910 // the link register.
5911 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5912 if (any_of(drop_end(FirstCand),
5913 [](const MachineInstr &MI) { return MI.isCall(); }))
5914 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5915
5916 // Handle the last instruction separately. If it is tail call, then the
5917 // last instruction is a call, we don't want to save + restore in this
5918 // case. However, it could be possible that the last instruction is a
5919 // call without it being valid to tail call this sequence. We should
5920 // consider this as well.
5921 else if (FrameID != MachineOutlinerThunk &&
5922 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5923 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5924 }
5925
5926 return std::make_unique<outliner::OutlinedFunction>(
5927 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
5928}
5929
5930bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5931 int64_t Fixup,
5932 bool Updt) const {
5933 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
5934 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5935 if (SPIdx < 0)
5936 // No SP operand
5937 return true;
5938 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5939 // If SP is not the base register we can't do much
5940 return false;
5941
5942 // Stack might be involved but addressing mode doesn't handle any offset.
5943 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5944 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5945 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5946 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5947 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5948 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5949 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5950 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5951 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5952 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5954 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5955 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5956 return false;
5957
5958 unsigned NumOps = MI->getDesc().getNumOperands();
5959 unsigned ImmIdx = NumOps - 3;
5960
5961 const MachineOperand &Offset = MI->getOperand(ImmIdx);
5962 assert(Offset.isImm() && "Is not an immediate");
5963 int64_t OffVal = Offset.getImm();
5964
5965 if (OffVal < 0)
5966 // Don't override data if the are below SP.
5967 return false;
5968
5969 unsigned NumBits = 0;
5970 unsigned Scale = 1;
5971
5972 switch (AddrMode) {
5973 case ARMII::AddrMode3:
5974 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
5975 return false;
5976 OffVal = ARM_AM::getAM3Offset(OffVal);
5977 NumBits = 8;
5978 break;
5979 case ARMII::AddrMode5:
5980 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
5981 return false;
5982 OffVal = ARM_AM::getAM5Offset(OffVal);
5983 NumBits = 8;
5984 Scale = 4;
5985 break;
5987 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
5988 return false;
5989 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
5990 NumBits = 8;
5991 Scale = 2;
5992 break;
5994 NumBits = 8;
5995 break;
5997 // FIXME: Values are already scaled in this addressing mode.
5998 assert((Fixup & 3) == 0 && "Can't encode this offset!");
5999 NumBits = 10;
6000 break;
6002 NumBits = 8;
6003 Scale = 4;
6004 break;
6007 NumBits = 12;
6008 break;
6009 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6010 NumBits = 8;
6011 Scale = 4;
6012 break;
6013 default:
6014 llvm_unreachable("Unsupported addressing mode!");
6015 }
6016 // Make sure the offset is encodable for instructions that scale the
6017 // immediate.
6018 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6019 "Can't encode this offset!");
6020 OffVal += Fixup / Scale;
6021
6022 unsigned Mask = (1 << NumBits) - 1;
6023
6024 if (OffVal <= Mask) {
6025 if (Updt)
6026 MI->getOperand(ImmIdx).setImm(OffVal);
6027 return true;
6028 }
6029
6030 return false;
6031}
6032
6034 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6035 outliner::Candidate &C = Candidates.front();
6036 // branch-target-enforcement is guaranteed to be consistent between all
6037 // candidates, so we only need to look at one.
6038 const Function &CFn = C.getMF()->getFunction();
6039 if (CFn.hasFnAttribute("branch-target-enforcement"))
6040 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6041
6042 if (CFn.hasFnAttribute("sign-return-address"))
6043 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6044
6045 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6046}
6047
6049 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6050 const Function &F = MF.getFunction();
6051
6052 // Can F be deduplicated by the linker? If it can, don't outline from it.
6053 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6054 return false;
6055
6056 // Don't outline from functions with section markings; the program could
6057 // expect that all the code is in the named section.
6058 // FIXME: Allow outlining from multiple functions with the same section
6059 // marking.
6060 if (F.hasSection())
6061 return false;
6062
6063 // FIXME: Thumb1 outlining is not handled
6065 return false;
6066
6067 // It's safe to outline from MF.
6068 return true;
6069}
6070
6072 unsigned &Flags) const {
6073 // Check if LR is available through all of the MBB. If it's not, then set
6074 // a flag.
6075 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6076 "Suitable Machine Function for outlining must track liveness");
6077
6079
6081 LRU.accumulate(MI);
6082
6083 // Check if each of the unsafe registers are available...
6084 bool R12AvailableInBlock = LRU.available(ARM::R12);
6085 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6086
6087 // If all of these are dead (and not live out), we know we don't have to check
6088 // them later.
6089 if (R12AvailableInBlock && CPSRAvailableInBlock)
6091
6092 // Now, add the live outs to the set.
6093 LRU.addLiveOuts(MBB);
6094
6095 // If any of these registers is available in the MBB, but also a live out of
6096 // the block, then we know outlining is unsafe.
6097 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6098 return false;
6099 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6100 return false;
6101
6102 // Check if there's a call inside this MachineBasicBlock. If there is, then
6103 // set a flag.
6104 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6106
6107 // LR liveness is overestimated in return blocks.
6108
6109 bool LRIsAvailable =
6110 MBB.isReturnBlock() && !MBB.back().isCall()
6111 ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
6112 : LRU.available(ARM::LR);
6113 if (!LRIsAvailable)
6115
6116 return true;
6117}
6118
6122 unsigned Flags) const {
6123 MachineInstr &MI = *MIT;
6125
6126 // PIC instructions contain labels, outlining them would break offset
6127 // computing. unsigned Opc = MI.getOpcode();
6128 unsigned Opc = MI.getOpcode();
6129 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6130 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6131 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6132 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6133 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6134 Opc == ARM::t2MOV_ga_pcrel)
6136
6137 // Be conservative with ARMv8.1 MVE instructions.
6138 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6139 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6140 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6141 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6142 Opc == ARM::t2LoopEndDec)
6144
6145 const MCInstrDesc &MCID = MI.getDesc();
6146 uint64_t MIFlags = MCID.TSFlags;
6147 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6149
6150 // Is this a terminator for a basic block?
6151 if (MI.isTerminator())
6152 // TargetInstrInfo::getOutliningType has already filtered out anything
6153 // that would break this, so we can allow it here.
6155
6156 // Don't outline if link register or program counter value are used.
6157 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6159
6160 if (MI.isCall()) {
6161 // Get the function associated with the call. Look at each operand and find
6162 // the one that represents the calle and get its name.
6163 const Function *Callee = nullptr;
6164 for (const MachineOperand &MOP : MI.operands()) {
6165 if (MOP.isGlobal()) {
6166 Callee = dyn_cast<Function>(MOP.getGlobal());
6167 break;
6168 }
6169 }
6170
6171 // Dont't outline calls to "mcount" like functions, in particular Linux
6172 // kernel function tracing relies on it.
6173 if (Callee &&
6174 (Callee->getName() == "\01__gnu_mcount_nc" ||
6175 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6177
6178 // If we don't know anything about the callee, assume it depends on the
6179 // stack layout of the caller. In that case, it's only legal to outline
6180 // as a tail-call. Explicitly list the call instructions we know about so
6181 // we don't get unexpected results with call pseudo-instructions.
6182 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6183 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6184 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6185 Opc == ARM::tBLXi)
6186 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6187
6188 if (!Callee)
6189 return UnknownCallOutlineType;
6190
6191 // We have a function we have information about. Check if it's something we
6192 // can safely outline.
6193 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6194
6195 // We don't know what's going on with the callee at all. Don't touch it.
6196 if (!CalleeMF)
6197 return UnknownCallOutlineType;
6198
6199 // Check if we know anything about the callee saves on the function. If we
6200 // don't, then don't touch it, since that implies that we haven't computed
6201 // anything about its stack frame yet.
6202 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6203 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6204 MFI.getNumObjects() > 0)
6205 return UnknownCallOutlineType;
6206
6207 // At this point, we can say that CalleeMF ought to not pass anything on the
6208 // stack. Therefore, we can outline it.
6210 }
6211
6212 // Since calls are handled, don't touch LR or PC
6213 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6215
6216 // Does this use the stack?
6217 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6218 // True if there is no chance that any outlined candidate from this range
6219 // could require stack fixups. That is, both
6220 // * LR is available in the range (No save/restore around call)
6221 // * The range doesn't include calls (No save/restore in outlined frame)
6222 // are true.
6223 // These conditions also ensure correctness of the return address
6224 // authentication - we insert sign and authentication instructions only if
6225 // we save/restore LR on stack, but then this condition ensures that the
6226 // outlined range does not modify the SP, therefore the SP value used for
6227 // signing is the same as the one used for authentication.
6228 // FIXME: This is very restrictive; the flags check the whole block,
6229 // not just the bit we will try to outline.
6230 bool MightNeedStackFixUp =
6233
6234 if (!MightNeedStackFixUp)
6236
6237 // Any modification of SP will break our code to save/restore LR.
6238 // FIXME: We could handle some instructions which add a constant offset to
6239 // SP, with a bit more work.
6240 if (MI.modifiesRegister(ARM::SP, TRI))
6242
6243 // At this point, we have a stack instruction that we might need to fix up.
6244 // up. We'll handle it if it's a load or store.
6245 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6246 false))
6248
6249 // We can't fix it up, so don't outline it.
6251 }
6252
6253 // Be conservative with IT blocks.
6254 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6255 MI.modifiesRegister(ARM::ITSTATE, TRI))
6257
6258 // Don't outline CFI instructions.
6259 if (MI.isCFIInstruction())
6261
6263}
6264
6265void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6266 for (MachineInstr &MI : MBB) {
6267 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6268 }
6269}
6270
6271void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6272 MachineBasicBlock::iterator It, bool CFI,
6273 bool Auth) const {
6274 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6275 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6276 assert(Align >= 8 && Align <= 256);
6277 if (Auth) {
6278 assert(Subtarget.isThumb2());
6279 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6280 // sequence.
6281 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6282 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6283 .addReg(ARM::R12, RegState::Kill)
6284 .addReg(ARM::LR, RegState::Kill)
6285 .addReg(ARM::SP)
6286 .addImm(-Align)
6288 .setMIFlags(MIFlags);
6289 } else {
6290 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6291 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6292 .addReg(ARM::LR, RegState::Kill)
6293 .addReg(ARM::SP)
6294 .addImm(-Align)
6296 .setMIFlags(MIFlags);
6297 }
6298
6299 if (!CFI)
6300 return;
6301
6302 // Add a CFI, saying CFA is offset by Align bytes from SP.
6303 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
6304 CFIBuilder.buildDefCFAOffset(Align);
6305
6306 // Add a CFI saying that the LR that we want to find is now higher than
6307 // before.
6308 int LROffset = Auth ? Align - 4 : Align;
6309 CFIBuilder.buildOffset(ARM::LR, -LROffset);
6310 if (Auth) {
6311 // Add a CFI for the location of the return address PAC.
6312 CFIBuilder.buildOffset(ARM::RA_AUTH_CODE, -Align);
6313 }
6314}
6315
6316void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6318 bool CFI, bool Auth) const {
6319 int Align = Subtarget.getStackAlignment().value();
6320 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6321 if (Auth) {
6322 assert(Subtarget.isThumb2());
6323 // Restore return address PAC and LR.
6324 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6325 .addReg(ARM::R12, RegState::Define)
6326 .addReg(ARM::LR, RegState::Define)
6327 .addReg(ARM::SP, RegState::Define)
6328 .addReg(ARM::SP)
6329 .addImm(Align)
6331 .setMIFlags(MIFlags);
6332 // LR authentication is after the CFI instructions, below.
6333 } else {
6334 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6335 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6336 .addReg(ARM::SP, RegState::Define)
6337 .addReg(ARM::SP);
6338 if (!Subtarget.isThumb())
6339 MIB.addReg(0);
6340 MIB.addImm(Subtarget.getStackAlignment().value())
6342 .setMIFlags(MIFlags);
6343 }
6344
6345 if (CFI) {
6346 // Now stack has moved back up and we have restored LR.
6347 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy);
6348 CFIBuilder.buildDefCFAOffset(0);
6349 CFIBuilder.buildRestore(ARM::LR);
6350 if (Auth)
6351 CFIBuilder.buildUndefined(ARM::RA_AUTH_CODE);
6352 }
6353
6354 if (Auth)
6355 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6356}
6357
6360 const outliner::OutlinedFunction &OF) const {
6361 // For thunk outlining, rewrite the last instruction from a call to a
6362 // tail-call.
6363 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6364 MachineInstr *Call = &*--MBB.instr_end();
6365 bool isThumb = Subtarget.isThumb();
6366 unsigned FuncOp = isThumb ? 2 : 0;
6367 unsigned Opc = Call->getOperand(FuncOp).isReg()
6368 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6369 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6370 : ARM::tTAILJMPdND
6371 : ARM::TAILJMPd;
6372 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6373 .add(Call->getOperand(FuncOp));
6374 if (isThumb && !Call->getOperand(FuncOp).isReg())
6375 MIB.add(predOps(ARMCC::AL));
6376 Call->eraseFromParent();
6377 }
6378
6379 // Is there a call in the outlined range?
6380 auto IsNonTailCall = [](MachineInstr &MI) {
6381 return MI.isCall() && !MI.isReturn();
6382 };
6383 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6384 MachineBasicBlock::iterator It = MBB.begin();
6386
6387 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6388 OF.FrameConstructionID == MachineOutlinerThunk)
6389 Et = std::prev(MBB.end());
6390
6391 // We have to save and restore LR, we need to add it to the liveins if it
6392 // is not already part of the set. This is sufficient since outlined
6393 // functions only have one block.
6394 if (!MBB.isLiveIn(ARM::LR))
6395 MBB.addLiveIn(ARM::LR);
6396
6397 // Insert a save before the outlined region
6398 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6399 saveLROnStack(MBB, It, true, Auth);
6400
6401 // Fix up the instructions in the range, since we're going to modify the
6402 // stack.
6403 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6404 "Can only fix up stack references once");
6405 fixupPostOutline(MBB);
6406
6407 // Insert a restore before the terminator for the function. Restore LR.
6408 restoreLRFromStack(MBB, Et, true, Auth);
6409 }
6410
6411 // If this is a tail call outlined function, then there's already a return.
6412 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6413 OF.FrameConstructionID == MachineOutlinerThunk)
6414 return;
6415
6416 // Here we have to insert the return ourselves. Get the correct opcode from
6417 // current feature set.
6418 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6420
6421 // Did we have to modify the stack by saving the link register?
6422 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6423 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6424 return;
6425
6426 // We modified the stack.
6427 // Walk over the basic block and fix up all the stack accesses.
6428 fixupPostOutline(MBB);
6429}
6430
6436 unsigned Opc;
6437 bool isThumb = Subtarget.isThumb();
6438
6439 // Are we tail calling?
6440 if (C.CallConstructionID == MachineOutlinerTailCall) {
6441 // If yes, then we can just branch to the label.
6442 Opc = isThumb
6443 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6444 : ARM::TAILJMPd;
6445 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6446 .addGlobalAddress(M.getNamedValue(MF.getName()));
6447 if (isThumb)
6448 MIB.add(predOps(ARMCC::AL));
6449 It = MBB.insert(It, MIB);
6450 return It;
6451 }
6452
6453 // Create the call instruction.
6454 Opc = isThumb ? ARM::tBL : ARM::BL;
6455 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6456 if (isThumb)
6457 CallMIB.add(predOps(ARMCC::AL));
6458 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6459
6460 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6461 C.CallConstructionID == MachineOutlinerThunk) {
6462 // No, so just insert the call.
6463 It = MBB.insert(It, CallMIB);
6464 return It;
6465 }
6466
6467 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6468 // Can we save to a register?
6469 if (C.CallConstructionID == MachineOutlinerRegSave) {
6470 Register Reg = findRegisterToSaveLRTo(C);
6471 assert(Reg != 0 && "No callee-saved register available?");
6472
6473 // Save and restore LR from that register.
6474 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6475 if (!AFI.isLRSpilled())
6477 .buildRegister(ARM::LR, Reg);
6478 CallPt = MBB.insert(It, CallMIB);
6479 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6480 if (!AFI.isLRSpilled())
6482 It--;
6483 return CallPt;
6484 }
6485 // We have the default case. Save and restore from SP.
6486 if (!MBB.isLiveIn(ARM::LR))
6487 MBB.addLiveIn(ARM::LR);
6488 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6489 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6490 CallPt = MBB.insert(It, CallMIB);
6491 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6492 It--;
6493 return CallPt;
6494}
6495
6497 MachineFunction &MF) const {
6498 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6499}
6500
6501bool ARMBaseInstrInfo::isReMaterializableImpl(
6502 const MachineInstr &MI) const {
6503 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6504 // the tail predication conversion. This means that the element count
6505 // register has to be live for longer, but that has to be better than
6506 // spill/restore and VPT predication.
6507 return (isVCTP(&MI) && !isPredicated(MI)) ||
6509}
6510
6512 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6513 : ARM::BLX;
6514}
6515
6517 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6518 : ARM::tBLXr;
6519}
6520
6522 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6523 : ARM::BLX_pred;
6524}
6525
6526namespace {
6527class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6528 MachineInstr *EndLoop, *LoopCount;
6529 MachineFunction *MF;
6530 const TargetInstrInfo *TII;
6531
6532 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6533 // [LAST_IS_USE] : last reference to register in schedule is a use
6534 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6535 static int constexpr MAX_STAGES = 30;
6536 static int constexpr LAST_IS_USE = MAX_STAGES;
6537 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6538 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6539 typedef std::map<Register, IterNeed> IterNeeds;
6540
6541 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6542 const IterNeeds &CIN);
6543 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6544
6545 // Meanings of the various stuff with loop types:
6546 // t2Bcc:
6547 // EndLoop = branch at end of original BB that will become a kernel
6548 // LoopCount = CC setter live into branch
6549 // t2LoopEnd:
6550 // EndLoop = branch at end of original BB
6551 // LoopCount = t2LoopDec
6552public:
6553 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6554 : EndLoop(EndLoop), LoopCount(LoopCount),
6555 MF(EndLoop->getParent()->getParent()),
6556 TII(MF->getSubtarget().getInstrInfo()) {}
6557
6558 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6559 // Only ignore the terminator.
6560 return MI == EndLoop || MI == LoopCount;
6561 }
6562
6563 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6564 if (tooMuchRegisterPressure(SSD, SMS))
6565 return false;
6566
6567 return true;
6568 }
6569
6570 std::optional<bool> createTripCountGreaterCondition(
6571 int TC, MachineBasicBlock &MBB,
6572 SmallVectorImpl<MachineOperand> &Cond) override {
6573
6574 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6575 Cond.push_back(EndLoop->getOperand(1));
6576 Cond.push_back(EndLoop->getOperand(2));
6577 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6579 }
6580 return {};
6581 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6582 // General case just lets the unrolled t2LoopDec do the subtraction and
6583 // therefore just needs to check if zero has been reached.
6584 MachineInstr *LoopDec = nullptr;
6585 for (auto &I : MBB.instrs())
6586 if (I.getOpcode() == ARM::t2LoopDec)
6587 LoopDec = &I;
6588 assert(LoopDec && "Unable to find copied LoopDec");
6589 // Check if we're done with the loop.
6590 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6591 .addReg(LoopDec->getOperand(0).getReg())
6592 .addImm(0)
6594 .addReg(ARM::NoRegister);
6596 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6597 return {};
6598 } else
6599 llvm_unreachable("Unknown EndLoop");
6600 }
6601
6602 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6603
6604 void adjustTripCount(int TripCountAdjust) override {}
6605};
6606
6607void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6608 const IterNeeds &CIN) {
6609 // Increase pressure by the amounts in CrossIterationNeeds
6610 for (const auto &N : CIN) {
6611 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6612 for (int I = 0; I < Cnt; ++I)
6615 }
6616 // Decrease pressure by the amounts in CrossIterationNeeds
6617 for (const auto &N : CIN) {
6618 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6619 for (int I = 0; I < Cnt; ++I)
6622 }
6623}
6624
6625bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6626 SMSchedule &SMS) {
6627 IterNeeds CrossIterationNeeds;
6628
6629 // Determine which values will be loop-carried after the schedule is
6630 // applied
6631
6632 for (auto &SU : SSD.SUnits) {
6633 const MachineInstr *MI = SU.getInstr();
6634 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6635 for (auto &S : SU.Succs)
6636 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6637 Register Reg = S.getReg();
6638 if (Reg.isVirtual())
6639 CrossIterationNeeds[Reg.id()].set(0);
6640 } else if (S.isAssignedRegDep()) {
6641 int OStg = SMS.stageScheduled(S.getSUnit());
6642 if (OStg >= 0 && OStg != Stg) {
6643 Register Reg = S.getReg();
6644 if (Reg.isVirtual())
6645 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6646 }
6647 }
6648 }
6649
6650 // Determine more-or-less what the proposed schedule (reversed) is going to
6651 // be; it might not be quite the same because the within-cycle ordering
6652 // created by SMSchedule depends upon changes to help with address offsets and
6653 // the like.
6654 std::vector<SUnit *> ProposedSchedule;
6655 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6656 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6657 ++Stage) {
6658 std::deque<SUnit *> Instrs =
6659 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6660 std::sort(Instrs.begin(), Instrs.end(),
6661 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6662 llvm::append_range(ProposedSchedule, Instrs);
6663 }
6664
6665 // Learn whether the last use/def of each cross-iteration register is a use or
6666 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6667 // and we do not have to add the pressure.
6668 for (auto *SU : ProposedSchedule)
6669 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6670 ++OperI) {
6671 auto MO = *OperI;
6672 if (!MO.isReg() || !MO.getReg())
6673 continue;
6674 Register Reg = MO.getReg();
6675 auto CIter = CrossIterationNeeds.find(Reg.id());
6676 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6677 CIter->second[SEEN_AS_LIVE])
6678 continue;
6679 if (MO.isDef() && !MO.isDead())
6680 CIter->second.set(SEEN_AS_LIVE);
6681 else if (MO.isUse())
6682 CIter->second.set(LAST_IS_USE);
6683 }
6684 for (auto &CI : CrossIterationNeeds)
6685 CI.second.reset(LAST_IS_USE);
6686
6687 RegionPressure RecRegPressure;
6688 RegPressureTracker RPTracker(RecRegPressure);
6689 RegisterClassInfo RegClassInfo;
6690 RegClassInfo.runOnMachineFunction(*MF);
6691 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6692 EndLoop->getParent()->end(), false, false);
6693
6694 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6695
6696 for (auto *SU : ProposedSchedule) {
6697 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6698 RPTracker.setPos(std::next(CurInstI));
6699 RPTracker.recede();
6700
6701 // Track what cross-iteration registers would be seen as live
6702 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6703 auto MO = *OperI;
6704 if (!MO.isReg() || !MO.getReg())
6705 continue;
6706 Register Reg = MO.getReg();
6707 if (MO.isDef() && !MO.isDead()) {
6708 auto CIter = CrossIterationNeeds.find(Reg.id());
6709 if (CIter != CrossIterationNeeds.end()) {
6710 CIter->second.reset(0);
6711 CIter->second.reset(SEEN_AS_LIVE);
6712 }
6713 }
6714 }
6715 for (auto &S : SU->Preds) {
6716 auto Stg = SMS.stageScheduled(SU);
6717 if (S.isAssignedRegDep()) {
6718 Register Reg = S.getReg();
6719 auto CIter = CrossIterationNeeds.find(Reg.id());
6720 if (CIter != CrossIterationNeeds.end()) {
6721 auto Stg2 = SMS.stageScheduled(S.getSUnit());
6722 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6723 if (Stg - Stg2 < MAX_STAGES)
6724 CIter->second.set(Stg - Stg2);
6725 CIter->second.set(SEEN_AS_LIVE);
6726 }
6727 }
6728 }
6729
6730 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6731 }
6732
6733 auto &P = RPTracker.getPressure().MaxSetPressure;
6734 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6735 // Exclude some Neon register classes.
6736 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6737 I == ARM::DTriple_with_qsub_0_in_QPR)
6738 continue;
6739
6740 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6741 return true;
6742 }
6743 }
6744 return false;
6745}
6746
6747} // namespace
6748
6749std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6752 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6753 if (Preheader == LoopBB)
6754 Preheader = *std::next(LoopBB->pred_begin());
6755
6756 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6757 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6758 // block. We need to determine the reaching definition of CPSR so that
6759 // it can be marked as non-pipelineable, allowing the pipeliner to force
6760 // it into stage 0 or give up if it cannot or will not do so.
6761 MachineInstr *CCSetter = nullptr;
6762 for (auto &L : LoopBB->instrs()) {
6763 if (L.isCall())
6764 return nullptr;
6765 if (isCPSRDefined(L))
6766 CCSetter = &L;
6767 }
6768 if (CCSetter)
6769 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6770 else
6771 return nullptr; // Unable to find the CC setter, so unable to guarantee
6772 // that pipeline will work
6773 }
6774
6775 // Recognize:
6776 // preheader:
6777 // %1 = t2DoopLoopStart %0
6778 // loop:
6779 // %2 = phi %1, <not loop>, %..., %loop
6780 // %3 = t2LoopDec %2, <imm>
6781 // t2LoopEnd %3, %loop
6782
6783 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6784 for (auto &L : LoopBB->instrs())
6785 if (L.isCall())
6786 return nullptr;
6787 else if (isVCTP(&L))
6788 return nullptr;
6789 Register LoopDecResult = I->getOperand(0).getReg();
6790 MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
6791 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
6792 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6793 return nullptr;
6794 MachineInstr *LoopStart = nullptr;
6795 for (auto &J : Preheader->instrs())
6796 if (J.getOpcode() == ARM::t2DoLoopStart)
6797 LoopStart = &J;
6798 if (!LoopStart)
6799 return nullptr;
6800 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
6801 }
6802 return nullptr;
6803}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, RegState State) const
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const ARMBaseRegisterInfo & getRegisterInfo() const
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
ARMBaseInstrInfo(const ARMSubtarget &STI, const ARMBaseRegisterInfo &TRI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isCortexA7() const
bool isSwift() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool isThumb2() const
bool isLikeA9() const
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool hasMinSize() const
bool isCortexA8() const
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
@ SingleIssue
Can load/store 1 register/cycle.
@ DoubleIssue
Can load/store 2 registers/cycle.
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:178
LLVM_ABI uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
Helper class for creating CFI instructions and inserting them into MIR.
void buildRegister(MCRegister Reg1, MCRegister Reg2) const
void buildRestore(MCRegister Reg) const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A debug info location.
Definition DebugLoc.h:123
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool hasDLLImportStorageClass() const
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
Describe properties that are true of each instruction in the target description file.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isCall() const
Return true if the instruction is a call.
unsigned getOpcode() const
Return the opcode number for this descriptor.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool isValid() const
isValid - Returns true until all the operands have been visited.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
union llvm::MachineConstantPoolEntry::@004270020304201266316354007027341142157160323045 Val
The constant itself.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
ArrayRef< MachineMemOperand * >::iterator mmo_iterator
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
bool isInsertSubreg() const
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void increaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
LLVM_ABI void decreaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition Register.h:60
constexpr unsigned id() const
Definition Register.h:100
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
ARMII - This namespace holds all of the target specific flags that instruction info tracks.
@ ThumbArithFlagSetting
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
AddrMode
ARM Addressing Modes.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
Define some predicates that are used for node matching.
Definition ARMEHABI.h:25
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
constexpr T rotr(T V, int R)
Definition bit.h:382
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
constexpr RegState getKillRegState(bool B)
unsigned getBLXpredOpcode(const MachineFunction &MF)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static bool isJumpTableBranchOpcode(int Opc)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static bool isPopOpcode(int Opc)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:2033
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
constexpr RegState getUndefRegState(bool B)
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.