LLVM 19.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/MC/MCAsmInfo.h"
53#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
64#include <algorithm>
65#include <cassert>
66#include <cstdint>
67#include <iterator>
68#include <new>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "arm-instrinfo"
75
76#define GET_INSTRINFO_CTOR_DTOR
77#include "ARMGenInstrInfo.inc"
78
79static cl::opt<bool>
80EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
81 cl::desc("Enable ARM 2-addr to 3-addr conv"));
82
83/// ARM_MLxEntry - Record information about MLA / MLS instructions.
85 uint16_t MLxOpc; // MLA / MLS opcode
86 uint16_t MulOpc; // Expanded multiplication opcode
87 uint16_t AddSubOpc; // Expanded add / sub opcode
88 bool NegAcc; // True if the acc is negated before the add / sub.
89 bool HasLane; // True if instruction has an extra "lane" operand.
90};
91
92static const ARM_MLxEntry ARM_MLxTable[] = {
93 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
94 // fp scalar ops
95 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
96 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
97 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
98 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
99 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
100 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
101 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
102 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
103
104 // fp SIMD ops
105 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
106 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
107 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
108 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
109 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
110 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
111 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
112 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
113};
114
116 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
117 Subtarget(STI) {
118 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
119 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
120 llvm_unreachable("Duplicated entries?");
121 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
122 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
123 }
124}
125
126// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
127// currently defaults to no prepass hazard recognizer.
130 const ScheduleDAG *DAG) const {
131 if (usePreRAHazardRecognizer()) {
132 const InstrItineraryData *II =
133 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
134 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
135 }
137}
138
139// Called during:
140// - pre-RA scheduling
141// - post-RA scheduling when FeatureUseMISched is set
143 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
145
146 // We would like to restrict this hazard recognizer to only
147 // post-RA scheduling; we can tell that we're post-RA because we don't
148 // track VRegLiveness.
149 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
150 // banks banked on bit 2. Assume that TCMs are in use.
151 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
153 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
154
155 // Not inserting ARMHazardRecognizerFPMLx because that would change
156 // legacy behavior
157
159 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
160 return MHR;
161}
162
163// Called during post-RA scheduling when FeatureUseMISched is not set
166 const ScheduleDAG *DAG) const {
168
169 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
170 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
171
173 if (BHR)
174 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
175 return MHR;
176}
177
180 LiveIntervals *LIS) const {
181 // FIXME: Thumb2 support.
182
183 if (!EnableARM3Addr)
184 return nullptr;
185
186 MachineFunction &MF = *MI.getParent()->getParent();
187 uint64_t TSFlags = MI.getDesc().TSFlags;
188 bool isPre = false;
189 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
190 default: return nullptr;
192 isPre = true;
193 break;
195 break;
196 }
197
198 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
199 // operation.
200 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
201 if (MemOpc == 0)
202 return nullptr;
203
204 MachineInstr *UpdateMI = nullptr;
205 MachineInstr *MemMI = nullptr;
206 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
207 const MCInstrDesc &MCID = MI.getDesc();
208 unsigned NumOps = MCID.getNumOperands();
209 bool isLoad = !MI.mayStore();
210 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
211 const MachineOperand &Base = MI.getOperand(2);
212 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
213 Register WBReg = WB.getReg();
214 Register BaseReg = Base.getReg();
215 Register OffReg = Offset.getReg();
216 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
217 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
218 switch (AddrMode) {
219 default: llvm_unreachable("Unknown indexed op!");
220 case ARMII::AddrMode2: {
221 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
222 unsigned Amt = ARM_AM::getAM2Offset(OffImm);
223 if (OffReg == 0) {
224 if (ARM_AM::getSOImmVal(Amt) == -1)
225 // Can't encode it in a so_imm operand. This transformation will
226 // add more than 1 instruction. Abandon!
227 return nullptr;
228 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
229 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
230 .addReg(BaseReg)
231 .addImm(Amt)
232 .add(predOps(Pred))
233 .add(condCodeOp());
234 } else if (Amt != 0) {
236 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
237 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
238 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
239 .addReg(BaseReg)
240 .addReg(OffReg)
241 .addReg(0)
242 .addImm(SOOpc)
243 .add(predOps(Pred))
244 .add(condCodeOp());
245 } else
246 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
247 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
248 .addReg(BaseReg)
249 .addReg(OffReg)
250 .add(predOps(Pred))
251 .add(condCodeOp());
252 break;
253 }
254 case ARMII::AddrMode3 : {
255 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
256 unsigned Amt = ARM_AM::getAM3Offset(OffImm);
257 if (OffReg == 0)
258 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
259 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
260 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
261 .addReg(BaseReg)
262 .addImm(Amt)
263 .add(predOps(Pred))
264 .add(condCodeOp());
265 else
266 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
267 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
268 .addReg(BaseReg)
269 .addReg(OffReg)
270 .add(predOps(Pred))
271 .add(condCodeOp());
272 break;
273 }
274 }
275
276 std::vector<MachineInstr*> NewMIs;
277 if (isPre) {
278 if (isLoad)
279 MemMI =
280 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
281 .addReg(WBReg)
282 .addImm(0)
283 .addImm(Pred);
284 else
285 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
286 .addReg(MI.getOperand(1).getReg())
287 .addReg(WBReg)
288 .addReg(0)
289 .addImm(0)
290 .addImm(Pred);
291 NewMIs.push_back(MemMI);
292 NewMIs.push_back(UpdateMI);
293 } else {
294 if (isLoad)
295 MemMI =
296 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
297 .addReg(BaseReg)
298 .addImm(0)
299 .addImm(Pred);
300 else
301 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
302 .addReg(MI.getOperand(1).getReg())
303 .addReg(BaseReg)
304 .addReg(0)
305 .addImm(0)
306 .addImm(Pred);
307 if (WB.isDead())
308 UpdateMI->getOperand(0).setIsDead();
309 NewMIs.push_back(UpdateMI);
310 NewMIs.push_back(MemMI);
311 }
312
313 // Transfer LiveVariables states, kill / dead info.
314 if (LV) {
315 for (const MachineOperand &MO : MI.operands()) {
316 if (MO.isReg() && MO.getReg().isVirtual()) {
317 Register Reg = MO.getReg();
318
319 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
320 if (MO.isDef()) {
321 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
322 if (MO.isDead())
323 LV->addVirtualRegisterDead(Reg, *NewMI);
324 }
325 if (MO.isUse() && MO.isKill()) {
326 for (unsigned j = 0; j < 2; ++j) {
327 // Look at the two new MI's in reverse order.
328 MachineInstr *NewMI = NewMIs[j];
329 if (!NewMI->readsRegister(Reg))
330 continue;
331 LV->addVirtualRegisterKilled(Reg, *NewMI);
332 if (VI.removeKill(MI))
333 VI.Kills.push_back(NewMI);
334 break;
335 }
336 }
337 }
338 }
339 }
340
341 MachineBasicBlock &MBB = *MI.getParent();
342 MBB.insert(MI, NewMIs[1]);
343 MBB.insert(MI, NewMIs[0]);
344 return NewMIs[0];
345}
346
347// Branch analysis.
348// Cond vector output format:
349// 0 elements indicates an unconditional branch
350// 2 elements indicates a conditional branch; the elements are
351// the condition to check and the CPSR.
352// 3 elements indicates a hardware loop end; the elements
353// are the opcode, the operand value to test, and a dummy
354// operand used to pad out to 3 operands.
357 MachineBasicBlock *&FBB,
359 bool AllowModify) const {
360 TBB = nullptr;
361 FBB = nullptr;
362
364 if (I == MBB.instr_begin())
365 return false; // Empty blocks are easy.
366 --I;
367
368 // Walk backwards from the end of the basic block until the branch is
369 // analyzed or we give up.
370 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
371 // Flag to be raised on unanalyzeable instructions. This is useful in cases
372 // where we want to clean up on the end of the basic block before we bail
373 // out.
374 bool CantAnalyze = false;
375
376 // Skip over DEBUG values, predicated nonterminators and speculation
377 // barrier terminators.
378 while (I->isDebugInstr() || !I->isTerminator() ||
379 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
380 I->getOpcode() == ARM::t2DoLoopStartTP){
381 if (I == MBB.instr_begin())
382 return false;
383 --I;
384 }
385
386 if (isIndirectBranchOpcode(I->getOpcode()) ||
387 isJumpTableBranchOpcode(I->getOpcode())) {
388 // Indirect branches and jump tables can't be analyzed, but we still want
389 // to clean up any instructions at the tail of the basic block.
390 CantAnalyze = true;
391 } else if (isUncondBranchOpcode(I->getOpcode())) {
392 TBB = I->getOperand(0).getMBB();
393 } else if (isCondBranchOpcode(I->getOpcode())) {
394 // Bail out if we encounter multiple conditional branches.
395 if (!Cond.empty())
396 return true;
397
398 assert(!FBB && "FBB should have been null.");
399 FBB = TBB;
400 TBB = I->getOperand(0).getMBB();
401 Cond.push_back(I->getOperand(1));
402 Cond.push_back(I->getOperand(2));
403 } else if (I->isReturn()) {
404 // Returns can't be analyzed, but we should run cleanup.
405 CantAnalyze = true;
406 } else if (I->getOpcode() == ARM::t2LoopEnd &&
407 MBB.getParent()
410 if (!Cond.empty())
411 return true;
412 FBB = TBB;
413 TBB = I->getOperand(1).getMBB();
414 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
415 Cond.push_back(I->getOperand(0));
416 Cond.push_back(MachineOperand::CreateImm(0));
417 } else {
418 // We encountered other unrecognized terminator. Bail out immediately.
419 return true;
420 }
421
422 // Cleanup code - to be run for unpredicated unconditional branches and
423 // returns.
424 if (!isPredicated(*I) &&
425 (isUncondBranchOpcode(I->getOpcode()) ||
426 isIndirectBranchOpcode(I->getOpcode()) ||
427 isJumpTableBranchOpcode(I->getOpcode()) ||
428 I->isReturn())) {
429 // Forget any previous condition branch information - it no longer applies.
430 Cond.clear();
431 FBB = nullptr;
432
433 // If we can modify the function, delete everything below this
434 // unconditional branch.
435 if (AllowModify) {
436 MachineBasicBlock::iterator DI = std::next(I);
437 while (DI != MBB.instr_end()) {
438 MachineInstr &InstToDelete = *DI;
439 ++DI;
440 // Speculation barriers must not be deleted.
441 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
442 continue;
443 InstToDelete.eraseFromParent();
444 }
445 }
446 }
447
448 if (CantAnalyze) {
449 // We may not be able to analyze the block, but we could still have
450 // an unconditional branch as the last instruction in the block, which
451 // just branches to layout successor. If this is the case, then just
452 // remove it if we're allowed to make modifications.
453 if (AllowModify && !isPredicated(MBB.back()) &&
457 return true;
458 }
459
460 if (I == MBB.instr_begin())
461 return false;
462
463 --I;
464 }
465
466 // We made it past the terminators without bailing out - we must have
467 // analyzed this branch successfully.
468 return false;
469}
470
472 int *BytesRemoved) const {
473 assert(!BytesRemoved && "code size not handled");
474
476 if (I == MBB.end())
477 return 0;
478
479 if (!isUncondBranchOpcode(I->getOpcode()) &&
480 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
481 return 0;
482
483 // Remove the branch.
484 I->eraseFromParent();
485
486 I = MBB.end();
487
488 if (I == MBB.begin()) return 1;
489 --I;
490 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
491 return 1;
492
493 // Remove the branch.
494 I->eraseFromParent();
495 return 2;
496}
497
502 const DebugLoc &DL,
503 int *BytesAdded) const {
504 assert(!BytesAdded && "code size not handled");
506 int BOpc = !AFI->isThumbFunction()
507 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
508 int BccOpc = !AFI->isThumbFunction()
509 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
510 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
511
512 // Shouldn't be a fall through.
513 assert(TBB && "insertBranch must not be told to insert a fallthrough");
514 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
515 "ARM branch conditions have two or three components!");
516
517 // For conditional branches, we use addOperand to preserve CPSR flags.
518
519 if (!FBB) {
520 if (Cond.empty()) { // Unconditional branch?
521 if (isThumb)
523 else
524 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
525 } else if (Cond.size() == 2) {
526 BuildMI(&MBB, DL, get(BccOpc))
527 .addMBB(TBB)
528 .addImm(Cond[0].getImm())
529 .add(Cond[1]);
530 } else
531 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
532 return 1;
533 }
534
535 // Two-way conditional branch.
536 if (Cond.size() == 2)
537 BuildMI(&MBB, DL, get(BccOpc))
538 .addMBB(TBB)
539 .addImm(Cond[0].getImm())
540 .add(Cond[1]);
541 else if (Cond.size() == 3)
542 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
543 if (isThumb)
544 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
545 else
546 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
547 return 2;
548}
549
552 if (Cond.size() == 2) {
553 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
555 return false;
556 }
557 return true;
558}
559
561 if (MI.isBundle()) {
563 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
564 while (++I != E && I->isInsideBundle()) {
565 int PIdx = I->findFirstPredOperandIdx();
566 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
567 return true;
568 }
569 return false;
570 }
571
572 int PIdx = MI.findFirstPredOperandIdx();
573 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
574}
575
577 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
578 const TargetRegisterInfo *TRI) const {
579
580 // First, let's see if there is a generic comment for this operand
581 std::string GenericComment =
583 if (!GenericComment.empty())
584 return GenericComment;
585
586 // If not, check if we have an immediate operand.
587 if (!Op.isImm())
588 return std::string();
589
590 // And print its corresponding condition code if the immediate is a
591 // predicate.
592 int FirstPredOp = MI.findFirstPredOperandIdx();
593 if (FirstPredOp != (int) OpIdx)
594 return std::string();
595
596 std::string CC = "CC::";
598 return CC;
599}
600
603 unsigned Opc = MI.getOpcode();
604 if (isUncondBranchOpcode(Opc)) {
605 MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
606 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
607 .addImm(Pred[0].getImm())
608 .addReg(Pred[1].getReg());
609 return true;
610 }
611
612 int PIdx = MI.findFirstPredOperandIdx();
613 if (PIdx != -1) {
614 MachineOperand &PMO = MI.getOperand(PIdx);
615 PMO.setImm(Pred[0].getImm());
616 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
617
618 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
619 // IT block. This affects how they are printed.
620 const MCInstrDesc &MCID = MI.getDesc();
622 assert(MCID.operands()[1].isOptionalDef() &&
623 "CPSR def isn't expected operand");
624 assert((MI.getOperand(1).isDead() ||
625 MI.getOperand(1).getReg() != ARM::CPSR) &&
626 "if conversion tried to stop defining used CPSR");
627 MI.getOperand(1).setReg(ARM::NoRegister);
628 }
629
630 return true;
631 }
632 return false;
633}
634
636 ArrayRef<MachineOperand> Pred2) const {
637 if (Pred1.size() > 2 || Pred2.size() > 2)
638 return false;
639
640 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
641 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
642 if (CC1 == CC2)
643 return true;
644
645 switch (CC1) {
646 default:
647 return false;
648 case ARMCC::AL:
649 return true;
650 case ARMCC::HS:
651 return CC2 == ARMCC::HI;
652 case ARMCC::LS:
653 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
654 case ARMCC::GE:
655 return CC2 == ARMCC::GT;
656 case ARMCC::LE:
657 return CC2 == ARMCC::LT;
658 }
659}
660
662 std::vector<MachineOperand> &Pred,
663 bool SkipDead) const {
664 bool Found = false;
665 for (const MachineOperand &MO : MI.operands()) {
666 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
667 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
668 if (ClobbersCPSR || IsCPSR) {
669
670 // Filter out T1 instructions that have a dead CPSR,
671 // allowing IT blocks to be generated containing T1 instructions
672 const MCInstrDesc &MCID = MI.getDesc();
673 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
674 SkipDead)
675 continue;
676
677 Pred.push_back(MO);
678 Found = true;
679 }
680 }
681
682 return Found;
683}
684
686 for (const auto &MO : MI.operands())
687 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
688 return true;
689 return false;
690}
691
693 switch (MI->getOpcode()) {
694 default: return true;
695 case ARM::tADC: // ADC (register) T1
696 case ARM::tADDi3: // ADD (immediate) T1
697 case ARM::tADDi8: // ADD (immediate) T2
698 case ARM::tADDrr: // ADD (register) T1
699 case ARM::tAND: // AND (register) T1
700 case ARM::tASRri: // ASR (immediate) T1
701 case ARM::tASRrr: // ASR (register) T1
702 case ARM::tBIC: // BIC (register) T1
703 case ARM::tEOR: // EOR (register) T1
704 case ARM::tLSLri: // LSL (immediate) T1
705 case ARM::tLSLrr: // LSL (register) T1
706 case ARM::tLSRri: // LSR (immediate) T1
707 case ARM::tLSRrr: // LSR (register) T1
708 case ARM::tMUL: // MUL T1
709 case ARM::tMVN: // MVN (register) T1
710 case ARM::tORR: // ORR (register) T1
711 case ARM::tROR: // ROR (register) T1
712 case ARM::tRSB: // RSB (immediate) T1
713 case ARM::tSBC: // SBC (register) T1
714 case ARM::tSUBi3: // SUB (immediate) T1
715 case ARM::tSUBi8: // SUB (immediate) T2
716 case ARM::tSUBrr: // SUB (register) T1
718 }
719}
720
721/// isPredicable - Return true if the specified instruction can be predicated.
722/// By default, this returns true for every instruction with a
723/// PredicateOperand.
725 if (!MI.isPredicable())
726 return false;
727
728 if (MI.isBundle())
729 return false;
730
732 return false;
733
734 const MachineFunction *MF = MI.getParent()->getParent();
735 const ARMFunctionInfo *AFI =
737
738 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
739 // In their ARM encoding, they can't be encoded in a conditional form.
740 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
741 return false;
742
743 // Make indirect control flow changes unpredicable when SLS mitigation is
744 // enabled.
745 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
746 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
747 return false;
748 if (ST.hardenSlsBlr() && isIndirectCall(MI))
749 return false;
750
751 if (AFI->isThumb2Function()) {
752 if (getSubtarget().restrictIT())
753 return isV8EligibleForIT(&MI);
754 }
755
756 return true;
757}
758
759namespace llvm {
760
761template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
762 for (const MachineOperand &MO : MI->operands()) {
763 if (!MO.isReg() || MO.isUndef() || MO.isUse())
764 continue;
765 if (MO.getReg() != ARM::CPSR)
766 continue;
767 if (!MO.isDead())
768 return false;
769 }
770 // all definitions of CPSR are dead
771 return true;
772}
773
774} // end namespace llvm
775
776/// GetInstSize - Return the size of the specified MachineInstr.
777///
779 const MachineBasicBlock &MBB = *MI.getParent();
780 const MachineFunction *MF = MBB.getParent();
781 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
782
783 const MCInstrDesc &MCID = MI.getDesc();
784
785 switch (MI.getOpcode()) {
786 default:
787 // Return the size specified in .td file. If there's none, return 0, as we
788 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
789 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
790 // contrast to AArch64 instructions which have a default size of 4 bytes for
791 // example.
792 return MCID.getSize();
793 case TargetOpcode::BUNDLE:
794 return getInstBundleLength(MI);
795 case ARM::CONSTPOOL_ENTRY:
796 case ARM::JUMPTABLE_INSTS:
797 case ARM::JUMPTABLE_ADDRS:
798 case ARM::JUMPTABLE_TBB:
799 case ARM::JUMPTABLE_TBH:
800 // If this machine instr is a constant pool entry, its size is recorded as
801 // operand #2.
802 return MI.getOperand(2).getImm();
803 case ARM::SPACE:
804 return MI.getOperand(1).getImm();
805 case ARM::INLINEASM:
806 case ARM::INLINEASM_BR: {
807 // If this machine instr is an inline asm, measure it.
808 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
810 Size = alignTo(Size, 4);
811 return Size;
812 }
813 }
814}
815
816unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
817 unsigned Size = 0;
819 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
820 while (++I != E && I->isInsideBundle()) {
821 assert(!I->isBundle() && "No nested bundle!");
823 }
824 return Size;
825}
826
829 unsigned DestReg, bool KillSrc,
830 const ARMSubtarget &Subtarget) const {
831 unsigned Opc = Subtarget.isThumb()
832 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
833 : ARM::MRS;
834
836 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
837
838 // There is only 1 A/R class MRS instruction, and it always refers to
839 // APSR. However, there are lots of other possibilities on M-class cores.
840 if (Subtarget.isMClass())
841 MIB.addImm(0x800);
842
843 MIB.add(predOps(ARMCC::AL))
844 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
845}
846
849 unsigned SrcReg, bool KillSrc,
850 const ARMSubtarget &Subtarget) const {
851 unsigned Opc = Subtarget.isThumb()
852 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
853 : ARM::MSR;
854
855 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
856
857 if (Subtarget.isMClass())
858 MIB.addImm(0x800);
859 else
860 MIB.addImm(8);
861
862 MIB.addReg(SrcReg, getKillRegState(KillSrc))
865}
866
868 MIB.addImm(ARMVCC::None);
869 MIB.addReg(0);
870 MIB.addReg(0); // tp_reg
871}
872
874 Register DestReg) {
876 MIB.addReg(DestReg, RegState::Undef);
877}
878
880 MIB.addImm(Cond);
881 MIB.addReg(ARM::VPR, RegState::Implicit);
882 MIB.addReg(0); // tp_reg
883}
884
886 unsigned Cond, unsigned Inactive) {
888 MIB.addReg(Inactive);
889}
890
893 const DebugLoc &DL, MCRegister DestReg,
894 MCRegister SrcReg, bool KillSrc) const {
895 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
896 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
897
898 if (GPRDest && GPRSrc) {
899 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
900 .addReg(SrcReg, getKillRegState(KillSrc))
902 .add(condCodeOp());
903 return;
904 }
905
906 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
907 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
908
909 unsigned Opc = 0;
910 if (SPRDest && SPRSrc)
911 Opc = ARM::VMOVS;
912 else if (GPRDest && SPRSrc)
913 Opc = ARM::VMOVRS;
914 else if (SPRDest && GPRSrc)
915 Opc = ARM::VMOVSR;
916 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
917 Opc = ARM::VMOVD;
918 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
919 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
920
921 if (Opc) {
922 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
923 MIB.addReg(SrcReg, getKillRegState(KillSrc));
924 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
925 MIB.addReg(SrcReg, getKillRegState(KillSrc));
926 if (Opc == ARM::MVE_VORR)
927 addUnpredicatedMveVpredROp(MIB, DestReg);
928 else if (Opc != ARM::MQPRCopy)
929 MIB.add(predOps(ARMCC::AL));
930 return;
931 }
932
933 // Handle register classes that require multiple instructions.
934 unsigned BeginIdx = 0;
935 unsigned SubRegs = 0;
936 int Spacing = 1;
937
938 // Use VORRq when possible.
939 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
940 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
941 BeginIdx = ARM::qsub_0;
942 SubRegs = 2;
943 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
944 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
945 BeginIdx = ARM::qsub_0;
946 SubRegs = 4;
947 // Fall back to VMOVD.
948 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
949 Opc = ARM::VMOVD;
950 BeginIdx = ARM::dsub_0;
951 SubRegs = 2;
952 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
953 Opc = ARM::VMOVD;
954 BeginIdx = ARM::dsub_0;
955 SubRegs = 3;
956 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
957 Opc = ARM::VMOVD;
958 BeginIdx = ARM::dsub_0;
959 SubRegs = 4;
960 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
961 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
962 BeginIdx = ARM::gsub_0;
963 SubRegs = 2;
964 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
965 Opc = ARM::VMOVD;
966 BeginIdx = ARM::dsub_0;
967 SubRegs = 2;
968 Spacing = 2;
969 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
970 Opc = ARM::VMOVD;
971 BeginIdx = ARM::dsub_0;
972 SubRegs = 3;
973 Spacing = 2;
974 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
975 Opc = ARM::VMOVD;
976 BeginIdx = ARM::dsub_0;
977 SubRegs = 4;
978 Spacing = 2;
979 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
980 !Subtarget.hasFP64()) {
981 Opc = ARM::VMOVS;
982 BeginIdx = ARM::ssub_0;
983 SubRegs = 2;
984 } else if (SrcReg == ARM::CPSR) {
985 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
986 return;
987 } else if (DestReg == ARM::CPSR) {
988 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
989 return;
990 } else if (DestReg == ARM::VPR) {
991 assert(ARM::GPRRegClass.contains(SrcReg));
992 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
993 .addReg(SrcReg, getKillRegState(KillSrc))
995 return;
996 } else if (SrcReg == ARM::VPR) {
997 assert(ARM::GPRRegClass.contains(DestReg));
998 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
999 .addReg(SrcReg, getKillRegState(KillSrc))
1001 return;
1002 } else if (DestReg == ARM::FPSCR_NZCV) {
1003 assert(ARM::GPRRegClass.contains(SrcReg));
1004 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1005 .addReg(SrcReg, getKillRegState(KillSrc))
1007 return;
1008 } else if (SrcReg == ARM::FPSCR_NZCV) {
1009 assert(ARM::GPRRegClass.contains(DestReg));
1010 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1011 .addReg(SrcReg, getKillRegState(KillSrc))
1013 return;
1014 }
1015
1016 assert(Opc && "Impossible reg-to-reg copy");
1017
1020
1021 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1022 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1023 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1024 Spacing = -Spacing;
1025 }
1026#ifndef NDEBUG
1027 SmallSet<unsigned, 4> DstRegs;
1028#endif
1029 for (unsigned i = 0; i != SubRegs; ++i) {
1030 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1031 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1032 assert(Dst && Src && "Bad sub-register");
1033#ifndef NDEBUG
1034 assert(!DstRegs.count(Src) && "destructive vector copy");
1035 DstRegs.insert(Dst);
1036#endif
1037 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1038 // VORR (NEON or MVE) takes two source operands.
1039 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1040 Mov.addReg(Src);
1041 }
1042 // MVE VORR takes predicate operands in place of an ordinary condition.
1043 if (Opc == ARM::MVE_VORR)
1045 else
1046 Mov = Mov.add(predOps(ARMCC::AL));
1047 // MOVr can set CC.
1048 if (Opc == ARM::MOVr)
1049 Mov = Mov.add(condCodeOp());
1050 }
1051 // Add implicit super-register defs and kills to the last instruction.
1052 Mov->addRegisterDefined(DestReg, TRI);
1053 if (KillSrc)
1054 Mov->addRegisterKilled(SrcReg, TRI);
1055}
1056
1057std::optional<DestSourcePair>
1059 // VMOVRRD is also a copy instruction but it requires
1060 // special way of handling. It is more complex copy version
1061 // and since that we are not considering it. For recognition
1062 // of such instruction isExtractSubregLike MI interface fuction
1063 // could be used.
1064 // VORRq is considered as a move only if two inputs are
1065 // the same register.
1066 if (!MI.isMoveReg() ||
1067 (MI.getOpcode() == ARM::VORRq &&
1068 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1069 return std::nullopt;
1070 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1071}
1072
1073std::optional<ParamLoadedValue>
1075 Register Reg) const {
1076 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1077 Register DstReg = DstSrcPair->Destination->getReg();
1078
1079 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1080 // than the copy registers. Consider for example:
1081 //
1082 // s16 = VMOVS s0
1083 // s17 = VMOVS s1
1084 // call @callee(d0)
1085 //
1086 // We'd like to describe the call site value of d0 as d8, but this requires
1087 // gathering and merging the descriptions for the two VMOVS instructions.
1088 //
1089 // We also don't handle the reverse situation, where the forwarding reg is
1090 // narrower than the copy destination:
1091 //
1092 // d8 = VMOVD d0
1093 // call @callee(s1)
1094 //
1095 // We need to produce a fragment description (the call site value of s1 is
1096 // /not/ just d8).
1097 if (DstReg != Reg)
1098 return std::nullopt;
1099 }
1101}
1102
1103const MachineInstrBuilder &
1105 unsigned SubIdx, unsigned State,
1106 const TargetRegisterInfo *TRI) const {
1107 if (!SubIdx)
1108 return MIB.addReg(Reg, State);
1109
1111 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1112 return MIB.addReg(Reg, State, SubIdx);
1113}
1114
1117 Register SrcReg, bool isKill, int FI,
1118 const TargetRegisterClass *RC,
1119 const TargetRegisterInfo *TRI,
1120 Register VReg) const {
1121 MachineFunction &MF = *MBB.getParent();
1122 MachineFrameInfo &MFI = MF.getFrameInfo();
1123 Align Alignment = MFI.getObjectAlign(FI);
1124
1127 MFI.getObjectSize(FI), Alignment);
1128
1129 switch (TRI->getSpillSize(*RC)) {
1130 case 2:
1131 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1132 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1133 .addReg(SrcReg, getKillRegState(isKill))
1134 .addFrameIndex(FI)
1135 .addImm(0)
1136 .addMemOperand(MMO)
1138 } else
1139 llvm_unreachable("Unknown reg class!");
1140 break;
1141 case 4:
1142 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1143 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1144 .addReg(SrcReg, getKillRegState(isKill))
1145 .addFrameIndex(FI)
1146 .addImm(0)
1147 .addMemOperand(MMO)
1149 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1150 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1151 .addReg(SrcReg, getKillRegState(isKill))
1152 .addFrameIndex(FI)
1153 .addImm(0)
1154 .addMemOperand(MMO)
1156 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1157 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1158 .addReg(SrcReg, getKillRegState(isKill))
1159 .addFrameIndex(FI)
1160 .addImm(0)
1161 .addMemOperand(MMO)
1163 } else
1164 llvm_unreachable("Unknown reg class!");
1165 break;
1166 case 8:
1167 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1168 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1169 .addReg(SrcReg, getKillRegState(isKill))
1170 .addFrameIndex(FI)
1171 .addImm(0)
1172 .addMemOperand(MMO)
1174 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1175 if (Subtarget.hasV5TEOps()) {
1176 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1177 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1178 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1179 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1181 } else {
1182 // Fallback to STM instruction, which has existed since the dawn of
1183 // time.
1184 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1185 .addFrameIndex(FI)
1186 .addMemOperand(MMO)
1188 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1189 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1190 }
1191 } else
1192 llvm_unreachable("Unknown reg class!");
1193 break;
1194 case 16:
1195 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1196 // Use aligned spills if the stack can be realigned.
1197 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1198 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1199 .addFrameIndex(FI)
1200 .addImm(16)
1201 .addReg(SrcReg, getKillRegState(isKill))
1202 .addMemOperand(MMO)
1204 } else {
1205 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1206 .addReg(SrcReg, getKillRegState(isKill))
1207 .addFrameIndex(FI)
1208 .addMemOperand(MMO)
1210 }
1211 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1212 Subtarget.hasMVEIntegerOps()) {
1213 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1214 MIB.addReg(SrcReg, getKillRegState(isKill))
1215 .addFrameIndex(FI)
1216 .addImm(0)
1217 .addMemOperand(MMO);
1219 } else
1220 llvm_unreachable("Unknown reg class!");
1221 break;
1222 case 24:
1223 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1224 // Use aligned spills if the stack can be realigned.
1225 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1226 Subtarget.hasNEON()) {
1227 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1228 .addFrameIndex(FI)
1229 .addImm(16)
1230 .addReg(SrcReg, getKillRegState(isKill))
1231 .addMemOperand(MMO)
1233 } else {
1235 get(ARM::VSTMDIA))
1236 .addFrameIndex(FI)
1238 .addMemOperand(MMO);
1239 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1240 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1241 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1242 }
1243 } else
1244 llvm_unreachable("Unknown reg class!");
1245 break;
1246 case 32:
1247 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1248 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1249 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1250 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1251 Subtarget.hasNEON()) {
1252 // FIXME: It's possible to only store part of the QQ register if the
1253 // spilled def has a sub-register index.
1254 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1255 .addFrameIndex(FI)
1256 .addImm(16)
1257 .addReg(SrcReg, getKillRegState(isKill))
1258 .addMemOperand(MMO)
1260 } else if (Subtarget.hasMVEIntegerOps()) {
1261 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1262 .addReg(SrcReg, getKillRegState(isKill))
1263 .addFrameIndex(FI)
1264 .addMemOperand(MMO);
1265 } else {
1267 get(ARM::VSTMDIA))
1268 .addFrameIndex(FI)
1270 .addMemOperand(MMO);
1271 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1272 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1273 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1274 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1275 }
1276 } else
1277 llvm_unreachable("Unknown reg class!");
1278 break;
1279 case 64:
1280 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1281 Subtarget.hasMVEIntegerOps()) {
1282 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1283 .addReg(SrcReg, getKillRegState(isKill))
1284 .addFrameIndex(FI)
1285 .addMemOperand(MMO);
1286 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1287 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1288 .addFrameIndex(FI)
1290 .addMemOperand(MMO);
1291 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1292 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1293 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1294 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1295 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1296 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1297 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1298 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1299 } else
1300 llvm_unreachable("Unknown reg class!");
1301 break;
1302 default:
1303 llvm_unreachable("Unknown reg class!");
1304 }
1305}
1306
1308 int &FrameIndex) const {
1309 switch (MI.getOpcode()) {
1310 default: break;
1311 case ARM::STRrs:
1312 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1313 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1314 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1315 MI.getOperand(3).getImm() == 0) {
1316 FrameIndex = MI.getOperand(1).getIndex();
1317 return MI.getOperand(0).getReg();
1318 }
1319 break;
1320 case ARM::STRi12:
1321 case ARM::t2STRi12:
1322 case ARM::tSTRspi:
1323 case ARM::VSTRD:
1324 case ARM::VSTRS:
1325 case ARM::VSTR_P0_off:
1326 case ARM::MVE_VSTRWU32:
1327 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1328 MI.getOperand(2).getImm() == 0) {
1329 FrameIndex = MI.getOperand(1).getIndex();
1330 return MI.getOperand(0).getReg();
1331 }
1332 break;
1333 case ARM::VST1q64:
1334 case ARM::VST1d64TPseudo:
1335 case ARM::VST1d64QPseudo:
1336 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1337 FrameIndex = MI.getOperand(0).getIndex();
1338 return MI.getOperand(2).getReg();
1339 }
1340 break;
1341 case ARM::VSTMQIA:
1342 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1343 FrameIndex = MI.getOperand(1).getIndex();
1344 return MI.getOperand(0).getReg();
1345 }
1346 break;
1347 case ARM::MQQPRStore:
1348 case ARM::MQQQQPRStore:
1349 if (MI.getOperand(1).isFI()) {
1350 FrameIndex = MI.getOperand(1).getIndex();
1351 return MI.getOperand(0).getReg();
1352 }
1353 break;
1354 }
1355
1356 return 0;
1357}
1358
1360 int &FrameIndex) const {
1362 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1363 Accesses.size() == 1) {
1364 FrameIndex =
1365 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1366 ->getFrameIndex();
1367 return true;
1368 }
1369 return false;
1370}
1371
1374 Register DestReg, int FI,
1375 const TargetRegisterClass *RC,
1376 const TargetRegisterInfo *TRI,
1377 Register VReg) const {
1378 DebugLoc DL;
1379 if (I != MBB.end()) DL = I->getDebugLoc();
1380 MachineFunction &MF = *MBB.getParent();
1381 MachineFrameInfo &MFI = MF.getFrameInfo();
1382 const Align Alignment = MFI.getObjectAlign(FI);
1385 MFI.getObjectSize(FI), Alignment);
1386
1387 switch (TRI->getSpillSize(*RC)) {
1388 case 2:
1389 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1390 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1391 .addFrameIndex(FI)
1392 .addImm(0)
1393 .addMemOperand(MMO)
1395 } else
1396 llvm_unreachable("Unknown reg class!");
1397 break;
1398 case 4:
1399 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1400 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1401 .addFrameIndex(FI)
1402 .addImm(0)
1403 .addMemOperand(MMO)
1405 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1406 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1407 .addFrameIndex(FI)
1408 .addImm(0)
1409 .addMemOperand(MMO)
1411 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1412 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1413 .addFrameIndex(FI)
1414 .addImm(0)
1415 .addMemOperand(MMO)
1417 } else
1418 llvm_unreachable("Unknown reg class!");
1419 break;
1420 case 8:
1421 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1422 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1423 .addFrameIndex(FI)
1424 .addImm(0)
1425 .addMemOperand(MMO)
1427 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1429
1430 if (Subtarget.hasV5TEOps()) {
1431 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1432 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1433 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1434 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1436 } else {
1437 // Fallback to LDM instruction, which has existed since the dawn of
1438 // time.
1439 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1440 .addFrameIndex(FI)
1441 .addMemOperand(MMO)
1443 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1444 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1445 }
1446
1447 if (DestReg.isPhysical())
1448 MIB.addReg(DestReg, RegState::ImplicitDefine);
1449 } else
1450 llvm_unreachable("Unknown reg class!");
1451 break;
1452 case 16:
1453 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1454 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1455 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1456 .addFrameIndex(FI)
1457 .addImm(16)
1458 .addMemOperand(MMO)
1460 } else {
1461 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1462 .addFrameIndex(FI)
1463 .addMemOperand(MMO)
1465 }
1466 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1467 Subtarget.hasMVEIntegerOps()) {
1468 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1469 MIB.addFrameIndex(FI)
1470 .addImm(0)
1471 .addMemOperand(MMO);
1473 } else
1474 llvm_unreachable("Unknown reg class!");
1475 break;
1476 case 24:
1477 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1478 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1479 Subtarget.hasNEON()) {
1480 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1481 .addFrameIndex(FI)
1482 .addImm(16)
1483 .addMemOperand(MMO)
1485 } else {
1486 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1487 .addFrameIndex(FI)
1488 .addMemOperand(MMO)
1490 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1491 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1492 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1493 if (DestReg.isPhysical())
1494 MIB.addReg(DestReg, RegState::ImplicitDefine);
1495 }
1496 } else
1497 llvm_unreachable("Unknown reg class!");
1498 break;
1499 case 32:
1500 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1501 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1502 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1503 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1504 Subtarget.hasNEON()) {
1505 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1506 .addFrameIndex(FI)
1507 .addImm(16)
1508 .addMemOperand(MMO)
1510 } else if (Subtarget.hasMVEIntegerOps()) {
1511 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1512 .addFrameIndex(FI)
1513 .addMemOperand(MMO);
1514 } else {
1515 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1516 .addFrameIndex(FI)
1518 .addMemOperand(MMO);
1519 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1520 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1521 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1522 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1523 if (DestReg.isPhysical())
1524 MIB.addReg(DestReg, RegState::ImplicitDefine);
1525 }
1526 } else
1527 llvm_unreachable("Unknown reg class!");
1528 break;
1529 case 64:
1530 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1531 Subtarget.hasMVEIntegerOps()) {
1532 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1533 .addFrameIndex(FI)
1534 .addMemOperand(MMO);
1535 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1536 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1537 .addFrameIndex(FI)
1539 .addMemOperand(MMO);
1540 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1541 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1542 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1543 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1544 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1545 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1546 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1547 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1548 if (DestReg.isPhysical())
1549 MIB.addReg(DestReg, RegState::ImplicitDefine);
1550 } else
1551 llvm_unreachable("Unknown reg class!");
1552 break;
1553 default:
1554 llvm_unreachable("Unknown regclass!");
1555 }
1556}
1557
1559 int &FrameIndex) const {
1560 switch (MI.getOpcode()) {
1561 default: break;
1562 case ARM::LDRrs:
1563 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1564 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1565 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1566 MI.getOperand(3).getImm() == 0) {
1567 FrameIndex = MI.getOperand(1).getIndex();
1568 return MI.getOperand(0).getReg();
1569 }
1570 break;
1571 case ARM::LDRi12:
1572 case ARM::t2LDRi12:
1573 case ARM::tLDRspi:
1574 case ARM::VLDRD:
1575 case ARM::VLDRS:
1576 case ARM::VLDR_P0_off:
1577 case ARM::MVE_VLDRWU32:
1578 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1579 MI.getOperand(2).getImm() == 0) {
1580 FrameIndex = MI.getOperand(1).getIndex();
1581 return MI.getOperand(0).getReg();
1582 }
1583 break;
1584 case ARM::VLD1q64:
1585 case ARM::VLD1d8TPseudo:
1586 case ARM::VLD1d16TPseudo:
1587 case ARM::VLD1d32TPseudo:
1588 case ARM::VLD1d64TPseudo:
1589 case ARM::VLD1d8QPseudo:
1590 case ARM::VLD1d16QPseudo:
1591 case ARM::VLD1d32QPseudo:
1592 case ARM::VLD1d64QPseudo:
1593 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1594 FrameIndex = MI.getOperand(1).getIndex();
1595 return MI.getOperand(0).getReg();
1596 }
1597 break;
1598 case ARM::VLDMQIA:
1599 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1600 FrameIndex = MI.getOperand(1).getIndex();
1601 return MI.getOperand(0).getReg();
1602 }
1603 break;
1604 case ARM::MQQPRLoad:
1605 case ARM::MQQQQPRLoad:
1606 if (MI.getOperand(1).isFI()) {
1607 FrameIndex = MI.getOperand(1).getIndex();
1608 return MI.getOperand(0).getReg();
1609 }
1610 break;
1611 }
1612
1613 return 0;
1614}
1615
1617 int &FrameIndex) const {
1619 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1620 Accesses.size() == 1) {
1621 FrameIndex =
1622 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1623 ->getFrameIndex();
1624 return true;
1625 }
1626 return false;
1627}
1628
1629/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1630/// depending on whether the result is used.
1631void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1632 bool isThumb1 = Subtarget.isThumb1Only();
1633 bool isThumb2 = Subtarget.isThumb2();
1634 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1635
1636 DebugLoc dl = MI->getDebugLoc();
1637 MachineBasicBlock *BB = MI->getParent();
1638
1639 MachineInstrBuilder LDM, STM;
1640 if (isThumb1 || !MI->getOperand(1).isDead()) {
1641 MachineOperand LDWb(MI->getOperand(1));
1642 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1643 : isThumb1 ? ARM::tLDMIA_UPD
1644 : ARM::LDMIA_UPD))
1645 .add(LDWb);
1646 } else {
1647 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1648 }
1649
1650 if (isThumb1 || !MI->getOperand(0).isDead()) {
1651 MachineOperand STWb(MI->getOperand(0));
1652 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1653 : isThumb1 ? ARM::tSTMIA_UPD
1654 : ARM::STMIA_UPD))
1655 .add(STWb);
1656 } else {
1657 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1658 }
1659
1660 MachineOperand LDBase(MI->getOperand(3));
1661 LDM.add(LDBase).add(predOps(ARMCC::AL));
1662
1663 MachineOperand STBase(MI->getOperand(2));
1664 STM.add(STBase).add(predOps(ARMCC::AL));
1665
1666 // Sort the scratch registers into ascending order.
1668 SmallVector<unsigned, 6> ScratchRegs;
1669 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1670 ScratchRegs.push_back(MO.getReg());
1671 llvm::sort(ScratchRegs,
1672 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1673 return TRI.getEncodingValue(Reg1) <
1674 TRI.getEncodingValue(Reg2);
1675 });
1676
1677 for (const auto &Reg : ScratchRegs) {
1678 LDM.addReg(Reg, RegState::Define);
1679 STM.addReg(Reg, RegState::Kill);
1680 }
1681
1682 BB->erase(MI);
1683}
1684
1686 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1687 expandLoadStackGuard(MI);
1688 MI.getParent()->erase(MI);
1689 return true;
1690 }
1691
1692 if (MI.getOpcode() == ARM::MEMCPY) {
1693 expandMEMCPY(MI);
1694 return true;
1695 }
1696
1697 // This hook gets to expand COPY instructions before they become
1698 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1699 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1700 // changed into a VORR that can go down the NEON pipeline.
1701 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1702 return false;
1703
1704 // Look for a copy between even S-registers. That is where we keep floats
1705 // when using NEON v2f32 instructions for f32 arithmetic.
1706 Register DstRegS = MI.getOperand(0).getReg();
1707 Register SrcRegS = MI.getOperand(1).getReg();
1708 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1709 return false;
1710
1712 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1713 &ARM::DPRRegClass);
1714 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1715 &ARM::DPRRegClass);
1716 if (!DstRegD || !SrcRegD)
1717 return false;
1718
1719 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1720 // legal if the COPY already defines the full DstRegD, and it isn't a
1721 // sub-register insertion.
1722 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1723 return false;
1724
1725 // A dead copy shouldn't show up here, but reject it just in case.
1726 if (MI.getOperand(0).isDead())
1727 return false;
1728
1729 // All clear, widen the COPY.
1730 LLVM_DEBUG(dbgs() << "widening: " << MI);
1731 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1732
1733 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1734 // or some other super-register.
1735 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1736 if (ImpDefIdx != -1)
1737 MI.removeOperand(ImpDefIdx);
1738
1739 // Change the opcode and operands.
1740 MI.setDesc(get(ARM::VMOVD));
1741 MI.getOperand(0).setReg(DstRegD);
1742 MI.getOperand(1).setReg(SrcRegD);
1743 MIB.add(predOps(ARMCC::AL));
1744
1745 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1746 // register scavenger and machine verifier, so we need to indicate that we
1747 // are reading an undefined value from SrcRegD, but a proper value from
1748 // SrcRegS.
1749 MI.getOperand(1).setIsUndef();
1750 MIB.addReg(SrcRegS, RegState::Implicit);
1751
1752 // SrcRegD may actually contain an unrelated value in the ssub_1
1753 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1754 if (MI.getOperand(1).isKill()) {
1755 MI.getOperand(1).setIsKill(false);
1756 MI.addRegisterKilled(SrcRegS, TRI, true);
1757 }
1758
1759 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1760 return true;
1761}
1762
1763/// Create a copy of a const pool value. Update CPI to the new index and return
1764/// the label UID.
1765static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1768
1769 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1770 assert(MCPE.isMachineConstantPoolEntry() &&
1771 "Expecting a machine constantpool entry!");
1772 ARMConstantPoolValue *ACPV =
1773 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1774
1775 unsigned PCLabelId = AFI->createPICLabelUId();
1776 ARMConstantPoolValue *NewCPV = nullptr;
1777
1778 // FIXME: The below assumes PIC relocation model and that the function
1779 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1780 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1781 // instructions, so that's probably OK, but is PIC always correct when
1782 // we get here?
1783 if (ACPV->isGlobalValue())
1785 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1786 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1787 else if (ACPV->isExtSymbol())
1788 NewCPV = ARMConstantPoolSymbol::
1789 Create(MF.getFunction().getContext(),
1790 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1791 else if (ACPV->isBlockAddress())
1792 NewCPV = ARMConstantPoolConstant::
1793 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1795 else if (ACPV->isLSDA())
1796 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1797 ARMCP::CPLSDA, 4);
1798 else if (ACPV->isMachineBasicBlock())
1799 NewCPV = ARMConstantPoolMBB::
1800 Create(MF.getFunction().getContext(),
1801 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1802 else
1803 llvm_unreachable("Unexpected ARM constantpool value type!!");
1804 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1805 return PCLabelId;
1806}
1807
1810 Register DestReg, unsigned SubIdx,
1811 const MachineInstr &Orig,
1812 const TargetRegisterInfo &TRI) const {
1813 unsigned Opcode = Orig.getOpcode();
1814 switch (Opcode) {
1815 default: {
1817 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1818 MBB.insert(I, MI);
1819 break;
1820 }
1821 case ARM::tLDRpci_pic:
1822 case ARM::t2LDRpci_pic: {
1823 MachineFunction &MF = *MBB.getParent();
1824 unsigned CPI = Orig.getOperand(1).getIndex();
1825 unsigned PCLabelId = duplicateCPV(MF, CPI);
1826 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1828 .addImm(PCLabelId)
1829 .cloneMemRefs(Orig);
1830 break;
1831 }
1832 }
1833}
1834
1837 MachineBasicBlock::iterator InsertBefore,
1838 const MachineInstr &Orig) const {
1839 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1841 for (;;) {
1842 switch (I->getOpcode()) {
1843 case ARM::tLDRpci_pic:
1844 case ARM::t2LDRpci_pic: {
1845 MachineFunction &MF = *MBB.getParent();
1846 unsigned CPI = I->getOperand(1).getIndex();
1847 unsigned PCLabelId = duplicateCPV(MF, CPI);
1848 I->getOperand(1).setIndex(CPI);
1849 I->getOperand(2).setImm(PCLabelId);
1850 break;
1851 }
1852 }
1853 if (!I->isBundledWithSucc())
1854 break;
1855 ++I;
1856 }
1857 return Cloned;
1858}
1859
1861 const MachineInstr &MI1,
1862 const MachineRegisterInfo *MRI) const {
1863 unsigned Opcode = MI0.getOpcode();
1864 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1865 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1866 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1867 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1868 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1869 Opcode == ARM::t2MOV_ga_pcrel) {
1870 if (MI1.getOpcode() != Opcode)
1871 return false;
1872 if (MI0.getNumOperands() != MI1.getNumOperands())
1873 return false;
1874
1875 const MachineOperand &MO0 = MI0.getOperand(1);
1876 const MachineOperand &MO1 = MI1.getOperand(1);
1877 if (MO0.getOffset() != MO1.getOffset())
1878 return false;
1879
1880 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1881 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1882 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1883 Opcode == ARM::t2MOV_ga_pcrel)
1884 // Ignore the PC labels.
1885 return MO0.getGlobal() == MO1.getGlobal();
1886
1887 const MachineFunction *MF = MI0.getParent()->getParent();
1888 const MachineConstantPool *MCP = MF->getConstantPool();
1889 int CPI0 = MO0.getIndex();
1890 int CPI1 = MO1.getIndex();
1891 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1892 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1893 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1894 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1895 if (isARMCP0 && isARMCP1) {
1896 ARMConstantPoolValue *ACPV0 =
1897 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1898 ARMConstantPoolValue *ACPV1 =
1899 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1900 return ACPV0->hasSameValue(ACPV1);
1901 } else if (!isARMCP0 && !isARMCP1) {
1902 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1903 }
1904 return false;
1905 } else if (Opcode == ARM::PICLDR) {
1906 if (MI1.getOpcode() != Opcode)
1907 return false;
1908 if (MI0.getNumOperands() != MI1.getNumOperands())
1909 return false;
1910
1911 Register Addr0 = MI0.getOperand(1).getReg();
1912 Register Addr1 = MI1.getOperand(1).getReg();
1913 if (Addr0 != Addr1) {
1914 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1915 return false;
1916
1917 // This assumes SSA form.
1918 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1919 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1920 // Check if the loaded value, e.g. a constantpool of a global address, are
1921 // the same.
1922 if (!produceSameValue(*Def0, *Def1, MRI))
1923 return false;
1924 }
1925
1926 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1927 // %12 = PICLDR %11, 0, 14, %noreg
1928 const MachineOperand &MO0 = MI0.getOperand(i);
1929 const MachineOperand &MO1 = MI1.getOperand(i);
1930 if (!MO0.isIdenticalTo(MO1))
1931 return false;
1932 }
1933 return true;
1934 }
1935
1937}
1938
1939/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1940/// determine if two loads are loading from the same base address. It should
1941/// only return true if the base pointers are the same and the only differences
1942/// between the two addresses is the offset. It also returns the offsets by
1943/// reference.
1944///
1945/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1946/// is permanently disabled.
1948 int64_t &Offset1,
1949 int64_t &Offset2) const {
1950 // Don't worry about Thumb: just ARM and Thumb2.
1951 if (Subtarget.isThumb1Only()) return false;
1952
1953 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1954 return false;
1955
1956 auto IsLoadOpcode = [&](unsigned Opcode) {
1957 switch (Opcode) {
1958 default:
1959 return false;
1960 case ARM::LDRi12:
1961 case ARM::LDRBi12:
1962 case ARM::LDRD:
1963 case ARM::LDRH:
1964 case ARM::LDRSB:
1965 case ARM::LDRSH:
1966 case ARM::VLDRD:
1967 case ARM::VLDRS:
1968 case ARM::t2LDRi8:
1969 case ARM::t2LDRBi8:
1970 case ARM::t2LDRDi8:
1971 case ARM::t2LDRSHi8:
1972 case ARM::t2LDRi12:
1973 case ARM::t2LDRBi12:
1974 case ARM::t2LDRSHi12:
1975 return true;
1976 }
1977 };
1978
1979 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1980 !IsLoadOpcode(Load2->getMachineOpcode()))
1981 return false;
1982
1983 // Check if base addresses and chain operands match.
1984 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1985 Load1->getOperand(4) != Load2->getOperand(4))
1986 return false;
1987
1988 // Index should be Reg0.
1989 if (Load1->getOperand(3) != Load2->getOperand(3))
1990 return false;
1991
1992 // Determine the offsets.
1993 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1994 isa<ConstantSDNode>(Load2->getOperand(1))) {
1995 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1996 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1997 return true;
1998 }
1999
2000 return false;
2001}
2002
2003/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2004/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2005/// be scheduled togther. On some targets if two loads are loading from
2006/// addresses in the same cache line, it's better if they are scheduled
2007/// together. This function takes two integers that represent the load offsets
2008/// from the common base address. It returns true if it decides it's desirable
2009/// to schedule the two loads together. "NumLoads" is the number of loads that
2010/// have already been scheduled after Load1.
2011///
2012/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2013/// is permanently disabled.
2015 int64_t Offset1, int64_t Offset2,
2016 unsigned NumLoads) const {
2017 // Don't worry about Thumb: just ARM and Thumb2.
2018 if (Subtarget.isThumb1Only()) return false;
2019
2020 assert(Offset2 > Offset1);
2021
2022 if ((Offset2 - Offset1) / 8 > 64)
2023 return false;
2024
2025 // Check if the machine opcodes are different. If they are different
2026 // then we consider them to not be of the same base address,
2027 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2028 // In this case, they are considered to be the same because they are different
2029 // encoding forms of the same basic instruction.
2030 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2031 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2032 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2033 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2034 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2035 return false; // FIXME: overly conservative?
2036
2037 // Four loads in a row should be sufficient.
2038 if (NumLoads >= 3)
2039 return false;
2040
2041 return true;
2042}
2043
2045 const MachineBasicBlock *MBB,
2046 const MachineFunction &MF) const {
2047 // Debug info is never a scheduling boundary. It's necessary to be explicit
2048 // due to the special treatment of IT instructions below, otherwise a
2049 // dbg_value followed by an IT will result in the IT instruction being
2050 // considered a scheduling hazard, which is wrong. It should be the actual
2051 // instruction preceding the dbg_value instruction(s), just like it is
2052 // when debug info is not present.
2053 if (MI.isDebugInstr())
2054 return false;
2055
2056 // Terminators and labels can't be scheduled around.
2057 if (MI.isTerminator() || MI.isPosition())
2058 return true;
2059
2060 // INLINEASM_BR can jump to another block
2061 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2062 return true;
2063
2064 if (isSEHInstruction(MI))
2065 return true;
2066
2067 // Treat the start of the IT block as a scheduling boundary, but schedule
2068 // t2IT along with all instructions following it.
2069 // FIXME: This is a big hammer. But the alternative is to add all potential
2070 // true and anti dependencies to IT block instructions as implicit operands
2071 // to the t2IT instruction. The added compile time and complexity does not
2072 // seem worth it.
2074 // Make sure to skip any debug instructions
2075 while (++I != MBB->end() && I->isDebugInstr())
2076 ;
2077 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2078 return true;
2079
2080 // Don't attempt to schedule around any instruction that defines
2081 // a stack-oriented pointer, as it's unlikely to be profitable. This
2082 // saves compile time, because it doesn't require every single
2083 // stack slot reference to depend on the instruction that does the
2084 // modification.
2085 // Calls don't actually change the stack pointer, even if they have imp-defs.
2086 // No ARM calling conventions change the stack pointer. (X86 calling
2087 // conventions sometimes do).
2088 if (!MI.isCall() && MI.definesRegister(ARM::SP))
2089 return true;
2090
2091 return false;
2092}
2093
2096 unsigned NumCycles, unsigned ExtraPredCycles,
2097 BranchProbability Probability) const {
2098 if (!NumCycles)
2099 return false;
2100
2101 // If we are optimizing for size, see if the branch in the predecessor can be
2102 // lowered to cbn?z by the constant island lowering pass, and return false if
2103 // so. This results in a shorter instruction sequence.
2104 if (MBB.getParent()->getFunction().hasOptSize()) {
2105 MachineBasicBlock *Pred = *MBB.pred_begin();
2106 if (!Pred->empty()) {
2107 MachineInstr *LastMI = &*Pred->rbegin();
2108 if (LastMI->getOpcode() == ARM::t2Bcc) {
2110 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2111 if (CmpMI)
2112 return false;
2113 }
2114 }
2115 }
2116 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2117 MBB, 0, 0, Probability);
2118}
2119
2122 unsigned TCycles, unsigned TExtra,
2123 MachineBasicBlock &FBB,
2124 unsigned FCycles, unsigned FExtra,
2125 BranchProbability Probability) const {
2126 if (!TCycles)
2127 return false;
2128
2129 // In thumb code we often end up trading one branch for a IT block, and
2130 // if we are cloning the instruction can increase code size. Prevent
2131 // blocks with multiple predecesors from being ifcvted to prevent this
2132 // cloning.
2133 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2134 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2135 return false;
2136 }
2137
2138 // Attempt to estimate the relative costs of predication versus branching.
2139 // Here we scale up each component of UnpredCost to avoid precision issue when
2140 // scaling TCycles/FCycles by Probability.
2141 const unsigned ScalingUpFactor = 1024;
2142
2143 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2144 unsigned UnpredCost;
2145 if (!Subtarget.hasBranchPredictor()) {
2146 // When we don't have a branch predictor it's always cheaper to not take a
2147 // branch than take it, so we have to take that into account.
2148 unsigned NotTakenBranchCost = 1;
2149 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2150 unsigned TUnpredCycles, FUnpredCycles;
2151 if (!FCycles) {
2152 // Triangle: TBB is the fallthrough
2153 TUnpredCycles = TCycles + NotTakenBranchCost;
2154 FUnpredCycles = TakenBranchCost;
2155 } else {
2156 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2157 TUnpredCycles = TCycles + TakenBranchCost;
2158 FUnpredCycles = FCycles + NotTakenBranchCost;
2159 // The branch at the end of FBB will disappear when it's predicated, so
2160 // discount it from PredCost.
2161 PredCost -= 1 * ScalingUpFactor;
2162 }
2163 // The total cost is the cost of each path scaled by their probabilites
2164 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2165 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2166 UnpredCost = TUnpredCost + FUnpredCost;
2167 // When predicating assume that the first IT can be folded away but later
2168 // ones cost one cycle each
2169 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2170 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2171 }
2172 } else {
2173 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2174 unsigned FUnpredCost =
2175 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2176 UnpredCost = TUnpredCost + FUnpredCost;
2177 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2178 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2179 }
2180
2181 return PredCost <= UnpredCost;
2182}
2183
2184unsigned
2186 unsigned NumInsts) const {
2187 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2188 // ARM has a condition code field in every predicable instruction, using it
2189 // doesn't change code size.
2190 if (!Subtarget.isThumb2())
2191 return 0;
2192
2193 // It's possible that the size of the IT is restricted to a single block.
2194 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2195 return divideCeil(NumInsts, MaxInsts) * 2;
2196}
2197
2198unsigned
2200 // If this branch is likely to be folded into the comparison to form a
2201 // CB(N)Z, then removing it won't reduce code size at all, because that will
2202 // just replace the CB(N)Z with a CMP.
2203 if (MI.getOpcode() == ARM::t2Bcc &&
2205 return 0;
2206
2207 unsigned Size = getInstSizeInBytes(MI);
2208
2209 // For Thumb2, all branches are 32-bit instructions during the if conversion
2210 // pass, but may be replaced with 16-bit instructions during size reduction.
2211 // Since the branches considered by if conversion tend to be forward branches
2212 // over small basic blocks, they are very likely to be in range for the
2213 // narrow instructions, so we assume the final code size will be half what it
2214 // currently is.
2215 if (Subtarget.isThumb2())
2216 Size /= 2;
2217
2218 return Size;
2219}
2220
2221bool
2223 MachineBasicBlock &FMBB) const {
2224 // Reduce false anti-dependencies to let the target's out-of-order execution
2225 // engine do its thing.
2226 return Subtarget.isProfitableToUnpredicate();
2227}
2228
2229/// getInstrPredicate - If instruction is predicated, returns its predicate
2230/// condition, otherwise returns AL. It also returns the condition code
2231/// register by reference.
2233 Register &PredReg) {
2234 int PIdx = MI.findFirstPredOperandIdx();
2235 if (PIdx == -1) {
2236 PredReg = 0;
2237 return ARMCC::AL;
2238 }
2239
2240 PredReg = MI.getOperand(PIdx+1).getReg();
2241 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2242}
2243
2245 if (Opc == ARM::B)
2246 return ARM::Bcc;
2247 if (Opc == ARM::tB)
2248 return ARM::tBcc;
2249 if (Opc == ARM::t2B)
2250 return ARM::t2Bcc;
2251
2252 llvm_unreachable("Unknown unconditional branch opcode!");
2253}
2254
2256 bool NewMI,
2257 unsigned OpIdx1,
2258 unsigned OpIdx2) const {
2259 switch (MI.getOpcode()) {
2260 case ARM::MOVCCr:
2261 case ARM::t2MOVCCr: {
2262 // MOVCC can be commuted by inverting the condition.
2263 Register PredReg;
2265 // MOVCC AL can't be inverted. Shouldn't happen.
2266 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2267 return nullptr;
2268 MachineInstr *CommutedMI =
2269 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2270 if (!CommutedMI)
2271 return nullptr;
2272 // After swapping the MOVCC operands, also invert the condition.
2273 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2275 return CommutedMI;
2276 }
2277 }
2278 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2279}
2280
2281/// Identify instructions that can be folded into a MOVCC instruction, and
2282/// return the defining instruction.
2284ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2285 const TargetInstrInfo *TII) const {
2286 if (!Reg.isVirtual())
2287 return nullptr;
2288 if (!MRI.hasOneNonDBGUse(Reg))
2289 return nullptr;
2290 MachineInstr *MI = MRI.getVRegDef(Reg);
2291 if (!MI)
2292 return nullptr;
2293 // Check if MI can be predicated and folded into the MOVCC.
2294 if (!isPredicable(*MI))
2295 return nullptr;
2296 // Check if MI has any non-dead defs or physreg uses. This also detects
2297 // predicated instructions which will be reading CPSR.
2298 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2299 // Reject frame index operands, PEI can't handle the predicated pseudos.
2300 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2301 return nullptr;
2302 if (!MO.isReg())
2303 continue;
2304 // MI can't have any tied operands, that would conflict with predication.
2305 if (MO.isTied())
2306 return nullptr;
2307 if (MO.getReg().isPhysical())
2308 return nullptr;
2309 if (MO.isDef() && !MO.isDead())
2310 return nullptr;
2311 }
2312 bool DontMoveAcrossStores = true;
2313 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2314 return nullptr;
2315 return MI;
2316}
2317
2320 unsigned &TrueOp, unsigned &FalseOp,
2321 bool &Optimizable) const {
2322 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2323 "Unknown select instruction");
2324 // MOVCC operands:
2325 // 0: Def.
2326 // 1: True use.
2327 // 2: False use.
2328 // 3: Condition code.
2329 // 4: CPSR use.
2330 TrueOp = 1;
2331 FalseOp = 2;
2332 Cond.push_back(MI.getOperand(3));
2333 Cond.push_back(MI.getOperand(4));
2334 // We can always fold a def.
2335 Optimizable = true;
2336 return false;
2337}
2338
2342 bool PreferFalse) const {
2343 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2344 "Unknown select instruction");
2345 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2346 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2347 bool Invert = !DefMI;
2348 if (!DefMI)
2349 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2350 if (!DefMI)
2351 return nullptr;
2352
2353 // Find new register class to use.
2354 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2355 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2356 Register DestReg = MI.getOperand(0).getReg();
2357 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2358 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2359 if (!MRI.constrainRegClass(DestReg, FalseClass))
2360 return nullptr;
2361 if (!MRI.constrainRegClass(DestReg, TrueClass))
2362 return nullptr;
2363
2364 // Create a new predicated version of DefMI.
2365 // Rfalse is the first use.
2366 MachineInstrBuilder NewMI =
2367 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2368
2369 // Copy all the DefMI operands, excluding its (null) predicate.
2370 const MCInstrDesc &DefDesc = DefMI->getDesc();
2371 for (unsigned i = 1, e = DefDesc.getNumOperands();
2372 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2373 NewMI.add(DefMI->getOperand(i));
2374
2375 unsigned CondCode = MI.getOperand(3).getImm();
2376 if (Invert)
2378 else
2379 NewMI.addImm(CondCode);
2380 NewMI.add(MI.getOperand(4));
2381
2382 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2383 if (NewMI->hasOptionalDef())
2384 NewMI.add(condCodeOp());
2385
2386 // The output register value when the predicate is false is an implicit
2387 // register operand tied to the first def.
2388 // The tie makes the register allocator ensure the FalseReg is allocated the
2389 // same register as operand 0.
2390 FalseReg.setImplicit();
2391 NewMI.add(FalseReg);
2392 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2393
2394 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2395 SeenMIs.insert(NewMI);
2396 SeenMIs.erase(DefMI);
2397
2398 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2399 // DefMI would be invalid when tranferred inside the loop. Checking for a
2400 // loop is expensive, but at least remove kill flags if they are in different
2401 // BBs.
2402 if (DefMI->getParent() != MI.getParent())
2403 NewMI->clearKillInfo();
2404
2405 // The caller will erase MI, but not DefMI.
2407 return NewMI;
2408}
2409
2410/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2411/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2412/// def operand.
2413///
2414/// This will go away once we can teach tblgen how to set the optional CPSR def
2415/// operand itself.
2419};
2420
2422 {ARM::ADDSri, ARM::ADDri},
2423 {ARM::ADDSrr, ARM::ADDrr},
2424 {ARM::ADDSrsi, ARM::ADDrsi},
2425 {ARM::ADDSrsr, ARM::ADDrsr},
2426
2427 {ARM::SUBSri, ARM::SUBri},
2428 {ARM::SUBSrr, ARM::SUBrr},
2429 {ARM::SUBSrsi, ARM::SUBrsi},
2430 {ARM::SUBSrsr, ARM::SUBrsr},
2431
2432 {ARM::RSBSri, ARM::RSBri},
2433 {ARM::RSBSrsi, ARM::RSBrsi},
2434 {ARM::RSBSrsr, ARM::RSBrsr},
2435
2436 {ARM::tADDSi3, ARM::tADDi3},
2437 {ARM::tADDSi8, ARM::tADDi8},
2438 {ARM::tADDSrr, ARM::tADDrr},
2439 {ARM::tADCS, ARM::tADC},
2440
2441 {ARM::tSUBSi3, ARM::tSUBi3},
2442 {ARM::tSUBSi8, ARM::tSUBi8},
2443 {ARM::tSUBSrr, ARM::tSUBrr},
2444 {ARM::tSBCS, ARM::tSBC},
2445 {ARM::tRSBS, ARM::tRSB},
2446 {ARM::tLSLSri, ARM::tLSLri},
2447
2448 {ARM::t2ADDSri, ARM::t2ADDri},
2449 {ARM::t2ADDSrr, ARM::t2ADDrr},
2450 {ARM::t2ADDSrs, ARM::t2ADDrs},
2451
2452 {ARM::t2SUBSri, ARM::t2SUBri},
2453 {ARM::t2SUBSrr, ARM::t2SUBrr},
2454 {ARM::t2SUBSrs, ARM::t2SUBrs},
2455
2456 {ARM::t2RSBSri, ARM::t2RSBri},
2457 {ARM::t2RSBSrs, ARM::t2RSBrs},
2458};
2459
2460unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2461 for (const auto &Entry : AddSubFlagsOpcodeMap)
2462 if (OldOpc == Entry.PseudoOpc)
2463 return Entry.MachineOpc;
2464 return 0;
2465}
2466
2469 const DebugLoc &dl, Register DestReg,
2470 Register BaseReg, int NumBytes,
2471 ARMCC::CondCodes Pred, Register PredReg,
2472 const ARMBaseInstrInfo &TII,
2473 unsigned MIFlags) {
2474 if (NumBytes == 0 && DestReg != BaseReg) {
2475 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2476 .addReg(BaseReg, RegState::Kill)
2477 .add(predOps(Pred, PredReg))
2478 .add(condCodeOp())
2479 .setMIFlags(MIFlags);
2480 return;
2481 }
2482
2483 bool isSub = NumBytes < 0;
2484 if (isSub) NumBytes = -NumBytes;
2485
2486 while (NumBytes) {
2487 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2488 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2489 assert(ThisVal && "Didn't extract field correctly");
2490
2491 // We will handle these bits from offset, clear them.
2492 NumBytes &= ~ThisVal;
2493
2494 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2495
2496 // Build the new ADD / SUB.
2497 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2498 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2499 .addReg(BaseReg, RegState::Kill)
2500 .addImm(ThisVal)
2501 .add(predOps(Pred, PredReg))
2502 .add(condCodeOp())
2503 .setMIFlags(MIFlags);
2504 BaseReg = DestReg;
2505 }
2506}
2507
2510 unsigned NumBytes) {
2511 // This optimisation potentially adds lots of load and store
2512 // micro-operations, it's only really a great benefit to code-size.
2513 if (!Subtarget.hasMinSize())
2514 return false;
2515
2516 // If only one register is pushed/popped, LLVM can use an LDR/STR
2517 // instead. We can't modify those so make sure we're dealing with an
2518 // instruction we understand.
2519 bool IsPop = isPopOpcode(MI->getOpcode());
2520 bool IsPush = isPushOpcode(MI->getOpcode());
2521 if (!IsPush && !IsPop)
2522 return false;
2523
2524 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2525 MI->getOpcode() == ARM::VLDMDIA_UPD;
2526 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2527 MI->getOpcode() == ARM::tPOP ||
2528 MI->getOpcode() == ARM::tPOP_RET;
2529
2530 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2531 MI->getOperand(1).getReg() == ARM::SP)) &&
2532 "trying to fold sp update into non-sp-updating push/pop");
2533
2534 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2535 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2536 // if this is violated.
2537 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2538 return false;
2539
2540 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2541 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2542 int RegListIdx = IsT1PushPop ? 2 : 4;
2543
2544 // Calculate the space we'll need in terms of registers.
2545 unsigned RegsNeeded;
2546 const TargetRegisterClass *RegClass;
2547 if (IsVFPPushPop) {
2548 RegsNeeded = NumBytes / 8;
2549 RegClass = &ARM::DPRRegClass;
2550 } else {
2551 RegsNeeded = NumBytes / 4;
2552 RegClass = &ARM::GPRRegClass;
2553 }
2554
2555 // We're going to have to strip all list operands off before
2556 // re-adding them since the order matters, so save the existing ones
2557 // for later.
2559
2560 // We're also going to need the first register transferred by this
2561 // instruction, which won't necessarily be the first register in the list.
2562 unsigned FirstRegEnc = -1;
2563
2565 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2566 MachineOperand &MO = MI->getOperand(i);
2567 RegList.push_back(MO);
2568
2569 if (MO.isReg() && !MO.isImplicit() &&
2570 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2571 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2572 }
2573
2574 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2575
2576 // Now try to find enough space in the reglist to allocate NumBytes.
2577 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2578 --CurRegEnc) {
2579 unsigned CurReg = RegClass->getRegister(CurRegEnc);
2580 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2581 continue;
2582 if (!IsPop) {
2583 // Pushing any register is completely harmless, mark the register involved
2584 // as undef since we don't care about its value and must not restore it
2585 // during stack unwinding.
2586 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2587 false, false, true));
2588 --RegsNeeded;
2589 continue;
2590 }
2591
2592 // However, we can only pop an extra register if it's not live. For
2593 // registers live within the function we might clobber a return value
2594 // register; the other way a register can be live here is if it's
2595 // callee-saved.
2596 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2597 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2599 // VFP pops don't allow holes in the register list, so any skip is fatal
2600 // for our transformation. GPR pops do, so we should just keep looking.
2601 if (IsVFPPushPop)
2602 return false;
2603 else
2604 continue;
2605 }
2606
2607 // Mark the unimportant registers as <def,dead> in the POP.
2608 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2609 true));
2610 --RegsNeeded;
2611 }
2612
2613 if (RegsNeeded > 0)
2614 return false;
2615
2616 // Finally we know we can profitably perform the optimisation so go
2617 // ahead: strip all existing registers off and add them back again
2618 // in the right order.
2619 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2620 MI->removeOperand(i);
2621
2622 // Add the complete list back in.
2623 MachineInstrBuilder MIB(MF, &*MI);
2624 for (const MachineOperand &MO : llvm::reverse(RegList))
2625 MIB.add(MO);
2626
2627 return true;
2628}
2629
2630bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2631 Register FrameReg, int &Offset,
2632 const ARMBaseInstrInfo &TII) {
2633 unsigned Opcode = MI.getOpcode();
2634 const MCInstrDesc &Desc = MI.getDesc();
2635 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2636 bool isSub = false;
2637
2638 // Memory operands in inline assembly always use AddrMode2.
2639 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2641
2642 if (Opcode == ARM::ADDri) {
2643 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2644 if (Offset == 0) {
2645 // Turn it into a move.
2646 MI.setDesc(TII.get(ARM::MOVr));
2647 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2648 MI.removeOperand(FrameRegIdx+1);
2649 Offset = 0;
2650 return true;
2651 } else if (Offset < 0) {
2652 Offset = -Offset;
2653 isSub = true;
2654 MI.setDesc(TII.get(ARM::SUBri));
2655 }
2656
2657 // Common case: small offset, fits into instruction.
2658 if (ARM_AM::getSOImmVal(Offset) != -1) {
2659 // Replace the FrameIndex with sp / fp
2660 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2661 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2662 Offset = 0;
2663 return true;
2664 }
2665
2666 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2667 // as possible.
2668 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2669 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2670
2671 // We will handle these bits from offset, clear them.
2672 Offset &= ~ThisImmVal;
2673
2674 // Get the properly encoded SOImmVal field.
2675 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2676 "Bit extraction didn't work?");
2677 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2678 } else {
2679 unsigned ImmIdx = 0;
2680 int InstrOffs = 0;
2681 unsigned NumBits = 0;
2682 unsigned Scale = 1;
2683 switch (AddrMode) {
2685 ImmIdx = FrameRegIdx + 1;
2686 InstrOffs = MI.getOperand(ImmIdx).getImm();
2687 NumBits = 12;
2688 break;
2689 case ARMII::AddrMode2:
2690 ImmIdx = FrameRegIdx+2;
2691 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2692 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2693 InstrOffs *= -1;
2694 NumBits = 12;
2695 break;
2696 case ARMII::AddrMode3:
2697 ImmIdx = FrameRegIdx+2;
2698 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2699 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2700 InstrOffs *= -1;
2701 NumBits = 8;
2702 break;
2703 case ARMII::AddrMode4:
2704 case ARMII::AddrMode6:
2705 // Can't fold any offset even if it's zero.
2706 return false;
2707 case ARMII::AddrMode5:
2708 ImmIdx = FrameRegIdx+1;
2709 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2710 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2711 InstrOffs *= -1;
2712 NumBits = 8;
2713 Scale = 4;
2714 break;
2716 ImmIdx = FrameRegIdx+1;
2717 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2718 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2719 InstrOffs *= -1;
2720 NumBits = 8;
2721 Scale = 2;
2722 break;
2726 ImmIdx = FrameRegIdx+1;
2727 InstrOffs = MI.getOperand(ImmIdx).getImm();
2728 NumBits = 7;
2729 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2730 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2731 break;
2732 default:
2733 llvm_unreachable("Unsupported addressing mode!");
2734 }
2735
2736 Offset += InstrOffs * Scale;
2737 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2738 if (Offset < 0) {
2739 Offset = -Offset;
2740 isSub = true;
2741 }
2742
2743 // Attempt to fold address comp. if opcode has offset bits
2744 if (NumBits > 0) {
2745 // Common case: small offset, fits into instruction.
2746 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2747 int ImmedOffset = Offset / Scale;
2748 unsigned Mask = (1 << NumBits) - 1;
2749 if ((unsigned)Offset <= Mask * Scale) {
2750 // Replace the FrameIndex with sp
2751 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2752 // FIXME: When addrmode2 goes away, this will simplify (like the
2753 // T2 version), as the LDR.i12 versions don't need the encoding
2754 // tricks for the offset value.
2755 if (isSub) {
2757 ImmedOffset = -ImmedOffset;
2758 else
2759 ImmedOffset |= 1 << NumBits;
2760 }
2761 ImmOp.ChangeToImmediate(ImmedOffset);
2762 Offset = 0;
2763 return true;
2764 }
2765
2766 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2767 ImmedOffset = ImmedOffset & Mask;
2768 if (isSub) {
2770 ImmedOffset = -ImmedOffset;
2771 else
2772 ImmedOffset |= 1 << NumBits;
2773 }
2774 ImmOp.ChangeToImmediate(ImmedOffset);
2775 Offset &= ~(Mask*Scale);
2776 }
2777 }
2778
2779 Offset = (isSub) ? -Offset : Offset;
2780 return Offset == 0;
2781}
2782
2783/// analyzeCompare - For a comparison instruction, return the source registers
2784/// in SrcReg and SrcReg2 if having two register operands, and the value it
2785/// compares against in CmpValue. Return true if the comparison instruction
2786/// can be analyzed.
2788 Register &SrcReg2, int64_t &CmpMask,
2789 int64_t &CmpValue) const {
2790 switch (MI.getOpcode()) {
2791 default: break;
2792 case ARM::CMPri:
2793 case ARM::t2CMPri:
2794 case ARM::tCMPi8:
2795 SrcReg = MI.getOperand(0).getReg();
2796 SrcReg2 = 0;
2797 CmpMask = ~0;
2798 CmpValue = MI.getOperand(1).getImm();
2799 return true;
2800 case ARM::CMPrr:
2801 case ARM::t2CMPrr:
2802 case ARM::tCMPr:
2803 SrcReg = MI.getOperand(0).getReg();
2804 SrcReg2 = MI.getOperand(1).getReg();
2805 CmpMask = ~0;
2806 CmpValue = 0;
2807 return true;
2808 case ARM::TSTri:
2809 case ARM::t2TSTri:
2810 SrcReg = MI.getOperand(0).getReg();
2811 SrcReg2 = 0;
2812 CmpMask = MI.getOperand(1).getImm();
2813 CmpValue = 0;
2814 return true;
2815 }
2816
2817 return false;
2818}
2819
2820/// isSuitableForMask - Identify a suitable 'and' instruction that
2821/// operates on the given source register and applies the same mask
2822/// as a 'tst' instruction. Provide a limited look-through for copies.
2823/// When successful, MI will hold the found instruction.
2825 int CmpMask, bool CommonUse) {
2826 switch (MI->getOpcode()) {
2827 case ARM::ANDri:
2828 case ARM::t2ANDri:
2829 if (CmpMask != MI->getOperand(2).getImm())
2830 return false;
2831 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2832 return true;
2833 break;
2834 }
2835
2836 return false;
2837}
2838
2839/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2840/// the condition code if we modify the instructions such that flags are
2841/// set by ADD(a,b,X).
2843 switch (CC) {
2844 default: return ARMCC::AL;
2845 case ARMCC::HS: return ARMCC::LO;
2846 case ARMCC::LO: return ARMCC::HS;
2847 case ARMCC::VS: return ARMCC::VS;
2848 case ARMCC::VC: return ARMCC::VC;
2849 }
2850}
2851
2852/// isRedundantFlagInstr - check whether the first instruction, whose only
2853/// purpose is to update flags, can be made redundant.
2854/// CMPrr can be made redundant by SUBrr if the operands are the same.
2855/// CMPri can be made redundant by SUBri if the operands are the same.
2856/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2857/// This function can be extended later on.
2858inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2859 Register SrcReg, Register SrcReg2,
2860 int64_t ImmValue,
2861 const MachineInstr *OI,
2862 bool &IsThumb1) {
2863 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2864 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2865 ((OI->getOperand(1).getReg() == SrcReg &&
2866 OI->getOperand(2).getReg() == SrcReg2) ||
2867 (OI->getOperand(1).getReg() == SrcReg2 &&
2868 OI->getOperand(2).getReg() == SrcReg))) {
2869 IsThumb1 = false;
2870 return true;
2871 }
2872
2873 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2874 ((OI->getOperand(2).getReg() == SrcReg &&
2875 OI->getOperand(3).getReg() == SrcReg2) ||
2876 (OI->getOperand(2).getReg() == SrcReg2 &&
2877 OI->getOperand(3).getReg() == SrcReg))) {
2878 IsThumb1 = true;
2879 return true;
2880 }
2881
2882 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2883 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2884 OI->getOperand(1).getReg() == SrcReg &&
2885 OI->getOperand(2).getImm() == ImmValue) {
2886 IsThumb1 = false;
2887 return true;
2888 }
2889
2890 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2891 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2892 OI->getOperand(2).getReg() == SrcReg &&
2893 OI->getOperand(3).getImm() == ImmValue) {
2894 IsThumb1 = true;
2895 return true;
2896 }
2897
2898 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2899 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2900 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2901 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2902 OI->getOperand(0).getReg() == SrcReg &&
2903 OI->getOperand(1).getReg() == SrcReg2) {
2904 IsThumb1 = false;
2905 return true;
2906 }
2907
2908 if (CmpI->getOpcode() == ARM::tCMPr &&
2909 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2910 OI->getOpcode() == ARM::tADDrr) &&
2911 OI->getOperand(0).getReg() == SrcReg &&
2912 OI->getOperand(2).getReg() == SrcReg2) {
2913 IsThumb1 = true;
2914 return true;
2915 }
2916
2917 return false;
2918}
2919
2920static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2921 switch (MI->getOpcode()) {
2922 default: return false;
2923 case ARM::tLSLri:
2924 case ARM::tLSRri:
2925 case ARM::tLSLrr:
2926 case ARM::tLSRrr:
2927 case ARM::tSUBrr:
2928 case ARM::tADDrr:
2929 case ARM::tADDi3:
2930 case ARM::tADDi8:
2931 case ARM::tSUBi3:
2932 case ARM::tSUBi8:
2933 case ARM::tMUL:
2934 case ARM::tADC:
2935 case ARM::tSBC:
2936 case ARM::tRSB:
2937 case ARM::tAND:
2938 case ARM::tORR:
2939 case ARM::tEOR:
2940 case ARM::tBIC:
2941 case ARM::tMVN:
2942 case ARM::tASRri:
2943 case ARM::tASRrr:
2944 case ARM::tROR:
2945 IsThumb1 = true;
2946 [[fallthrough]];
2947 case ARM::RSBrr:
2948 case ARM::RSBri:
2949 case ARM::RSCrr:
2950 case ARM::RSCri:
2951 case ARM::ADDrr:
2952 case ARM::ADDri:
2953 case ARM::ADCrr:
2954 case ARM::ADCri:
2955 case ARM::SUBrr:
2956 case ARM::SUBri:
2957 case ARM::SBCrr:
2958 case ARM::SBCri:
2959 case ARM::t2RSBri:
2960 case ARM::t2ADDrr:
2961 case ARM::t2ADDri:
2962 case ARM::t2ADCrr:
2963 case ARM::t2ADCri:
2964 case ARM::t2SUBrr:
2965 case ARM::t2SUBri:
2966 case ARM::t2SBCrr:
2967 case ARM::t2SBCri:
2968 case ARM::ANDrr:
2969 case ARM::ANDri:
2970 case ARM::ANDrsr:
2971 case ARM::ANDrsi:
2972 case ARM::t2ANDrr:
2973 case ARM::t2ANDri:
2974 case ARM::t2ANDrs:
2975 case ARM::ORRrr:
2976 case ARM::ORRri:
2977 case ARM::ORRrsr:
2978 case ARM::ORRrsi:
2979 case ARM::t2ORRrr:
2980 case ARM::t2ORRri:
2981 case ARM::t2ORRrs:
2982 case ARM::EORrr:
2983 case ARM::EORri:
2984 case ARM::EORrsr:
2985 case ARM::EORrsi:
2986 case ARM::t2EORrr:
2987 case ARM::t2EORri:
2988 case ARM::t2EORrs:
2989 case ARM::BICri:
2990 case ARM::BICrr:
2991 case ARM::BICrsi:
2992 case ARM::BICrsr:
2993 case ARM::t2BICri:
2994 case ARM::t2BICrr:
2995 case ARM::t2BICrs:
2996 case ARM::t2LSRri:
2997 case ARM::t2LSRrr:
2998 case ARM::t2LSLri:
2999 case ARM::t2LSLrr:
3000 case ARM::MOVsr:
3001 case ARM::MOVsi:
3002 return true;
3003 }
3004}
3005
3006/// optimizeCompareInstr - Convert the instruction supplying the argument to the
3007/// comparison into one that sets the zero bit in the flags register;
3008/// Remove a redundant Compare instruction if an earlier instruction can set the
3009/// flags in the same way as Compare.
3010/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3011/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3012/// condition code of instructions which use the flags.
3014 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3015 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3016 // Get the unique definition of SrcReg.
3017 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3018 if (!MI) return false;
3019
3020 // Masked compares sometimes use the same register as the corresponding 'and'.
3021 if (CmpMask != ~0) {
3022 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3023 MI = nullptr;
3025 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3026 UI != UE; ++UI) {
3027 if (UI->getParent() != CmpInstr.getParent())
3028 continue;
3029 MachineInstr *PotentialAND = &*UI;
3030 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3031 isPredicated(*PotentialAND))
3032 continue;
3033 MI = PotentialAND;
3034 break;
3035 }
3036 if (!MI) return false;
3037 }
3038 }
3039
3040 // Get ready to iterate backward from CmpInstr.
3041 MachineBasicBlock::iterator I = CmpInstr, E = MI,
3042 B = CmpInstr.getParent()->begin();
3043
3044 // Early exit if CmpInstr is at the beginning of the BB.
3045 if (I == B) return false;
3046
3047 // There are two possible candidates which can be changed to set CPSR:
3048 // One is MI, the other is a SUB or ADD instruction.
3049 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3050 // ADDr[ri](r1, r2, X).
3051 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3052 MachineInstr *SubAdd = nullptr;
3053 if (SrcReg2 != 0)
3054 // MI is not a candidate for CMPrr.
3055 MI = nullptr;
3056 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3057 // Conservatively refuse to convert an instruction which isn't in the same
3058 // BB as the comparison.
3059 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3060 // Thus we cannot return here.
3061 if (CmpInstr.getOpcode() == ARM::CMPri ||
3062 CmpInstr.getOpcode() == ARM::t2CMPri ||
3063 CmpInstr.getOpcode() == ARM::tCMPi8)
3064 MI = nullptr;
3065 else
3066 return false;
3067 }
3068
3069 bool IsThumb1 = false;
3070 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3071 return false;
3072
3073 // We also want to do this peephole for cases like this: if (a*b == 0),
3074 // and optimise away the CMP instruction from the generated code sequence:
3075 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3076 // resulting from the select instruction, but these MOVS instructions for
3077 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3078 // However, if we only have MOVS instructions in between the CMP and the
3079 // other instruction (the MULS in this example), then the CPSR is dead so we
3080 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3081 // reordering and then continue the analysis hoping we can eliminate the
3082 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3083 // consequence, the movs won't redefine/kill the MUL operands which would
3084 // make this reordering illegal.
3086 if (MI && IsThumb1) {
3087 --I;
3088 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3089 bool CanReorder = true;
3090 for (; I != E; --I) {
3091 if (I->getOpcode() != ARM::tMOVi8) {
3092 CanReorder = false;
3093 break;
3094 }
3095 }
3096 if (CanReorder) {
3097 MI = MI->removeFromParent();
3098 E = CmpInstr;
3099 CmpInstr.getParent()->insert(E, MI);
3100 }
3101 }
3102 I = CmpInstr;
3103 E = MI;
3104 }
3105
3106 // Check that CPSR isn't set between the comparison instruction and the one we
3107 // want to change. At the same time, search for SubAdd.
3108 bool SubAddIsThumb1 = false;
3109 do {
3110 const MachineInstr &Instr = *--I;
3111
3112 // Check whether CmpInstr can be made redundant by the current instruction.
3113 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3114 SubAddIsThumb1)) {
3115 SubAdd = &*I;
3116 break;
3117 }
3118
3119 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3120 if (I == E)
3121 break;
3122
3123 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3124 Instr.readsRegister(ARM::CPSR, TRI))
3125 // This instruction modifies or uses CPSR after the one we want to
3126 // change. We can't do this transformation.
3127 return false;
3128
3129 if (I == B) {
3130 // In some cases, we scan the use-list of an instruction for an AND;
3131 // that AND is in the same BB, but may not be scheduled before the
3132 // corresponding TST. In that case, bail out.
3133 //
3134 // FIXME: We could try to reschedule the AND.
3135 return false;
3136 }
3137 } while (true);
3138
3139 // Return false if no candidates exist.
3140 if (!MI && !SubAdd)
3141 return false;
3142
3143 // If we found a SubAdd, use it as it will be closer to the CMP
3144 if (SubAdd) {
3145 MI = SubAdd;
3146 IsThumb1 = SubAddIsThumb1;
3147 }
3148
3149 // We can't use a predicated instruction - it doesn't always write the flags.
3150 if (isPredicated(*MI))
3151 return false;
3152
3153 // Scan forward for the use of CPSR
3154 // When checking against MI: if it's a conditional code that requires
3155 // checking of the V bit or C bit, then this is not safe to do.
3156 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3157 // If we are done with the basic block, we need to check whether CPSR is
3158 // live-out.
3160 OperandsToUpdate;
3161 bool isSafe = false;
3162 I = CmpInstr;
3163 E = CmpInstr.getParent()->end();
3164 while (!isSafe && ++I != E) {
3165 const MachineInstr &Instr = *I;
3166 for (unsigned IO = 0, EO = Instr.getNumOperands();
3167 !isSafe && IO != EO; ++IO) {
3168 const MachineOperand &MO = Instr.getOperand(IO);
3169 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3170 isSafe = true;
3171 break;
3172 }
3173 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3174 continue;
3175 if (MO.isDef()) {
3176 isSafe = true;
3177 break;
3178 }
3179 // Condition code is after the operand before CPSR except for VSELs.
3181 bool IsInstrVSel = true;
3182 switch (Instr.getOpcode()) {
3183 default:
3184 IsInstrVSel = false;
3185 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3186 break;
3187 case ARM::VSELEQD:
3188 case ARM::VSELEQS:
3189 case ARM::VSELEQH:
3190 CC = ARMCC::EQ;
3191 break;
3192 case ARM::VSELGTD:
3193 case ARM::VSELGTS:
3194 case ARM::VSELGTH:
3195 CC = ARMCC::GT;
3196 break;
3197 case ARM::VSELGED:
3198 case ARM::VSELGES:
3199 case ARM::VSELGEH:
3200 CC = ARMCC::GE;
3201 break;
3202 case ARM::VSELVSD:
3203 case ARM::VSELVSS:
3204 case ARM::VSELVSH:
3205 CC = ARMCC::VS;
3206 break;
3207 }
3208
3209 if (SubAdd) {
3210 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3211 // on CMP needs to be updated to be based on SUB.
3212 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3213 // needs to be modified.
3214 // Push the condition code operands to OperandsToUpdate.
3215 // If it is safe to remove CmpInstr, the condition code of these
3216 // operands will be modified.
3217 unsigned Opc = SubAdd->getOpcode();
3218 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3219 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3220 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3221 Opc == ARM::tSUBi8;
3222 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3223 if (!IsSub ||
3224 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3225 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3226 // VSel doesn't support condition code update.
3227 if (IsInstrVSel)
3228 return false;
3229 // Ensure we can swap the condition.
3231 if (NewCC == ARMCC::AL)
3232 return false;
3233 OperandsToUpdate.push_back(
3234 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3235 }
3236 } else {
3237 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3238 switch (CC) {
3239 case ARMCC::EQ: // Z
3240 case ARMCC::NE: // Z
3241 case ARMCC::MI: // N
3242 case ARMCC::PL: // N
3243 case ARMCC::AL: // none
3244 // CPSR can be used multiple times, we should continue.
3245 break;
3246 case ARMCC::HS: // C
3247 case ARMCC::LO: // C
3248 case ARMCC::VS: // V
3249 case ARMCC::VC: // V
3250 case ARMCC::HI: // C Z
3251 case ARMCC::LS: // C Z
3252 case ARMCC::GE: // N V
3253 case ARMCC::LT: // N V
3254 case ARMCC::GT: // Z N V
3255 case ARMCC::LE: // Z N V
3256 // The instruction uses the V bit or C bit which is not safe.
3257 return false;
3258 }
3259 }
3260 }
3261 }
3262
3263 // If CPSR is not killed nor re-defined, we should check whether it is
3264 // live-out. If it is live-out, do not optimize.
3265 if (!isSafe) {
3266 MachineBasicBlock *MBB = CmpInstr.getParent();
3267 for (MachineBasicBlock *Succ : MBB->successors())
3268 if (Succ->isLiveIn(ARM::CPSR))
3269 return false;
3270 }
3271
3272 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3273 // set CPSR so this is represented as an explicit output)
3274 if (!IsThumb1) {
3275 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3276 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3277 MI->getOperand(CPSRRegNum).setIsDef(true);
3278 }
3279 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3280 CmpInstr.eraseFromParent();
3281
3282 // Modify the condition code of operands in OperandsToUpdate.
3283 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3284 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3285 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3286 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3287
3288 MI->clearRegisterDeads(ARM::CPSR);
3289
3290 return true;
3291}
3292
3294 // Do not sink MI if it might be used to optimize a redundant compare.
3295 // We heuristically only look at the instruction immediately following MI to
3296 // avoid potentially searching the entire basic block.
3297 if (isPredicated(MI))
3298 return true;
3300 ++Next;
3301 Register SrcReg, SrcReg2;
3302 int64_t CmpMask, CmpValue;
3303 bool IsThumb1;
3304 if (Next != MI.getParent()->end() &&
3305 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3306 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3307 return false;
3308 return true;
3309}
3310
3312 Register Reg,
3313 MachineRegisterInfo *MRI) const {
3314 // Fold large immediates into add, sub, or, xor.
3315 unsigned DefOpc = DefMI.getOpcode();
3316 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3317 DefOpc != ARM::tMOVi32imm)
3318 return false;
3319 if (!DefMI.getOperand(1).isImm())
3320 // Could be t2MOVi32imm @xx
3321 return false;
3322
3323 if (!MRI->hasOneNonDBGUse(Reg))
3324 return false;
3325
3326 const MCInstrDesc &DefMCID = DefMI.getDesc();
3327 if (DefMCID.hasOptionalDef()) {
3328 unsigned NumOps = DefMCID.getNumOperands();
3329 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3330 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3331 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3332 // to delete DefMI.
3333 return false;
3334 }
3335
3336 const MCInstrDesc &UseMCID = UseMI.getDesc();
3337 if (UseMCID.hasOptionalDef()) {
3338 unsigned NumOps = UseMCID.getNumOperands();
3339 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3340 // If the instruction sets the flag, do not attempt this optimization
3341 // since it may change the semantics of the code.
3342 return false;
3343 }
3344
3345 unsigned UseOpc = UseMI.getOpcode();
3346 unsigned NewUseOpc = 0;
3347 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3348 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3349 bool Commute = false;
3350 switch (UseOpc) {
3351 default: return false;
3352 case ARM::SUBrr:
3353 case ARM::ADDrr:
3354 case ARM::ORRrr:
3355 case ARM::EORrr:
3356 case ARM::t2SUBrr:
3357 case ARM::t2ADDrr:
3358 case ARM::t2ORRrr:
3359 case ARM::t2EORrr: {
3360 Commute = UseMI.getOperand(2).getReg() != Reg;
3361 switch (UseOpc) {
3362 default: break;
3363 case ARM::ADDrr:
3364 case ARM::SUBrr:
3365 if (UseOpc == ARM::SUBrr && Commute)
3366 return false;
3367
3368 // ADD/SUB are special because they're essentially the same operation, so
3369 // we can handle a larger range of immediates.
3370 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3371 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3372 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3373 ImmVal = -ImmVal;
3374 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3375 } else
3376 return false;
3377 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3378 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3379 break;
3380 case ARM::ORRrr:
3381 case ARM::EORrr:
3382 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3383 return false;
3384 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3385 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3386 switch (UseOpc) {
3387 default: break;
3388 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3389 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3390 }
3391 break;
3392 case ARM::t2ADDrr:
3393 case ARM::t2SUBrr: {
3394 if (UseOpc == ARM::t2SUBrr && Commute)
3395 return false;
3396
3397 // ADD/SUB are special because they're essentially the same operation, so
3398 // we can handle a larger range of immediates.
3399 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3400 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3401 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3402 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3403 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3404 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3405 ImmVal = -ImmVal;
3406 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3407 } else
3408 return false;
3409 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3410 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3411 break;
3412 }
3413 case ARM::t2ORRrr:
3414 case ARM::t2EORrr:
3415 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3416 return false;
3417 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3418 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3419 switch (UseOpc) {
3420 default: break;
3421 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3422 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3423 }
3424 break;
3425 }
3426 }
3427 }
3428
3429 unsigned OpIdx = Commute ? 2 : 1;
3430 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3431 bool isKill = UseMI.getOperand(OpIdx).isKill();
3432 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3433 Register NewReg = MRI->createVirtualRegister(TRC);
3434 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3435 NewReg)
3436 .addReg(Reg1, getKillRegState(isKill))
3437 .addImm(SOImmValV1)
3439 .add(condCodeOp());
3440 UseMI.setDesc(get(NewUseOpc));
3441 UseMI.getOperand(1).setReg(NewReg);
3442 UseMI.getOperand(1).setIsKill();
3443 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3444 DefMI.eraseFromParent();
3445 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3446 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3447 // Then the below code will not be needed, as the input/output register
3448 // classes will be rgpr or gprSP.
3449 // For now, we fix the UseMI operand explicitly here:
3450 switch(NewUseOpc){
3451 case ARM::t2ADDspImm:
3452 case ARM::t2SUBspImm:
3453 case ARM::t2ADDri:
3454 case ARM::t2SUBri:
3455 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3456 }
3457 return true;
3458}
3459
3460static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3461 const MachineInstr &MI) {
3462 switch (MI.getOpcode()) {
3463 default: {
3464 const MCInstrDesc &Desc = MI.getDesc();
3465 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3466 assert(UOps >= 0 && "bad # UOps");
3467 return UOps;
3468 }
3469
3470 case ARM::LDRrs:
3471 case ARM::LDRBrs:
3472 case ARM::STRrs:
3473 case ARM::STRBrs: {
3474 unsigned ShOpVal = MI.getOperand(3).getImm();
3475 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3476 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3477 if (!isSub &&
3478 (ShImm == 0 ||
3479 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3480 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3481 return 1;
3482 return 2;
3483 }
3484
3485 case ARM::LDRH:
3486 case ARM::STRH: {
3487 if (!MI.getOperand(2).getReg())
3488 return 1;
3489
3490 unsigned ShOpVal = MI.getOperand(3).getImm();
3491 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3492 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3493 if (!isSub &&
3494 (ShImm == 0 ||
3495 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3496 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3497 return 1;
3498 return 2;
3499 }
3500
3501 case ARM::LDRSB:
3502 case ARM::LDRSH:
3503 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3504
3505 case ARM::LDRSB_POST:
3506 case ARM::LDRSH_POST: {
3507 Register Rt = MI.getOperand(0).getReg();
3508 Register Rm = MI.getOperand(3).getReg();
3509 return (Rt == Rm) ? 4 : 3;
3510 }
3511
3512 case ARM::LDR_PRE_REG:
3513 case ARM::LDRB_PRE_REG: {
3514 Register Rt = MI.getOperand(0).getReg();
3515 Register Rm = MI.getOperand(3).getReg();
3516 if (Rt == Rm)
3517 return 3;
3518 unsigned ShOpVal = MI.getOperand(4).getImm();
3519 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3520 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3521 if (!isSub &&
3522 (ShImm == 0 ||
3523 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3524 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3525 return 2;
3526 return 3;
3527 }
3528
3529 case ARM::STR_PRE_REG:
3530 case ARM::STRB_PRE_REG: {
3531 unsigned ShOpVal = MI.getOperand(4).getImm();
3532 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3533 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3534 if (!isSub &&
3535 (ShImm == 0 ||
3536 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3537 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3538 return 2;
3539 return 3;
3540 }
3541
3542 case ARM::LDRH_PRE:
3543 case ARM::STRH_PRE: {
3544 Register Rt = MI.getOperand(0).getReg();
3545 Register Rm = MI.getOperand(3).getReg();
3546 if (!Rm)
3547 return 2;
3548 if (Rt == Rm)
3549 return 3;
3550 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3551 }
3552
3553 case ARM::LDR_POST_REG:
3554 case ARM::LDRB_POST_REG:
3555 case ARM::LDRH_POST: {
3556 Register Rt = MI.getOperand(0).getReg();
3557 Register Rm = MI.getOperand(3).getReg();
3558 return (Rt == Rm) ? 3 : 2;
3559 }
3560
3561 case ARM::LDR_PRE_IMM:
3562 case ARM::LDRB_PRE_IMM:
3563 case ARM::LDR_POST_IMM:
3564 case ARM::LDRB_POST_IMM:
3565 case ARM::STRB_POST_IMM:
3566 case ARM::STRB_POST_REG:
3567 case ARM::STRB_PRE_IMM:
3568 case ARM::STRH_POST:
3569 case ARM::STR_POST_IMM:
3570 case ARM::STR_POST_REG:
3571 case ARM::STR_PRE_IMM:
3572 return 2;
3573
3574 case ARM::LDRSB_PRE:
3575 case ARM::LDRSH_PRE: {
3576 Register Rm = MI.getOperand(3).getReg();
3577 if (Rm == 0)
3578 return 3;
3579 Register Rt = MI.getOperand(0).getReg();
3580 if (Rt == Rm)
3581 return 4;
3582 unsigned ShOpVal = MI.getOperand(4).getImm();
3583 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3584 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3585 if (!isSub &&
3586 (ShImm == 0 ||
3587 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3588 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3589 return 3;
3590 return 4;
3591 }
3592
3593 case ARM::LDRD: {
3594 Register Rt = MI.getOperand(0).getReg();
3595 Register Rn = MI.getOperand(2).getReg();
3596 Register Rm = MI.getOperand(3).getReg();
3597 if (Rm)
3598 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3599 : 3;
3600 return (Rt == Rn) ? 3 : 2;
3601 }
3602
3603 case ARM::STRD: {
3604 Register Rm = MI.getOperand(3).getReg();
3605 if (Rm)
3606 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3607 : 3;
3608 return 2;
3609 }
3610
3611 case ARM::LDRD_POST:
3612 case ARM::t2LDRD_POST:
3613 return 3;
3614
3615 case ARM::STRD_POST:
3616 case ARM::t2STRD_POST:
3617 return 4;
3618
3619 case ARM::LDRD_PRE: {
3620 Register Rt = MI.getOperand(0).getReg();
3621 Register Rn = MI.getOperand(3).getReg();
3622 Register Rm = MI.getOperand(4).getReg();
3623 if (Rm)
3624 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3625 : 4;
3626 return (Rt == Rn) ? 4 : 3;
3627 }
3628
3629 case ARM::t2LDRD_PRE: {
3630 Register Rt = MI.getOperand(0).getReg();
3631 Register Rn = MI.getOperand(3).getReg();
3632 return (Rt == Rn) ? 4 : 3;
3633 }
3634
3635 case ARM::STRD_PRE: {
3636 Register Rm = MI.getOperand(4).getReg();
3637 if (Rm)
3638 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3639 : 4;
3640 return 3;
3641 }
3642
3643 case ARM::t2STRD_PRE:
3644 return 3;
3645
3646 case ARM::t2LDR_POST:
3647 case ARM::t2LDRB_POST:
3648 case ARM::t2LDRB_PRE:
3649 case ARM::t2LDRSBi12:
3650 case ARM::t2LDRSBi8:
3651 case ARM::t2LDRSBpci:
3652 case ARM::t2LDRSBs:
3653 case ARM::t2LDRH_POST:
3654 case ARM::t2LDRH_PRE:
3655 case ARM::t2LDRSBT:
3656 case ARM::t2LDRSB_POST:
3657 case ARM::t2LDRSB_PRE:
3658 case ARM::t2LDRSH_POST:
3659 case ARM::t2LDRSH_PRE:
3660 case ARM::t2LDRSHi12:
3661 case ARM::t2LDRSHi8:
3662 case ARM::t2LDRSHpci:
3663 case ARM::t2LDRSHs:
3664 return 2;
3665
3666 case ARM::t2LDRDi8: {
3667 Register Rt = MI.getOperand(0).getReg();
3668 Register Rn = MI.getOperand(2).getReg();
3669 return (Rt == Rn) ? 3 : 2;
3670 }
3671
3672 case ARM::t2STRB_POST:
3673 case ARM::t2STRB_PRE:
3674 case ARM::t2STRBs:
3675 case ARM::t2STRDi8:
3676 case ARM::t2STRH_POST:
3677 case ARM::t2STRH_PRE:
3678 case ARM::t2STRHs:
3679 case ARM::t2STR_POST:
3680 case ARM::t2STR_PRE:
3681 case ARM::t2STRs:
3682 return 2;
3683 }
3684}
3685
3686// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3687// can't be easily determined return 0 (missing MachineMemOperand).
3688//
3689// FIXME: The current MachineInstr design does not support relying on machine
3690// mem operands to determine the width of a memory access. Instead, we expect
3691// the target to provide this information based on the instruction opcode and
3692// operands. However, using MachineMemOperand is the best solution now for
3693// two reasons:
3694//
3695// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3696// operands. This is much more dangerous than using the MachineMemOperand
3697// sizes because CodeGen passes can insert/remove optional machine operands. In
3698// fact, it's totally incorrect for preRA passes and appears to be wrong for
3699// postRA passes as well.
3700//
3701// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3702// machine model that calls this should handle the unknown (zero size) case.
3703//
3704// Long term, we should require a target hook that verifies MachineMemOperand
3705// sizes during MC lowering. That target hook should be local to MC lowering
3706// because we can't ensure that it is aware of other MI forms. Doing this will
3707// ensure that MachineMemOperands are correctly propagated through all passes.
3709 unsigned Size = 0;
3710 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3711 E = MI.memoperands_end();
3712 I != E; ++I) {
3713 Size += (*I)->getSize().getValue();
3714 }
3715 // FIXME: The scheduler currently can't handle values larger than 16. But
3716 // the values can actually go up to 32 for floating-point load/store
3717 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3718 // operations isn't right; we could end up with "extra" memory operands for
3719 // various reasons, like tail merge merging two memory operations.
3720 return std::min(Size / 4, 16U);
3721}
3722
3723static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3724 unsigned NumRegs) {
3725 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3726 switch (Opc) {
3727 default:
3728 break;
3729 case ARM::VLDMDIA_UPD:
3730 case ARM::VLDMDDB_UPD:
3731 case ARM::VLDMSIA_UPD:
3732 case ARM::VLDMSDB_UPD:
3733 case ARM::VSTMDIA_UPD:
3734 case ARM::VSTMDDB_UPD:
3735 case ARM::VSTMSIA_UPD:
3736 case ARM::VSTMSDB_UPD:
3737 case ARM::LDMIA_UPD:
3738 case ARM::LDMDA_UPD:
3739 case ARM::LDMDB_UPD:
3740 case ARM::LDMIB_UPD:
3741 case ARM::STMIA_UPD:
3742 case ARM::STMDA_UPD:
3743 case ARM::STMDB_UPD:
3744 case ARM::STMIB_UPD:
3745 case ARM::tLDMIA_UPD:
3746 case ARM::tSTMIA_UPD:
3747 case ARM::t2LDMIA_UPD:
3748 case ARM::t2LDMDB_UPD:
3749 case ARM::t2STMIA_UPD:
3750 case ARM::t2STMDB_UPD:
3751 ++UOps; // One for base register writeback.
3752 break;
3753 case ARM::LDMIA_RET:
3754 case ARM::tPOP_RET:
3755 case ARM::t2LDMIA_RET:
3756 UOps += 2; // One for base reg wb, one for write to pc.
3757 break;
3758 }
3759 return UOps;
3760}
3761
3763 const MachineInstr &MI) const {
3764 if (!ItinData || ItinData->isEmpty())
3765 return 1;
3766
3767 const MCInstrDesc &Desc = MI.getDesc();
3768 unsigned Class = Desc.getSchedClass();
3769 int ItinUOps = ItinData->getNumMicroOps(Class);
3770 if (ItinUOps >= 0) {
3771 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3772 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3773
3774 return ItinUOps;
3775 }
3776
3777 unsigned Opc = MI.getOpcode();
3778 switch (Opc) {
3779 default:
3780 llvm_unreachable("Unexpected multi-uops instruction!");
3781 case ARM::VLDMQIA:
3782 case ARM::VSTMQIA:
3783 return 2;
3784
3785 // The number of uOps for load / store multiple are determined by the number
3786 // registers.
3787 //
3788 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3789 // same cycle. The scheduling for the first load / store must be done
3790 // separately by assuming the address is not 64-bit aligned.
3791 //
3792 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3793 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3794 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3795 case ARM::VLDMDIA:
3796 case ARM::VLDMDIA_UPD:
3797 case ARM::VLDMDDB_UPD:
3798 case ARM::VLDMSIA:
3799 case ARM::VLDMSIA_UPD:
3800 case ARM::VLDMSDB_UPD:
3801 case ARM::VSTMDIA:
3802 case ARM::VSTMDIA_UPD:
3803 case ARM::VSTMDDB_UPD:
3804 case ARM::VSTMSIA:
3805 case ARM::VSTMSIA_UPD:
3806 case ARM::VSTMSDB_UPD: {
3807 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3808 return (NumRegs / 2) + (NumRegs % 2) + 1;
3809 }
3810
3811 case ARM::LDMIA_RET:
3812 case ARM::LDMIA:
3813 case ARM::LDMDA:
3814 case ARM::LDMDB:
3815 case ARM::LDMIB:
3816 case ARM::LDMIA_UPD:
3817 case ARM::LDMDA_UPD:
3818 case ARM::LDMDB_UPD:
3819 case ARM::LDMIB_UPD:
3820 case ARM::STMIA:
3821 case ARM::STMDA:
3822 case ARM::STMDB:
3823 case ARM::STMIB:
3824 case ARM::STMIA_UPD:
3825 case ARM::STMDA_UPD:
3826 case ARM::STMDB_UPD:
3827 case ARM::STMIB_UPD:
3828 case ARM::tLDMIA:
3829 case ARM::tLDMIA_UPD:
3830 case ARM::tSTMIA_UPD:
3831 case ARM::tPOP_RET:
3832 case ARM::tPOP:
3833 case ARM::tPUSH:
3834 case ARM::t2LDMIA_RET:
3835 case ARM::t2LDMIA:
3836 case ARM::t2LDMDB:
3837 case ARM::t2LDMIA_UPD:
3838 case ARM::t2LDMDB_UPD:
3839 case ARM::t2STMIA:
3840 case ARM::t2STMDB:
3841 case ARM::t2STMIA_UPD:
3842 case ARM::t2STMDB_UPD: {
3843 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3844 switch (Subtarget.getLdStMultipleTiming()) {
3846 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3848 // Assume the worst.
3849 return NumRegs;
3851 if (NumRegs < 4)
3852 return 2;
3853 // 4 registers would be issued: 2, 2.
3854 // 5 registers would be issued: 2, 2, 1.
3855 unsigned UOps = (NumRegs / 2);
3856 if (NumRegs % 2)
3857 ++UOps;
3858 return UOps;
3859 }
3861 unsigned UOps = (NumRegs / 2);
3862 // If there are odd number of registers or if it's not 64-bit aligned,
3863 // then it takes an extra AGU (Address Generation Unit) cycle.
3864 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3865 (*MI.memoperands_begin())->getAlign() < Align(8))
3866 ++UOps;
3867 return UOps;
3868 }
3869 }
3870 }
3871 }
3872 llvm_unreachable("Didn't find the number of microops");
3873}
3874
3875std::optional<unsigned>
3876ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3877 const MCInstrDesc &DefMCID, unsigned DefClass,
3878 unsigned DefIdx, unsigned DefAlign) const {
3879 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3880 if (RegNo <= 0)
3881 // Def is the address writeback.
3882 return ItinData->getOperandCycle(DefClass, DefIdx);
3883
3884 unsigned DefCycle;
3885 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3886 // (regno / 2) + (regno % 2) + 1
3887 DefCycle = RegNo / 2 + 1;
3888 if (RegNo % 2)
3889 ++DefCycle;
3890 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3891 DefCycle = RegNo;
3892 bool isSLoad = false;
3893
3894 switch (DefMCID.getOpcode()) {
3895 default: break;
3896 case ARM::VLDMSIA:
3897 case ARM::VLDMSIA_UPD:
3898 case ARM::VLDMSDB_UPD:
3899 isSLoad = true;
3900 break;
3901 }
3902
3903 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3904 // then it takes an extra cycle.
3905 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3906 ++DefCycle;
3907 } else {
3908 // Assume the worst.
3909 DefCycle = RegNo + 2;
3910 }
3911
3912 return DefCycle;
3913}
3914
3915std::optional<unsigned>
3916ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3917 const MCInstrDesc &DefMCID, unsigned DefClass,
3918 unsigned DefIdx, unsigned DefAlign) const {
3919 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3920 if (RegNo <= 0)
3921 // Def is the address writeback.
3922 return ItinData->getOperandCycle(DefClass, DefIdx);
3923
3924 unsigned DefCycle;
3925 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3926 // 4 registers would be issued: 1, 2, 1.
3927 // 5 registers would be issued: 1, 2, 2.
3928 DefCycle = RegNo / 2;
3929 if (DefCycle < 1)
3930 DefCycle = 1;
3931 // Result latency is issue cycle + 2: E2.
3932 DefCycle += 2;
3933 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3934 DefCycle = (RegNo / 2);
3935 // If there are odd number of registers or if it's not 64-bit aligned,
3936 // then it takes an extra AGU (Address Generation Unit) cycle.
3937 if ((RegNo % 2) || DefAlign < 8)
3938 ++DefCycle;
3939 // Result latency is AGU cycles + 2.
3940 DefCycle += 2;
3941 } else {
3942 // Assume the worst.
3943 DefCycle = RegNo + 2;
3944 }
3945
3946 return DefCycle;
3947}
3948
3949std::optional<unsigned>
3950ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3951 const MCInstrDesc &UseMCID, unsigned UseClass,
3952 unsigned UseIdx, unsigned UseAlign) const {
3953 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3954 if (RegNo <= 0)
3955 return ItinData->getOperandCycle(UseClass, UseIdx);
3956
3957 unsigned UseCycle;
3958 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3959 // (regno / 2) + (regno % 2) + 1
3960 UseCycle = RegNo / 2 + 1;
3961 if (RegNo % 2)
3962 ++UseCycle;
3963 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3964 UseCycle = RegNo;
3965 bool isSStore = false;
3966
3967 switch (UseMCID.getOpcode()) {
3968 default: break;
3969 case ARM::VSTMSIA:
3970 case ARM::VSTMSIA_UPD:
3971 case ARM::VSTMSDB_UPD:
3972 isSStore = true;
3973 break;
3974 }
3975
3976 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3977 // then it takes an extra cycle.
3978 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3979 ++UseCycle;
3980 } else {
3981 // Assume the worst.
3982 UseCycle = RegNo + 2;
3983 }
3984
3985 return UseCycle;
3986}
3987
3988std::optional<unsigned>
3989ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3990 const MCInstrDesc &UseMCID, unsigned UseClass,
3991 unsigned UseIdx, unsigned UseAlign) const {
3992 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3993 if (RegNo <= 0)
3994 return ItinData->getOperandCycle(UseClass, UseIdx);
3995
3996 unsigned UseCycle;
3997 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3998 UseCycle = RegNo / 2;
3999 if (UseCycle < 2)
4000 UseCycle = 2;
4001 // Read in E3.
4002 UseCycle += 2;
4003 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4004 UseCycle = (RegNo / 2);
4005 // If there are odd number of registers or if it's not 64-bit aligned,
4006 // then it takes an extra AGU (Address Generation Unit) cycle.
4007 if ((RegNo % 2) || UseAlign < 8)
4008 ++UseCycle;
4009 } else {
4010 // Assume the worst.
4011 UseCycle = 1;
4012 }
4013 return UseCycle;
4014}
4015
4016std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4017 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
4018 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
4019 unsigned UseIdx, unsigned UseAlign) const {
4020 unsigned DefClass = DefMCID.getSchedClass();
4021 unsigned UseClass = UseMCID.getSchedClass();
4022
4023 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4024 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4025
4026 // This may be a def / use of a variable_ops instruction, the operand
4027 // latency might be determinable dynamically. Let the target try to
4028 // figure it out.
4029 std::optional<unsigned> DefCycle;
4030 bool LdmBypass = false;
4031 switch (DefMCID.getOpcode()) {
4032 default:
4033 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4034 break;
4035
4036 case ARM::VLDMDIA:
4037 case ARM::VLDMDIA_UPD:
4038 case ARM::VLDMDDB_UPD:
4039 case ARM::VLDMSIA:
4040 case ARM::VLDMSIA_UPD:
4041 case ARM::VLDMSDB_UPD:
4042 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4043 break;
4044
4045 case ARM::LDMIA_RET:
4046 case ARM::LDMIA:
4047 case ARM::LDMDA:
4048 case ARM::LDMDB:
4049 case ARM::LDMIB:
4050 case ARM::LDMIA_UPD:
4051 case ARM::LDMDA_UPD:
4052 case ARM::LDMDB_UPD:
4053 case ARM::LDMIB_UPD:
4054 case ARM::tLDMIA:
4055 case ARM::tLDMIA_UPD:
4056 case ARM::tPUSH:
4057 case ARM::t2LDMIA_RET:
4058 case ARM::t2LDMIA:
4059 case ARM::t2LDMDB:
4060 case ARM::t2LDMIA_UPD:
4061 case ARM::t2LDMDB_UPD:
4062 LdmBypass = true;
4063 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4064 break;
4065 }
4066
4067 if (!DefCycle)
4068 // We can't seem to determine the result latency of the def, assume it's 2.
4069 DefCycle = 2;
4070
4071 std::optional<unsigned> UseCycle;
4072 switch (UseMCID.getOpcode()) {
4073 default:
4074 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4075 break;
4076
4077 case ARM::VSTMDIA:
4078 case ARM::VSTMDIA_UPD:
4079 case ARM::VSTMDDB_UPD:
4080 case ARM::VSTMSIA:
4081 case ARM::VSTMSIA_UPD:
4082 case ARM::VSTMSDB_UPD:
4083 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4084 break;
4085
4086 case ARM::STMIA:
4087 case ARM::STMDA:
4088 case ARM::STMDB:
4089 case ARM::STMIB:
4090 case ARM::STMIA_UPD:
4091 case ARM::STMDA_UPD:
4092 case ARM::STMDB_UPD:
4093 case ARM::STMIB_UPD:
4094 case ARM::tSTMIA_UPD:
4095 case ARM::tPOP_RET:
4096 case ARM::tPOP:
4097 case ARM::t2STMIA:
4098 case ARM::t2STMDB:
4099 case ARM::t2STMIA_UPD:
4100 case ARM::t2STMDB_UPD:
4101 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4102 break;
4103 }
4104
4105 if (!UseCycle)
4106 // Assume it's read in the first stage.
4107 UseCycle = 1;
4108
4109 if (UseCycle > *DefCycle + 1)
4110 return std::nullopt;
4111
4112 UseCycle = *DefCycle - *UseCycle + 1;
4113 if (UseCycle > 0u) {
4114 if (LdmBypass) {
4115 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4116 // first def operand.
4117 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4118 UseClass, UseIdx))
4119 UseCycle = *UseCycle - 1;
4120 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4121 UseClass, UseIdx)) {
4122 UseCycle = *UseCycle - 1;
4123 }
4124 }
4125
4126 return UseCycle;
4127}
4128
4130 const MachineInstr *MI, unsigned Reg,
4131 unsigned &DefIdx, unsigned &Dist) {
4132 Dist = 0;
4133
4135 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4136 assert(II->isInsideBundle() && "Empty bundle?");
4137
4138 int Idx = -1;
4139 while (II->isInsideBundle()) {
4140 Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
4141 if (Idx != -1)
4142 break;
4143 --II;
4144 ++Dist;
4145 }
4146
4147 assert(Idx != -1 && "Cannot find bundled definition!");
4148 DefIdx = Idx;
4149 return &*II;
4150}
4151
4153 const MachineInstr &MI, unsigned Reg,
4154 unsigned &UseIdx, unsigned &Dist) {
4155 Dist = 0;
4156
4157 MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4158 assert(II->isInsideBundle() && "Empty bundle?");
4159 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4160
4161 // FIXME: This doesn't properly handle multiple uses.
4162 int Idx = -1;
4163 while (II != E && II->isInsideBundle()) {
4164 Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
4165 if (Idx != -1)
4166 break;
4167 if (II->getOpcode() != ARM::t2IT)
4168 ++Dist;
4169 ++II;
4170 }
4171
4172 if (Idx == -1) {
4173 Dist = 0;
4174 return nullptr;
4175 }
4176
4177 UseIdx = Idx;
4178 return &*II;
4179}
4180
4181/// Return the number of cycles to add to (or subtract from) the static
4182/// itinerary based on the def opcode and alignment. The caller will ensure that
4183/// adjusted latency is at least one cycle.
4184static int adjustDefLatency(const ARMSubtarget &Subtarget,
4185 const MachineInstr &DefMI,
4186 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4187 int Adjust = 0;
4188 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4189 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4190 // variants are one cycle cheaper.
4191 switch (DefMCID.getOpcode()) {
4192 default: break;
4193 case ARM::LDRrs:
4194 case ARM::LDRBrs: {
4195 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4196 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4197 if (ShImm == 0 ||
4198 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4199 --Adjust;
4200 break;
4201 }
4202 case ARM::t2LDRs:
4203 case ARM::t2LDRBs:
4204 case ARM::t2LDRHs:
4205 case ARM::t2LDRSHs: {
4206 // Thumb2 mode: lsl only.
4207 unsigned ShAmt = DefMI.getOperand(3).getImm();
4208 if (ShAmt == 0 || ShAmt == 2)
4209 --Adjust;
4210 break;
4211 }
4212 }
4213 } else if (Subtarget.isSwift()) {
4214 // FIXME: Properly handle all of the latency adjustments for address
4215 // writeback.
4216 switch (DefMCID.getOpcode()) {
4217 default: break;
4218 case ARM::LDRrs:
4219 case ARM::LDRBrs: {
4220 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4221 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4222 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4223 if (!isSub &&
4224 (ShImm == 0 ||
4225 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4226 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4227 Adjust -= 2;
4228 else if (!isSub &&
4229 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4230 --Adjust;
4231 break;
4232 }
4233 case ARM::t2LDRs:
4234 case ARM::t2LDRBs:
4235 case ARM::t2LDRHs:
4236 case ARM::t2LDRSHs: {
4237 // Thumb2 mode: lsl only.
4238 unsigned ShAmt = DefMI.getOperand(3).getImm();
4239 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4240 Adjust -= 2;
4241 break;
4242 }
4243 }
4244 }
4245
4246 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4247 switch (DefMCID.getOpcode()) {
4248 default: break;
4249 case ARM::VLD1q8:
4250 case ARM::VLD1q16:
4251 case ARM::VLD1q32:
4252 case ARM::VLD1q64:
4253 case ARM::VLD1q8wb_fixed:
4254 case ARM::VLD1q16wb_fixed:
4255 case ARM::VLD1q32wb_fixed:
4256 case ARM::VLD1q64wb_fixed:
4257 case ARM::VLD1q8wb_register:
4258 case ARM::VLD1q16wb_register:
4259 case ARM::VLD1q32wb_register:
4260 case ARM::VLD1q64wb_register:
4261 case ARM::VLD2d8:
4262 case ARM::VLD2d16:
4263 case ARM::VLD2d32:
4264 case ARM::VLD2q8:
4265 case ARM::VLD2q16:
4266 case ARM::VLD2q32:
4267 case ARM::VLD2d8wb_fixed:
4268 case ARM::VLD2d16wb_fixed:
4269 case ARM::VLD2d32wb_fixed:
4270 case ARM::VLD2q8wb_fixed:
4271 case ARM::VLD2q16wb_fixed:
4272 case ARM::VLD2q32wb_fixed:
4273 case ARM::VLD2d8wb_register:
4274 case ARM::VLD2d16wb_register:
4275 case ARM::VLD2d32wb_register:
4276 case ARM::VLD2q8wb_register:
4277 case ARM::VLD2q16wb_register:
4278 case ARM::VLD2q32wb_register:
4279 case ARM::VLD3d8:
4280 case ARM::VLD3d16:
4281 case ARM::VLD3d32:
4282 case ARM::VLD1d64T:
4283 case ARM::VLD3d8_UPD:
4284 case ARM::VLD3d16_UPD:
4285 case ARM::VLD3d32_UPD:
4286 case ARM::VLD1d64Twb_fixed:
4287 case ARM::VLD1d64Twb_register:
4288 case ARM::VLD3q8_UPD:
4289 case ARM::VLD3q16_UPD:
4290 case ARM::VLD3q32_UPD:
4291 case ARM::VLD4d8:
4292 case ARM::VLD4d16:
4293 case ARM::VLD4d32:
4294 case ARM::VLD1d64Q:
4295 case ARM::VLD4d8_UPD:
4296 case ARM::VLD4d16_UPD:
4297 case ARM::VLD4d32_UPD:
4298 case ARM::VLD1d64Qwb_fixed:
4299 case ARM::VLD1d64Qwb_register:
4300 case ARM::VLD4q8_UPD:
4301 case ARM::VLD4q16_UPD:
4302 case ARM::VLD4q32_UPD:
4303 case ARM::VLD1DUPq8:
4304 case ARM::VLD1DUPq16:
4305 case ARM::VLD1DUPq32:
4306 case ARM::VLD1DUPq8wb_fixed:
4307 case ARM::VLD1DUPq16wb_fixed:
4308 case ARM::VLD1DUPq32wb_fixed:
4309 case ARM::VLD1DUPq8wb_register:
4310 case ARM::VLD1DUPq16wb_register:
4311 case ARM::VLD1DUPq32wb_register:
4312 case ARM::VLD2DUPd8:
4313 case ARM::VLD2DUPd16:
4314 case ARM::VLD2DUPd32:
4315 case ARM::VLD2DUPd8wb_fixed:
4316 case ARM::VLD2DUPd16wb_fixed:
4317 case ARM::VLD2DUPd32wb_fixed:
4318 case ARM::VLD2DUPd8wb_register:
4319 case ARM::VLD2DUPd16wb_register:
4320 case ARM::VLD2DUPd32wb_register:
4321 case ARM::VLD4DUPd8:
4322 case ARM::VLD4DUPd16:
4323 case ARM::VLD4DUPd32:
4324 case ARM::VLD4DUPd8_UPD:
4325 case ARM::VLD4DUPd16_UPD:
4326 case ARM::VLD4DUPd32_UPD:
4327 case ARM::VLD1LNd8:
4328 case ARM::VLD1LNd16:
4329 case ARM::VLD1LNd32:
4330 case ARM::VLD1LNd8_UPD:
4331 case ARM::VLD1LNd16_UPD:
4332 case ARM::VLD1LNd32_UPD:
4333 case ARM::VLD2LNd8:
4334 case ARM::VLD2LNd16:
4335 case ARM::VLD2LNd32:
4336 case ARM::VLD2LNq16:
4337 case ARM::VLD2LNq32:
4338 case ARM::VLD2LNd8_UPD:
4339 case ARM::VLD2LNd16_UPD:
4340 case ARM::VLD2LNd32_UPD:
4341 case ARM::VLD2LNq16_UPD:
4342 case ARM::VLD2LNq32_UPD:
4343 case ARM::VLD4LNd8:
4344 case ARM::VLD4LNd16:
4345 case ARM::VLD4LNd32:
4346 case ARM::VLD4LNq16:
4347 case ARM::VLD4LNq32:
4348 case ARM::VLD4LNd8_UPD:
4349 case ARM::VLD4LNd16_UPD:
4350 case ARM::VLD4LNd32_UPD:
4351 case ARM::VLD4LNq16_UPD:
4352 case ARM::VLD4LNq32_UPD:
4353 // If the address is not 64-bit aligned, the latencies of these
4354 // instructions increases by one.
4355 ++Adjust;
4356 break;
4357 }
4358 }
4359 return Adjust;
4360}
4361
4363 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4364 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4365 // No operand latency. The caller may fall back to getInstrLatency.
4366 if (!ItinData || ItinData->isEmpty())
4367 return std::nullopt;
4368
4369 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4370 Register Reg = DefMO.getReg();
4371
4372 const MachineInstr *ResolvedDefMI = &DefMI;
4373 unsigned DefAdj = 0;
4374 if (DefMI.isBundle())
4375 ResolvedDefMI =
4376 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4377 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4378 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4379 return 1;
4380 }
4381
4382 const MachineInstr *ResolvedUseMI = &UseMI;
4383 unsigned UseAdj = 0;
4384 if (UseMI.isBundle()) {
4385 ResolvedUseMI =
4386 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4387 if (!ResolvedUseMI)
4388 return std::nullopt;
4389 }
4390
4391 return getOperandLatencyImpl(
4392 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4393 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4394}
4395
4396std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4397 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4398 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4399 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4400 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4401 if (Reg == ARM::CPSR) {
4402 if (DefMI.getOpcode() == ARM::FMSTAT) {
4403 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4404 return Subtarget.isLikeA9() ? 1 : 20;
4405 }
4406
4407 // CPSR set and branch can be paired in the same cycle.
4408 if (UseMI.isBranch())
4409 return 0;
4410
4411 // Otherwise it takes the instruction latency (generally one).
4412 unsigned Latency = getInstrLatency(ItinData, DefMI);
4413
4414 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4415 // its uses. Instructions which are otherwise scheduled between them may
4416 // incur a code size penalty (not able to use the CPSR setting 16-bit
4417 // instructions).
4418 if (Latency > 0 && Subtarget.isThumb2()) {
4419 const MachineFunction *MF = DefMI.getParent()->getParent();
4420 // FIXME: Use Function::hasOptSize().
4421 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4422 --Latency;
4423 }
4424 return Latency;
4425 }
4426
4427 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4428 return std::nullopt;
4429
4430 unsigned DefAlign = DefMI.hasOneMemOperand()
4431 ? (*DefMI.memoperands_begin())->getAlign().value()
4432 : 0;
4433 unsigned UseAlign = UseMI.hasOneMemOperand()
4434 ? (*UseMI.memoperands_begin())->getAlign().value()
4435 : 0;
4436
4437 // Get the itinerary's latency if possible, and handle variable_ops.
4438 std::optional<unsigned> Latency = getOperandLatency(
4439 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4440 // Unable to find operand latency. The caller may resort to getInstrLatency.
4441 if (!Latency)
4442 return std::nullopt;
4443
4444 // Adjust for IT block position.
4445 int Adj = DefAdj + UseAdj;
4446
4447 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4448 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4449 if (Adj >= 0 || (int)*Latency > -Adj) {
4450 return *Latency + Adj;
4451 }
4452 // Return the itinerary latency, which may be zero but not less than zero.
4453 return Latency;
4454}
4455
4456std::optional<unsigned>
4458 SDNode *DefNode, unsigned DefIdx,
4459 SDNode *UseNode, unsigned UseIdx) const {
4460 if (!DefNode->isMachineOpcode())
4461 return 1;
4462
4463 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4464
4465 if (isZeroCost(DefMCID.Opcode))
4466 return 0;
4467
4468 if (!ItinData || ItinData->isEmpty())
4469 return DefMCID.mayLoad() ? 3 : 1;
4470
4471 if (!UseNode->isMachineOpcode()) {
4472 std::optional<unsigned> Latency =
4473 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4474 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4475 int Threshold = 1 + Adj;
4476 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4477 }
4478
4479 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4480 auto *DefMN = cast<MachineSDNode>(DefNode);
4481 unsigned DefAlign = !DefMN->memoperands_empty()
4482 ? (*DefMN->memoperands_begin())->getAlign().value()
4483 : 0;
4484 auto *UseMN = cast<MachineSDNode>(UseNode);
4485 unsigned UseAlign = !UseMN->memoperands_empty()
4486 ? (*UseMN->memoperands_begin())->getAlign().value()
4487 : 0;
4488 std::optional<unsigned> Latency = getOperandLatency(
4489 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4490 if (!Latency)
4491 return std::nullopt;
4492
4493 if (Latency > 1U &&
4494 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4495 Subtarget.isCortexA7())) {
4496 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4497 // variants are one cycle cheaper.
4498 switch (DefMCID.getOpcode()) {
4499 default: break;
4500 case ARM::LDRrs:
4501 case ARM::LDRBrs: {
4502 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4503 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4504 if (ShImm == 0 ||
4505 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4506 Latency = *Latency - 1;
4507 break;
4508 }
4509 case ARM::t2LDRs:
4510 case ARM::t2LDRBs:
4511 case ARM::t2LDRHs:
4512 case ARM::t2LDRSHs: {
4513 // Thumb2 mode: lsl only.
4514 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4515 if (ShAmt == 0 || ShAmt == 2)
4516 Latency = *Latency - 1;
4517 break;
4518 }
4519 }
4520 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4521 // FIXME: Properly handle all of the latency adjustments for address
4522 // writeback.
4523 switch (DefMCID.getOpcode()) {
4524 default: break;
4525 case ARM::LDRrs:
4526 case ARM::LDRBrs: {
4527 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4528 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4529 if (ShImm == 0 ||
4530 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4532 Latency = *Latency - 2;
4533 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4534 Latency = *Latency - 1;
4535 break;
4536 }
4537 case ARM::t2LDRs:
4538 case ARM::t2LDRBs:
4539 case ARM::t2LDRHs:
4540 case ARM::t2LDRSHs:
4541 // Thumb2 mode: lsl 0-3 only.
4542 Latency = *Latency - 2;
4543 break;
4544 }
4545 }
4546
4547 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4548 switch (DefMCID.getOpcode()) {
4549 default: break;
4550 case ARM::VLD1q8:
4551 case ARM::VLD1q16:
4552 case ARM::VLD1q32:
4553 case ARM::VLD1q64:
4554 case ARM::VLD1q8wb_register:
4555 case ARM::VLD1q16wb_register:
4556 case ARM::VLD1q32wb_register:
4557 case ARM::VLD1q64wb_register:
4558 case ARM::VLD1q8wb_fixed:
4559 case ARM::VLD1q16wb_fixed:
4560 case ARM::VLD1q32wb_fixed:
4561 case ARM::VLD1q64wb_fixed:
4562 case ARM::VLD2d8:
4563 case ARM::VLD2d16:
4564 case ARM::VLD2d32:
4565 case ARM::VLD2q8Pseudo:
4566 case ARM::VLD2q16Pseudo:
4567 case ARM::VLD2q32Pseudo:
4568 case ARM::VLD2d8wb_fixed:
4569 case ARM::VLD2d16wb_fixed:
4570 case ARM::VLD2d32wb_fixed:
4571 case ARM::VLD2q8PseudoWB_fixed:
4572 case ARM::VLD2q16PseudoWB_fixed:
4573 case ARM::VLD2q32PseudoWB_fixed:
4574 case ARM::VLD2d8wb_register:
4575 case ARM::VLD2d16wb_register:
4576 case ARM::VLD2d32wb_register:
4577 case ARM::VLD2q8PseudoWB_register:
4578 case ARM::VLD2q16PseudoWB_register:
4579 case ARM::VLD2q32PseudoWB_register:
4580 case ARM::VLD3d8Pseudo:
4581 case ARM::VLD3d16Pseudo:
4582 case ARM::VLD3d32Pseudo:
4583 case ARM::VLD1d8TPseudo:
4584 case ARM::VLD1d16TPseudo:
4585 case ARM::VLD1d32TPseudo:
4586 case ARM::VLD1d64TPseudo:
4587 case ARM::VLD1d64TPseudoWB_fixed:
4588 case ARM::VLD1d64TPseudoWB_register:
4589 case ARM::VLD3d8Pseudo_UPD:
4590 case ARM::VLD3d16Pseudo_UPD:
4591 case ARM::VLD3d32Pseudo_UPD:
4592 case ARM::VLD3q8Pseudo_UPD:
4593 case ARM::VLD3q16Pseudo_UPD:
4594 case ARM::VLD3q32Pseudo_UPD:
4595 case ARM::VLD3q8oddPseudo:
4596 case ARM::VLD3q16oddPseudo:
4597 case ARM::VLD3q32oddPseudo:
4598 case ARM::VLD3q8oddPseudo_UPD:
4599 case ARM::VLD3q16oddPseudo_UPD:
4600 case ARM::VLD3q32oddPseudo_UPD:
4601 case ARM::VLD4d8Pseudo:
4602 case ARM::VLD4d16Pseudo:
4603 case ARM::VLD4d32Pseudo:
4604 case ARM::VLD1d8QPseudo:
4605 case ARM::VLD1d16QPseudo:
4606 case ARM::VLD1d32QPseudo:
4607 case ARM::VLD1d64QPseudo:
4608 case ARM::VLD1d64QPseudoWB_fixed:
4609 case ARM::VLD1d64QPseudoWB_register:
4610 case ARM::VLD1q8HighQPseudo:
4611 case ARM::VLD1q8LowQPseudo_UPD:
4612 case ARM::VLD1q8HighTPseudo:
4613 case ARM::VLD1q8LowTPseudo_UPD:
4614 case ARM::VLD1q16HighQPseudo:
4615 case ARM::VLD1q16LowQPseudo_UPD:
4616 case ARM::VLD1q16HighTPseudo:
4617 case ARM::VLD1q16LowTPseudo_UPD:
4618 case ARM::VLD1q32HighQPseudo:
4619 case ARM::VLD1q32LowQPseudo_UPD:
4620 case ARM::VLD1q32HighTPseudo:
4621 case ARM::VLD1q32LowTPseudo_UPD:
4622 case ARM::VLD1q64HighQPseudo:
4623 case ARM::VLD1q64LowQPseudo_UPD:
4624 case ARM::VLD1q64HighTPseudo:
4625 case ARM::VLD1q64LowTPseudo_UPD:
4626 case ARM::VLD4d8Pseudo_UPD:
4627 case ARM::VLD4d16Pseudo_UPD:
4628 case ARM::VLD4d32Pseudo_UPD:
4629 case ARM::VLD4q8Pseudo_UPD:
4630 case ARM::VLD4q16Pseudo_UPD:
4631 case ARM::VLD4q32Pseudo_UPD:
4632 case ARM::VLD4q8oddPseudo:
4633 case ARM::VLD4q16oddPseudo:
4634 case ARM::VLD4q32oddPseudo:
4635 case ARM::VLD4q8oddPseudo_UPD:
4636 case ARM::VLD4q16oddPseudo_UPD:
4637 case ARM::VLD4q32oddPseudo_UPD:
4638 case ARM::VLD1DUPq8:
4639 case ARM::VLD1DUPq16:
4640 case ARM::VLD1DUPq32:
4641 case ARM::VLD1DUPq8wb_fixed:
4642 case ARM::VLD1DUPq16wb_fixed:
4643 case ARM::VLD1DUPq32wb_fixed:
4644 case ARM::VLD1DUPq8wb_register:
4645 case ARM::VLD1DUPq16wb_register:
4646 case ARM::VLD1DUPq32wb_register:
4647 case ARM::VLD2DUPd8:
4648 case ARM::VLD2DUPd16:
4649 case ARM::VLD2DUPd32:
4650 case ARM::VLD2DUPd8wb_fixed:
4651 case ARM::VLD2DUPd16wb_fixed:
4652 case ARM::VLD2DUPd32wb_fixed:
4653 case ARM::VLD2DUPd8wb_register:
4654 case ARM::VLD2DUPd16wb_register:
4655 case ARM::VLD2DUPd32wb_register:
4656 case ARM::VLD2DUPq8EvenPseudo:
4657 case ARM::VLD2DUPq8OddPseudo:
4658 case ARM::VLD2DUPq16EvenPseudo:
4659 case ARM::VLD2DUPq16OddPseudo:
4660 case ARM::VLD2DUPq32EvenPseudo:
4661 case ARM::VLD2DUPq32OddPseudo:
4662 case ARM::VLD3DUPq8EvenPseudo:
4663 case ARM::VLD3DUPq8OddPseudo:
4664 case ARM::VLD3DUPq16EvenPseudo:
4665 case ARM::VLD3DUPq16OddPseudo:
4666 case ARM::VLD3DUPq32EvenPseudo:
4667 case ARM::VLD3DUPq32OddPseudo:
4668 case ARM::VLD4DUPd8Pseudo:
4669 case ARM::VLD4DUPd16Pseudo:
4670 case ARM::VLD4DUPd32Pseudo:
4671 case ARM::VLD4DUPd8Pseudo_UPD:
4672 case ARM::VLD4DUPd16Pseudo_UPD:
4673 case ARM::VLD4DUPd32Pseudo_UPD:
4674 case ARM::VLD4DUPq8EvenPseudo:
4675 case ARM::VLD4DUPq8OddPseudo:
4676 case ARM::VLD4DUPq16EvenPseudo:
4677 case ARM::VLD4DUPq16OddPseudo:
4678 case ARM::VLD4DUPq32EvenPseudo:
4679 case ARM::VLD4DUPq32OddPseudo:
4680 case ARM::VLD1LNq8Pseudo:
4681 case ARM::VLD1LNq16Pseudo:
4682 case ARM::VLD1LNq32Pseudo:
4683 case ARM::VLD1LNq8Pseudo_UPD:
4684 case ARM::VLD1LNq16Pseudo_UPD:
4685 case ARM::VLD1LNq32Pseudo_UPD:
4686 case ARM::VLD2LNd8Pseudo:
4687 case ARM::VLD2LNd16Pseudo:
4688 case ARM::VLD2LNd32Pseudo:
4689 case ARM::VLD2LNq16Pseudo:
4690 case ARM::VLD2LNq32Pseudo:
4691 case ARM::VLD2LNd8Pseudo_UPD:
4692 case ARM::VLD2LNd16Pseudo_UPD:
4693 case ARM::VLD2LNd32Pseudo_UPD:
4694 case ARM::VLD2LNq16Pseudo_UPD:
4695 case ARM::VLD2LNq32Pseudo_UPD:
4696 case ARM::VLD4LNd8Pseudo:
4697 case ARM::VLD4LNd16Pseudo:
4698 case ARM::VLD4LNd32Pseudo:
4699 case ARM::VLD4LNq16Pseudo:
4700 case ARM::VLD4LNq32Pseudo:
4701 case ARM::VLD4LNd8Pseudo_UPD:
4702 case ARM::VLD4LNd16Pseudo_UPD:
4703 case ARM::VLD4LNd32Pseudo_UPD:
4704 case ARM::VLD4LNq16Pseudo_UPD:
4705 case ARM::VLD4LNq32Pseudo_UPD:
4706 // If the address is not 64-bit aligned, the latencies of these
4707 // instructions increases by one.
4708 Latency = *Latency + 1;
4709 break;
4710 }
4711
4712 return Latency;
4713}
4714
4715unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4716 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4717 MI.isImplicitDef())
4718 return 0;
4719
4720 if (MI.isBundle())
4721 return 0;
4722
4723 const MCInstrDesc &MCID = MI.getDesc();
4724
4725 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4726 !Subtarget.cheapPredicableCPSRDef())) {
4727 // When predicated, CPSR is an additional source operand for CPSR updating
4728 // instructions, this apparently increases their latencies.
4729 return 1;
4730 }
4731 return 0;
4732}
4733
4734unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4735 const MachineInstr &MI,
4736 unsigned *PredCost) const {
4737 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4738 MI.isImplicitDef())
4739 return 1;
4740
4741 // An instruction scheduler typically runs on unbundled instructions, however
4742 // other passes may query the latency of a bundled instruction.
4743 if (MI.isBundle()) {
4744 unsigned Latency = 0;
4746 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4747 while (++I != E && I->isInsideBundle()) {
4748 if (I->getOpcode() != ARM::t2IT)
4749 Latency += getInstrLatency(ItinData, *I, PredCost);
4750 }
4751 return Latency;
4752 }
4753
4754 const MCInstrDesc &MCID = MI.getDesc();
4755 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4756 !Subtarget.cheapPredicableCPSRDef()))) {
4757 // When predicated, CPSR is an additional source operand for CPSR updating
4758 // instructions, this apparently increases their latencies.
4759 *PredCost = 1;
4760 }
4761 // Be sure to call getStageLatency for an empty itinerary in case it has a
4762 // valid MinLatency property.
4763 if (!ItinData)
4764 return MI.mayLoad() ? 3 : 1;
4765
4766 unsigned Class = MCID.getSchedClass();
4767
4768 // For instructions with variable uops, use uops as latency.
4769 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4770 return getNumMicroOps(ItinData, MI);
4771
4772 // For the common case, fall back on the itinerary's latency.
4773 unsigned Latency = ItinData->getStageLatency(Class);
4774
4775 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4776 unsigned DefAlign =
4777 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4778 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4779 if (Adj >= 0 || (int)Latency > -Adj) {
4780 return Latency + Adj;
4781 }
4782 return Latency;
4783}
4784
4785unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4786 SDNode *Node) const {
4787 if (!Node->isMachineOpcode())
4788 return 1;
4789
4790 if (!ItinData || ItinData->isEmpty())
4791 return 1;
4792
4793 unsigned Opcode = Node->getMachineOpcode();
4794 switch (Opcode) {
4795 default:
4796 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4797 case ARM::VLDMQIA:
4798 case ARM::VSTMQIA:
4799 return 2;
4800 }
4801}
4802
4803bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4804 const MachineRegisterInfo *MRI,
4805 const MachineInstr &DefMI,
4806 unsigned DefIdx,
4807 const MachineInstr &UseMI,
4808 unsigned UseIdx) const {
4809 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4810 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4811 if (Subtarget.nonpipelinedVFP() &&
4812 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4813 return true;
4814
4815 // Hoist VFP / NEON instructions with 4 or higher latency.
4816 unsigned Latency =
4817 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4818 if (Latency <= 3)
4819 return false;
4820 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4821 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4822}
4823
4824bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4825 const MachineInstr &DefMI,
4826 unsigned DefIdx) const {
4827 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4828 if (!ItinData || ItinData->isEmpty())
4829 return false;
4830
4831 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4832 if (DDomain == ARMII::DomainGeneral) {
4833 unsigned DefClass = DefMI.getDesc().getSchedClass();
4834 std::optional<unsigned> DefCycle =
4835 ItinData->getOperandCycle(DefClass, DefIdx);
4836 return DefCycle && DefCycle <= 2U;
4837 }
4838 return false;
4839}
4840
4841bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4842 StringRef &ErrInfo) const {
4843 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4844 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4845 return false;
4846 }
4847 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4848 // Make sure we don't generate a lo-lo mov that isn't supported.
4849 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4850 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4851 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4852 return false;
4853 }
4854 }
4855 if (MI.getOpcode() == ARM::tPUSH ||
4856 MI.getOpcode() == ARM::tPOP ||
4857 MI.getOpcode() == ARM::tPOP_RET) {
4858 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4859 if (MO.isImplicit() || !MO.isReg())
4860 continue;
4861 Register Reg = MO.getReg();
4862 if (Reg < ARM::R0 || Reg > ARM::R7) {
4863 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4864 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4865 ErrInfo = "Unsupported register in Thumb1 push/pop";
4866 return false;
4867 }
4868 }
4869 }
4870 }
4871 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4872 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4873 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4874 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4875 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4876 return false;
4877 }
4878 }
4879
4880 // Check the address model by taking the first Imm operand and checking it is
4881 // legal for that addressing mode.
4883 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4884 switch (AddrMode) {
4885 default:
4886 break;
4894 case ARMII::AddrModeT2_i12: {
4895 uint32_t Imm = 0;
4896 for (auto Op : MI.operands()) {
4897 if (Op.isImm()) {
4898 Imm = Op.getImm();
4899 break;
4900 }
4901 }
4902 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4903 ErrInfo = "Incorrect AddrMode Imm for instruction";
4904 return false;
4905 }
4906 break;
4907 }
4908 }
4909 return true;
4910}
4911
4913 unsigned LoadImmOpc,
4914 unsigned LoadOpc) const {
4915 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4916 "ROPI/RWPI not currently supported with stack guard");
4917
4918 MachineBasicBlock &MBB = *MI->getParent();
4919 DebugLoc DL = MI->getDebugLoc();
4920 Register Reg = MI->getOperand(0).getReg();
4922 unsigned int Offset = 0;
4923
4924 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4925 assert(!Subtarget.isReadTPSoft() &&
4926 "TLS stack protector requires hardware TLS register");
4927
4928 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4929 .addImm(15)
4930 .addImm(0)
4931 .addImm(13)
4932 .addImm(0)
4933 .addImm(3)
4935
4937 Offset = M.getStackProtectorGuardOffset();
4938 if (Offset & ~0xfffU) {
4939 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4940 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4941 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4942 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4943 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4944 .addReg(Reg, RegState::Kill)
4945 .addImm(Offset & ~0xfffU)
4947 .addReg(0);
4948 Offset &= 0xfffU;
4949 }
4950 } else {
4951 const GlobalValue *GV =
4952 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4953 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4954
4955 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4956 if (Subtarget.isTargetMachO()) {
4957 TargetFlags |= ARMII::MO_NONLAZY;
4958 } else if (Subtarget.isTargetCOFF()) {
4959 if (GV->hasDLLImportStorageClass())
4960 TargetFlags |= ARMII::MO_DLLIMPORT;
4961 else if (IsIndirect)
4962 TargetFlags |= ARMII::MO_COFFSTUB;
4963 } else if (IsIndirect) {
4964 TargetFlags |= ARMII::MO_GOT;
4965 }
4966
4967 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4968 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4969 auto APSREncoding =
4970 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4971 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4972 .addImm(APSREncoding)
4974 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4975 .addGlobalAddress(GV, 0, TargetFlags);
4976 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4977 .addImm(APSREncoding)
4978 .addReg(CPSRSaveReg, RegState::Kill)
4980 } else {
4981 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4982 .addGlobalAddress(GV, 0, TargetFlags);
4983 }
4984
4985 if (IsIndirect) {
4986 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4987 MIB.addReg(Reg, RegState::Kill).addImm(0);
4988 auto Flags = MachineMemOperand::MOLoad |
4992 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4994 }
4995 }
4996
4997 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4998 MIB.addReg(Reg, RegState::Kill)
4999 .addImm(Offset)
5000 .cloneMemRefs(*MI)
5002}
5003
5004bool
5005ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
5006 unsigned &AddSubOpc,
5007 bool &NegAcc, bool &HasLane) const {
5009 if (I == MLxEntryMap.end())
5010 return false;
5011
5012 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
5013 MulOpc = Entry.MulOpc;
5014 AddSubOpc = Entry.AddSubOpc;
5015 NegAcc = Entry.NegAcc;
5016 HasLane = Entry.HasLane;
5017 return true;
5018}
5019
5020//===----------------------------------------------------------------------===//
5021// Execution domains.
5022//===----------------------------------------------------------------------===//
5023//
5024// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5025// and some can go down both. The vmov instructions go down the VFP pipeline,
5026// but they can be changed to vorr equivalents that are executed by the NEON
5027// pipeline.
5028//
5029// We use the following execution domain numbering:
5030//
5034 ExeNEON = 2
5036
5037//
5038// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5039//
5040std::pair<uint16_t, uint16_t>
5042 // If we don't have access to NEON instructions then we won't be able
5043 // to swizzle anything to the NEON domain. Check to make sure.
5044 if (Subtarget.hasNEON()) {
5045 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5046 // if they are not predicated.
5047 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5048 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5049
5050 // CortexA9 is particularly picky about mixing the two and wants these
5051 // converted.
5052 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5053 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5054 MI.getOpcode() == ARM::VMOVS))
5055 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5056 }
5057 // No other instructions can be swizzled, so just determine their domain.
5058 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5059
5061 return std::make_pair(ExeNEON, 0);
5062
5063 // Certain instructions can go either way on Cortex-A8.
5064 // Treat them as NEON instructions.
5065 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5066 return std::make_pair(ExeNEON, 0);
5067
5069 return std::make_pair(ExeVFP, 0);
5070
5071 return std::make_pair(ExeGeneric, 0);
5072}
5073
5075 unsigned SReg, unsigned &Lane) {
5076 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5077 Lane = 0;
5078
5079 if (DReg != ARM::NoRegister)
5080 return DReg;
5081
5082 Lane = 1;
5083 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5084
5085 assert(DReg && "S-register with no D super-register?");
5086 return DReg;
5087}
5088
5089/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5090/// set ImplicitSReg to a register number that must be marked as implicit-use or
5091/// zero if no register needs to be defined as implicit-use.
5092///
5093/// If the function cannot determine if an SPR should be marked implicit use or
5094/// not, it returns false.
5095///
5096/// This function handles cases where an instruction is being modified from taking
5097/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5098/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5099/// lane of the DPR).
5100///
5101/// If the other SPR is defined, an implicit-use of it should be added. Else,
5102/// (including the case where the DPR itself is defined), it should not.
5103///
5105 MachineInstr &MI, unsigned DReg,
5106 unsigned Lane, unsigned &ImplicitSReg) {
5107 // If the DPR is defined or used already, the other SPR lane will be chained
5108 // correctly, so there is nothing to be done.
5109 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5110 ImplicitSReg = 0;
5111 return true;
5112 }
5113
5114 // Otherwise we need to go searching to see if the SPR is set explicitly.
5115 ImplicitSReg = TRI->getSubReg(DReg,
5116 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5118 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5119
5120 if (LQR == MachineBasicBlock::LQR_Live)
5121 return true;
5122 else if (LQR == MachineBasicBlock::LQR_Unknown)
5123 return false;
5124
5125 // If the register is known not to be live, there is no need to add an
5126 // implicit-use.
5127 ImplicitSReg = 0;
5128 return true;
5129}
5130
5132 unsigned Domain) const {
5133 unsigned DstReg, SrcReg, DReg;
5134 unsigned Lane;
5135 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5137 switch (MI.getOpcode()) {
5138 default:
5139 llvm_unreachable("cannot handle opcode!");
5140 break;
5141 case ARM::VMOVD:
5142 if (Domain != ExeNEON)
5143 break;
5144
5145 // Zap the predicate operands.
5146 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5147
5148 // Make sure we've got NEON instructions.
5149 assert(Subtarget.hasNEON() && "VORRd requires NEON");
5150
5151 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5152 DstReg = MI.getOperand(0).getReg();
5153 SrcReg = MI.getOperand(1).getReg();
5154
5155 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5156 MI.removeOperand(i - 1);
5157
5158 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5159 MI.setDesc(get(ARM::VORRd));
5160 MIB.addReg(DstReg, RegState::Define)
5161 .addReg(SrcReg)
5162 .addReg(SrcReg)
5164 break;
5165 case ARM::VMOVRS:
5166 if (Domain != ExeNEON)
5167 break;
5168 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5169
5170 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5171 DstReg = MI.getOperand(0).getReg();
5172 SrcReg = MI.getOperand(1).getReg();
5173
5174 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5175 MI.removeOperand(i - 1);
5176
5177 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5178
5179 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5180 // Note that DSrc has been widened and the other lane may be undef, which
5181 // contaminates the entire register.
5182 MI.setDesc(get(ARM::VGETLNi32));
5183 MIB.addReg(DstReg, RegState::Define)
5184 .addReg(DReg, RegState::Undef)
5185 .addImm(Lane)
5187
5188 // The old source should be an implicit use, otherwise we might think it
5189 // was dead before here.
5190 MIB.addReg(SrcReg, RegState::Implicit);
5191 break;
5192 case ARM::VMOVSR: {
5193 if (Domain != ExeNEON)
5194 break;
5195 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5196
5197 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5198 DstReg = MI.getOperand(0).getReg();
5199 SrcReg = MI.getOperand(1).getReg();
5200
5201 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5202
5203 unsigned ImplicitSReg;
5204 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5205 break;
5206
5207 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5208 MI.removeOperand(i - 1);
5209
5210 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5211 // Again DDst may be undefined at the beginning of this instruction.
5212 MI.setDesc(get(ARM::VSETLNi32));
5213 MIB.addReg(DReg, RegState::Define)
5214 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5215 .addReg(SrcReg)
5216 .addImm(Lane)
5218
5219 // The narrower destination must be marked as set to keep previous chains
5220 // in place.
5222 if (ImplicitSReg != 0)
5223 MIB.addReg(ImplicitSReg, RegState::Implicit);
5224 break;
5225 }
5226 case ARM::VMOVS: {
5227 if (Domain != ExeNEON)
5228 break;
5229
5230 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5231 DstReg = MI.getOperand(0).getReg();
5232 SrcReg = MI.getOperand(1).getReg();
5233
5234 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
5235 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5236 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5237
5238 unsigned ImplicitSReg;
5239 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5240 break;
5241
5242 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5243 MI.removeOperand(i - 1);
5244
5245 if (DSrc == DDst) {
5246 // Destination can be:
5247 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5248 MI.setDesc(get(ARM::VDUPLN32d));
5249 MIB.addReg(DDst, RegState::Define)
5250 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5251 .addImm(SrcLane)
5253
5254 // Neither the source or the destination are naturally represented any
5255 // more, so add them in manually.
5257 MIB.addReg(SrcReg, RegState::Implicit);
5258 if (ImplicitSReg != 0)
5259 MIB.addReg(ImplicitSReg, RegState::Implicit);
5260 break;
5261 }
5262
5263 // In general there's no single instruction that can perform an S <-> S
5264 // move in NEON space, but a pair of VEXT instructions *can* do the
5265 // job. It turns out that the VEXTs needed will only use DSrc once, with
5266 // the position based purely on the combination of lane-0 and lane-1
5267 // involved. For example
5268 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5269 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5270 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5271 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5272 //
5273 // Pattern of the MachineInstrs is:
5274 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5275 MachineInstrBuilder NewMIB;
5276 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5277 DDst);
5278
5279 // On the first instruction, both DSrc and DDst may be undef if present.
5280 // Specifically when the original instruction didn't have them as an
5281 // <imp-use>.
5282 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5283 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5284 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5285
5286 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5287 CurUndef = !MI.readsRegister(CurReg, TRI);
5288 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5289 .addImm(1)
5291
5292 if (SrcLane == DstLane)
5293 NewMIB.addReg(SrcReg, RegState::Implicit);
5294
5295 MI.setDesc(get(ARM::VEXTd32));
5296 MIB.addReg(DDst, RegState::Define);
5297
5298 // On the second instruction, DDst has definitely been defined above, so
5299 // it is not undef. DSrc, if present, can be undef as above.
5300 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5301 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5302 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5303
5304 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5305 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5306 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5307 .addImm(1)
5309
5310 if (SrcLane != DstLane)
5311 MIB.addReg(SrcReg, RegState::Implicit);
5312
5313 // As before, the original destination is no longer represented, add it
5314 // implicitly.
5316 if (ImplicitSReg != 0)
5317 MIB.addReg(ImplicitSReg, RegState::Implicit);
5318 break;
5319 }
5320 }
5321}
5322
5323//===----------------------------------------------------------------------===//
5324// Partial register updates
5325//===----------------------------------------------------------------------===//
5326//
5327// Swift renames NEON registers with 64-bit granularity. That means any
5328// instruction writing an S-reg implicitly reads the containing D-reg. The
5329// problem is mostly avoided by translating f32 operations to v2f32 operations
5330// on D-registers, but f32 loads are still a problem.
5331//
5332// These instructions can load an f32 into a NEON register:
5333//
5334// VLDRS - Only writes S, partial D update.
5335// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5336// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5337//
5338// FCONSTD can be used as a dependency-breaking instruction.
5340 const MachineInstr &MI, unsigned OpNum,
5341 const TargetRegisterInfo *TRI) const {
5342 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5343 if (!PartialUpdateClearance)
5344 return 0;
5345
5346 assert(TRI && "Need TRI instance");
5347
5348 const MachineOperand &MO = MI.getOperand(OpNum);
5349 if (MO.readsReg())
5350 return 0;
5351 Register Reg = MO.getReg();
5352 int UseOp = -1;
5353
5354 switch (MI.getOpcode()) {
5355 // Normal instructions writing only an S-register.
5356 case ARM::VLDRS:
5357 case ARM::FCONSTS:
5358 case ARM::VMOVSR:
5359 case ARM::VMOVv8i8:
5360 case ARM::VMOVv4i16:
5361 case ARM::VMOVv2i32:
5362 case ARM::VMOVv2f32:
5363 case ARM::VMOVv1i64:
5364 UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
5365 break;
5366
5367 // Explicitly reads the dependency.
5368 case ARM::VLD1LNd32:
5369 UseOp = 3;
5370 break;
5371 default:
5372 return 0;
5373 }
5374
5375 // If this instruction actually reads a value from Reg, there is no unwanted
5376 // dependency.
5377 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5378 return 0;
5379
5380 // We must be able to clobber the whole D-reg.
5381 if (Reg.isVirtual()) {
5382 // Virtual register must be a def undef foo:ssub_0 operand.
5383 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5384 return 0;
5385 } else if (ARM::SPRRegClass.contains(Reg)) {
5386 // Physical register: MI must define the full D-reg.
5387 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
5388 &ARM::DPRRegClass);
5389 if (!DReg || !MI.definesRegister(DReg, TRI))
5390 return 0;
5391 }
5392
5393 // MI has an unwanted D-register dependency.
5394 // Avoid defs in the previous N instructrions.
5395 return PartialUpdateClearance;
5396}
5397
5398// Break a partial register dependency after getPartialRegUpdateClearance
5399// returned non-zero.
5401 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5402 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5403 assert(TRI && "Need TRI instance");
5404
5405 const MachineOperand &MO = MI.getOperand(OpNum);
5406 Register Reg = MO.getReg();
5407 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5408 unsigned DReg = Reg;
5409
5410 // If MI defines an S-reg, find the corresponding D super-register.
5411 if (ARM::SPRRegClass.contains(Reg)) {
5412 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5413 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5414 }
5415
5416 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5417 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5418
5419 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5420 // the full D-register by loading the same value to both lanes. The
5421 // instruction is micro-coded with 2 uops, so don't do this until we can
5422 // properly schedule micro-coded instructions. The dispatcher stalls cause
5423 // too big regressions.
5424
5425 // Insert the dependency-breaking FCONSTD before MI.
5426 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5427 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5428 .addImm(96)
5430 MI.addRegisterKilled(DReg, TRI, true);
5431}
5432
5434 return Subtarget.hasFeature(ARM::HasV6KOps);
5435}
5436
5438 if (MI->getNumOperands() < 4)
5439 return true;
5440 unsigned ShOpVal = MI->getOperand(3).getImm();
5441 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5442 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5443 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5444 ((ShImm == 1 || ShImm == 2) &&
5445 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5446 return true;
5447
5448 return false;
5449}
5450
5452 const MachineInstr &MI, unsigned DefIdx,
5453 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5454 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5455 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5456
5457 switch (MI.getOpcode()) {
5458 case ARM::VMOVDRR:
5459 // dX = VMOVDRR rY, rZ
5460 // is the same as:
5461 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5462 // Populate the InputRegs accordingly.
5463 // rY
5464 const MachineOperand *MOReg = &MI.getOperand(1);
5465 if (!MOReg->isUndef())
5466 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5467 MOReg->getSubReg(), ARM::ssub_0));
5468 // rZ
5469 MOReg = &MI.getOperand(2);
5470 if (!MOReg->isUndef())
5471 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5472 MOReg->getSubReg(), ARM::ssub_1));
5473 return true;
5474 }
5475 llvm_unreachable("Target dependent opcode missing");
5476}
5477
5479 const MachineInstr &MI, unsigned DefIdx,
5480 RegSubRegPairAndIdx &InputReg) const {
5481 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5482 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5483
5484 switch (MI.getOpcode()) {
5485 case ARM::VMOVRRD:
5486 // rX, rY = VMOVRRD dZ
5487 // is the same as:
5488 // rX = EXTRACT_SUBREG dZ, ssub_0
5489 // rY = EXTRACT_SUBREG dZ, ssub_1
5490 const MachineOperand &MOReg = MI.getOperand(2);
5491 if (MOReg.isUndef())
5492 return false;
5493 InputReg.Reg = MOReg.getReg();
5494 InputReg.SubReg = MOReg.getSubReg();
5495 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5496 return true;
5497 }
5498 llvm_unreachable("Target dependent opcode missing");
5499}
5500
5502 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5503 RegSubRegPairAndIdx &InsertedReg) const {
5504 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5505 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5506
5507 switch (MI.getOpcode()) {
5508 case ARM::VSETLNi32:
5509 case ARM::MVE_VMOV_to_lane_32:
5510 // dX = VSETLNi32 dY, rZ, imm
5511 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5512 const MachineOperand &MOBaseReg = MI.getOperand(1);
5513 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5514 if (MOInsertedReg.isUndef())
5515 return false;
5516 const MachineOperand &MOIndex = MI.getOperand(3);
5517 BaseReg.Reg = MOBaseReg.getReg();
5518 BaseReg.SubReg = MOBaseReg.getSubReg();
5519
5520 InsertedReg.Reg = MOInsertedReg.getReg();
5521 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5522 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5523 return true;
5524 }
5525 llvm_unreachable("Target dependent opcode missing");
5526}
5527
5528std::pair<unsigned, unsigned>
5530 const unsigned Mask = ARMII::MO_OPTION_MASK;
5531 return std::make_pair(TF & Mask, TF & ~Mask);
5532}
5533
5536 using namespace ARMII;
5537
5538 static const std::pair<unsigned, const char *> TargetFlags[] = {
5539 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5540 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5541 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5542 };
5543 return ArrayRef(TargetFlags);
5544}
5545
5548 using namespace ARMII;
5549
5550 static const std::pair<unsigned, const char *> TargetFlags[] = {
5551 {MO_COFFSTUB, "arm-coffstub"},
5552 {MO_GOT, "arm-got"},
5553 {MO_SBREL, "arm-sbrel"},
5554 {MO_DLLIMPORT, "arm-dllimport"},
5555 {MO_SECREL, "arm-secrel"},
5556 {MO_NONLAZY, "arm-nonlazy"}};
5557 return ArrayRef(TargetFlags);
5558}
5559
5560std::optional<RegImmPair>
5562 int Sign = 1;
5563 unsigned Opcode = MI.getOpcode();
5564 int64_t Offset = 0;
5565
5566 // TODO: Handle cases where Reg is a super- or sub-register of the
5567 // destination register.
5568 const MachineOperand &Op0 = MI.getOperand(0);
5569 if (!Op0.isReg() || Reg != Op0.getReg())
5570 return std::nullopt;
5571
5572 // We describe SUBri or ADDri instructions.
5573 if (Opcode == ARM::SUBri)
5574 Sign = -1;
5575 else if (Opcode != ARM::ADDri)
5576 return std::nullopt;
5577
5578 // TODO: Third operand can be global address (usually some string). Since
5579 // strings can be relocated we cannot calculate their offsets for
5580 // now.
5581 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5582 return std::nullopt;
5583
5584 Offset = MI.getOperand(2).getImm() * Sign;
5585 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5586}
5587
5591 const TargetRegisterInfo *TRI) {
5592 for (auto I = From; I != To; ++I)
5593 if (I->modifiesRegister(Reg, TRI))
5594 return true;
5595 return false;
5596}
5597
5599 const TargetRegisterInfo *TRI) {
5600 // Search backwards to the instruction that defines CSPR. This may or not
5601 // be a CMP, we check that after this loop. If we find another instruction
5602 // that reads cpsr, we return nullptr.
5603 MachineBasicBlock::iterator CmpMI = Br;
5604 while (CmpMI != Br->getParent()->begin()) {
5605 --CmpMI;
5606 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5607 break;
5608 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5609 break;
5610 }
5611
5612 // Check that this inst is a CMP r[0-7], #0 and that the register
5613 // is not redefined between the cmp and the br.
5614 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5615 return nullptr;
5616 Register Reg = CmpMI->getOperand(0).getReg();
5617 Register PredReg;
5618 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5619 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5620 return nullptr;
5621 if (!isARMLowRegister(Reg))
5622 return nullptr;
5623 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5624 return nullptr;
5625
5626 return &*CmpMI;
5627}
5628
5630 const ARMSubtarget *Subtarget,
5631 bool ForCodesize) {
5632 if (Subtarget->isThumb()) {
5633 if (Val <= 255) // MOV
5634 return ForCodesize ? 2 : 1;
5635 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5636 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5637 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5638 return ForCodesize ? 4 : 1;
5639 if (Val <= 510) // MOV + ADDi8
5640 return ForCodesize ? 4 : 2;
5641 if (~Val <= 255) // MOV + MVN
5642 return ForCodesize ? 4 : 2;
5643 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5644 return ForCodesize ? 4 : 2;
5645 } else {
5646 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5647 return ForCodesize ? 4 : 1;
5648 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5649 return ForCodesize ? 4 : 1;
5650 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5651 return ForCodesize ? 4 : 1;
5652 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5653 return ForCodesize ? 8 : 2;
5654 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5655 return ForCodesize ? 8 : 2;
5656 }
5657 if (Subtarget->useMovt()) // MOVW + MOVT
5658 return ForCodesize ? 8 : 2;
5659 return ForCodesize ? 8 : 3; // Literal pool load
5660}
5661
5662bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5663 const ARMSubtarget *Subtarget,
5664 bool ForCodesize) {
5665 // Check with ForCodesize
5666 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5667 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5668 if (Cost1 < Cost2)
5669 return true;
5670 if (Cost1 > Cost2)
5671 return false;
5672
5673 // If they are equal, try with !ForCodesize
5674 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5675 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5676}
5677
5678/// Constants defining how certain sequences should be outlined.
5679/// This encompasses how an outlined function should be called, and what kind of
5680/// frame should be emitted for that outlined function.
5681///
5682/// \p MachineOutlinerTailCall implies that the function is being created from
5683/// a sequence of instructions ending in a return.
5684///
5685/// That is,
5686///
5687/// I1 OUTLINED_FUNCTION:
5688/// I2 --> B OUTLINED_FUNCTION I1
5689/// BX LR I2
5690/// BX LR
5691///
5692/// +-------------------------+--------+-----+
5693/// | | Thumb2 | ARM |
5694/// +-------------------------+--------+-----+
5695/// | Call overhead in Bytes | 4 | 4 |
5696/// | Frame overhead in Bytes | 0 | 0 |
5697/// | Stack fixup required | No | No |
5698/// +-------------------------+--------+-----+
5699///
5700/// \p MachineOutlinerThunk implies that the function is being created from
5701/// a sequence of instructions ending in a call. The outlined function is
5702/// called with a BL instruction, and the outlined function tail-calls the
5703/// original call destination.
5704///
5705/// That is,
5706///
5707/// I1 OUTLINED_FUNCTION:
5708/// I2 --> BL OUTLINED_FUNCTION I1
5709/// BL f I2
5710/// B f
5711///
5712/// +-------------------------+--------+-----+
5713/// | | Thumb2 | ARM |
5714/// +-------------------------+--------+-----+
5715/// | Call overhead in Bytes | 4 | 4 |
5716/// | Frame overhead in Bytes | 0 | 0 |
5717/// | Stack fixup required | No | No |
5718/// +-------------------------+--------+-----+
5719///
5720/// \p MachineOutlinerNoLRSave implies that the function should be called using
5721/// a BL instruction, but doesn't require LR to be saved and restored. This
5722/// happens when LR is known to be dead.
5723///
5724/// That is,
5725///
5726/// I1 OUTLINED_FUNCTION:
5727/// I2 --> BL OUTLINED_FUNCTION I1
5728/// I3 I2
5729/// I3
5730/// BX LR
5731///
5732/// +-------------------------+--------+-----+
5733/// | | Thumb2 | ARM |
5734/// +-------------------------+--------+-----+
5735/// | Call overhead in Bytes | 4 | 4 |
5736/// | Frame overhead in Bytes | 2 | 4 |
5737/// | Stack fixup required | No | No |
5738/// +-------------------------+--------+-----+
5739///
5740/// \p MachineOutlinerRegSave implies that the function should be called with a
5741/// save and restore of LR to an available register. This allows us to avoid
5742/// stack fixups. Note that this outlining variant is compatible with the
5743/// NoLRSave case.
5744///
5745/// That is,
5746///
5747/// I1 Save LR OUTLINED_FUNCTION:
5748/// I2 --> BL OUTLINED_FUNCTION I1
5749/// I3 Restore LR I2
5750/// I3
5751/// BX LR
5752///
5753/// +-------------------------+--------+-----+
5754/// | | Thumb2 | ARM |
5755/// +-------------------------+--------+-----+
5756/// | Call overhead in Bytes | 8 | 12 |
5757/// | Frame overhead in Bytes | 2 | 4 |
5758/// | Stack fixup required | No | No |
5759/// +-------------------------+--------+-----+
5760///
5761/// \p MachineOutlinerDefault implies that the function should be called with
5762/// a save and restore of LR to the stack.
5763///
5764/// That is,
5765///
5766/// I1 Save LR OUTLINED_FUNCTION:
5767/// I2 --> BL OUTLINED_FUNCTION I1
5768/// I3 Restore LR I2
5769/// I3
5770/// BX LR
5771///
5772/// +-------------------------+--------+-----+
5773/// | | Thumb2 | ARM |
5774/// +-------------------------+--------+-----+
5775/// | Call overhead in Bytes | 8 | 12 |
5776/// | Frame overhead in Bytes | 2 | 4 |
5777/// | Stack fixup required | Yes | Yes |
5778/// +-------------------------+--------+-----+
5779
5787
5791 UnsafeRegsDead = 0x8
5793
5806
5808 : CallTailCall(target.isThumb() ? 4 : 4),
5809 FrameTailCall(target.isThumb() ? 0 : 0),
5810 CallThunk(target.isThumb() ? 4 : 4),
5811 FrameThunk(target.isThumb() ? 0 : 0),
5812 CallNoLRSave(target.isThumb() ? 4 : 4),
5813 FrameNoLRSave(target.isThumb() ? 2 : 4),
5814 CallRegSave(target.isThumb() ? 8 : 12),
5815 FrameRegSave(target.isThumb() ? 2 : 4),
5816 CallDefault(target.isThumb() ? 8 : 12),
5817 FrameDefault(target.isThumb() ? 2 : 4),
5818 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5819};
5820
5822ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5823 MachineFunction *MF = C.getMF();
5825 const ARMBaseRegisterInfo *ARI =
5826 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5827
5828 BitVector regsReserved = ARI->getReservedRegs(*MF);
5829 // Check if there is an available register across the sequence that we can
5830 // use.
5831 for (Register Reg : ARM::rGPRRegClass) {
5832 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5833 Reg != ARM::LR && // LR is not reserved, but don't use it.
5834 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5835 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5836 C.isAvailableInsideSeq(Reg, TRI))
5837 return Reg;
5838 }
5839 return Register();
5840}
5841
5842// Compute liveness of LR at the point after the interval [I, E), which
5843// denotes a *backward* iteration through instructions. Used only for return
5844// basic blocks, which do not end with a tail call.
5848 // At the end of the function LR dead.
5849 bool Live = false;
5850 for (; I != E; ++I) {
5851 const MachineInstr &MI = *I;
5852
5853 // Check defs of LR.
5854 if (MI.modifiesRegister(ARM::LR, &TRI))
5855 Live = false;
5856
5857 // Check uses of LR.
5858 unsigned Opcode = MI.getOpcode();
5859 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5860 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5861 Opcode == ARM::tBXNS_RET) {
5862 // These instructions use LR, but it's not an (explicit or implicit)
5863 // operand.
5864 Live = true;
5865 continue;
5866 }
5867 if (MI.readsRegister(ARM::LR, &TRI))
5868 Live = true;
5869 }
5870 return !Live;
5871}
5872
5873std::optional<outliner::OutlinedFunction>
5875 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5876 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5877
5878 unsigned SequenceSize = 0;
5879 for (auto &MI : FirstCand)
5880 SequenceSize += getInstSizeInBytes(MI);
5881
5882 // Properties about candidate MBBs that hold for all of them.
5883 unsigned FlagsSetInAll = 0xF;
5884
5885 // Compute liveness information for each candidate, and set FlagsSetInAll.
5887 for (outliner::Candidate &C : RepeatedSequenceLocs)
5888 FlagsSetInAll &= C.Flags;
5889
5890 // According to the ARM Procedure Call Standard, the following are
5891 // undefined on entry/exit from a function call:
5892 //
5893 // * Register R12(IP),
5894 // * Condition codes (and thus the CPSR register)
5895 //
5896 // Since we control the instructions which are part of the outlined regions
5897 // we don't need to be fully compliant with the AAPCS, but we have to
5898 // guarantee that if a veneer is inserted at link time the code is still
5899 // correct. Because of this, we can't outline any sequence of instructions
5900 // where one of these registers is live into/across it. Thus, we need to
5901 // delete those candidates.
5902 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5903 // If the unsafe registers in this block are all dead, then we don't need
5904 // to compute liveness here.
5905 if (C.Flags & UnsafeRegsDead)
5906 return false;
5907 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5908 };
5909
5910 // Are there any candidates where those registers are live?
5911 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5912 // Erase every candidate that violates the restrictions above. (It could be
5913 // true that we have viable candidates, so it's not worth bailing out in
5914 // the case that, say, 1 out of 20 candidates violate the restructions.)
5915 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5916
5917 // If the sequence doesn't have enough candidates left, then we're done.
5918 if (RepeatedSequenceLocs.size() < 2)
5919 return std::nullopt;
5920 }
5921
5922 // We expect the majority of the outlining candidates to be in consensus with
5923 // regard to return address sign and authentication, and branch target
5924 // enforcement, in other words, partitioning according to all the four
5925 // possible combinations of PAC-RET and BTI is going to yield one big subset
5926 // and three small (likely empty) subsets. That allows us to cull incompatible
5927 // candidates separately for PAC-RET and BTI.
5928
5929 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5930 // disabled. Remove the candidates from the smaller set. If they are the same
5931 // number prefer the non-BTI ones for outlining, since they have less
5932 // overhead.
5933 auto NoBTI =
5934 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5935 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5936 return AFI.branchTargetEnforcement();
5937 });
5938 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5939 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5940 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5941 else
5942 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5943
5944 if (RepeatedSequenceLocs.size() < 2)
5945 return std::nullopt;
5946
5947 // Likewise, partition the candidates according to PAC-RET enablement.
5948 auto NoPAC =
5949 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5950 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5951 // If the function happens to not spill the LR, do not disqualify it
5952 // from the outlining.
5953 return AFI.shouldSignReturnAddress(true);
5954 });
5955 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5956 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5957 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5958 else
5959 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5960
5961 if (RepeatedSequenceLocs.size() < 2)
5962 return std::nullopt;
5963
5964 // At this point, we have only "safe" candidates to outline. Figure out
5965 // frame + call instruction information.
5966
5967 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5968
5969 // Helper lambda which sets call information for every candidate.
5970 auto SetCandidateCallInfo =
5971 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5972 for (outliner::Candidate &C : RepeatedSequenceLocs)
5973 C.setCallInfo(CallID, NumBytesForCall);
5974 };
5975
5976 OutlinerCosts Costs(Subtarget);
5977
5978 const auto &SomeMFI =
5979 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5980 // Adjust costs to account for the BTI instructions.
5981 if (SomeMFI.branchTargetEnforcement()) {
5982 Costs.FrameDefault += 4;
5983 Costs.FrameNoLRSave += 4;
5984 Costs.FrameRegSave += 4;
5985 Costs.FrameTailCall += 4;
5986 Costs.FrameThunk += 4;
5987 }
5988
5989 // Adjust costs to account for sign and authentication instructions.
5990 if (SomeMFI.shouldSignReturnAddress(true)) {
5991 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5992 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5993 }
5994
5995 unsigned FrameID = MachineOutlinerDefault;
5996 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5997
5998 // If the last instruction in any candidate is a terminator, then we should
5999 // tail call all of the candidates.
6000 if (RepeatedSequenceLocs[0].back().isTerminator()) {
6001 FrameID = MachineOutlinerTailCall;
6002 NumBytesToCreateFrame = Costs.FrameTailCall;
6003 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
6004 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
6005 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
6006 LastInstrOpcode == ARM::tBLXr ||
6007 LastInstrOpcode == ARM::tBLXr_noip ||
6008 LastInstrOpcode == ARM::tBLXi) {
6009 FrameID = MachineOutlinerThunk;
6010 NumBytesToCreateFrame = Costs.FrameThunk;
6011 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
6012 } else {
6013 // We need to decide how to emit calls + frames. We can always emit the same
6014 // frame if we don't need to save to the stack. If we have to save to the
6015 // stack, then we need a different frame.
6016 unsigned NumBytesNoStackCalls = 0;
6017 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6018
6019 for (outliner::Candidate &C : RepeatedSequenceLocs) {
6020 // LR liveness is overestimated in return blocks, unless they end with a
6021 // tail call.
6022 const auto Last = C.getMBB()->rbegin();
6023 const bool LRIsAvailable =
6024 C.getMBB()->isReturnBlock() && !Last->isCall()
6027 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
6028 if (LRIsAvailable) {
6029 FrameID = MachineOutlinerNoLRSave;
6030 NumBytesNoStackCalls += Costs.CallNoLRSave;
6031 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
6032 CandidatesWithoutStackFixups.push_back(C);
6033 }
6034
6035 // Is an unused register available? If so, we won't modify the stack, so
6036 // we can outline with the same frame type as those that don't save LR.
6037 else if (findRegisterToSaveLRTo(C)) {
6038 FrameID = MachineOutlinerRegSave;
6039 NumBytesNoStackCalls += Costs.CallRegSave;
6040 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
6041 CandidatesWithoutStackFixups.push_back(C);
6042 }
6043
6044 // Is SP used in the sequence at all? If not, we don't have to modify
6045 // the stack, so we are guaranteed to get the same frame.
6046 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
6047 NumBytesNoStackCalls += Costs.CallDefault;
6048 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6049 CandidatesWithoutStackFixups.push_back(C);
6050 }
6051
6052 // If we outline this, we need to modify the stack. Pretend we don't
6053 // outline this by saving all of its bytes.
6054 else
6055 NumBytesNoStackCalls += SequenceSize;
6056 }
6057
6058 // If there are no places where we have to save LR, then note that we don't
6059 // have to update the stack. Otherwise, give every candidate the default
6060 // call type
6061 if (NumBytesNoStackCalls <=
6062 RepeatedSequenceLocs.size() * Costs.CallDefault) {
6063 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6064 FrameID = MachineOutlinerNoLRSave;
6065 } else
6066 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6067 }
6068
6069 // Does every candidate's MBB contain a call? If so, then we might have a
6070 // call in the range.
6071 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6072 // check if the range contains a call. These require a save + restore of
6073 // the link register.
6074 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
6075 [](const MachineInstr &MI) { return MI.isCall(); }))
6076 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6077
6078 // Handle the last instruction separately. If it is tail call, then the
6079 // last instruction is a call, we don't want to save + restore in this
6080 // case. However, it could be possible that the last instruction is a
6081 // call without it being valid to tail call this sequence. We should
6082 // consider this as well.
6083 else if (FrameID != MachineOutlinerThunk &&
6084 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
6085 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6086 }
6087
6088 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
6089 NumBytesToCreateFrame, FrameID);
6090}
6091
6092bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
6093 int64_t Fixup,
6094 bool Updt) const {
6095 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP);
6096 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
6097 if (SPIdx < 0)
6098 // No SP operand
6099 return true;
6100 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
6101 // If SP is not the base register we can't do much
6102 return false;
6103
6104 // Stack might be involved but addressing mode doesn't handle any offset.
6105 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6106 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
6107 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
6108 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
6109 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
6110 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
6111 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
6112 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
6113 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
6114 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
6116 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
6117 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
6118 return false;
6119
6120 unsigned NumOps = MI->getDesc().getNumOperands();
6121 unsigned ImmIdx = NumOps - 3;
6122
6123 const MachineOperand &Offset = MI->getOperand(ImmIdx);
6124 assert(Offset.isImm() && "Is not an immediate");
6125 int64_t OffVal = Offset.getImm();
6126
6127 if (OffVal < 0)
6128 // Don't override data if the are below SP.
6129 return false;
6130
6131 unsigned NumBits = 0;
6132 unsigned Scale = 1;
6133
6134 switch (AddrMode) {
6135 case ARMII::AddrMode3:
6136 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
6137 return false;
6138 OffVal = ARM_AM::getAM3Offset(OffVal);
6139 NumBits = 8;
6140 break;
6141 case ARMII::AddrMode5:
6142 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
6143 return false;
6144 OffVal = ARM_AM::getAM5Offset(OffVal);
6145 NumBits = 8;
6146 Scale = 4;
6147 break;
6149 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6150 return false;
6151 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6152 NumBits = 8;
6153 Scale = 2;
6154 break;
6156 NumBits = 8;
6157 break;
6159 // FIXME: Values are already scaled in this addressing mode.
6160 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6161 NumBits = 10;
6162 break;
6164 NumBits = 8;
6165 Scale = 4;
6166 break;
6169 NumBits = 12;
6170 break;
6171 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6172 NumBits = 8;
6173 Scale = 4;
6174 break;
6175 default:
6176 llvm_unreachable("Unsupported addressing mode!");
6177 }
6178 // Make sure the offset is encodable for instructions that scale the
6179 // immediate.
6180 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6181 "Can't encode this offset!");
6182 OffVal += Fixup / Scale;
6183
6184 unsigned Mask = (1 << NumBits) - 1;
6185
6186 if (OffVal <= Mask) {
6187 if (Updt)
6188 MI->getOperand(ImmIdx).setImm(OffVal);
6189 return true;
6190 }
6191
6192 return false;
6193}
6194
6196 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6197 outliner::Candidate &C = Candidates.front();
6198 // branch-target-enforcement is guaranteed to be consistent between all
6199 // candidates, so we only need to look at one.
6200 const Function &CFn = C.getMF()->getFunction();
6201 if (CFn.hasFnAttribute("branch-target-enforcement"))
6202 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6203
6204 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6205}
6206
6208 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6209 const Function &F = MF.getFunction();
6210
6211 // Can F be deduplicated by the linker? If it can, don't outline from it.
6212 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6213 return false;
6214
6215 // Don't outline from functions with section markings; the program could
6216 // expect that all the code is in the named section.
6217 // FIXME: Allow outlining from multiple functions with the same section
6218 // marking.
6219 if (F.hasSection())
6220 return false;
6221
6222 // FIXME: Thumb1 outlining is not handled
6224 return false;
6225
6226 // It's safe to outline from MF.
6227 return true;
6228}
6229
6231 unsigned &Flags) const {
6232 // Check if LR is available through all of the MBB. If it's not, then set
6233 // a flag.
6235 "Suitable Machine Function for outlining must track liveness");
6236
6238
6240 LRU.accumulate(MI);
6241
6242 // Check if each of the unsafe registers are available...
6243 bool R12AvailableInBlock = LRU.available(ARM::R12);
6244 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6245
6246 // If all of these are dead (and not live out), we know we don't have to check
6247 // them later.
6248 if (R12AvailableInBlock && CPSRAvailableInBlock)
6249 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6250
6251 // Now, add the live outs to the set.
6252 LRU.addLiveOuts(MBB);
6253
6254 // If any of these registers is available in the MBB, but also a live out of
6255 // the block, then we know outlining is unsafe.
6256 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6257 return false;
6258 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6259 return false;
6260
6261 // Check if there's a call inside this MachineBasicBlock. If there is, then
6262 // set a flag.
6263 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6264 Flags |= MachineOutlinerMBBFlags::HasCalls;
6265
6266 // LR liveness is overestimated in return blocks.
6267
6268 bool LRIsAvailable =
6269 MBB.isReturnBlock() && !MBB.back().isCall()
6271 : LRU.available(ARM::LR);
6272 if (!LRIsAvailable)
6273 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6274
6275 return true;
6276}
6277
6280 unsigned Flags) const {
6281 MachineInstr &MI = *MIT;
6283
6284 // PIC instructions contain labels, outlining them would break offset
6285 // computing. unsigned Opc = MI.getOpcode();
6286 unsigned Opc = MI.getOpcode();
6287 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6288 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6289 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6290 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6291 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6292 Opc == ARM::t2MOV_ga_pcrel)
6294
6295 // Be conservative with ARMv8.1 MVE instructions.
6296 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6297 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6298 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6299 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6300 Opc == ARM::t2LoopEndDec)
6302
6303 const MCInstrDesc &MCID = MI.getDesc();
6304 uint64_t MIFlags = MCID.TSFlags;
6305 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6307
6308 // Is this a terminator for a basic block?
6309 if (MI.isTerminator())
6310 // TargetInstrInfo::getOutliningType has already filtered out anything
6311 // that would break this, so we can allow it here.
6313
6314 // Don't outline if link register or program counter value are used.
6315 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6317
6318 if (MI.isCall()) {
6319 // Get the function associated with the call. Look at each operand and find
6320 // the one that represents the calle and get its name.
6321 const Function *Callee = nullptr;
6322 for (const MachineOperand &MOP : MI.operands()) {
6323 if (MOP.isGlobal()) {
6324 Callee = dyn_cast<Function>(MOP.getGlobal());
6325 break;
6326 }
6327 }
6328
6329 // Dont't outline calls to "mcount" like functions, in particular Linux
6330 // kernel function tracing relies on it.
6331 if (Callee &&
6332 (Callee->getName() == "\01__gnu_mcount_nc" ||
6333 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6335
6336 // If we don't know anything about the callee, assume it depends on the
6337 // stack layout of the caller. In that case, it's only legal to outline
6338 // as a tail-call. Explicitly list the call instructions we know about so
6339 // we don't get unexpected results with call pseudo-instructions.
6340 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6341 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6342 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6343 Opc == ARM::tBLXi)
6344 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6345
6346 if (!Callee)
6347 return UnknownCallOutlineType;
6348
6349 // We have a function we have information about. Check if it's something we
6350 // can safely outline.
6351 MachineFunction *MF = MI.getParent()->getParent();
6352 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6353
6354 // We don't know what's going on with the callee at all. Don't touch it.
6355 if (!CalleeMF)
6356 return UnknownCallOutlineType;
6357
6358 // Check if we know anything about the callee saves on the function. If we
6359 // don't, then don't touch it, since that implies that we haven't computed
6360 // anything about its stack frame yet.
6361 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6362 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6363 MFI.getNumObjects() > 0)
6364 return UnknownCallOutlineType;
6365
6366 // At this point, we can say that CalleeMF ought to not pass anything on the
6367 // stack. Therefore, we can outline it.
6369 }
6370
6371 // Since calls are handled, don't touch LR or PC
6372 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6374
6375 // Does this use the stack?
6376 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6377 // True if there is no chance that any outlined candidate from this range
6378 // could require stack fixups. That is, both
6379 // * LR is available in the range (No save/restore around call)
6380 // * The range doesn't include calls (No save/restore in outlined frame)
6381 // are true.
6382 // These conditions also ensure correctness of the return address
6383 // authentication - we insert sign and authentication instructions only if
6384 // we save/restore LR on stack, but then this condition ensures that the
6385 // outlined range does not modify the SP, therefore the SP value used for
6386 // signing is the same as the one used for authentication.
6387 // FIXME: This is very restrictive; the flags check the whole block,
6388 // not just the bit we will try to outline.
6389 bool MightNeedStackFixUp =
6390 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6391 MachineOutlinerMBBFlags::HasCalls));
6392
6393 if (!MightNeedStackFixUp)
6395
6396 // Any modification of SP will break our code to save/restore LR.
6397 // FIXME: We could handle some instructions which add a constant offset to
6398 // SP, with a bit more work.
6399 if (MI.modifiesRegister(ARM::SP, TRI))
6401
6402 // At this point, we have a stack instruction that we might need to fix up.
6403 // up. We'll handle it if it's a load or store.
6404 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6405 false))
6407
6408 // We can't fix it up, so don't outline it.
6410 }
6411
6412 // Be conservative with IT blocks.
6413 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6414 MI.modifiesRegister(ARM::ITSTATE, TRI))
6416
6417 // Don't outline CFI instructions.
6418 if (MI.isCFIInstruction())
6420
6422}
6423
6424void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6425 for (MachineInstr &MI : MBB) {
6426 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6427 }
6428}
6429
6430void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6431 MachineBasicBlock::iterator It, bool CFI,
6432 bool Auth) const {
6433 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6434 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6435 assert(Align >= 8 && Align <= 256);
6436 if (Auth) {
6437 assert(Subtarget.isThumb2());
6438 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6439 // sequence.
6440 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6441 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6442 .addReg(ARM::R12, RegState::Kill)
6443 .addReg(ARM::LR, RegState::Kill)
6444 .addReg(ARM::SP)
6445 .addImm(-Align)
6447 .setMIFlags(MIFlags);
6448 } else {
6449 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6450 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6451 .addReg(ARM::LR, RegState::Kill)
6452 .addReg(ARM::SP)
6453 .addImm(-Align)
6455 .setMIFlags(MIFlags);
6456 }
6457
6458 if (!CFI)
6459 return;
6460
6461 MachineFunction &MF = *MBB.getParent();
6462
6463 // Add a CFI, saying CFA is offset by Align bytes from SP.
6464 int64_t StackPosEntry =
6466 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6467 .addCFIIndex(StackPosEntry)
6469
6470 // Add a CFI saying that the LR that we want to find is now higher than
6471 // before.
6472 int LROffset = Auth ? Align - 4 : Align;
6473 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6474 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6475 int64_t LRPosEntry = MF.addFrameInst(
6476 MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
6477 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6478 .addCFIIndex(LRPosEntry)
6480 if (Auth) {
6481 // Add a CFI for the location of the return adddress PAC.
6482 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6483 int64_t RACPosEntry = MF.addFrameInst(
6484 MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
6485 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6486 .addCFIIndex(RACPosEntry)
6488 }
6489}
6490
6491void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
6493 Register Reg) const {
6494 MachineFunction &MF = *MBB.getParent();
6495 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6496 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6497 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
6498
6499 int64_t LRPosEntry = MF.addFrameInst(
6500 MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
6501 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6502 .addCFIIndex(LRPosEntry)
6504}
6505
6506void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6508 bool CFI, bool Auth) const {
6509 int Align = Subtarget.getStackAlignment().value();
6510 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6511 if (Auth) {
6512 assert(Subtarget.isThumb2());
6513 // Restore return address PAC and LR.
6514 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6515 .addReg(ARM::R12, RegState::Define)
6516 .addReg(ARM::LR, RegState::Define)
6517 .addReg(ARM::SP, RegState::Define)
6518 .addReg(ARM::SP)
6519 .addImm(Align)
6521 .setMIFlags(MIFlags);
6522 // LR authentication is after the CFI instructions, below.
6523 } else {
6524 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6525 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6526 .addReg(ARM::SP, RegState::Define)
6527 .addReg(ARM::SP);
6528 if (!Subtarget.isThumb())
6529 MIB.addReg(0);
6530 MIB.addImm(Subtarget.getStackAlignment().value())
6532 .setMIFlags(MIFlags);
6533 }
6534
6535 if (CFI) {
6536 // Now stack has moved back up...
6537 MachineFunction &MF = *MBB.getParent();
6538 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6539 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6540 int64_t StackPosEntry =
6542 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6543 .addCFIIndex(StackPosEntry)
6545
6546 // ... and we have restored LR.
6547 int64_t LRPosEntry =
6548 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6549 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6550 .addCFIIndex(LRPosEntry)
6552
6553 if (Auth) {
6554 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6555 int64_t Entry =
6556 MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));
6557 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6558 .addCFIIndex(Entry)
6560 }
6561 }
6562
6563 if (Auth)
6564 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6565}
6566
6567void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
6569 MachineFunction &MF = *MBB.getParent();
6570 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6571 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6572
6573 int64_t LRPosEntry =
6574 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6575 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6576 .addCFIIndex(LRPosEntry)
6578}
6579
6582 const outliner::OutlinedFunction &OF) const {
6583 // For thunk outlining, rewrite the last instruction from a call to a
6584 // tail-call.
6586 MachineInstr *Call = &*--MBB.instr_end();
6587 bool isThumb = Subtarget.isThumb();
6588 unsigned FuncOp = isThumb ? 2 : 0;
6589 unsigned Opc = Call->getOperand(FuncOp).isReg()
6590 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6591 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6592 : ARM::tTAILJMPdND
6593 : ARM::TAILJMPd;
6594 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6595 .add(Call->getOperand(FuncOp));
6596 if (isThumb && !Call->getOperand(FuncOp).isReg())
6597 MIB.add(predOps(ARMCC::AL));
6598 Call->eraseFromParent();
6599 }
6600
6601 // Is there a call in the outlined range?
6602 auto IsNonTailCall = [](MachineInstr &MI) {
6603 return MI.isCall() && !MI.isReturn();
6604 };
6605 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6608
6611 Et = std::prev(MBB.end());
6612
6613 // We have to save and restore LR, we need to add it to the liveins if it
6614 // is not already part of the set. This is suffient since outlined
6615 // functions only have one block.
6616 if (!MBB.isLiveIn(ARM::LR))
6617 MBB.addLiveIn(ARM::LR);
6618
6619 // Insert a save before the outlined region
6620 bool Auth = OF.Candidates.front()
6621 .getMF()
6622 ->getInfo<ARMFunctionInfo>()
6623 ->shouldSignReturnAddress(true);
6624 saveLROnStack(MBB, It, true, Auth);
6625
6626 // Fix up the instructions in the range, since we're going to modify the
6627 // stack.
6629 "Can only fix up stack references once");
6630 fixupPostOutline(MBB);
6631
6632 // Insert a restore before the terminator for the function. Restore LR.
6633 restoreLRFromStack(MBB, Et, true, Auth);
6634 }
6635
6636 // If this is a tail call outlined function, then there's already a return.
6639 return;
6640
6641 // Here we have to insert the return ourselves. Get the correct opcode from
6642 // current feature set.
6643 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6645
6646 // Did we have to modify the stack by saving the link register?
6648 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6649 return;
6650
6651 // We modified the stack.
6652 // Walk over the basic block and fix up all the stack accesses.
6653 fixupPostOutline(MBB);
6654}
6655
6661 unsigned Opc;
6662 bool isThumb = Subtarget.isThumb();
6663
6664 // Are we tail calling?
6665 if (C.CallConstructionID == MachineOutlinerTailCall) {
6666 // If yes, then we can just branch to the label.
6667 Opc = isThumb
6668 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6669 : ARM::TAILJMPd;
6670 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6671 .addGlobalAddress(M.getNamedValue(MF.getName()));
6672 if (isThumb)
6673 MIB.add(predOps(ARMCC::AL));
6674 It = MBB.insert(It, MIB);
6675 return It;
6676 }
6677
6678 // Create the call instruction.
6679 Opc = isThumb ? ARM::tBL : ARM::BL;
6680 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6681 if (isThumb)
6682 CallMIB.add(predOps(ARMCC::AL));
6683 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6684
6685 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6686 C.CallConstructionID == MachineOutlinerThunk) {
6687 // No, so just insert the call.
6688 It = MBB.insert(It, CallMIB);
6689 return It;
6690 }
6691
6692 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6693 // Can we save to a register?
6694 if (C.CallConstructionID == MachineOutlinerRegSave) {
6695 Register Reg = findRegisterToSaveLRTo(C);
6696 assert(Reg != 0 && "No callee-saved register available?");
6697
6698 // Save and restore LR from that register.
6699 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6700 if (!AFI.isLRSpilled())
6701 emitCFIForLRSaveToReg(MBB, It, Reg);
6702 CallPt = MBB.insert(It, CallMIB);
6703 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6704 if (!AFI.isLRSpilled())
6705 emitCFIForLRRestoreFromReg(MBB, It);
6706 It--;
6707 return CallPt;
6708 }
6709 // We have the default case. Save and restore from SP.
6710 if (!MBB.isLiveIn(ARM::LR))
6711 MBB.addLiveIn(ARM::LR);
6712 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6713 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6714 CallPt = MBB.insert(It, CallMIB);
6715 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6716 It--;
6717 return CallPt;
6718}
6719
6721 MachineFunction &MF) const {
6722 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6723}
6724
6725bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6726 const MachineInstr &MI) const {
6727 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6728 // the tail predication conversion. This means that the element count
6729 // register has to be live for longer, but that has to be better than
6730 // spill/restore and VPT predication.
6731 return (isVCTP(&MI) && !isPredicated(MI)) ||
6733}
6734
6736 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6737 : ARM::BLX;
6738}
6739
6741 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6742 : ARM::tBLXr;
6743}
6744
6746 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6747 : ARM::BLX_pred;
6748}
6749
6750namespace {
6751class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6752 MachineInstr *EndLoop, *LoopCount;
6753 MachineFunction *MF;
6754 const TargetInstrInfo *TII;
6755
6756 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6757 // [LAST_IS_USE] : last reference to register in schedule is a use
6758 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6759 static int constexpr MAX_STAGES = 30;
6760 static int constexpr LAST_IS_USE = MAX_STAGES;
6761 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6762 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6763 typedef std::map<unsigned, IterNeed> IterNeeds;
6764
6765 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6766 const IterNeeds &CIN);
6767 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6768
6769 // Meanings of the various stuff with loop types:
6770 // t2Bcc:
6771 // EndLoop = branch at end of original BB that will become a kernel
6772 // LoopCount = CC setter live into branch
6773 // t2LoopEnd:
6774 // EndLoop = branch at end of original BB
6775 // LoopCount = t2LoopDec
6776public:
6777 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6778 : EndLoop(EndLoop), LoopCount(LoopCount),
6779 MF(EndLoop->getParent()->getParent()),
6780 TII(MF->getSubtarget().getInstrInfo()) {}
6781
6782 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6783 // Only ignore the terminator.
6784 return MI == EndLoop || MI == LoopCount;
6785 }
6786
6787 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6788 if (tooMuchRegisterPressure(SSD, SMS))
6789 return false;
6790
6791 return true;
6792 }
6793
6794 std::optional<bool> createTripCountGreaterCondition(
6795 int TC, MachineBasicBlock &MBB,
6797
6798 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6799 Cond.push_back(EndLoop->getOperand(1));
6800 Cond.push_back(EndLoop->getOperand(2));
6801 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6803 }
6804 return {};
6805 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6806 // General case just lets the unrolled t2LoopDec do the subtraction and
6807 // therefore just needs to check if zero has been reached.
6808 MachineInstr *LoopDec = nullptr;
6809 for (auto &I : MBB.instrs())
6810 if (I.getOpcode() == ARM::t2LoopDec)
6811 LoopDec = &I;
6812 assert(LoopDec && "Unable to find copied LoopDec");
6813 // Check if we're done with the loop.
6814 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6815 .addReg(LoopDec->getOperand(0).getReg())
6816 .addImm(0)
6818 .addReg(ARM::NoRegister);
6820 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6821 return {};
6822 } else
6823 llvm_unreachable("Unknown EndLoop");
6824 }
6825
6826 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6827
6828 void adjustTripCount(int TripCountAdjust) override {}
6829
6830 void disposed() override {}
6831};
6832
6833void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6834 const IterNeeds &CIN) {
6835 // Increase pressure by the amounts in CrossIterationNeeds
6836 for (const auto &N : CIN) {
6837 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6838 for (int I = 0; I < Cnt; ++I)
6841 }
6842 // Decrease pressure by the amounts in CrossIterationNeeds
6843 for (const auto &N : CIN) {
6844 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6845 for (int I = 0; I < Cnt; ++I)
6848 }
6849}
6850
6851bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6852 SMSchedule &SMS) {
6853 IterNeeds CrossIterationNeeds;
6854
6855 // Determine which values will be loop-carried after the schedule is
6856 // applied
6857
6858 for (auto &SU : SSD.SUnits) {
6859 const MachineInstr *MI = SU.getInstr();
6860 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6861 for (auto &S : SU.Succs)
6862 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6863 Register Reg = S.getReg();
6864 if (Reg.isVirtual())
6865 CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
6866 .first->second.set(0);
6867 } else if (S.isAssignedRegDep()) {
6868 int OStg = SMS.stageScheduled(S.getSUnit());
6869 if (OStg >= 0 && OStg != Stg) {
6870 Register Reg = S.getReg();
6871 if (Reg.isVirtual())
6872 CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
6873 .first->second |= ((1 << (OStg - Stg)) - 1);
6874 }
6875 }
6876 }
6877
6878 // Determine more-or-less what the proposed schedule (reversed) is going to
6879 // be; it might not be quite the same because the within-cycle ordering
6880 // created by SMSchedule depends upon changes to help with address offsets and
6881 // the like.
6882 std::vector<SUnit *> ProposedSchedule;
6883 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6884 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6885 ++Stage) {
6886 std::deque<SUnit *> Instrs =
6887 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6888 std::sort(Instrs.begin(), Instrs.end(),
6889 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6890 for (SUnit *SU : Instrs)
6891 ProposedSchedule.push_back(SU);
6892 }
6893
6894 // Learn whether the last use/def of each cross-iteration register is a use or
6895 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6896 // and we do not have to add the pressure.
6897 for (auto *SU : ProposedSchedule)
6898 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6899 ++OperI) {
6900 auto MO = *OperI;
6901 if (!MO.isReg() || !MO.getReg())
6902 continue;
6903 Register Reg = MO.getReg();
6904 auto CIter = CrossIterationNeeds.find(Reg.id());
6905 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6906 CIter->second[SEEN_AS_LIVE])
6907 continue;
6908 if (MO.isDef() && !MO.isDead())
6909 CIter->second.set(SEEN_AS_LIVE);
6910 else if (MO.isUse())
6911 CIter->second.set(LAST_IS_USE);
6912 }
6913 for (auto &CI : CrossIterationNeeds)
6914 CI.second.reset(LAST_IS_USE);
6915
6916 RegionPressure RecRegPressure;
6917 RegPressureTracker RPTracker(RecRegPressure);
6918 RegisterClassInfo RegClassInfo;
6919 RegClassInfo.runOnMachineFunction(*MF);
6920 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6921 EndLoop->getParent()->end(), false, false);
6923
6924 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6925
6926 for (auto *SU : ProposedSchedule) {
6927 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6928 RPTracker.setPos(std::next(CurInstI));
6929 RPTracker.recede();
6930
6931 // Track what cross-iteration registers would be seen as live
6932 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6933 auto MO = *OperI;
6934 if (!MO.isReg() || !MO.getReg())
6935 continue;
6936 Register Reg = MO.getReg();
6937 if (MO.isDef() && !MO.isDead()) {
6938 auto CIter = CrossIterationNeeds.find(Reg.id());
6939 if (CIter != CrossIterationNeeds.end()) {
6940 CIter->second.reset(0);
6941 CIter->second.reset(SEEN_AS_LIVE);
6942 }
6943 }
6944 }
6945 for (auto &S : SU->Preds) {
6946 auto Stg = SMS.stageScheduled(SU);
6947 if (S.isAssignedRegDep()) {
6948 Register Reg = S.getReg();
6949 auto CIter = CrossIterationNeeds.find(Reg.id());
6950 if (CIter != CrossIterationNeeds.end()) {
6951 auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));
6952 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6953 if (Stg - Stg2 < MAX_STAGES)
6954 CIter->second.set(Stg - Stg2);
6955 CIter->second.set(SEEN_AS_LIVE);
6956 }
6957 }
6958 }
6959
6960 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6961 }
6962
6963 auto &P = RPTracker.getPressure().MaxSetPressure;
6964 for (unsigned I = 0, E = P.size(); I < E; ++I)
6965 if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
6966 return true;
6967 }
6968 return false;
6969}
6970
6971} // namespace
6972
6973std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6976 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6977 if (Preheader == LoopBB)
6978 Preheader = *std::next(LoopBB->pred_begin());
6979
6980 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6981 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6982 // block. We need to determine the reaching definition of CPSR so that
6983 // it can be marked as non-pipelineable, allowing the pipeliner to force
6984 // it into stage 0 or give up if it cannot or will not do so.
6985 MachineInstr *CCSetter = nullptr;
6986 for (auto &L : LoopBB->instrs()) {
6987 if (L.isCall())
6988 return nullptr;
6989 if (isCPSRDefined(L))
6990 CCSetter = &L;
6991 }
6992 if (CCSetter)
6993 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6994 else
6995 return nullptr; // Unable to find the CC setter, so unable to guarantee
6996 // that pipeline will work
6997 }
6998
6999 // Recognize:
7000 // preheader:
7001 // %1 = t2DoopLoopStart %0
7002 // loop:
7003 // %2 = phi %1, <not loop>, %..., %loop
7004 // %3 = t2LoopDec %2, <imm>
7005 // t2LoopEnd %3, %loop
7006
7007 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
7008 for (auto &L : LoopBB->instrs())
7009 if (L.isCall())
7010 return nullptr;
7011 else if (isVCTP(&L))
7012 return nullptr;
7013 Register LoopDecResult = I->getOperand(0).getReg();
7015 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
7016 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
7017 return nullptr;
7018 MachineInstr *LoopStart = nullptr;
7019 for (auto &J : Preheader->instrs())
7020 if (J.getOpcode() == ARM::t2DoLoopStart)
7021 LoopStart = &J;
7022 if (!LoopStart)
7023 return nullptr;
7024 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
7025 }
7026 return nullptr;
7027}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isLoad(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, unsigned DReg, unsigned Lane, unsigned &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static cl::opt< bool > EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv"))
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
@ ExeGeneric
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
ARMBaseInstrInfo(const ARMSubtarget &STI)
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
ARMConstantPoolConstant - ARM-specific constant pool values for Constants, Functions,...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic block.
ARMConstantPoolSymbol - ARM-specific constantpool values for external symbols.
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isTargetMachO() const
Definition: ARMSubtarget.h:382
bool isCortexA7() const
Definition: ARMSubtarget.h:323
bool useMovt() const
bool isSwift() const
Definition: ARMSubtarget.h:327
ARMLdStMultipleTiming getLdStMultipleTiming() const
Definition: ARMSubtarget.h:521
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:266
bool isThumb1Only() const
Definition: ARMSubtarget.h:434
bool isCortexM7() const
Definition: ARMSubtarget.h:329
bool isThumb2() const
Definition: ARMSubtarget.h:435
bool isReadTPSoft() const
Definition: ARMSubtarget.h:416
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned getMispredictionPenalty() const
bool isLikeA9() const
Definition: ARMSubtarget.h:330
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:278
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:541
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
Definition: ARMSubtarget.h:510
bool hasVFP2Base() const
Definition: ARMSubtarget.h:341
bool isROPI() const
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:380
unsigned getPartialUpdateClearance() const
Definition: ARMSubtarget.h:519
bool hasMinSize() const
Definition: ARMSubtarget.h:433
bool isCortexA8() const
Definition: ARMSubtarget.h:324
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
Definition: ARMSubtarget.h:146
@ SingleIssue
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:148
@ DoubleIssue
Can load/store 2 registers/cycle.
Definition: ARMSubtarget.h:143
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
Definition: ARMSubtarget.h:151
int getPreISelOperandLatencyAdjustment() const
Definition: ARMSubtarget.h:525
bool isRWPI() const
bool isMClass() const
Definition: ARMSubtarget.h:436
bool restrictIT() const
Definition: ARMSubtarget.h:473
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool test(unsigned Idx) const
Definition: BitVector.h:461
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:159
uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:680
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:695
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:342
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
A possibly irreducible generalization of a Loop.
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterDead - Add information about the fact that the specified register is dead after bei...
void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound=false)
addVirtualRegisterKilled - Add information about the fact that the specified register is killed after...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_undefined From now on the previous value of Register can't be restored anymore.
Definition: MCDwarf.h:623
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:616
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc={})
.cfi_register Previous value of Register1 is saved in register Register2.
Definition: MCDwarf.h:598
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:604
unsigned short Opcode
Definition: MCInstrDesc.h:205
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isValid() const
isValid - Returns true until all the operands have been visited.
unsigned pred_size() const
instr_iterator instr_begin()
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
union llvm::MachineConstantPoolEntry::@195 Val
The constant itself.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:916
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:547
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:541
bool isRegSequence() const
bool isInsertSubreg() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:890
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
Definition: ARMBaseInfo.h:258
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:185
@ ThumbArithFlagSetting
Definition: ARMBaseInfo.h:414
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
unsigned getBLXpredOpcode(const MachineFunction &MF)
static bool isIndirectBranchOpcode(int Opc)
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
static bool isJumpTableBranchOpcode(int Opc)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static bool isPopOpcode(int Opc)
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
unsigned getUndefRegState(bool B)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
unsigned getKillRegState(bool B)
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1944
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
Definition: ARMBaseInfo.h:146
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Description of the encoding of one expression Op.
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:80
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.
std::vector< Candidate > Candidates