LLVM 22.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "HexagonInstrInfo.h"
15#include "HexagonRegisterInfo.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/IntrinsicsHexagon.h"
29#include <algorithm>
30#include <cassert>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "hexagon-subtarget"
36
37#define GET_SUBTARGETINFO_CTOR
38#define GET_SUBTARGETINFO_TARGET_DESC
39#include "HexagonGenSubtargetInfo.inc"
40
41static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
42 cl::init(true));
43
44static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
45 cl::init(false));
46
47static cl::opt<bool>
48 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
49 cl::desc("Enable the scheduler to generate .cur"));
50
51static cl::opt<bool>
52 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
53 cl::desc("Disable Hexagon MI Scheduling"));
54
56 "hexagon-long-calls", cl::Hidden,
57 cl::desc("If present, forces/disables the use of long calls"));
58
59static cl::opt<bool>
60 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
61 cl::desc("Consider calls to be predicable"));
62
63static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
64 cl::init(true));
65
66static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
67 cl::Hidden, cl::init(true));
68
70 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
71 cl::desc("Enable checking for cache bank conflicts"));
72
74 StringRef FS, const TargetMachine &TM)
75 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
76 OptLevel(TM.getOptLevel()),
77 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
78 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
79 RegInfo(getHwMode()), TLInfo(TM, *this),
80 InstrItins(getInstrItineraryForCPU(CPUString)) {
82 // Beware of the default constructor of InstrItineraryData: it will
83 // reset all members to 0.
84 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
85}
86
89 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
90 if (ArchVer)
91 HexagonArchVersion = *ArchVer;
92 else
93 llvm_unreachable("Unrecognized Hexagon processor version");
94
95 UseHVX128BOps = false;
96 UseHVX64BOps = false;
97 UseAudioOps = false;
98 UseLongCalls = false;
99
100 SubtargetFeatures Features(FS);
101
102 // Turn on QFloat if the HVX version is v68+.
103 // The function ParseSubtargetFeatures will set feature bits and initialize
104 // subtarget's variables all in one, so there isn't a good way to preprocess
105 // the feature string, other than by tinkering with it directly.
106 auto IsQFloatFS = [](StringRef F) {
107 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
108 };
109 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
110 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
111 for (StringRef F : llvm::reverse(Features.getFeatures())) {
112 if (F.starts_with("+hvxv"))
113 return F;
114 }
115 for (StringRef F : llvm::reverse(Features.getFeatures())) {
116 if (F == "-hvx")
117 return StringRef();
118 if (F.starts_with("+hvx") || F == "-hvx")
119 return F.take_front(4); // Return "+hvx" or "-hvx".
120 }
121 return StringRef();
122 };
123
124 bool AddQFloat = false;
125 StringRef HvxVer = getHvxVersion(FS);
126 if (HvxVer.starts_with("+hvxv")) {
127 int Ver = 0;
128 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
129 AddQFloat = true;
130 } else if (HvxVer == "+hvx") {
131 if (hasV68Ops())
132 AddQFloat = true;
133 }
134
135 if (AddQFloat)
136 Features.AddFeature("+hvx-qfloat");
137 }
138
139 std::string FeatureString = Features.getString();
140 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
141
142 if (useHVXV68Ops())
143 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
144
145 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
147 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
148
149 if (OverrideLongCalls.getPosition())
150 UseLongCalls = OverrideLongCalls;
151
153
154 if (isTinyCore()) {
155 // Tiny core has a single thread, so back-to-back scheduling is enabled by
156 // default.
157 if (!EnableBSBSched.getPosition())
158 UseBSBScheduling = false;
159 }
160
161 FeatureBitset FeatureBits = getFeatureBits();
163 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
164 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
165
166 return *this;
167}
168
169bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
170 if (!useHVXOps())
171 return false;
172 if (Ty.isVector())
173 Ty = Ty.getVectorElementType();
174 if (IncludeBool && Ty == MVT::i1)
175 return true;
176 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
177 return llvm::is_contained(ElemTypes, Ty);
178}
179
180bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
181 if (!VecTy.isSimple())
182 return false;
183 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
184 return false;
185 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
186 if (!IncludeBool && ElemTy == MVT::i1)
187 return false;
188
189 unsigned HwLen = getVectorLength();
190 unsigned NumElems = VecTy.getVectorNumElements();
191 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
192
193 if (IncludeBool && ElemTy == MVT::i1) {
194 // Boolean HVX vector types are formed from regular HVX vector types
195 // by replacing the element type with i1.
196 for (MVT T : ElemTypes)
197 if (NumElems * T.getSizeInBits() == 8 * HwLen)
198 return true;
199 return false;
200 }
201
202 unsigned VecWidth = VecTy.getSizeInBits();
203 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
204 return false;
205 return llvm::is_contained(ElemTypes, ElemTy);
206}
207
208bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
209 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
210 return false;
211 // Avoid types like <2 x i32*>.
212 Type *ScalTy = VecTy->getScalarType();
213 if (!ScalTy->isIntegerTy() &&
214 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
215 return false;
216 // The given type may be something like <17 x i32>, which is not MVT,
217 // but can be represented as (non-simple) EVT.
218 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
219 if (!Ty.getVectorElementType().isSimple())
220 return false;
221
222 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
223 if (isHVXVectorType(SimpleTy, IncludeBool))
224 return true;
225 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
227 };
228
229 // Round up EVT to have power-of-2 elements, and keep checking if it
230 // qualifies for HVX, dividing it in half after each step.
231 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
232 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
233 while (VecLen > 1) {
234 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
235 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
236 return true;
237 VecLen /= 2;
238 }
239
240 return false;
241}
242
244 for (SUnit &SU : DAG->SUnits) {
245 if (!SU.isInstr())
246 continue;
248 for (auto &D : SU.Preds)
249 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
250 Erase.push_back(D);
251 for (auto &E : Erase)
252 SU.removePred(E);
253 }
254}
255
257 for (SUnit &SU : DAG->SUnits) {
258 // Update the latency of chain edges between v60 vector load or store
259 // instructions to be 1. These instruction cannot be scheduled in the
260 // same packet.
261 MachineInstr &MI1 = *SU.getInstr();
262 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
263 bool IsStoreMI1 = MI1.mayStore();
264 bool IsLoadMI1 = MI1.mayLoad();
265 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
266 continue;
267 for (SDep &SI : SU.Succs) {
268 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
269 continue;
270 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
271 if (!QII->isHVXVec(MI2))
272 continue;
273 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
274 SI.setLatency(1);
275 SU.setHeightDirty();
276 // Change the dependence in the opposite direction too.
277 for (SDep &PI : SI.getSUnit()->Preds) {
278 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
279 continue;
280 PI.setLatency(1);
281 SI.getSUnit()->setDepthDirty();
282 }
283 }
284 }
285 }
286}
287
288// Check if a call and subsequent A2_tfrpi instructions should maintain
289// scheduling affinity. We are looking for the TFRI to be consumed in
290// the next instruction. This should help reduce the instances of
291// double register pairs being allocated and scheduled before a call
292// when not used until after the call. This situation is exacerbated
293// by the fact that we allocate the pair from the callee saves list,
294// leading to excess spills and restores.
295bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
296 const HexagonInstrInfo &HII, const SUnit &Inst1,
297 const SUnit &Inst2) const {
298 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
299 return false;
300
301 // TypeXTYPE are 64 bit operations.
302 unsigned Type = HII.getType(*Inst2.getInstr());
305}
306
308 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
309 SUnit* LastSequentialCall = nullptr;
310 // Map from virtual register to physical register from the copy.
311 DenseMap<unsigned, unsigned> VRegHoldingReg;
312 // Map from the physical register to the instruction that uses virtual
313 // register. This is used to create the barrier edge.
314 DenseMap<unsigned, SUnit *> LastVRegUse;
315 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
316 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
317
318 // Currently we only catch the situation when compare gets scheduled
319 // before preceding call.
320 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
321 // Remember the call.
322 if (DAG->SUnits[su].getInstr()->isCall())
323 LastSequentialCall = &DAG->SUnits[su];
324 // Look for a compare that defines a predicate.
325 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
326 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
327 // Look for call and tfri* instructions.
328 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
329 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
330 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
331 // Prevent redundant register copies due to reads and writes of physical
332 // registers. The original motivation for this was the code generated
333 // between two calls, which are caused both the return value and the
334 // argument for the next call being in %r0.
335 // Example:
336 // 1: <call1>
337 // 2: %vreg = COPY %r0
338 // 3: <use of %vreg>
339 // 4: %r0 = ...
340 // 5: <call2>
341 // The scheduler would often swap 3 and 4, so an additional register is
342 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
343 // this.
344 // The code below checks for all the physical registers, not just R0/D0/V0.
345 else if (SchedRetvalOptimization) {
346 const MachineInstr *MI = DAG->SUnits[su].getInstr();
347 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
348 // %vregX = COPY %r0
349 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
350 LastVRegUse.erase(MI->getOperand(1).getReg());
351 } else {
352 for (const MachineOperand &MO : MI->operands()) {
353 if (!MO.isReg())
354 continue;
355 if (MO.isUse() && !MI->isCopy() &&
356 VRegHoldingReg.count(MO.getReg())) {
357 // <use of %vregX>
358 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
359 } else if (MO.isDef() && MO.getReg().isPhysical()) {
360 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
361 ++AI) {
362 if (auto It = LastVRegUse.find(*AI); It != LastVRegUse.end()) {
363 if (It->second != &DAG->SUnits[su])
364 // %r0 = ...
365 DAG->addEdge(&DAG->SUnits[su],
366 SDep(It->second, SDep::Barrier));
367 LastVRegUse.erase(It);
368 }
369 }
370 }
371 }
372 }
373 }
374 }
375}
376
379 return;
380
381 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
382
383 // Create artificial edges between loads that could likely cause a bank
384 // conflict. Since such loads would normally not have any dependency
385 // between them, we cannot rely on existing edges.
386 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
387 SUnit &S0 = DAG->SUnits[i];
388 MachineInstr &L0 = *S0.getInstr();
389 if (!L0.mayLoad() || L0.mayStore() ||
391 continue;
392 int64_t Offset0;
394 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
395 // Is the access size is longer than the L1 cache line, skip the check.
396 if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
397 Size0.getValue() >= 32)
398 continue;
399 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
400 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
401 SUnit &S1 = DAG->SUnits[j];
402 MachineInstr &L1 = *S1.getInstr();
403 if (!L1.mayLoad() || L1.mayStore() ||
405 continue;
406 int64_t Offset1;
408 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
409 if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
410 Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
411 continue;
412 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
413 // is unlikely.
414 if (((Offset0 ^ Offset1) & 0x18) != 0)
415 continue;
416 // Bits 3 and 4 are the same, add an artificial edge and set extra
417 // latency.
418 SDep A(&S0, SDep::Artificial);
419 A.setLatency(1);
420 S1.addPred(A, true);
421 }
422 }
423}
424
425/// Enable use of alias analysis during code generation (during MI
426/// scheduling, DAGCombine, etc.).
429 return true;
430 return false;
431}
432
433/// Perform target specific adjustments to the latency of a schedule
434/// dependency.
436 SUnit *Src, int SrcOpIdx, SUnit *Dst, int DstOpIdx, SDep &Dep,
437 const TargetSchedModel *SchedModel) const {
438 if (!Src->isInstr() || !Dst->isInstr())
439 return;
440
441 MachineInstr *SrcInst = Src->getInstr();
442 MachineInstr *DstInst = Dst->getInstr();
443 const HexagonInstrInfo *QII = getInstrInfo();
444
445 // Instructions with .new operands have zero latency.
448 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
449 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
450 Dep.setLatency(0);
451 return;
452 }
453
454 // Set the latency for a copy to zero since we hope that is will get
455 // removed.
456 if (DstInst->isCopy())
457 Dep.setLatency(0);
458
459 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
460 // the correct latency.
461 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
462 // only if the latencies on all the uses are equal, otherwise set it to
463 // default.
464 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
465 Register DReg = DstInst->getOperand(0).getReg();
466 std::optional<unsigned> DLatency;
467 for (const auto &DDep : Dst->Succs) {
468 MachineInstr *DDst = DDep.getSUnit()->getInstr();
469 int UseIdx = -1;
470 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
471 const MachineOperand &MO = DDst->getOperand(OpNum);
472 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
473 UseIdx = OpNum;
474 break;
475 }
476 }
477
478 if (UseIdx == -1)
479 continue;
480
481 std::optional<unsigned> Latency =
482 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
483
484 // Set DLatency for the first time.
485 if (!DLatency)
486 DLatency = Latency;
487
488 // For multiple uses, if the Latency is different across uses, reset
489 // DLatency.
490 if (DLatency != Latency) {
491 DLatency = std::nullopt;
492 break;
493 }
494 }
495 Dep.setLatency(DLatency.value_or(0));
496 }
497
498 // Try to schedule uses near definitions to generate .cur.
499 ExclSrc.clear();
500 ExclDst.clear();
501 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
502 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
503 Dep.setLatency(0);
504 return;
505 }
506 int Latency = Dep.getLatency();
507 bool IsArtificial = Dep.isArtificial();
508 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
509 Dep.setLatency(Latency);
510}
511
513 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
514 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
515 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
516 Mutations.push_back(std::make_unique<BankConflictMutation>());
517}
518
520 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
521 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
522 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
523}
524
525// Pin the vtable to this file.
526void HexagonSubtarget::anchor() {}
527
529 if (DisableHexagonMISched.getNumOccurrences())
530 return !DisableHexagonMISched;
531 return true;
532}
533
537
538int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
539 MachineInstr &DstInst, bool IsArtificial,
540 int Latency) const {
541 if (IsArtificial)
542 return 1;
543 if (!hasV60Ops())
544 return Latency;
545
546 const HexagonInstrInfo &QII = *getInstrInfo();
547 // BSB scheduling.
548 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
549 Latency = (Latency + 1) >> 1;
550 return Latency;
551}
552
553void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
554 MachineInstr *SrcI = Src->getInstr();
555 for (auto &I : Src->Succs) {
556 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
557 continue;
558 Register DepR = I.getReg();
559 int DefIdx = -1;
560 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
561 const MachineOperand &MO = SrcI->getOperand(OpNum);
562 bool IsSameOrSubReg = false;
563 if (MO.isReg()) {
564 Register MOReg = MO.getReg();
565 if (DepR.isVirtual()) {
566 IsSameOrSubReg = (MOReg == DepR);
567 } else {
568 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
569 }
570 if (MO.isDef() && IsSameOrSubReg)
571 DefIdx = OpNum;
572 }
573 }
574 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
575 MachineInstr *DstI = Dst->getInstr();
576 SDep T = I;
577 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
578 const MachineOperand &MO = DstI->getOperand(OpNum);
579 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
580 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
581 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
582
583 // For some instructions (ex: COPY), we might end up with < 0 latency
584 // as they don't have any Itinerary class associated with them.
585 if (!Latency)
586 Latency = 0;
587 bool IsArtificial = I.isArtificial();
588 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
589 I.setLatency(*Latency);
590 }
591 }
592
593 // Update the latency of opposite edge too.
594 T.setSUnit(Src);
595 auto F = find(Dst->Preds, T);
596 assert(F != Dst->Preds.end());
597 F->setLatency(I.getLatency());
598 }
599}
600
601/// Change the latency between the two SUnits.
602void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
603 const {
604 for (auto &I : Src->Succs) {
605 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
606 continue;
607 SDep T = I;
608 I.setLatency(Lat);
609
610 // Update the latency of opposite edge too.
611 T.setSUnit(Src);
612 auto F = find(Dst->Preds, T);
613 assert(F != Dst->Preds.end());
614 F->setLatency(Lat);
615 }
616}
617
618/// If the SUnit has a zero latency edge, return the other SUnit.
620 for (auto &I : Deps)
621 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
622 !I.getSUnit()->getInstr()->isPseudo())
623 return I.getSUnit();
624 return nullptr;
625}
626
627// Return true if these are the best two instructions to schedule
628// together with a zero latency. Only one dependence should have a zero
629// latency. If there are multiple choices, choose the best, and change
630// the others, if needed.
631bool HexagonSubtarget::isBestZeroLatency(
632 SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII,
633 SmallPtrSet<SUnit *, 4> &ExclSrc, SmallPtrSet<SUnit *, 4> &ExclDst) const {
634 MachineInstr &SrcInst = *Src->getInstr();
635 MachineInstr &DstInst = *Dst->getInstr();
636
637 // Ignore Boundary SU nodes as these have null instructions.
638 if (Dst->isBoundaryNode())
639 return false;
640
641 if (SrcInst.isPHI() || DstInst.isPHI())
642 return false;
643
644 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
645 !TII->canExecuteInBundle(SrcInst, DstInst))
646 return false;
647
648 // The architecture doesn't allow three dependent instructions in the same
649 // packet. So, if the destination has a zero latency successor, then it's
650 // not a candidate for a zero latency predecessor.
651 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
652 return false;
653
654 // Check if the Dst instruction is the best candidate first.
655 SUnit *Best = nullptr;
656 SUnit *DstBest = nullptr;
657 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
658 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
659 // Check that Src doesn't have a better candidate.
660 DstBest = getZeroLatency(Src, Src->Succs);
661 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
662 Best = Dst;
663 }
664 if (Best != Dst)
665 return false;
666
667 // The caller frequently adds the same dependence twice. If so, then
668 // return true for this case too.
669 if ((Src == SrcBest && Dst == DstBest ) ||
670 (SrcBest == nullptr && Dst == DstBest) ||
671 (Src == SrcBest && Dst == nullptr))
672 return true;
673
674 // Reassign the latency for the previous bests, which requires setting
675 // the dependence edge in both directions.
676 if (SrcBest != nullptr) {
677 if (!hasV60Ops())
678 changeLatency(SrcBest, Dst, 1);
679 else
680 restoreLatency(SrcBest, Dst);
681 }
682 if (DstBest != nullptr) {
683 if (!hasV60Ops())
684 changeLatency(Src, DstBest, 1);
685 else
686 restoreLatency(Src, DstBest);
687 }
688
689 // Attempt to find another opportunity for zero latency in a different
690 // dependence.
691 if (SrcBest && DstBest)
692 // If there is an edge from SrcBest to DstBst, then try to change that
693 // to 0 now.
694 changeLatency(SrcBest, DstBest, 0);
695 else if (DstBest) {
696 // Check if the previous best destination instruction has a new zero
697 // latency dependence opportunity.
698 ExclSrc.insert(Src);
699 for (auto &I : DstBest->Preds)
700 if (ExclSrc.count(I.getSUnit()) == 0 &&
701 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
702 changeLatency(I.getSUnit(), DstBest, 0);
703 } else if (SrcBest) {
704 // Check if previous best source instruction has a new zero latency
705 // dependence opportunity.
706 ExclDst.insert(Dst);
707 for (auto &I : SrcBest->Succs)
708 if (ExclDst.count(I.getSUnit()) == 0 &&
709 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
710 changeLatency(SrcBest, I.getSUnit(), 0);
711 }
712
713 return true;
714}
715
717 return 32;
718}
719
721 return 32;
722}
723
724bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
725
727 struct Scalar {
728 unsigned Opcode;
729 Intrinsic::ID IntId;
730 };
731 struct Hvx {
732 unsigned Opcode;
733 Intrinsic::ID Int64Id, Int128Id;
734 };
735
736 static Scalar ScalarInts[] = {
737#define GET_SCALAR_INTRINSICS
739#undef GET_SCALAR_INTRINSICS
740 };
741
742 static Hvx HvxInts[] = {
743#define GET_HVX_INTRINSICS
745#undef GET_HVX_INTRINSICS
746 };
747
748 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
749 [[maybe_unused]] static bool SortedScalar =
750 (llvm::sort(ScalarInts, CmpOpcode), true);
751 [[maybe_unused]] static bool SortedHvx =
752 (llvm::sort(HvxInts, CmpOpcode), true);
753
754 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
755 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
756
757 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
758 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
759 return FoundScalar->IntId;
760
761 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
762 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
763 unsigned HwLen = getVectorLength();
764 if (HwLen == 64)
765 return FoundHvx->Int64Id;
766 if (HwLen == 128)
767 return FoundHvx->Int128Id;
768 }
769
770 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
771 llvm_unreachable(error.c_str());
772 return 0;
773}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
#define T
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:322
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
bool isHVXVec(const MachineInstr &MI) const
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, LocationSize &AccessSize) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Perform target specific adjustments to the latency of a schedule dependency.
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool enableSubRegLiveness() const override
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Scheduling dependency.
Definition ScheduleDAG.h:51
@ Output
A register output-dependence (aka WAW).
Definition ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition ScheduleDAG.h:58
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
Definition ScheduleDAG.h:71
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
LLVM_ABI void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition StringRef.h:501
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
LLVM_ABI std::string getString() const
Returns features as a string.
LLVM_ABI void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override