LLVM 22.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "HexagonInstrInfo.h"
15#include "HexagonRegisterInfo.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/IntrinsicsHexagon.h"
29#include <algorithm>
30#include <cassert>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "hexagon-subtarget"
36
37#define GET_SUBTARGETINFO_CTOR
38#define GET_SUBTARGETINFO_TARGET_DESC
39#include "HexagonGenSubtargetInfo.inc"
40
41static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
42 cl::init(true));
43
44static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
45 cl::init(false));
46
47static cl::opt<bool>
48 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
49 cl::desc("Enable the scheduler to generate .cur"));
50
51static cl::opt<bool>
52 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
53 cl::desc("Disable Hexagon MI Scheduling"));
54
56 "hexagon-long-calls", cl::Hidden,
57 cl::desc("If present, forces/disables the use of long calls"));
58
59static cl::opt<bool>
60 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
61 cl::desc("Consider calls to be predicable"));
62
63static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
64 cl::init(true));
65
66static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
67 cl::Hidden, cl::init(true));
68
70 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
71 cl::desc("Enable checking for cache bank conflicts"));
72
74 StringRef FS, const TargetMachine &TM)
75 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
76 OptLevel(TM.getOptLevel()),
77 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
78 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
79 TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) {
81 // Beware of the default constructor of InstrItineraryData: it will
82 // reset all members to 0.
83 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
84}
85
88 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
89 if (ArchVer)
90 HexagonArchVersion = *ArchVer;
91 else
92 llvm_unreachable("Unrecognized Hexagon processor version");
93
94 UseHVX128BOps = false;
95 UseHVX64BOps = false;
96 UseAudioOps = false;
97 UseLongCalls = false;
98
99 SubtargetFeatures Features(FS);
100
101 // Turn on QFloat if the HVX version is v68+.
102 // The function ParseSubtargetFeatures will set feature bits and initialize
103 // subtarget's variables all in one, so there isn't a good way to preprocess
104 // the feature string, other than by tinkering with it directly.
105 auto IsQFloatFS = [](StringRef F) {
106 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
107 };
108 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
109 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
110 for (StringRef F : llvm::reverse(Features.getFeatures())) {
111 if (F.starts_with("+hvxv"))
112 return F;
113 }
114 for (StringRef F : llvm::reverse(Features.getFeatures())) {
115 if (F == "-hvx")
116 return StringRef();
117 if (F.starts_with("+hvx") || F == "-hvx")
118 return F.take_front(4); // Return "+hvx" or "-hvx".
119 }
120 return StringRef();
121 };
122
123 bool AddQFloat = false;
124 StringRef HvxVer = getHvxVersion(FS);
125 if (HvxVer.starts_with("+hvxv")) {
126 int Ver = 0;
127 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
128 AddQFloat = true;
129 } else if (HvxVer == "+hvx") {
130 if (hasV68Ops())
131 AddQFloat = true;
132 }
133
134 if (AddQFloat)
135 Features.AddFeature("+hvx-qfloat");
136 }
137
138 std::string FeatureString = Features.getString();
139 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
140
141 if (useHVXV68Ops())
142 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
143
144 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
146 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
147
148 if (OverrideLongCalls.getPosition())
149 UseLongCalls = OverrideLongCalls;
150
152
153 if (isTinyCore()) {
154 // Tiny core has a single thread, so back-to-back scheduling is enabled by
155 // default.
156 if (!EnableBSBSched.getPosition())
157 UseBSBScheduling = false;
158 }
159
160 FeatureBitset FeatureBits = getFeatureBits();
162 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
163 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
164
165 return *this;
166}
167
168bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
169 if (!useHVXOps())
170 return false;
171 if (Ty.isVector())
172 Ty = Ty.getVectorElementType();
173 if (IncludeBool && Ty == MVT::i1)
174 return true;
175 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
176 return llvm::is_contained(ElemTypes, Ty);
177}
178
179bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
180 if (!VecTy.isSimple())
181 return false;
182 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
183 return false;
184 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
185 if (!IncludeBool && ElemTy == MVT::i1)
186 return false;
187
188 unsigned HwLen = getVectorLength();
189 unsigned NumElems = VecTy.getVectorNumElements();
190 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
191
192 if (IncludeBool && ElemTy == MVT::i1) {
193 // Boolean HVX vector types are formed from regular HVX vector types
194 // by replacing the element type with i1.
195 for (MVT T : ElemTypes)
196 if (NumElems * T.getSizeInBits() == 8 * HwLen)
197 return true;
198 return false;
199 }
200
201 unsigned VecWidth = VecTy.getSizeInBits();
202 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
203 return false;
204 return llvm::is_contained(ElemTypes, ElemTy);
205}
206
207bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
208 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
209 return false;
210 // Avoid types like <2 x i32*>.
211 Type *ScalTy = VecTy->getScalarType();
212 if (!ScalTy->isIntegerTy() &&
213 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
214 return false;
215 // The given type may be something like <17 x i32>, which is not MVT,
216 // but can be represented as (non-simple) EVT.
217 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
218 if (!Ty.getVectorElementType().isSimple())
219 return false;
220
221 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
222 if (isHVXVectorType(SimpleTy, IncludeBool))
223 return true;
224 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
226 };
227
228 // Round up EVT to have power-of-2 elements, and keep checking if it
229 // qualifies for HVX, dividing it in half after each step.
230 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
231 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
232 while (VecLen > 1) {
233 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
234 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
235 return true;
236 VecLen /= 2;
237 }
238
239 return false;
240}
241
243 for (SUnit &SU : DAG->SUnits) {
244 if (!SU.isInstr())
245 continue;
247 for (auto &D : SU.Preds)
248 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
249 Erase.push_back(D);
250 for (auto &E : Erase)
251 SU.removePred(E);
252 }
253}
254
256 for (SUnit &SU : DAG->SUnits) {
257 // Update the latency of chain edges between v60 vector load or store
258 // instructions to be 1. These instruction cannot be scheduled in the
259 // same packet.
260 MachineInstr &MI1 = *SU.getInstr();
261 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
262 bool IsStoreMI1 = MI1.mayStore();
263 bool IsLoadMI1 = MI1.mayLoad();
264 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
265 continue;
266 for (SDep &SI : SU.Succs) {
267 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
268 continue;
269 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
270 if (!QII->isHVXVec(MI2))
271 continue;
272 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
273 SI.setLatency(1);
274 SU.setHeightDirty();
275 // Change the dependence in the opposite direction too.
276 for (SDep &PI : SI.getSUnit()->Preds) {
277 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
278 continue;
279 PI.setLatency(1);
280 SI.getSUnit()->setDepthDirty();
281 }
282 }
283 }
284 }
285}
286
287// Check if a call and subsequent A2_tfrpi instructions should maintain
288// scheduling affinity. We are looking for the TFRI to be consumed in
289// the next instruction. This should help reduce the instances of
290// double register pairs being allocated and scheduled before a call
291// when not used until after the call. This situation is exacerbated
292// by the fact that we allocate the pair from the callee saves list,
293// leading to excess spills and restores.
294bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
295 const HexagonInstrInfo &HII, const SUnit &Inst1,
296 const SUnit &Inst2) const {
297 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
298 return false;
299
300 // TypeXTYPE are 64 bit operations.
301 unsigned Type = HII.getType(*Inst2.getInstr());
304}
305
307 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
308 SUnit* LastSequentialCall = nullptr;
309 // Map from virtual register to physical register from the copy.
310 DenseMap<unsigned, unsigned> VRegHoldingReg;
311 // Map from the physical register to the instruction that uses virtual
312 // register. This is used to create the barrier edge.
313 DenseMap<unsigned, SUnit *> LastVRegUse;
314 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
315 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
316
317 // Currently we only catch the situation when compare gets scheduled
318 // before preceding call.
319 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
320 // Remember the call.
321 if (DAG->SUnits[su].getInstr()->isCall())
322 LastSequentialCall = &DAG->SUnits[su];
323 // Look for a compare that defines a predicate.
324 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
325 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
326 // Look for call and tfri* instructions.
327 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
328 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
329 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
330 // Prevent redundant register copies due to reads and writes of physical
331 // registers. The original motivation for this was the code generated
332 // between two calls, which are caused both the return value and the
333 // argument for the next call being in %r0.
334 // Example:
335 // 1: <call1>
336 // 2: %vreg = COPY %r0
337 // 3: <use of %vreg>
338 // 4: %r0 = ...
339 // 5: <call2>
340 // The scheduler would often swap 3 and 4, so an additional register is
341 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
342 // this.
343 // The code below checks for all the physical registers, not just R0/D0/V0.
344 else if (SchedRetvalOptimization) {
345 const MachineInstr *MI = DAG->SUnits[su].getInstr();
346 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
347 // %vregX = COPY %r0
348 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
349 LastVRegUse.erase(MI->getOperand(1).getReg());
350 } else {
351 for (const MachineOperand &MO : MI->operands()) {
352 if (!MO.isReg())
353 continue;
354 if (MO.isUse() && !MI->isCopy() &&
355 VRegHoldingReg.count(MO.getReg())) {
356 // <use of %vregX>
357 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
358 } else if (MO.isDef() && MO.getReg().isPhysical()) {
359 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
360 ++AI) {
361 if (auto It = LastVRegUse.find(*AI); It != LastVRegUse.end()) {
362 if (It->second != &DAG->SUnits[su])
363 // %r0 = ...
364 DAG->addEdge(&DAG->SUnits[su],
365 SDep(It->second, SDep::Barrier));
366 LastVRegUse.erase(It);
367 }
368 }
369 }
370 }
371 }
372 }
373 }
374}
375
378 return;
379
380 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
381
382 // Create artificial edges between loads that could likely cause a bank
383 // conflict. Since such loads would normally not have any dependency
384 // between them, we cannot rely on existing edges.
385 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
386 SUnit &S0 = DAG->SUnits[i];
387 MachineInstr &L0 = *S0.getInstr();
388 if (!L0.mayLoad() || L0.mayStore() ||
390 continue;
391 int64_t Offset0;
393 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
394 // Is the access size is longer than the L1 cache line, skip the check.
395 if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
396 Size0.getValue() >= 32)
397 continue;
398 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
399 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
400 SUnit &S1 = DAG->SUnits[j];
401 MachineInstr &L1 = *S1.getInstr();
402 if (!L1.mayLoad() || L1.mayStore() ||
404 continue;
405 int64_t Offset1;
407 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
408 if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
409 Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
410 continue;
411 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
412 // is unlikely.
413 if (((Offset0 ^ Offset1) & 0x18) != 0)
414 continue;
415 // Bits 3 and 4 are the same, add an artificial edge and set extra
416 // latency.
417 SDep A(&S0, SDep::Artificial);
418 A.setLatency(1);
419 S1.addPred(A, true);
420 }
421 }
422}
423
424/// Enable use of alias analysis during code generation (during MI
425/// scheduling, DAGCombine, etc.).
428 return true;
429 return false;
430}
431
432/// Perform target specific adjustments to the latency of a schedule
433/// dependency.
435 SUnit *Src, int SrcOpIdx, SUnit *Dst, int DstOpIdx, SDep &Dep,
436 const TargetSchedModel *SchedModel) const {
437 if (!Src->isInstr() || !Dst->isInstr())
438 return;
439
440 MachineInstr *SrcInst = Src->getInstr();
441 MachineInstr *DstInst = Dst->getInstr();
442 const HexagonInstrInfo *QII = getInstrInfo();
443
444 // Instructions with .new operands have zero latency.
447 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
448 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
449 Dep.setLatency(0);
450 return;
451 }
452
453 // Set the latency for a copy to zero since we hope that is will get
454 // removed.
455 if (DstInst->isCopy())
456 Dep.setLatency(0);
457
458 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
459 // the correct latency.
460 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
461 // only if the latencies on all the uses are equal, otherwise set it to
462 // default.
463 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
464 Register DReg = DstInst->getOperand(0).getReg();
465 std::optional<unsigned> DLatency;
466 for (const auto &DDep : Dst->Succs) {
467 MachineInstr *DDst = DDep.getSUnit()->getInstr();
468 int UseIdx = -1;
469 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
470 const MachineOperand &MO = DDst->getOperand(OpNum);
471 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
472 UseIdx = OpNum;
473 break;
474 }
475 }
476
477 if (UseIdx == -1)
478 continue;
479
480 std::optional<unsigned> Latency =
481 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
482
483 // Set DLatency for the first time.
484 if (!DLatency)
485 DLatency = Latency;
486
487 // For multiple uses, if the Latency is different across uses, reset
488 // DLatency.
489 if (DLatency != Latency) {
490 DLatency = std::nullopt;
491 break;
492 }
493 }
494 Dep.setLatency(DLatency.value_or(0));
495 }
496
497 // Try to schedule uses near definitions to generate .cur.
498 ExclSrc.clear();
499 ExclDst.clear();
500 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
501 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
502 Dep.setLatency(0);
503 return;
504 }
505 int Latency = Dep.getLatency();
506 bool IsArtificial = Dep.isArtificial();
507 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
508 Dep.setLatency(Latency);
509}
510
512 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
513 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
514 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
515 Mutations.push_back(std::make_unique<BankConflictMutation>());
516}
517
519 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
520 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
521 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
522}
523
524// Pin the vtable to this file.
525void HexagonSubtarget::anchor() {}
526
528 if (DisableHexagonMISched.getNumOccurrences())
529 return !DisableHexagonMISched;
530 return true;
531}
532
536
537int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
538 MachineInstr &DstInst, bool IsArtificial,
539 int Latency) const {
540 if (IsArtificial)
541 return 1;
542 if (!hasV60Ops())
543 return Latency;
544
545 const HexagonInstrInfo &QII = *getInstrInfo();
546 // BSB scheduling.
547 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
548 Latency = (Latency + 1) >> 1;
549 return Latency;
550}
551
552void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
553 MachineInstr *SrcI = Src->getInstr();
554 for (auto &I : Src->Succs) {
555 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
556 continue;
557 Register DepR = I.getReg();
558 int DefIdx = -1;
559 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
560 const MachineOperand &MO = SrcI->getOperand(OpNum);
561 bool IsSameOrSubReg = false;
562 if (MO.isReg()) {
563 Register MOReg = MO.getReg();
564 if (DepR.isVirtual()) {
565 IsSameOrSubReg = (MOReg == DepR);
566 } else {
567 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
568 }
569 if (MO.isDef() && IsSameOrSubReg)
570 DefIdx = OpNum;
571 }
572 }
573 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
574 MachineInstr *DstI = Dst->getInstr();
575 SDep T = I;
576 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
577 const MachineOperand &MO = DstI->getOperand(OpNum);
578 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
579 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
580 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
581
582 // For some instructions (ex: COPY), we might end up with < 0 latency
583 // as they don't have any Itinerary class associated with them.
584 if (!Latency)
585 Latency = 0;
586 bool IsArtificial = I.isArtificial();
587 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
588 I.setLatency(*Latency);
589 }
590 }
591
592 // Update the latency of opposite edge too.
593 T.setSUnit(Src);
594 auto F = find(Dst->Preds, T);
595 assert(F != Dst->Preds.end());
596 F->setLatency(I.getLatency());
597 }
598}
599
600/// Change the latency between the two SUnits.
601void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
602 const {
603 for (auto &I : Src->Succs) {
604 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
605 continue;
606 SDep T = I;
607 I.setLatency(Lat);
608
609 // Update the latency of opposite edge too.
610 T.setSUnit(Src);
611 auto F = find(Dst->Preds, T);
612 assert(F != Dst->Preds.end());
613 F->setLatency(Lat);
614 }
615}
616
617/// If the SUnit has a zero latency edge, return the other SUnit.
619 for (auto &I : Deps)
620 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
621 !I.getSUnit()->getInstr()->isPseudo())
622 return I.getSUnit();
623 return nullptr;
624}
625
626// Return true if these are the best two instructions to schedule
627// together with a zero latency. Only one dependence should have a zero
628// latency. If there are multiple choices, choose the best, and change
629// the others, if needed.
630bool HexagonSubtarget::isBestZeroLatency(
631 SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII,
632 SmallPtrSet<SUnit *, 4> &ExclSrc, SmallPtrSet<SUnit *, 4> &ExclDst) const {
633 MachineInstr &SrcInst = *Src->getInstr();
634 MachineInstr &DstInst = *Dst->getInstr();
635
636 // Ignore Boundary SU nodes as these have null instructions.
637 if (Dst->isBoundaryNode())
638 return false;
639
640 if (SrcInst.isPHI() || DstInst.isPHI())
641 return false;
642
643 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
644 !TII->canExecuteInBundle(SrcInst, DstInst))
645 return false;
646
647 // The architecture doesn't allow three dependent instructions in the same
648 // packet. So, if the destination has a zero latency successor, then it's
649 // not a candidate for a zero latency predecessor.
650 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
651 return false;
652
653 // Check if the Dst instruction is the best candidate first.
654 SUnit *Best = nullptr;
655 SUnit *DstBest = nullptr;
656 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
657 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
658 // Check that Src doesn't have a better candidate.
659 DstBest = getZeroLatency(Src, Src->Succs);
660 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
661 Best = Dst;
662 }
663 if (Best != Dst)
664 return false;
665
666 // The caller frequently adds the same dependence twice. If so, then
667 // return true for this case too.
668 if ((Src == SrcBest && Dst == DstBest ) ||
669 (SrcBest == nullptr && Dst == DstBest) ||
670 (Src == SrcBest && Dst == nullptr))
671 return true;
672
673 // Reassign the latency for the previous bests, which requires setting
674 // the dependence edge in both directions.
675 if (SrcBest != nullptr) {
676 if (!hasV60Ops())
677 changeLatency(SrcBest, Dst, 1);
678 else
679 restoreLatency(SrcBest, Dst);
680 }
681 if (DstBest != nullptr) {
682 if (!hasV60Ops())
683 changeLatency(Src, DstBest, 1);
684 else
685 restoreLatency(Src, DstBest);
686 }
687
688 // Attempt to find another opportunity for zero latency in a different
689 // dependence.
690 if (SrcBest && DstBest)
691 // If there is an edge from SrcBest to DstBst, then try to change that
692 // to 0 now.
693 changeLatency(SrcBest, DstBest, 0);
694 else if (DstBest) {
695 // Check if the previous best destination instruction has a new zero
696 // latency dependence opportunity.
697 ExclSrc.insert(Src);
698 for (auto &I : DstBest->Preds)
699 if (ExclSrc.count(I.getSUnit()) == 0 &&
700 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
701 changeLatency(I.getSUnit(), DstBest, 0);
702 } else if (SrcBest) {
703 // Check if previous best source instruction has a new zero latency
704 // dependence opportunity.
705 ExclDst.insert(Dst);
706 for (auto &I : SrcBest->Succs)
707 if (ExclDst.count(I.getSUnit()) == 0 &&
708 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
709 changeLatency(SrcBest, I.getSUnit(), 0);
710 }
711
712 return true;
713}
714
716 return 32;
717}
718
720 return 32;
721}
722
723bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
724
726 struct Scalar {
727 unsigned Opcode;
728 Intrinsic::ID IntId;
729 };
730 struct Hvx {
731 unsigned Opcode;
732 Intrinsic::ID Int64Id, Int128Id;
733 };
734
735 static Scalar ScalarInts[] = {
736#define GET_SCALAR_INTRINSICS
738#undef GET_SCALAR_INTRINSICS
739 };
740
741 static Hvx HvxInts[] = {
742#define GET_HVX_INTRINSICS
744#undef GET_HVX_INTRINSICS
745 };
746
747 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
748 [[maybe_unused]] static bool SortedScalar =
749 (llvm::sort(ScalarInts, CmpOpcode), true);
750 [[maybe_unused]] static bool SortedHvx =
751 (llvm::sort(HvxInts, CmpOpcode), true);
752
753 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
754 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
755
756 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
757 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
758 return FoundScalar->IntId;
759
760 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
761 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
762 unsigned HwLen = getVectorLength();
763 if (HwLen == 64)
764 return FoundHvx->Int64Id;
765 if (HwLen == 128)
766 return FoundHvx->Int128Id;
767 }
768
769 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
770 llvm_unreachable(error.c_str());
771 return 0;
772}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
#define T
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:322
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
bool isHVXVec(const MachineInstr &MI) const
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, LocationSize &AccessSize) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Perform target specific adjustments to the latency of a schedule dependency.
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool enableSubRegLiveness() const override
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Scheduling dependency.
Definition ScheduleDAG.h:51
@ Output
A register output-dependence (aka WAW).
Definition ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition ScheduleDAG.h:58
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
Definition ScheduleDAG.h:71
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
LLVM_ABI void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition StringRef.h:501
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
LLVM_ABI std::string getString() const
Returns features as a string.
LLVM_ABI void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override