LLVM 19.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "Hexagon.h"
15#include "HexagonInstrInfo.h"
16#include "HexagonRegisterInfo.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/StringRef.h"
27#include "llvm/IR/IntrinsicsHexagon.h"
31#include <algorithm>
32#include <cassert>
33#include <map>
34#include <optional>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "hexagon-subtarget"
39
40#define GET_SUBTARGETINFO_CTOR
41#define GET_SUBTARGETINFO_TARGET_DESC
42#include "HexagonGenSubtargetInfo.inc"
43
44static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
45 cl::init(true));
46
47static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
48 cl::init(false));
49
50static cl::opt<bool>
51 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
52 cl::desc("Enable the scheduler to generate .cur"));
53
54static cl::opt<bool>
55 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
56 cl::desc("Disable Hexagon MI Scheduling"));
57
59 "hexagon-subreg-liveness", cl::Hidden, cl::init(true),
60 cl::desc("Enable subregister liveness tracking for Hexagon"));
61
63 "hexagon-long-calls", cl::Hidden,
64 cl::desc("If present, forces/disables the use of long calls"));
65
66static cl::opt<bool>
67 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
68 cl::desc("Consider calls to be predicable"));
69
70static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
71 cl::init(true));
72
73static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
74 cl::Hidden, cl::init(true));
75
77 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
78 cl::desc("Enable checking for cache bank conflicts"));
79
81 StringRef FS, const TargetMachine &TM)
82 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
83 OptLevel(TM.getOptLevel()),
84 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
85 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
86 RegInfo(getHwMode()), TLInfo(TM, *this),
87 InstrItins(getInstrItineraryForCPU(CPUString)) {
89 // Beware of the default constructor of InstrItineraryData: it will
90 // reset all members to 0.
91 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
92}
93
96 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
97 if (ArchVer)
98 HexagonArchVersion = *ArchVer;
99 else
100 llvm_unreachable("Unrecognized Hexagon processor version");
101
102 UseHVX128BOps = false;
103 UseHVX64BOps = false;
104 UseAudioOps = false;
105 UseLongCalls = false;
106
107 SubtargetFeatures Features(FS);
108
109 // Turn on QFloat if the HVX version is v68+.
110 // The function ParseSubtargetFeatures will set feature bits and initialize
111 // subtarget's variables all in one, so there isn't a good way to preprocess
112 // the feature string, other than by tinkering with it directly.
113 auto IsQFloatFS = [](StringRef F) {
114 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
115 };
116 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
117 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
118 for (StringRef F : llvm::reverse(Features.getFeatures())) {
119 if (F.starts_with("+hvxv"))
120 return F;
121 }
122 for (StringRef F : llvm::reverse(Features.getFeatures())) {
123 if (F == "-hvx")
124 return StringRef();
125 if (F.starts_with("+hvx") || F == "-hvx")
126 return F.take_front(4); // Return "+hvx" or "-hvx".
127 }
128 return StringRef();
129 };
130
131 bool AddQFloat = false;
132 StringRef HvxVer = getHvxVersion(FS);
133 if (HvxVer.starts_with("+hvxv")) {
134 int Ver = 0;
135 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
136 AddQFloat = true;
137 } else if (HvxVer == "+hvx") {
138 if (hasV68Ops())
139 AddQFloat = true;
140 }
141
142 if (AddQFloat)
143 Features.AddFeature("+hvx-qfloat");
144 }
145
146 std::string FeatureString = Features.getString();
147 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
148
149 if (useHVXV68Ops())
150 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
151
152 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
154 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
155
156 if (OverrideLongCalls.getPosition())
157 UseLongCalls = OverrideLongCalls;
158
160
161 if (isTinyCore()) {
162 // Tiny core has a single thread, so back-to-back scheduling is enabled by
163 // default.
164 if (!EnableBSBSched.getPosition())
165 UseBSBScheduling = false;
166 }
167
168 FeatureBitset FeatureBits = getFeatureBits();
170 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
171 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
172
173 return *this;
174}
175
176bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
177 if (!useHVXOps())
178 return false;
179 if (Ty.isVector())
180 Ty = Ty.getVectorElementType();
181 if (IncludeBool && Ty == MVT::i1)
182 return true;
183 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
184 return llvm::is_contained(ElemTypes, Ty);
185}
186
187bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
188 if (!VecTy.isSimple())
189 return false;
190 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
191 return false;
192 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
193 if (!IncludeBool && ElemTy == MVT::i1)
194 return false;
195
196 unsigned HwLen = getVectorLength();
197 unsigned NumElems = VecTy.getVectorNumElements();
198 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
199
200 if (IncludeBool && ElemTy == MVT::i1) {
201 // Boolean HVX vector types are formed from regular HVX vector types
202 // by replacing the element type with i1.
203 for (MVT T : ElemTypes)
204 if (NumElems * T.getSizeInBits() == 8 * HwLen)
205 return true;
206 return false;
207 }
208
209 unsigned VecWidth = VecTy.getSizeInBits();
210 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
211 return false;
212 return llvm::is_contained(ElemTypes, ElemTy);
213}
214
215bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
216 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
217 return false;
218 // Avoid types like <2 x i32*>.
219 Type *ScalTy = VecTy->getScalarType();
220 if (!ScalTy->isIntegerTy() &&
221 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
222 return false;
223 // The given type may be something like <17 x i32>, which is not MVT,
224 // but can be represented as (non-simple) EVT.
225 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
226 if (!Ty.getVectorElementType().isSimple())
227 return false;
228
229 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
230 if (isHVXVectorType(SimpleTy, IncludeBool))
231 return true;
232 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
234 };
235
236 // Round up EVT to have power-of-2 elements, and keep checking if it
237 // qualifies for HVX, dividing it in half after each step.
238 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
239 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
240 while (VecLen > 1) {
241 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
242 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
243 return true;
244 VecLen /= 2;
245 }
246
247 return false;
248}
249
251 for (SUnit &SU : DAG->SUnits) {
252 if (!SU.isInstr())
253 continue;
255 for (auto &D : SU.Preds)
256 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
257 Erase.push_back(D);
258 for (auto &E : Erase)
259 SU.removePred(E);
260 }
261}
262
264 for (SUnit &SU : DAG->SUnits) {
265 // Update the latency of chain edges between v60 vector load or store
266 // instructions to be 1. These instruction cannot be scheduled in the
267 // same packet.
268 MachineInstr &MI1 = *SU.getInstr();
269 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
270 bool IsStoreMI1 = MI1.mayStore();
271 bool IsLoadMI1 = MI1.mayLoad();
272 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
273 continue;
274 for (SDep &SI : SU.Succs) {
275 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
276 continue;
277 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
278 if (!QII->isHVXVec(MI2))
279 continue;
280 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
281 SI.setLatency(1);
282 SU.setHeightDirty();
283 // Change the dependence in the opposite direction too.
284 for (SDep &PI : SI.getSUnit()->Preds) {
285 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
286 continue;
287 PI.setLatency(1);
288 SI.getSUnit()->setDepthDirty();
289 }
290 }
291 }
292 }
293}
294
295// Check if a call and subsequent A2_tfrpi instructions should maintain
296// scheduling affinity. We are looking for the TFRI to be consumed in
297// the next instruction. This should help reduce the instances of
298// double register pairs being allocated and scheduled before a call
299// when not used until after the call. This situation is exacerbated
300// by the fact that we allocate the pair from the callee saves list,
301// leading to excess spills and restores.
302bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
303 const HexagonInstrInfo &HII, const SUnit &Inst1,
304 const SUnit &Inst2) const {
305 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
306 return false;
307
308 // TypeXTYPE are 64 bit operations.
309 unsigned Type = HII.getType(*Inst2.getInstr());
312}
313
315 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
316 SUnit* LastSequentialCall = nullptr;
317 // Map from virtual register to physical register from the copy.
318 DenseMap<unsigned, unsigned> VRegHoldingReg;
319 // Map from the physical register to the instruction that uses virtual
320 // register. This is used to create the barrier edge.
321 DenseMap<unsigned, SUnit *> LastVRegUse;
322 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
323 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
324
325 // Currently we only catch the situation when compare gets scheduled
326 // before preceding call.
327 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
328 // Remember the call.
329 if (DAG->SUnits[su].getInstr()->isCall())
330 LastSequentialCall = &DAG->SUnits[su];
331 // Look for a compare that defines a predicate.
332 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
333 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
334 // Look for call and tfri* instructions.
335 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
336 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
337 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
338 // Prevent redundant register copies due to reads and writes of physical
339 // registers. The original motivation for this was the code generated
340 // between two calls, which are caused both the return value and the
341 // argument for the next call being in %r0.
342 // Example:
343 // 1: <call1>
344 // 2: %vreg = COPY %r0
345 // 3: <use of %vreg>
346 // 4: %r0 = ...
347 // 5: <call2>
348 // The scheduler would often swap 3 and 4, so an additional register is
349 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
350 // this.
351 // The code below checks for all the physical registers, not just R0/D0/V0.
352 else if (SchedRetvalOptimization) {
353 const MachineInstr *MI = DAG->SUnits[su].getInstr();
354 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
355 // %vregX = COPY %r0
356 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
357 LastVRegUse.erase(MI->getOperand(1).getReg());
358 } else {
359 for (const MachineOperand &MO : MI->operands()) {
360 if (!MO.isReg())
361 continue;
362 if (MO.isUse() && !MI->isCopy() &&
363 VRegHoldingReg.count(MO.getReg())) {
364 // <use of %vregX>
365 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
366 } else if (MO.isDef() && MO.getReg().isPhysical()) {
367 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
368 ++AI) {
369 if (LastVRegUse.count(*AI) &&
370 LastVRegUse[*AI] != &DAG->SUnits[su])
371 // %r0 = ...
372 DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
373 LastVRegUse.erase(*AI);
374 }
375 }
376 }
377 }
378 }
379 }
380}
381
384 return;
385
386 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
387
388 // Create artificial edges between loads that could likely cause a bank
389 // conflict. Since such loads would normally not have any dependency
390 // between them, we cannot rely on existing edges.
391 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
392 SUnit &S0 = DAG->SUnits[i];
393 MachineInstr &L0 = *S0.getInstr();
394 if (!L0.mayLoad() || L0.mayStore() ||
396 continue;
397 int64_t Offset0;
398 LocationSize Size0 = 0;
399 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
400 // Is the access size is longer than the L1 cache line, skip the check.
401 if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
402 Size0.getValue() >= 32)
403 continue;
404 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
405 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
406 SUnit &S1 = DAG->SUnits[j];
407 MachineInstr &L1 = *S1.getInstr();
408 if (!L1.mayLoad() || L1.mayStore() ||
410 continue;
411 int64_t Offset1;
412 LocationSize Size1 = 0;
413 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
414 if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
415 Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
416 continue;
417 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
418 // is unlikely.
419 if (((Offset0 ^ Offset1) & 0x18) != 0)
420 continue;
421 // Bits 3 and 4 are the same, add an artificial edge and set extra
422 // latency.
423 SDep A(&S0, SDep::Artificial);
424 A.setLatency(1);
425 S1.addPred(A, true);
426 }
427 }
428}
429
430/// Enable use of alias analysis during code generation (during MI
431/// scheduling, DAGCombine, etc.).
434 return true;
435 return false;
436}
437
438/// Perform target specific adjustments to the latency of a schedule
439/// dependency.
441 SUnit *Dst, int DstOpIdx,
442 SDep &Dep) const {
443 if (!Src->isInstr() || !Dst->isInstr())
444 return;
445
446 MachineInstr *SrcInst = Src->getInstr();
447 MachineInstr *DstInst = Dst->getInstr();
448 const HexagonInstrInfo *QII = getInstrInfo();
449
450 // Instructions with .new operands have zero latency.
451 SmallSet<SUnit *, 4> ExclSrc;
452 SmallSet<SUnit *, 4> ExclDst;
453 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
454 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
455 Dep.setLatency(0);
456 return;
457 }
458
459 // Set the latency for a copy to zero since we hope that is will get
460 // removed.
461 if (DstInst->isCopy())
462 Dep.setLatency(0);
463
464 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
465 // the correct latency.
466 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
467 // only if the latencies on all the uses are equal, otherwise set it to
468 // default.
469 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
470 Register DReg = DstInst->getOperand(0).getReg();
471 std::optional<unsigned> DLatency;
472 for (const auto &DDep : Dst->Succs) {
473 MachineInstr *DDst = DDep.getSUnit()->getInstr();
474 int UseIdx = -1;
475 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
476 const MachineOperand &MO = DDst->getOperand(OpNum);
477 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
478 UseIdx = OpNum;
479 break;
480 }
481 }
482
483 if (UseIdx == -1)
484 continue;
485
486 std::optional<unsigned> Latency =
487 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
488
489 // Set DLatency for the first time.
490 if (!DLatency)
491 DLatency = Latency;
492
493 // For multiple uses, if the Latency is different across uses, reset
494 // DLatency.
495 if (DLatency != Latency) {
496 DLatency = std::nullopt;
497 break;
498 }
499 }
500 Dep.setLatency(DLatency ? *DLatency : 0);
501 }
502
503 // Try to schedule uses near definitions to generate .cur.
504 ExclSrc.clear();
505 ExclDst.clear();
506 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
507 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
508 Dep.setLatency(0);
509 return;
510 }
511 int Latency = Dep.getLatency();
512 bool IsArtificial = Dep.isArtificial();
513 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
514 Dep.setLatency(Latency);
515}
516
518 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
519 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
520 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
521 Mutations.push_back(std::make_unique<BankConflictMutation>());
522}
523
525 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
526 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
527 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
528}
529
530// Pin the vtable to this file.
531void HexagonSubtarget::anchor() {}
532
534 if (DisableHexagonMISched.getNumOccurrences())
535 return !DisableHexagonMISched;
536 return true;
537}
538
541}
542
543int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
544 MachineInstr &DstInst, bool IsArtificial,
545 int Latency) const {
546 if (IsArtificial)
547 return 1;
548 if (!hasV60Ops())
549 return Latency;
550
551 auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
552 // BSB scheduling.
553 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
554 Latency = (Latency + 1) >> 1;
555 return Latency;
556}
557
558void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
559 MachineInstr *SrcI = Src->getInstr();
560 for (auto &I : Src->Succs) {
561 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
562 continue;
563 Register DepR = I.getReg();
564 int DefIdx = -1;
565 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
566 const MachineOperand &MO = SrcI->getOperand(OpNum);
567 bool IsSameOrSubReg = false;
568 if (MO.isReg()) {
569 Register MOReg = MO.getReg();
570 if (DepR.isVirtual()) {
571 IsSameOrSubReg = (MOReg == DepR);
572 } else {
573 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
574 }
575 if (MO.isDef() && IsSameOrSubReg)
576 DefIdx = OpNum;
577 }
578 }
579 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
580 MachineInstr *DstI = Dst->getInstr();
581 SDep T = I;
582 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
583 const MachineOperand &MO = DstI->getOperand(OpNum);
584 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
585 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
586 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
587
588 // For some instructions (ex: COPY), we might end up with < 0 latency
589 // as they don't have any Itinerary class associated with them.
590 if (!Latency)
591 Latency = 0;
592 bool IsArtificial = I.isArtificial();
593 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
594 I.setLatency(*Latency);
595 }
596 }
597
598 // Update the latency of opposite edge too.
599 T.setSUnit(Src);
600 auto F = find(Dst->Preds, T);
601 assert(F != Dst->Preds.end());
602 F->setLatency(I.getLatency());
603 }
604}
605
606/// Change the latency between the two SUnits.
607void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
608 const {
609 for (auto &I : Src->Succs) {
610 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
611 continue;
612 SDep T = I;
613 I.setLatency(Lat);
614
615 // Update the latency of opposite edge too.
616 T.setSUnit(Src);
617 auto F = find(Dst->Preds, T);
618 assert(F != Dst->Preds.end());
619 F->setLatency(Lat);
620 }
621}
622
623/// If the SUnit has a zero latency edge, return the other SUnit.
625 for (auto &I : Deps)
626 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
627 !I.getSUnit()->getInstr()->isPseudo())
628 return I.getSUnit();
629 return nullptr;
630}
631
632// Return true if these are the best two instructions to schedule
633// together with a zero latency. Only one dependence should have a zero
634// latency. If there are multiple choices, choose the best, and change
635// the others, if needed.
636bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
638 SmallSet<SUnit*, 4> &ExclDst) const {
639 MachineInstr &SrcInst = *Src->getInstr();
640 MachineInstr &DstInst = *Dst->getInstr();
641
642 // Ignore Boundary SU nodes as these have null instructions.
643 if (Dst->isBoundaryNode())
644 return false;
645
646 if (SrcInst.isPHI() || DstInst.isPHI())
647 return false;
648
649 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
650 !TII->canExecuteInBundle(SrcInst, DstInst))
651 return false;
652
653 // The architecture doesn't allow three dependent instructions in the same
654 // packet. So, if the destination has a zero latency successor, then it's
655 // not a candidate for a zero latency predecessor.
656 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
657 return false;
658
659 // Check if the Dst instruction is the best candidate first.
660 SUnit *Best = nullptr;
661 SUnit *DstBest = nullptr;
662 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
663 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
664 // Check that Src doesn't have a better candidate.
665 DstBest = getZeroLatency(Src, Src->Succs);
666 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
667 Best = Dst;
668 }
669 if (Best != Dst)
670 return false;
671
672 // The caller frequently adds the same dependence twice. If so, then
673 // return true for this case too.
674 if ((Src == SrcBest && Dst == DstBest ) ||
675 (SrcBest == nullptr && Dst == DstBest) ||
676 (Src == SrcBest && Dst == nullptr))
677 return true;
678
679 // Reassign the latency for the previous bests, which requires setting
680 // the dependence edge in both directions.
681 if (SrcBest != nullptr) {
682 if (!hasV60Ops())
683 changeLatency(SrcBest, Dst, 1);
684 else
685 restoreLatency(SrcBest, Dst);
686 }
687 if (DstBest != nullptr) {
688 if (!hasV60Ops())
689 changeLatency(Src, DstBest, 1);
690 else
691 restoreLatency(Src, DstBest);
692 }
693
694 // Attempt to find another opprotunity for zero latency in a different
695 // dependence.
696 if (SrcBest && DstBest)
697 // If there is an edge from SrcBest to DstBst, then try to change that
698 // to 0 now.
699 changeLatency(SrcBest, DstBest, 0);
700 else if (DstBest) {
701 // Check if the previous best destination instruction has a new zero
702 // latency dependence opportunity.
703 ExclSrc.insert(Src);
704 for (auto &I : DstBest->Preds)
705 if (ExclSrc.count(I.getSUnit()) == 0 &&
706 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
707 changeLatency(I.getSUnit(), DstBest, 0);
708 } else if (SrcBest) {
709 // Check if previous best source instruction has a new zero latency
710 // dependence opportunity.
711 ExclDst.insert(Dst);
712 for (auto &I : SrcBest->Succs)
713 if (ExclDst.count(I.getSUnit()) == 0 &&
714 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
715 changeLatency(SrcBest, I.getSUnit(), 0);
716 }
717
718 return true;
719}
720
722 return 32;
723}
724
726 return 32;
727}
728
731}
732
734 struct Scalar {
735 unsigned Opcode;
736 Intrinsic::ID IntId;
737 };
738 struct Hvx {
739 unsigned Opcode;
740 Intrinsic::ID Int64Id, Int128Id;
741 };
742
743 static Scalar ScalarInts[] = {
744#define GET_SCALAR_INTRINSICS
746#undef GET_SCALAR_INTRINSICS
747 };
748
749 static Hvx HvxInts[] = {
750#define GET_HVX_INTRINSICS
752#undef GET_HVX_INTRINSICS
753 };
754
755 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
756 [[maybe_unused]] static bool SortedScalar =
757 (llvm::sort(ScalarInts, CmpOpcode), true);
758 [[maybe_unused]] static bool SortedHvx =
759 (llvm::sort(HvxInts, CmpOpcode), true);
760
761 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
762 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
763
764 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
765 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
766 return FoundScalar->IntId;
767
768 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
769 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
770 unsigned HwLen = getVectorLength();
771 if (HwLen == 64)
772 return FoundHvx->Int64Id;
773 if (HwLen == 128)
774 return FoundHvx->Int128Id;
775 }
776
777 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
778 llvm_unreachable(error.c_str());
779 return 0;
780}
static const LLT S1
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > EnableSubregLiveness("hexagon-subreg-liveness", cl::Hidden, cl::init(true), cl::desc("Enable subregister liveness tracking for Hexagon"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, LocationSize &AccessSize) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Perform target specific adjustments to the latency of a schedule dependency.
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
bool enableSubRegLiveness() const override
CodeGenOptLevel OptLevel
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const InstrItinerary * Itineraries
Array of itineraries selected.
bool hasValue() const
TypeSize getValue() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
bool isVector() const
Return true if this is a vector value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:549
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Scheduling dependency.
Definition: ScheduleDAG.h:49
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:495
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
std::string getString() const
Returns features as a string.
void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override