LLVM 20.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "HexagonInstrInfo.h"
15#include "HexagonRegisterInfo.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/StringRef.h"
26#include "llvm/IR/IntrinsicsHexagon.h"
30#include <algorithm>
31#include <cassert>
32#include <map>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "hexagon-subtarget"
38
39#define GET_SUBTARGETINFO_CTOR
40#define GET_SUBTARGETINFO_TARGET_DESC
41#include "HexagonGenSubtargetInfo.inc"
42
43static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
44 cl::init(true));
45
46static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
47 cl::init(false));
48
49static cl::opt<bool>
50 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
51 cl::desc("Enable the scheduler to generate .cur"));
52
53static cl::opt<bool>
54 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
55 cl::desc("Disable Hexagon MI Scheduling"));
56
58 "hexagon-long-calls", cl::Hidden,
59 cl::desc("If present, forces/disables the use of long calls"));
60
61static cl::opt<bool>
62 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
63 cl::desc("Consider calls to be predicable"));
64
65static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
66 cl::init(true));
67
68static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
69 cl::Hidden, cl::init(true));
70
72 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
73 cl::desc("Enable checking for cache bank conflicts"));
74
76 StringRef FS, const TargetMachine &TM)
77 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
78 OptLevel(TM.getOptLevel()),
79 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
80 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
81 RegInfo(getHwMode()), TLInfo(TM, *this),
82 InstrItins(getInstrItineraryForCPU(CPUString)) {
84 // Beware of the default constructor of InstrItineraryData: it will
85 // reset all members to 0.
86 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
87}
88
91 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
92 if (ArchVer)
93 HexagonArchVersion = *ArchVer;
94 else
95 llvm_unreachable("Unrecognized Hexagon processor version");
96
97 UseHVX128BOps = false;
98 UseHVX64BOps = false;
99 UseAudioOps = false;
100 UseLongCalls = false;
101
102 SubtargetFeatures Features(FS);
103
104 // Turn on QFloat if the HVX version is v68+.
105 // The function ParseSubtargetFeatures will set feature bits and initialize
106 // subtarget's variables all in one, so there isn't a good way to preprocess
107 // the feature string, other than by tinkering with it directly.
108 auto IsQFloatFS = [](StringRef F) {
109 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
110 };
111 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
112 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
113 for (StringRef F : llvm::reverse(Features.getFeatures())) {
114 if (F.starts_with("+hvxv"))
115 return F;
116 }
117 for (StringRef F : llvm::reverse(Features.getFeatures())) {
118 if (F == "-hvx")
119 return StringRef();
120 if (F.starts_with("+hvx") || F == "-hvx")
121 return F.take_front(4); // Return "+hvx" or "-hvx".
122 }
123 return StringRef();
124 };
125
126 bool AddQFloat = false;
127 StringRef HvxVer = getHvxVersion(FS);
128 if (HvxVer.starts_with("+hvxv")) {
129 int Ver = 0;
130 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
131 AddQFloat = true;
132 } else if (HvxVer == "+hvx") {
133 if (hasV68Ops())
134 AddQFloat = true;
135 }
136
137 if (AddQFloat)
138 Features.AddFeature("+hvx-qfloat");
139 }
140
141 std::string FeatureString = Features.getString();
142 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
143
144 if (useHVXV68Ops())
145 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
146
147 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
149 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
150
151 if (OverrideLongCalls.getPosition())
152 UseLongCalls = OverrideLongCalls;
153
155
156 if (isTinyCore()) {
157 // Tiny core has a single thread, so back-to-back scheduling is enabled by
158 // default.
159 if (!EnableBSBSched.getPosition())
160 UseBSBScheduling = false;
161 }
162
163 FeatureBitset FeatureBits = getFeatureBits();
165 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
166 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
167
168 return *this;
169}
170
171bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
172 if (!useHVXOps())
173 return false;
174 if (Ty.isVector())
175 Ty = Ty.getVectorElementType();
176 if (IncludeBool && Ty == MVT::i1)
177 return true;
178 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
179 return llvm::is_contained(ElemTypes, Ty);
180}
181
182bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
183 if (!VecTy.isSimple())
184 return false;
185 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
186 return false;
187 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
188 if (!IncludeBool && ElemTy == MVT::i1)
189 return false;
190
191 unsigned HwLen = getVectorLength();
192 unsigned NumElems = VecTy.getVectorNumElements();
193 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
194
195 if (IncludeBool && ElemTy == MVT::i1) {
196 // Boolean HVX vector types are formed from regular HVX vector types
197 // by replacing the element type with i1.
198 for (MVT T : ElemTypes)
199 if (NumElems * T.getSizeInBits() == 8 * HwLen)
200 return true;
201 return false;
202 }
203
204 unsigned VecWidth = VecTy.getSizeInBits();
205 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
206 return false;
207 return llvm::is_contained(ElemTypes, ElemTy);
208}
209
210bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
211 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
212 return false;
213 // Avoid types like <2 x i32*>.
214 Type *ScalTy = VecTy->getScalarType();
215 if (!ScalTy->isIntegerTy() &&
216 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
217 return false;
218 // The given type may be something like <17 x i32>, which is not MVT,
219 // but can be represented as (non-simple) EVT.
220 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
221 if (!Ty.getVectorElementType().isSimple())
222 return false;
223
224 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
225 if (isHVXVectorType(SimpleTy, IncludeBool))
226 return true;
227 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
229 };
230
231 // Round up EVT to have power-of-2 elements, and keep checking if it
232 // qualifies for HVX, dividing it in half after each step.
233 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
234 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
235 while (VecLen > 1) {
236 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
237 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
238 return true;
239 VecLen /= 2;
240 }
241
242 return false;
243}
244
246 for (SUnit &SU : DAG->SUnits) {
247 if (!SU.isInstr())
248 continue;
250 for (auto &D : SU.Preds)
251 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
252 Erase.push_back(D);
253 for (auto &E : Erase)
254 SU.removePred(E);
255 }
256}
257
259 for (SUnit &SU : DAG->SUnits) {
260 // Update the latency of chain edges between v60 vector load or store
261 // instructions to be 1. These instruction cannot be scheduled in the
262 // same packet.
263 MachineInstr &MI1 = *SU.getInstr();
264 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
265 bool IsStoreMI1 = MI1.mayStore();
266 bool IsLoadMI1 = MI1.mayLoad();
267 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
268 continue;
269 for (SDep &SI : SU.Succs) {
270 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
271 continue;
272 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
273 if (!QII->isHVXVec(MI2))
274 continue;
275 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
276 SI.setLatency(1);
277 SU.setHeightDirty();
278 // Change the dependence in the opposite direction too.
279 for (SDep &PI : SI.getSUnit()->Preds) {
280 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
281 continue;
282 PI.setLatency(1);
283 SI.getSUnit()->setDepthDirty();
284 }
285 }
286 }
287 }
288}
289
290// Check if a call and subsequent A2_tfrpi instructions should maintain
291// scheduling affinity. We are looking for the TFRI to be consumed in
292// the next instruction. This should help reduce the instances of
293// double register pairs being allocated and scheduled before a call
294// when not used until after the call. This situation is exacerbated
295// by the fact that we allocate the pair from the callee saves list,
296// leading to excess spills and restores.
297bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
298 const HexagonInstrInfo &HII, const SUnit &Inst1,
299 const SUnit &Inst2) const {
300 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
301 return false;
302
303 // TypeXTYPE are 64 bit operations.
304 unsigned Type = HII.getType(*Inst2.getInstr());
307}
308
310 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
311 SUnit* LastSequentialCall = nullptr;
312 // Map from virtual register to physical register from the copy.
313 DenseMap<unsigned, unsigned> VRegHoldingReg;
314 // Map from the physical register to the instruction that uses virtual
315 // register. This is used to create the barrier edge.
316 DenseMap<unsigned, SUnit *> LastVRegUse;
317 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
318 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
319
320 // Currently we only catch the situation when compare gets scheduled
321 // before preceding call.
322 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
323 // Remember the call.
324 if (DAG->SUnits[su].getInstr()->isCall())
325 LastSequentialCall = &DAG->SUnits[su];
326 // Look for a compare that defines a predicate.
327 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
328 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
329 // Look for call and tfri* instructions.
330 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
331 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
332 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
333 // Prevent redundant register copies due to reads and writes of physical
334 // registers. The original motivation for this was the code generated
335 // between two calls, which are caused both the return value and the
336 // argument for the next call being in %r0.
337 // Example:
338 // 1: <call1>
339 // 2: %vreg = COPY %r0
340 // 3: <use of %vreg>
341 // 4: %r0 = ...
342 // 5: <call2>
343 // The scheduler would often swap 3 and 4, so an additional register is
344 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
345 // this.
346 // The code below checks for all the physical registers, not just R0/D0/V0.
347 else if (SchedRetvalOptimization) {
348 const MachineInstr *MI = DAG->SUnits[su].getInstr();
349 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
350 // %vregX = COPY %r0
351 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
352 LastVRegUse.erase(MI->getOperand(1).getReg());
353 } else {
354 for (const MachineOperand &MO : MI->operands()) {
355 if (!MO.isReg())
356 continue;
357 if (MO.isUse() && !MI->isCopy() &&
358 VRegHoldingReg.count(MO.getReg())) {
359 // <use of %vregX>
360 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
361 } else if (MO.isDef() && MO.getReg().isPhysical()) {
362 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
363 ++AI) {
364 if (LastVRegUse.count(*AI) &&
365 LastVRegUse[*AI] != &DAG->SUnits[su])
366 // %r0 = ...
367 DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
368 LastVRegUse.erase(*AI);
369 }
370 }
371 }
372 }
373 }
374 }
375}
376
379 return;
380
381 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
382
383 // Create artificial edges between loads that could likely cause a bank
384 // conflict. Since such loads would normally not have any dependency
385 // between them, we cannot rely on existing edges.
386 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
387 SUnit &S0 = DAG->SUnits[i];
388 MachineInstr &L0 = *S0.getInstr();
389 if (!L0.mayLoad() || L0.mayStore() ||
391 continue;
392 int64_t Offset0;
393 LocationSize Size0 = 0;
394 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
395 // Is the access size is longer than the L1 cache line, skip the check.
396 if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
397 Size0.getValue() >= 32)
398 continue;
399 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
400 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
401 SUnit &S1 = DAG->SUnits[j];
402 MachineInstr &L1 = *S1.getInstr();
403 if (!L1.mayLoad() || L1.mayStore() ||
405 continue;
406 int64_t Offset1;
407 LocationSize Size1 = 0;
408 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
409 if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
410 Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
411 continue;
412 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
413 // is unlikely.
414 if (((Offset0 ^ Offset1) & 0x18) != 0)
415 continue;
416 // Bits 3 and 4 are the same, add an artificial edge and set extra
417 // latency.
418 SDep A(&S0, SDep::Artificial);
419 A.setLatency(1);
420 S1.addPred(A, true);
421 }
422 }
423}
424
425/// Enable use of alias analysis during code generation (during MI
426/// scheduling, DAGCombine, etc.).
429 return true;
430 return false;
431}
432
433/// Perform target specific adjustments to the latency of a schedule
434/// dependency.
436 SUnit *Src, int SrcOpIdx, SUnit *Dst, int DstOpIdx, SDep &Dep,
437 const TargetSchedModel *SchedModel) const {
438 if (!Src->isInstr() || !Dst->isInstr())
439 return;
440
441 MachineInstr *SrcInst = Src->getInstr();
442 MachineInstr *DstInst = Dst->getInstr();
443 const HexagonInstrInfo *QII = getInstrInfo();
444
445 // Instructions with .new operands have zero latency.
446 SmallSet<SUnit *, 4> ExclSrc;
447 SmallSet<SUnit *, 4> ExclDst;
448 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
449 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
450 Dep.setLatency(0);
451 return;
452 }
453
454 // Set the latency for a copy to zero since we hope that is will get
455 // removed.
456 if (DstInst->isCopy())
457 Dep.setLatency(0);
458
459 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
460 // the correct latency.
461 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
462 // only if the latencies on all the uses are equal, otherwise set it to
463 // default.
464 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
465 Register DReg = DstInst->getOperand(0).getReg();
466 std::optional<unsigned> DLatency;
467 for (const auto &DDep : Dst->Succs) {
468 MachineInstr *DDst = DDep.getSUnit()->getInstr();
469 int UseIdx = -1;
470 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
471 const MachineOperand &MO = DDst->getOperand(OpNum);
472 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
473 UseIdx = OpNum;
474 break;
475 }
476 }
477
478 if (UseIdx == -1)
479 continue;
480
481 std::optional<unsigned> Latency =
482 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
483
484 // Set DLatency for the first time.
485 if (!DLatency)
486 DLatency = Latency;
487
488 // For multiple uses, if the Latency is different across uses, reset
489 // DLatency.
490 if (DLatency != Latency) {
491 DLatency = std::nullopt;
492 break;
493 }
494 }
495 Dep.setLatency(DLatency.value_or(0));
496 }
497
498 // Try to schedule uses near definitions to generate .cur.
499 ExclSrc.clear();
500 ExclDst.clear();
501 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
502 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
503 Dep.setLatency(0);
504 return;
505 }
506 int Latency = Dep.getLatency();
507 bool IsArtificial = Dep.isArtificial();
508 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
509 Dep.setLatency(Latency);
510}
511
513 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
514 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
515 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
516 Mutations.push_back(std::make_unique<BankConflictMutation>());
517}
518
520 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
521 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
522 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
523}
524
525// Pin the vtable to this file.
526void HexagonSubtarget::anchor() {}
527
529 if (DisableHexagonMISched.getNumOccurrences())
530 return !DisableHexagonMISched;
531 return true;
532}
533
536}
537
538int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
539 MachineInstr &DstInst, bool IsArtificial,
540 int Latency) const {
541 if (IsArtificial)
542 return 1;
543 if (!hasV60Ops())
544 return Latency;
545
546 auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
547 // BSB scheduling.
548 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
549 Latency = (Latency + 1) >> 1;
550 return Latency;
551}
552
553void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
554 MachineInstr *SrcI = Src->getInstr();
555 for (auto &I : Src->Succs) {
556 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
557 continue;
558 Register DepR = I.getReg();
559 int DefIdx = -1;
560 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
561 const MachineOperand &MO = SrcI->getOperand(OpNum);
562 bool IsSameOrSubReg = false;
563 if (MO.isReg()) {
564 Register MOReg = MO.getReg();
565 if (DepR.isVirtual()) {
566 IsSameOrSubReg = (MOReg == DepR);
567 } else {
568 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
569 }
570 if (MO.isDef() && IsSameOrSubReg)
571 DefIdx = OpNum;
572 }
573 }
574 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
575 MachineInstr *DstI = Dst->getInstr();
576 SDep T = I;
577 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
578 const MachineOperand &MO = DstI->getOperand(OpNum);
579 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
580 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
581 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
582
583 // For some instructions (ex: COPY), we might end up with < 0 latency
584 // as they don't have any Itinerary class associated with them.
585 if (!Latency)
586 Latency = 0;
587 bool IsArtificial = I.isArtificial();
588 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
589 I.setLatency(*Latency);
590 }
591 }
592
593 // Update the latency of opposite edge too.
594 T.setSUnit(Src);
595 auto F = find(Dst->Preds, T);
596 assert(F != Dst->Preds.end());
597 F->setLatency(I.getLatency());
598 }
599}
600
601/// Change the latency between the two SUnits.
602void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
603 const {
604 for (auto &I : Src->Succs) {
605 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
606 continue;
607 SDep T = I;
608 I.setLatency(Lat);
609
610 // Update the latency of opposite edge too.
611 T.setSUnit(Src);
612 auto F = find(Dst->Preds, T);
613 assert(F != Dst->Preds.end());
614 F->setLatency(Lat);
615 }
616}
617
618/// If the SUnit has a zero latency edge, return the other SUnit.
620 for (auto &I : Deps)
621 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
622 !I.getSUnit()->getInstr()->isPseudo())
623 return I.getSUnit();
624 return nullptr;
625}
626
627// Return true if these are the best two instructions to schedule
628// together with a zero latency. Only one dependence should have a zero
629// latency. If there are multiple choices, choose the best, and change
630// the others, if needed.
631bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
633 SmallSet<SUnit*, 4> &ExclDst) const {
634 MachineInstr &SrcInst = *Src->getInstr();
635 MachineInstr &DstInst = *Dst->getInstr();
636
637 // Ignore Boundary SU nodes as these have null instructions.
638 if (Dst->isBoundaryNode())
639 return false;
640
641 if (SrcInst.isPHI() || DstInst.isPHI())
642 return false;
643
644 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
645 !TII->canExecuteInBundle(SrcInst, DstInst))
646 return false;
647
648 // The architecture doesn't allow three dependent instructions in the same
649 // packet. So, if the destination has a zero latency successor, then it's
650 // not a candidate for a zero latency predecessor.
651 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
652 return false;
653
654 // Check if the Dst instruction is the best candidate first.
655 SUnit *Best = nullptr;
656 SUnit *DstBest = nullptr;
657 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
658 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
659 // Check that Src doesn't have a better candidate.
660 DstBest = getZeroLatency(Src, Src->Succs);
661 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
662 Best = Dst;
663 }
664 if (Best != Dst)
665 return false;
666
667 // The caller frequently adds the same dependence twice. If so, then
668 // return true for this case too.
669 if ((Src == SrcBest && Dst == DstBest ) ||
670 (SrcBest == nullptr && Dst == DstBest) ||
671 (Src == SrcBest && Dst == nullptr))
672 return true;
673
674 // Reassign the latency for the previous bests, which requires setting
675 // the dependence edge in both directions.
676 if (SrcBest != nullptr) {
677 if (!hasV60Ops())
678 changeLatency(SrcBest, Dst, 1);
679 else
680 restoreLatency(SrcBest, Dst);
681 }
682 if (DstBest != nullptr) {
683 if (!hasV60Ops())
684 changeLatency(Src, DstBest, 1);
685 else
686 restoreLatency(Src, DstBest);
687 }
688
689 // Attempt to find another opprotunity for zero latency in a different
690 // dependence.
691 if (SrcBest && DstBest)
692 // If there is an edge from SrcBest to DstBst, then try to change that
693 // to 0 now.
694 changeLatency(SrcBest, DstBest, 0);
695 else if (DstBest) {
696 // Check if the previous best destination instruction has a new zero
697 // latency dependence opportunity.
698 ExclSrc.insert(Src);
699 for (auto &I : DstBest->Preds)
700 if (ExclSrc.count(I.getSUnit()) == 0 &&
701 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
702 changeLatency(I.getSUnit(), DstBest, 0);
703 } else if (SrcBest) {
704 // Check if previous best source instruction has a new zero latency
705 // dependence opportunity.
706 ExclDst.insert(Dst);
707 for (auto &I : SrcBest->Succs)
708 if (ExclDst.count(I.getSUnit()) == 0 &&
709 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
710 changeLatency(SrcBest, I.getSUnit(), 0);
711 }
712
713 return true;
714}
715
717 return 32;
718}
719
721 return 32;
722}
723
724bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
725
727 struct Scalar {
728 unsigned Opcode;
729 Intrinsic::ID IntId;
730 };
731 struct Hvx {
732 unsigned Opcode;
733 Intrinsic::ID Int64Id, Int128Id;
734 };
735
736 static Scalar ScalarInts[] = {
737#define GET_SCALAR_INTRINSICS
739#undef GET_SCALAR_INTRINSICS
740 };
741
742 static Hvx HvxInts[] = {
743#define GET_HVX_INTRINSICS
745#undef GET_HVX_INTRINSICS
746 };
747
748 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
749 [[maybe_unused]] static bool SortedScalar =
750 (llvm::sort(ScalarInts, CmpOpcode), true);
751 [[maybe_unused]] static bool SortedHvx =
752 (llvm::sort(HvxInts, CmpOpcode), true);
753
754 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
755 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
756
757 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
758 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
759 return FoundScalar->IntId;
760
761 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
762 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
763 unsigned HwLen = getVectorLength();
764 if (HwLen == 64)
765 return FoundHvx->Int64Id;
766 if (HwLen == 128)
767 return FoundHvx->Int128Id;
768 }
769
770 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
771 llvm_unreachable(error.c_str());
772 return 0;
773}
static const LLT S1
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, LocationSize &AccessSize) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Perform target specific adjustments to the latency of a schedule dependency.
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
bool enableSubRegLiveness() const override
CodeGenOptLevel OptLevel
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const InstrItinerary * Itineraries
Array of itineraries selected.
bool hasValue() const
TypeSize getValue() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
bool isVector() const
Return true if this is a vector value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Scheduling dependency.
Definition: ScheduleDAG.h:49
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:378
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:499
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:609
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
std::string getString() const
Returns features as a string.
void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override