23#define DEBUG_TYPE "llvm-mca-instrbuilder"
35 : STI(sti), MCII(mcii),
MRI(mri), MCIA(mcia), IM(
im), FirstCallInst(
true),
36 FirstReturnInst(
true) {
49 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
65 APInt Buffers(NumProcResources, 0);
67 bool AllInOrderResources =
true;
68 bool AnyDispatchHazards =
false;
75 <<
"Ignoring invalid write of zero cycles on processor resource "
78 <<
" (write index #" <<
I <<
")\n";
85 AllInOrderResources =
false;
100 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
104 sort(Worklist, [](
const ResourcePlusCycles &
A,
const ResourcePlusCycles &
B) {
107 if (popcntA < popcntB)
109 if (popcntA > popcntB)
111 return A.first <
B.first;
116 uint64_t UnitsFromResourceGroups = 0;
120 ID.HasPartiallyOverlappingGroups =
false;
122 for (
unsigned I = 0,
E = Worklist.
size();
I <
E; ++
I) {
123 ResourcePlusCycles &
A = Worklist[
I];
124 if (!
A.second.size()) {
130 ID.Resources.emplace_back(
A);
134 UsedResourceUnits |=
A.first;
138 if (UnitsFromResourceGroups & NormalizedMask)
139 ID.HasPartiallyOverlappingGroups =
true;
141 UnitsFromResourceGroups |= NormalizedMask;
142 UsedResourceGroups |= (
A.first ^ NormalizedMask);
145 for (
unsigned J =
I + 1; J <
E; ++J) {
146 ResourcePlusCycles &
B = Worklist[J];
147 if ((NormalizedMask &
B.first) == NormalizedMask) {
148 B.second.CS.subtract(
A.second.size() - SuperResources[
A.first]);
172 for (ResourcePlusCycles &RPC :
ID.Resources) {
178 RPC.second.setReserved();
179 RPC.second.NumUnits = MaxResourceUnits;
185 for (
const std::pair<uint64_t, unsigned> &SR : SuperResources) {
186 for (
unsigned I = 1,
E = NumProcResources;
I <
E; ++
I) {
192 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
198 ID.UsedProcResUnits = UsedResourceUnits;
199 ID.UsedProcResGroups = UsedResourceGroups;
202 for (
const std::pair<uint64_t, ResourceUsage> &R :
ID.Resources)
204 <<
"Reserved=" << R.second.isReserved() <<
", "
205 <<
"#Units=" << R.second.NumUnits <<
", "
206 <<
"cy=" << R.second.size() <<
'\n';
209 uint64_t Current = BufferIDs & (-BufferIDs);
211 BufferIDs ^= Current;
213 dbgs() <<
"\t\t Used Units=" <<
format_hex(
ID.UsedProcResUnits, 16) <<
'\n';
216 dbgs() <<
"\t\tHasPartiallyOverlappingGroups="
217 <<
ID.HasPartiallyOverlappingGroups <<
'\n';
227 ID.MaxLatency = 100U;
233 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(
Latency);
239 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
246 if (NumExplicitDefs) {
247 return make_error<InstructionError<MCInst>>(
248 "Expected more register operand definitions.", MCI);
255 std::string Message =
256 "expected a register operand for an optional definition. Instruction "
257 "has not been correctly analyzed.";
258 return make_error<InstructionError<MCInst>>(Message, MCI);
265void InstrBuilder::populateWrites(InstrDesc &
ID,
const MCInst &MCI,
266 unsigned SchedClassID) {
314 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
317 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
322 ID.Writes.resize(TotalDefs + NumVariadicOps);
326 unsigned CurrentDef = 0;
329 for (; i < MCI.
getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
334 if (MCDesc.
operands()[CurrentDef].isOptionalDef()) {
335 OptionalDefIdx = CurrentDef++;
339 WriteDescriptor &Write =
ID.Writes[CurrentDef];
341 if (CurrentDef < NumWriteLatencyEntries) {
342 const MCWriteLatencyEntry &WLE =
346 WLE.Cycles < 0 ?
ID.MaxLatency :
static_cast<unsigned>(WLE.Cycles);
347 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
351 Write.SClassOrWriteResourceID = 0;
353 Write.IsOptionalDef =
false;
355 dbgs() <<
"\t\t[Def] OpIdx=" <<
Write.OpIndex
356 <<
", Latency=" <<
Write.Latency
357 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
362 assert(CurrentDef == NumExplicitDefs &&
363 "Expected more register operand definitions.");
364 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
365 unsigned Index = NumExplicitDefs + CurrentDef;
367 Write.OpIndex = ~CurrentDef;
369 if (
Index < NumWriteLatencyEntries) {
370 const MCWriteLatencyEntry &WLE =
374 WLE.Cycles < 0 ?
ID.MaxLatency :
static_cast<unsigned>(WLE.Cycles);
375 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
379 Write.SClassOrWriteResourceID = 0;
382 Write.IsOptionalDef =
false;
383 assert(
Write.RegisterID != 0 &&
"Expected a valid phys register!");
385 dbgs() <<
"\t\t[Def][I] OpIdx=" << ~Write.OpIndex
387 <<
", Latency=" <<
Write.Latency
388 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
393 WriteDescriptor &
Write =
ID.Writes[NumExplicitDefs + NumImplicitDefs];
394 Write.OpIndex = OptionalDefIdx;
397 Write.SClassOrWriteResourceID = 0;
398 Write.IsOptionalDef =
true;
400 dbgs() <<
"\t\t[Def][O] OpIdx=" <<
Write.OpIndex
401 <<
", Latency=" <<
Write.Latency
402 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
410 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.
hasOptionalDef();
412 I < NumVariadicOps && !AssumeUsesOnly; ++
I, ++
OpIndex) {
417 WriteDescriptor &
Write =
ID.Writes[CurrentDef];
421 Write.SClassOrWriteResourceID = 0;
422 Write.IsOptionalDef =
false;
425 dbgs() <<
"\t\t[Def][V] OpIdx=" <<
Write.OpIndex
426 <<
", Latency=" <<
Write.Latency
427 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
431 ID.Writes.resize(CurrentDef);
434void InstrBuilder::populateReads(InstrDesc &
ID,
const MCInst &MCI,
435 unsigned SchedClassID) {
436 const MCInstrDesc &MCDesc = MCII.
get(MCI.getOpcode());
437 unsigned NumExplicitUses = MCDesc.
getNumOperands() - MCDesc.getNumDefs();
438 unsigned NumImplicitUses = MCDesc.implicit_uses().size();
440 if (MCDesc.hasOptionalDef())
442 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
443 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
444 ID.Reads.resize(TotalUses);
445 unsigned CurrentUse = 0;
446 for (
unsigned I = 0,
OpIndex = MCDesc.getNumDefs();
I < NumExplicitUses;
448 const MCOperand &
Op = MCI.getOperand(
OpIndex);
452 ReadDescriptor &
Read =
ID.Reads[CurrentUse];
455 Read.SchedClassID = SchedClassID;
458 <<
", UseIndex=" <<
Read.UseIndex <<
'\n');
463 for (
unsigned I = 0;
I < NumImplicitUses; ++
I) {
464 ReadDescriptor &
Read =
ID.Reads[CurrentUse +
I];
466 Read.UseIndex = NumExplicitUses +
I;
467 Read.RegisterID = MCDesc.implicit_uses()[
I];
468 Read.SchedClassID = SchedClassID;
470 <<
", UseIndex=" <<
Read.UseIndex <<
", RegisterID="
474 CurrentUse += NumImplicitUses;
476 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
477 for (
unsigned I = 0,
OpIndex = MCDesc.getNumOperands();
478 I < NumVariadicOps && !AssumeDefsOnly; ++
I, ++
OpIndex) {
479 const MCOperand &
Op = MCI.getOperand(
OpIndex);
483 ReadDescriptor &
Read =
ID.Reads[CurrentUse];
485 Read.UseIndex = NumExplicitUses + NumImplicitUses +
I;
486 Read.SchedClassID = SchedClassID;
489 <<
", UseIndex=" <<
Read.UseIndex <<
'\n');
492 ID.Reads.resize(CurrentUse);
495Error InstrBuilder::verifyInstrDesc(
const InstrDesc &
ID,
496 const MCInst &MCI)
const {
497 if (
ID.NumMicroOps != 0)
498 return ErrorSuccess();
500 bool UsesBuffers =
ID.UsedBuffers;
501 bool UsesResources = !
ID.Resources.empty();
502 if (!UsesBuffers && !UsesResources)
503 return ErrorSuccess();
507 StringRef Message =
"found an inconsistent instruction that decodes to zero "
508 "opcodes and that consumes scheduler resources.";
509 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
512Expected<const InstrDesc &>
513InstrBuilder::createInstrDescImpl(
const MCInst &MCI,
514 const SmallVector<Instrument *> &IVec) {
516 "Itineraries are not yet supported!");
519 unsigned short Opcode = MCI.getOpcode();
520 const MCInstrDesc &MCDesc = MCII.
get(Opcode);
526 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
530 unsigned CPUID = SM.getProcessorID();
531 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
536 return make_error<InstructionError<MCInst>>(
537 "unable to resolve scheduling class for write variant.", MCI);
542 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
544 return make_error<InstructionError<MCInst>>(
545 "found an unsupported instruction in the input assembly sequence.",
550 LLVM_DEBUG(
dbgs() <<
"\t\tSchedClassID=" << SchedClassID <<
'\n');
554 std::unique_ptr<InstrDesc>
ID = std::make_unique<InstrDesc>();
555 ID->NumMicroOps = SCDesc.NumMicroOps;
556 ID->SchedClassID = SchedClassID;
558 if (MCDesc.isCall() && FirstCallInst) {
562 <<
"Assume a latency of 100cy.\n";
563 FirstCallInst =
false;
566 if (MCDesc.isReturn() && FirstReturnInst) {
568 <<
" assembly sequence.\n";
570 FirstReturnInst =
false;
577 return std::move(Err);
579 populateWrites(*
ID, MCI, SchedClassID);
580 populateReads(*
ID, MCI, SchedClassID);
586 if (Error Err = verifyInstrDesc(*
ID, MCI))
587 return std::move(Err);
590 bool IsVariadic = MCDesc.isVariadic();
591 if ((
ID->IsRecyclable = !IsVariadic && !IsVariant)) {
592 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
593 Descriptors[DKey] = std::move(
ID);
594 return *Descriptors[DKey];
597 auto VDKey = std::make_pair(&MCI, SchedClassID);
598 VariantDescriptors[VDKey] = std::move(
ID);
599 return *VariantDescriptors[VDKey];
602Expected<const InstrDesc &>
603InstrBuilder::getOrCreateInstrDesc(
const MCInst &MCI,
604 const SmallVector<Instrument *> &IVec) {
608 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
609 if (Descriptors.find_as(DKey) != Descriptors.end())
610 return *Descriptors[DKey];
614 auto VDKey = std::make_pair(&MCI, SchedClassID);
615 if (VariantDescriptors.contains(VDKey))
616 return *VariantDescriptors[VDKey];
618 return createInstrDescImpl(MCI, IVec);
621STATISTIC(NumVariantInst,
"Number of MCInsts that doesn't have static Desc");
631 std::unique_ptr<Instruction> CreatedIS;
632 bool IsInstRecycled =
false;
637 if (
D.IsRecyclable && InstRecycleCB) {
638 if (
auto *
I = InstRecycleCB(
D)) {
641 IsInstRecycled =
true;
644 if (!IsInstRecycled) {
645 CreatedIS = std::make_unique<Instruction>(
D, MCI.
getOpcode());
646 NewIS = CreatedIS.get();
663 bool IsZeroIdiom =
false;
664 bool IsDepBreaking =
false;
667 IsZeroIdiom = MCIA->
isZeroIdiom(MCI, Mask, ProcID);
696 if (IsInstRecycled && Idx < NewIS->getUses().
size()) {
700 NewIS->
getUses().emplace_back(RD, RegID);
717 if (Mask.getBitWidth() > RD.
UseIndex) {
725 if (IsInstRecycled && Idx < NewIS->getUses().
size())
729 if (
D.Writes.empty()) {
731 return llvm::make_error<RecycledInstErr>(NewIS);
733 return std::move(CreatedIS);
738 APInt WriteMask(
D.Writes.size(), 0);
746 unsigned WriteIndex = 0;
757 assert(RegID &&
"Expected a valid register ID!");
758 if (IsInstRecycled && Idx < NewIS->getDefs().
size()) {
761 WriteMask[WriteIndex],
764 NewIS->
getDefs().emplace_back(WD, RegID,
765 WriteMask[WriteIndex],
771 if (IsInstRecycled && Idx < NewIS->getDefs().
size())
775 return llvm::make_error<RecycledInstErr>(NewIS);
777 return std::move(CreatedIS);
unsigned const MachineRegisterInfo * MRI
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
A builder class for instructions that are statically analyzed by llvm-mca.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
Subclass of Error for the sole purpose of identifying the success path in the type system.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
const MCOperand & getOperand(unsigned i) const
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID .
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool variadicOpsAreDefs() const
Return true if variadic operands of this instruction are definitions.
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
bool isCall() const
Return true if the instruction is a call.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
unsigned getReg() const
Returns the register number.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI, const SmallVector< Instrument * > &IVec)
void setEndGroup(bool newVal)
void setRetireOOO(bool newVal)
SmallVectorImpl< WriteState > & getDefs()
void setBeginGroup(bool newVal)
SmallVectorImpl< ReadState > & getUses()
void setHasSideEffects(bool newVal)
void setMayStore(bool newVal)
void setOptimizableMove()
void setMayLoad(bool newVal)
An instruction propagated through the simulated instruction pipeline.
This class allows targets to optionally customize the logic that resolves scheduling class IDs.
virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, const SmallVector< Instrument * > &IVec) const
Given an MCInst and a vector of Instrument, a target can return a SchedClassID.
Tracks register operand latency in cycles.
void setIndependentFromDef()
Tracks uses of a register definition (e.g.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI)
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
unsigned getResourceStateIndex(uint64_t Mask)
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
DWARFExpression::Operation Op
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
static const unsigned short InvalidNumMicroOps
uint16_t NumWriteLatencyEntries
uint16_t NumWriteProcResEntries
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getProcessorID() const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
An instruction descriptor.
A register read descriptor.
bool isImplicitRead() const
Helper used by class InstrDesc to describe how hardware resources are used.
A register write descriptor.
bool isImplicitWrite() const