23#define DEBUG_TYPE "llvm-mca-instrbuilder"
35 : STI(sti), MCII(mcii),
MRI(mri), MCIA(mcia), IM(
im), FirstCallInst(
true),
36 FirstReturnInst(
true) {
49 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
65 APInt Buffers(NumProcResources, 0);
67 bool AllInOrderResources =
true;
68 bool AnyDispatchHazards =
false;
75 <<
"Ignoring invalid write of zero cycles on processor resource "
78 <<
" (write index #" <<
I <<
")\n";
85 AllInOrderResources =
false;
100 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
104 sort(Worklist, [](
const ResourcePlusCycles &
A,
const ResourcePlusCycles &
B) {
107 if (popcntA < popcntB)
109 if (popcntA > popcntB)
111 return A.first <
B.first;
116 uint64_t UnitsFromResourceGroups = 0;
120 ID.HasPartiallyOverlappingGroups =
false;
122 for (
unsigned I = 0,
E = Worklist.
size();
I <
E; ++
I) {
123 ResourcePlusCycles &
A = Worklist[
I];
124 if (!
A.second.size()) {
130 ID.Resources.emplace_back(
A);
134 UsedResourceUnits |=
A.first;
138 if (UnitsFromResourceGroups & NormalizedMask)
139 ID.HasPartiallyOverlappingGroups =
true;
141 UnitsFromResourceGroups |= NormalizedMask;
142 UsedResourceGroups |= (
A.first ^ NormalizedMask);
145 for (
unsigned J =
I + 1; J <
E; ++J) {
146 ResourcePlusCycles &
B = Worklist[J];
147 if ((NormalizedMask &
B.first) == NormalizedMask) {
148 B.second.CS.subtract(
A.second.size() - SuperResources[
A.first]);
172 for (ResourcePlusCycles &RPC :
ID.Resources) {
178 RPC.second.setReserved();
179 RPC.second.NumUnits = MaxResourceUnits;
185 for (
const std::pair<uint64_t, unsigned> &SR : SuperResources) {
186 for (
unsigned I = 1,
E = NumProcResources;
I <
E; ++
I) {
192 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
198 ID.UsedProcResUnits = UsedResourceUnits;
199 ID.UsedProcResGroups = UsedResourceGroups;
202 for (
const std::pair<uint64_t, ResourceUsage> &R :
ID.Resources)
204 <<
"Reserved=" << R.second.isReserved() <<
", "
205 <<
"#Units=" << R.second.NumUnits <<
", "
206 <<
"cy=" << R.second.size() <<
'\n';
209 uint64_t Current = BufferIDs & (-BufferIDs);
211 BufferIDs ^= Current;
213 dbgs() <<
"\t\t Used Units=" <<
format_hex(
ID.UsedProcResUnits, 16) <<
'\n';
216 dbgs() <<
"\t\tHasPartiallyOverlappingGroups="
217 <<
ID.HasPartiallyOverlappingGroups <<
'\n';
227 ID.MaxLatency = 100U;
233 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(
Latency);
239 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
246 if (NumExplicitDefs) {
247 return make_error<InstructionError<MCInst>>(
248 "Expected more register operand definitions.", MCI);
255 std::string Message =
256 "expected a register operand for an optional definition. Instruction "
257 "has not been correctly analyzed.";
258 return make_error<InstructionError<MCInst>>(Message, MCI);
265void InstrBuilder::populateWrites(InstrDesc &
ID,
const MCInst &MCI,
266 unsigned SchedClassID) {
314 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
317 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
322 ID.Writes.resize(TotalDefs + NumVariadicOps);
326 unsigned CurrentDef = 0;
329 for (; i < MCI.
getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
334 if (MCDesc.
operands()[CurrentDef].isOptionalDef()) {
335 OptionalDefIdx = CurrentDef++;
343 WriteDescriptor &
Write =
ID.Writes[CurrentDef];
345 if (CurrentDef < NumWriteLatencyEntries) {
346 const MCWriteLatencyEntry &WLE =
350 WLE.Cycles < 0 ?
ID.MaxLatency :
static_cast<unsigned>(WLE.Cycles);
351 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
355 Write.SClassOrWriteResourceID = 0;
357 Write.IsOptionalDef =
false;
359 dbgs() <<
"\t\t[Def] OpIdx=" <<
Write.OpIndex
360 <<
", Latency=" <<
Write.Latency
361 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
366 assert(CurrentDef == NumExplicitDefs &&
367 "Expected more register operand definitions.");
368 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
369 unsigned Index = NumExplicitDefs + CurrentDef;
371 Write.OpIndex = ~CurrentDef;
373 if (
Index < NumWriteLatencyEntries) {
374 const MCWriteLatencyEntry &WLE =
378 WLE.Cycles < 0 ?
ID.MaxLatency :
static_cast<unsigned>(WLE.Cycles);
379 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
383 Write.SClassOrWriteResourceID = 0;
386 Write.IsOptionalDef =
false;
387 assert(
Write.RegisterID != 0 &&
"Expected a valid phys register!");
389 dbgs() <<
"\t\t[Def][I] OpIdx=" << ~Write.OpIndex
391 <<
", Latency=" <<
Write.Latency
392 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
397 WriteDescriptor &
Write =
ID.Writes[NumExplicitDefs + NumImplicitDefs];
398 Write.OpIndex = OptionalDefIdx;
401 Write.SClassOrWriteResourceID = 0;
402 Write.IsOptionalDef =
true;
404 dbgs() <<
"\t\t[Def][O] OpIdx=" <<
Write.OpIndex
405 <<
", Latency=" <<
Write.Latency
406 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
414 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.
hasOptionalDef();
416 I < NumVariadicOps && !AssumeUsesOnly; ++
I, ++
OpIndex) {
423 WriteDescriptor &
Write =
ID.Writes[CurrentDef];
427 Write.SClassOrWriteResourceID = 0;
428 Write.IsOptionalDef =
false;
431 dbgs() <<
"\t\t[Def][V] OpIdx=" <<
Write.OpIndex
432 <<
", Latency=" <<
Write.Latency
433 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
437 ID.Writes.resize(CurrentDef);
440void InstrBuilder::populateReads(InstrDesc &
ID,
const MCInst &MCI,
441 unsigned SchedClassID) {
442 const MCInstrDesc &MCDesc = MCII.
get(MCI.getOpcode());
443 unsigned NumExplicitUses = MCDesc.
getNumOperands() - MCDesc.getNumDefs();
444 unsigned NumImplicitUses = MCDesc.implicit_uses().size();
446 if (MCDesc.hasOptionalDef())
448 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
449 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
450 ID.Reads.resize(TotalUses);
451 unsigned CurrentUse = 0;
452 for (
unsigned I = 0,
OpIndex = MCDesc.getNumDefs();
I < NumExplicitUses;
454 const MCOperand &
Op = MCI.getOperand(
OpIndex);
460 ReadDescriptor &
Read =
ID.Reads[CurrentUse];
463 Read.SchedClassID = SchedClassID;
466 <<
", UseIndex=" <<
Read.UseIndex <<
'\n');
471 for (
unsigned I = 0;
I < NumImplicitUses; ++
I) {
472 ReadDescriptor &
Read =
ID.Reads[CurrentUse +
I];
474 Read.UseIndex = NumExplicitUses +
I;
475 Read.RegisterID = MCDesc.implicit_uses()[
I];
478 Read.SchedClassID = SchedClassID;
480 <<
", UseIndex=" <<
Read.UseIndex <<
", RegisterID="
484 CurrentUse += NumImplicitUses;
486 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
487 for (
unsigned I = 0,
OpIndex = MCDesc.getNumOperands();
488 I < NumVariadicOps && !AssumeDefsOnly; ++
I, ++
OpIndex) {
489 const MCOperand &
Op = MCI.getOperand(
OpIndex);
493 ReadDescriptor &
Read =
ID.Reads[CurrentUse];
495 Read.UseIndex = NumExplicitUses + NumImplicitUses +
I;
496 Read.SchedClassID = SchedClassID;
499 <<
", UseIndex=" <<
Read.UseIndex <<
'\n');
502 ID.Reads.resize(CurrentUse);
505Error InstrBuilder::verifyInstrDesc(
const InstrDesc &
ID,
506 const MCInst &MCI)
const {
507 if (
ID.NumMicroOps != 0)
508 return ErrorSuccess();
510 bool UsesBuffers =
ID.UsedBuffers;
511 bool UsesResources = !
ID.Resources.empty();
512 if (!UsesBuffers && !UsesResources)
513 return ErrorSuccess();
517 StringRef Message =
"found an inconsistent instruction that decodes to zero "
518 "opcodes and that consumes scheduler resources.";
519 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
522Expected<const InstrDesc &>
523InstrBuilder::createInstrDescImpl(
const MCInst &MCI,
524 const SmallVector<Instrument *> &IVec) {
526 "Itineraries are not yet supported!");
529 unsigned short Opcode = MCI.getOpcode();
530 const MCInstrDesc &MCDesc = MCII.
get(Opcode);
536 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
540 unsigned CPUID = SM.getProcessorID();
541 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
546 return make_error<InstructionError<MCInst>>(
547 "unable to resolve scheduling class for write variant.", MCI);
552 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
554 return make_error<InstructionError<MCInst>>(
555 "found an unsupported instruction in the input assembly sequence", MCI);
559 LLVM_DEBUG(
dbgs() <<
"\t\tSchedClassID=" << SchedClassID <<
'\n');
563 std::unique_ptr<InstrDesc>
ID = std::make_unique<InstrDesc>();
564 ID->NumMicroOps = SCDesc.NumMicroOps;
565 ID->SchedClassID = SchedClassID;
567 if (MCDesc.isCall() && FirstCallInst) {
571 <<
"Assume a latency of 100cy.\n";
572 FirstCallInst =
false;
575 if (MCDesc.isReturn() && FirstReturnInst) {
577 <<
" assembly sequence.\n";
579 FirstReturnInst =
false;
586 return std::move(Err);
588 populateWrites(*
ID, MCI, SchedClassID);
589 populateReads(*
ID, MCI, SchedClassID);
595 if (Error Err = verifyInstrDesc(*
ID, MCI))
596 return std::move(Err);
599 bool IsVariadic = MCDesc.isVariadic();
600 if ((
ID->IsRecyclable = !IsVariadic && !IsVariant)) {
601 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
602 Descriptors[DKey] = std::move(
ID);
603 return *Descriptors[DKey];
606 auto VDKey = std::make_pair(&MCI, SchedClassID);
607 VariantDescriptors[VDKey] = std::move(
ID);
608 return *VariantDescriptors[VDKey];
611Expected<const InstrDesc &>
612InstrBuilder::getOrCreateInstrDesc(
const MCInst &MCI,
613 const SmallVector<Instrument *> &IVec) {
617 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
618 if (Descriptors.find_as(DKey) != Descriptors.end())
619 return *Descriptors[DKey];
623 auto VDKey = std::make_pair(&MCI, SchedClassID);
624 if (VariantDescriptors.contains(VDKey))
625 return *VariantDescriptors[VDKey];
627 return createInstrDescImpl(MCI, IVec);
630STATISTIC(NumVariantInst,
"Number of MCInsts that doesn't have static Desc");
640 std::unique_ptr<Instruction> CreatedIS;
641 bool IsInstRecycled =
false;
646 if (
D.IsRecyclable && InstRecycleCB) {
647 if (
auto *
I = InstRecycleCB(
D)) {
650 IsInstRecycled =
true;
653 if (!IsInstRecycled) {
654 CreatedIS = std::make_unique<Instruction>(
D, MCI.
getOpcode());
655 NewIS = CreatedIS.get();
672 bool IsZeroIdiom =
false;
673 bool IsDepBreaking =
false;
676 IsZeroIdiom = MCIA->
isZeroIdiom(MCI, Mask, ProcID);
705 if (IsInstRecycled && Idx < NewIS->getUses().
size()) {
709 NewIS->
getUses().emplace_back(RD, RegID);
726 if (Mask.getBitWidth() > RD.
UseIndex) {
734 if (IsInstRecycled && Idx < NewIS->getUses().
size())
738 if (
D.Writes.empty()) {
740 return llvm::make_error<RecycledInstErr>(NewIS);
742 return std::move(CreatedIS);
747 APInt WriteMask(
D.Writes.size(), 0);
755 unsigned WriteIndex = 0;
767 assert(RegID &&
"Expected a valid register ID!");
768 if (IsInstRecycled && Idx < NewIS->getDefs().
size()) {
771 WriteMask[WriteIndex],
774 NewIS->
getDefs().emplace_back(WD, RegID,
775 WriteMask[WriteIndex],
781 if (IsInstRecycled && Idx < NewIS->getDefs().
size())
785 return llvm::make_error<RecycledInstErr>(NewIS);
787 return std::move(CreatedIS);
unsigned const MachineRegisterInfo * MRI
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
A builder class for instructions that are statically analyzed by llvm-mca.
while(!ToSimplify.empty())
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
Subclass of Error for the sole purpose of identifying the success path in the type system.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
const MCOperand & getOperand(unsigned i) const
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID .
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool variadicOpsAreDefs() const
Return true if variadic operands of this instruction are definitions.
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
bool isCall() const
Return true if the instruction is a call.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
unsigned getReg() const
Returns the register number.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
bool isConstant(MCRegister RegNo) const
Returns true if the given register is constant.
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI, const SmallVector< Instrument * > &IVec)
void setEndGroup(bool newVal)
void setRetireOOO(bool newVal)
SmallVectorImpl< WriteState > & getDefs()
void setBeginGroup(bool newVal)
SmallVectorImpl< ReadState > & getUses()
void setHasSideEffects(bool newVal)
void setMayStore(bool newVal)
void setOptimizableMove()
void setMayLoad(bool newVal)
An instruction propagated through the simulated instruction pipeline.
This class allows targets to optionally customize the logic that resolves scheduling class IDs.
virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, const SmallVector< Instrument * > &IVec) const
Given an MCInst and a vector of Instrument, a target can return a SchedClassID.
Tracks register operand latency in cycles.
void setIndependentFromDef()
Tracks uses of a register definition (e.g.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI)
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
unsigned getResourceStateIndex(uint64_t Mask)
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
DWARFExpression::Operation Op
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
static const unsigned short InvalidNumMicroOps
uint16_t NumWriteLatencyEntries
uint16_t NumWriteProcResEntries
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getProcessorID() const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
An instruction descriptor.
A register read descriptor.
bool isImplicitRead() const
Helper used by class InstrDesc to describe how hardware resources are used.
A register write descriptor.
bool isImplicitWrite() const