22 #define DEBUG_TYPE "llvm-mca"
31 : STI(sti), MCII(mcii),
MRI(mri), MCIA(mcia), FirstCallInst(
true),
32 FirstReturnInst(
true) {
45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
61 APInt Buffers(NumProcResources, 0);
63 bool AllInOrderResources =
true;
64 bool AnyDispatchHazards =
false;
71 <<
"Ignoring invalid write of zero cycles on processor resource "
74 <<
" (write index #" <<
I <<
")\n";
81 AllInOrderResources =
false;
92 SuperResources[Super] += PRE->
Cycles;
96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
100 sort(Worklist, [](
const ResourcePlusCycles &A,
const ResourcePlusCycles &
B) {
103 if (popcntA < popcntB)
105 if (popcntA > popcntB)
107 return A.first <
B.first;
112 auto GroupIt =
find_if(Worklist, [](
const ResourcePlusCycles &Elt) {
115 unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
116 uint64_t ImpliedUsesOfResourceUnits = 0;
119 for (
unsigned I = 0,
E = Worklist.size();
I <
E; ++
I) {
120 ResourcePlusCycles &A = Worklist[
I];
121 if (!A.second.size()) {
127 ID.Resources.emplace_back(A);
130 UsedResourceUnits |= A.first;
134 UsedResourceGroups |= (A.first ^ NormalizedMask);
136 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
137 if ((NormalizedMask != AvailableMask) &&
141 ImpliedUsesOfResourceUnits |= AvailableMask;
142 UsedResourceUnits |= AvailableMask;
146 for (
unsigned J =
I + 1; J <
E; ++J) {
147 ResourcePlusCycles &
B = Worklist[J];
148 if ((NormalizedMask &
B.first) == NormalizedMask) {
149 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
159 while (ImpliedUsesOfResourceUnits) {
160 ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
161 ImpliedUsesOfResourceUnits = 0;
162 for (
unsigned I = FirstGroupIdx,
E = Worklist.size();
I <
E; ++
I) {
163 ResourcePlusCycles &A = Worklist[
I];
164 if (!A.second.size())
171 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
172 if ((NormalizedMask != AvailableMask) &&
176 UsedResourceUnits |= AvailableMask;
177 ImpliedUsesOfResourceUnits |= AvailableMask;
198 for (ResourcePlusCycles &RPC :
ID.Resources) {
204 RPC.second.setReserved();
205 RPC.second.NumUnits = MaxResourceUnits;
211 for (
const std::pair<uint64_t, unsigned> &SR : SuperResources) {
212 for (
unsigned I = 1,
E = NumProcResources;
I <
E; ++
I) {
218 if (
Mask != SR.first && ((
Mask & SR.first) == SR.first))
224 ID.UsedProcResUnits = UsedResourceUnits;
225 ID.UsedProcResGroups = UsedResourceGroups;
228 for (
const std::pair<uint64_t, ResourceUsage> &R :
ID.Resources)
230 <<
"Reserved=" << R.second.isReserved() <<
", "
231 <<
"#Units=" << R.second.NumUnits <<
", "
232 <<
"cy=" << R.second.size() <<
'\n';
235 uint64_t Current = BufferIDs & (-BufferIDs);
237 BufferIDs ^= Current;
239 dbgs() <<
"\t\t Used Units=" <<
format_hex(
ID.UsedProcResUnits, 16) <<
'\n';
240 dbgs() <<
"\t\tImplicitly Used Units="
241 <<
format_hex(
ID.ImplicitlyUsedProcResUnits, 16) <<
'\n';
253 ID.MaxLatency = 100U;
259 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(
Latency);
265 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
272 if (NumExplicitDefs) {
273 return make_error<InstructionError<MCInst>>(
274 "Expected more register operand definitions.", MCI);
281 std::string Message =
282 "expected a register operand for an optional definition. Instruction "
283 "has not been correctly analyzed.";
284 return make_error<InstructionError<MCInst>>(Message, MCI);
291 void InstrBuilder::populateWrites(InstrDesc &
ID,
const MCInst &MCI,
292 unsigned SchedClassID) {
340 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
343 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
348 ID.Writes.resize(TotalDefs + NumVariadicOps);
352 unsigned CurrentDef = 0;
361 OptionalDefIdx = CurrentDef++;
365 WriteDescriptor &Write =
ID.Writes[CurrentDef];
367 if (CurrentDef < NumWriteLatencyEntries) {
368 const MCWriteLatencyEntry &WLE =
372 WLE.Cycles < 0 ?
ID.MaxLatency :
static_cast<unsigned>(WLE.Cycles);
373 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
377 Write.SClassOrWriteResourceID = 0;
379 Write.IsOptionalDef =
false;
381 dbgs() <<
"\t\t[Def] OpIdx=" <<
Write.OpIndex
382 <<
", Latency=" <<
Write.Latency
383 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
388 assert(CurrentDef == NumExplicitDefs &&
389 "Expected more register operand definitions.");
390 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
391 unsigned Index = NumExplicitDefs + CurrentDef;
393 Write.OpIndex = ~CurrentDef;
395 if (Index < NumWriteLatencyEntries) {
396 const MCWriteLatencyEntry &WLE =
400 WLE.Cycles < 0 ?
ID.MaxLatency :
static_cast<unsigned>(WLE.Cycles);
401 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
405 Write.SClassOrWriteResourceID = 0;
408 Write.IsOptionalDef =
false;
409 assert(
Write.RegisterID != 0 &&
"Expected a valid phys register!");
411 dbgs() <<
"\t\t[Def][I] OpIdx=" << ~
Write.OpIndex
413 <<
", Latency=" <<
Write.Latency
414 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
419 WriteDescriptor &
Write =
ID.Writes[NumExplicitDefs + NumImplicitDefs];
420 Write.OpIndex = OptionalDefIdx;
423 Write.SClassOrWriteResourceID = 0;
424 Write.IsOptionalDef =
true;
426 dbgs() <<
"\t\t[Def][O] OpIdx=" <<
Write.OpIndex
427 <<
", Latency=" <<
Write.Latency
428 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
436 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.
hasOptionalDef();
438 I < NumVariadicOps && !AssumeUsesOnly; ++
I, ++
OpIndex) {
443 WriteDescriptor &
Write =
ID.Writes[CurrentDef];
447 Write.SClassOrWriteResourceID = 0;
448 Write.IsOptionalDef =
false;
451 dbgs() <<
"\t\t[Def][V] OpIdx=" <<
Write.OpIndex
452 <<
", Latency=" <<
Write.Latency
453 <<
", WriteResourceID=" <<
Write.SClassOrWriteResourceID <<
'\n';
457 ID.Writes.resize(CurrentDef);
460 void InstrBuilder::populateReads(InstrDesc &
ID,
const MCInst &MCI,
461 unsigned SchedClassID) {
462 const MCInstrDesc &MCDesc = MCII.
get(MCI.getOpcode());
463 unsigned NumExplicitUses = MCDesc.
getNumOperands() - MCDesc.getNumDefs();
464 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
466 if (MCDesc.hasOptionalDef())
468 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
469 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
470 ID.Reads.resize(TotalUses);
471 unsigned CurrentUse = 0;
472 for (
unsigned I = 0,
OpIndex = MCDesc.getNumDefs();
I < NumExplicitUses;
474 const MCOperand &
Op = MCI.getOperand(
OpIndex);
478 ReadDescriptor &
Read =
ID.Reads[CurrentUse];
481 Read.SchedClassID = SchedClassID;
484 <<
", UseIndex=" <<
Read.UseIndex <<
'\n');
489 for (
unsigned I = 0;
I < NumImplicitUses; ++
I) {
490 ReadDescriptor &
Read =
ID.Reads[CurrentUse +
I];
492 Read.UseIndex = NumExplicitUses +
I;
493 Read.RegisterID = MCDesc.getImplicitUses()[
I];
494 Read.SchedClassID = SchedClassID;
496 <<
", UseIndex=" <<
Read.UseIndex <<
", RegisterID="
500 CurrentUse += NumImplicitUses;
502 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
503 for (
unsigned I = 0,
OpIndex = MCDesc.getNumOperands();
504 I < NumVariadicOps && !AssumeDefsOnly; ++
I, ++
OpIndex) {
505 const MCOperand &
Op = MCI.getOperand(
OpIndex);
509 ReadDescriptor &
Read =
ID.Reads[CurrentUse];
511 Read.UseIndex = NumExplicitUses + NumImplicitUses +
I;
512 Read.SchedClassID = SchedClassID;
515 <<
", UseIndex=" <<
Read.UseIndex <<
'\n');
518 ID.Reads.resize(CurrentUse);
521 Error InstrBuilder::verifyInstrDesc(
const InstrDesc &
ID,
522 const MCInst &MCI)
const {
523 if (
ID.NumMicroOps != 0)
524 return ErrorSuccess();
526 bool UsesBuffers =
ID.UsedBuffers;
527 bool UsesResources = !
ID.Resources.empty();
528 if (!UsesBuffers && !UsesResources)
529 return ErrorSuccess();
533 StringRef Message =
"found an inconsistent instruction that decodes to zero "
534 "opcodes and that consumes scheduler resources.";
535 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
538 Expected<const InstrDesc &>
539 InstrBuilder::createInstrDescImpl(
const MCInst &MCI) {
541 "Itineraries are not yet supported!");
544 unsigned short Opcode = MCI.getOpcode();
545 const MCInstrDesc &MCDesc = MCII.
get(Opcode);
549 unsigned SchedClassID = MCDesc.getSchedClass();
554 unsigned CPUID = SM.getProcessorID();
555 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
560 return make_error<InstructionError<MCInst>>(
561 "unable to resolve scheduling class for write variant.", MCI);
566 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
568 return make_error<InstructionError<MCInst>>(
569 "found an unsupported instruction in the input assembly sequence.",
574 LLVM_DEBUG(
dbgs() <<
"\t\tSchedClassID=" << SchedClassID <<
'\n');
578 std::unique_ptr<InstrDesc>
ID = std::make_unique<InstrDesc>();
579 ID->NumMicroOps = SCDesc.NumMicroOps;
580 ID->SchedClassID = SchedClassID;
582 if (MCDesc.isCall() && FirstCallInst) {
586 <<
"Assume a latency of 100cy.\n";
587 FirstCallInst =
false;
590 if (MCDesc.isReturn() && FirstReturnInst) {
592 <<
" assembly sequence.\n";
594 FirstReturnInst =
false;
603 populateWrites(*
ID, MCI, SchedClassID);
604 populateReads(*
ID, MCI, SchedClassID);
610 if (Error Err = verifyInstrDesc(*
ID, MCI))
614 bool IsVariadic = MCDesc.isVariadic();
615 if (!IsVariadic && !IsVariant) {
617 return *Descriptors[MCI.getOpcode()];
621 return *VariantDescriptors[&MCI];
624 Expected<const InstrDesc &>
625 InstrBuilder::getOrCreateInstrDesc(
const MCInst &MCI) {
626 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
627 return *Descriptors[MCI.getOpcode()];
629 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
630 return *VariantDescriptors[&MCI];
632 return createInstrDescImpl(MCI);
635 Expected<std::unique_ptr<Instruction>>
641 std::unique_ptr<Instruction> NewIS =
642 std::make_unique<Instruction>(
D, MCI.
getOpcode());
648 NewIS->setMayLoad(MCDesc.
mayLoad());
649 NewIS->setMayStore(MCDesc.
mayStore());
652 NewIS->setEndGroup(SCDesc.
EndGroup);
658 bool IsZeroIdiom =
false;
659 bool IsDepBreaking =
false;
666 NewIS->setOptimizableMove();
689 NewIS->getUses().emplace_back(RD, RegID);
714 if (
D.Writes.empty())
719 APInt WriteMask(
D.Writes.size(), 0);
727 unsigned WriteIndex = 0;
737 assert(RegID &&
"Expected a valid register ID!");
738 NewIS->getDefs().emplace_back(WD, RegID,
739 WriteMask[WriteIndex],