LLVM 20.0.0git
InstrBuilder.cpp
Go to the documentation of this file.
1//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the InstrBuilder interface.
11///
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Hashing.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/Support/Debug.h"
23
24#define DEBUG_TYPE "llvm-mca-instrbuilder"
25
26namespace llvm {
27namespace mca {
28
29char RecycledInstErr::ID = 0;
30
31InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
32 const llvm::MCInstrInfo &mcii,
33 const llvm::MCRegisterInfo &mri,
34 const llvm::MCInstrAnalysis *mcia,
35 const mca::InstrumentManager &im, unsigned cl)
36 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
37 FirstReturnInst(true), CallLatency(cl) {
38 const MCSchedModel &SM = STI.getSchedModel();
39 ProcResourceMasks.resize(SM.getNumProcResourceKinds());
40 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
41}
42
44 const MCSchedClassDesc &SCDesc,
45 const MCSubtargetInfo &STI,
46 ArrayRef<uint64_t> ProcResourceMasks) {
47 const MCSchedModel &SM = STI.getSchedModel();
48
49 // Populate resources consumed.
50 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
52
53 // Track cycles contributed by resources that are in a "Super" relationship.
54 // This is required if we want to correctly match the behavior of method
55 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
56 // of "consumed" processor resources and resource cycles, the logic in
57 // ExpandProcResource() doesn't update the number of resource cycles
58 // contributed by a "Super" resource to a group.
59 // We need to take this into account when we find that a processor resource is
60 // part of a group, and it is also used as the "Super" of other resources.
61 // This map stores the number of cycles contributed by sub-resources that are
62 // part of a "Super" resource. The key value is the "Super" resource mask ID.
63 DenseMap<uint64_t, unsigned> SuperResources;
64
65 unsigned NumProcResources = SM.getNumProcResourceKinds();
66 APInt Buffers(NumProcResources, 0);
67
68 bool AllInOrderResources = true;
69 bool AnyDispatchHazards = false;
70 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
71 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
73 if (!PRE->ReleaseAtCycle) {
74#ifndef NDEBUG
76 << "Ignoring invalid write of zero cycles on processor resource "
77 << PR.Name << "\n";
78 WithColor::note() << "found in scheduling class " << SCDesc.Name
79 << " (write index #" << I << ")\n";
80#endif
81 continue;
82 }
83
84 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
85 if (PR.BufferSize < 0) {
86 AllInOrderResources = false;
87 } else {
88 Buffers.setBit(getResourceStateIndex(Mask));
89 AnyDispatchHazards |= (PR.BufferSize == 0);
90 AllInOrderResources &= (PR.BufferSize <= 1);
91 }
92
93 CycleSegment RCy(0, PRE->ReleaseAtCycle, false);
94 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
95 if (PR.SuperIdx) {
96 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
97 SuperResources[Super] += PRE->ReleaseAtCycle;
98 }
99 }
100
101 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
102
103 // Sort elements by mask popcount, so that we prioritize resource units over
104 // resource groups, and smaller groups over larger groups.
105 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
106 unsigned popcntA = llvm::popcount(A.first);
107 unsigned popcntB = llvm::popcount(B.first);
108 if (popcntA < popcntB)
109 return true;
110 if (popcntA > popcntB)
111 return false;
112 return A.first < B.first;
113 });
114
115 uint64_t UsedResourceUnits = 0;
116 uint64_t UsedResourceGroups = 0;
117 uint64_t UnitsFromResourceGroups = 0;
118
119 // Remove cycles contributed by smaller resources, and check if there
120 // are partially overlapping resource groups.
121 ID.HasPartiallyOverlappingGroups = false;
122
123 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
124 ResourcePlusCycles &A = Worklist[I];
125 if (!A.second.size()) {
126 assert(llvm::popcount(A.first) > 1 && "Expected a group!");
127 UsedResourceGroups |= llvm::bit_floor(A.first);
128 continue;
129 }
130
131 ID.Resources.emplace_back(A);
132 uint64_t NormalizedMask = A.first;
133
134 if (llvm::popcount(A.first) == 1) {
135 UsedResourceUnits |= A.first;
136 } else {
137 // Remove the leading 1 from the resource group mask.
138 NormalizedMask ^= llvm::bit_floor(NormalizedMask);
139 if (UnitsFromResourceGroups & NormalizedMask)
140 ID.HasPartiallyOverlappingGroups = true;
141
142 UnitsFromResourceGroups |= NormalizedMask;
143 UsedResourceGroups |= (A.first ^ NormalizedMask);
144 }
145
146 for (unsigned J = I + 1; J < E; ++J) {
147 ResourcePlusCycles &B = Worklist[J];
148 if ((NormalizedMask & B.first) == NormalizedMask) {
149 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
150 if (llvm::popcount(B.first) > 1)
151 B.second.NumUnits++;
152 }
153 }
154 }
155
156 // A SchedWrite may specify a number of cycles in which a resource group
157 // is reserved. For example (on target x86; cpu Haswell):
158 //
159 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
160 // let ReleaseAtCycles = [2, 2, 3];
161 // }
162 //
163 // This means:
164 // Resource units HWPort0 and HWPort1 are both used for 2cy.
165 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
166 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
167 // will not be usable for 2 entire cycles from instruction issue.
168 //
169 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
170 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
171 // extra delay on top of the 2 cycles latency.
172 // During those extra cycles, HWPort01 is not usable by other instructions.
173 for (ResourcePlusCycles &RPC : ID.Resources) {
174 if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
175 // Remove the leading 1 from the resource group mask.
176 uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);
177 uint64_t MaxResourceUnits = llvm::popcount(Mask);
178 if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
179 RPC.second.setReserved();
180 RPC.second.NumUnits = MaxResourceUnits;
181 }
182 }
183 }
184
185 // Identify extra buffers that are consumed through super resources.
186 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
187 for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
188 const MCProcResourceDesc &PR = *SM.getProcResource(I);
189 if (PR.BufferSize == -1)
190 continue;
191
192 uint64_t Mask = ProcResourceMasks[I];
193 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
194 Buffers.setBit(getResourceStateIndex(Mask));
195 }
196 }
197
198 ID.UsedBuffers = Buffers.getZExtValue();
199 ID.UsedProcResUnits = UsedResourceUnits;
200 ID.UsedProcResGroups = UsedResourceGroups;
201
202 LLVM_DEBUG({
203 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
204 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
205 << "Reserved=" << R.second.isReserved() << ", "
206 << "#Units=" << R.second.NumUnits << ", "
207 << "cy=" << R.second.size() << '\n';
208 uint64_t BufferIDs = ID.UsedBuffers;
209 while (BufferIDs) {
210 uint64_t Current = BufferIDs & (-BufferIDs);
211 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
212 BufferIDs ^= Current;
213 }
214 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
215 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
216 << '\n';
217 dbgs() << "\t\tHasPartiallyOverlappingGroups="
218 << ID.HasPartiallyOverlappingGroups << '\n';
219 });
220}
221
222static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
223 const MCSchedClassDesc &SCDesc,
224 const MCSubtargetInfo &STI,
225 unsigned CallLatency) {
226 if (MCDesc.isCall()) {
227 // We cannot estimate how long this call will take.
228 // Artificially set an arbitrarily high latency.
229 ID.MaxLatency = CallLatency;
230 return;
231 }
232
234 // If latency is unknown, then conservatively assume the MaxLatency set for
235 // calls.
236 ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency);
237}
238
239static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
240 // Count register definitions, and skip non register operands in the process.
241 unsigned I, E;
242 unsigned NumExplicitDefs = MCDesc.getNumDefs();
243 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
244 const MCOperand &Op = MCI.getOperand(I);
245 if (Op.isReg())
246 --NumExplicitDefs;
247 }
248
249 if (NumExplicitDefs) {
250 return make_error<InstructionError<MCInst>>(
251 "Expected more register operand definitions.", MCI);
252 }
253
254 if (MCDesc.hasOptionalDef()) {
255 // Always assume that the optional definition is the last operand.
256 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
257 if (I == MCI.getNumOperands() || !Op.isReg()) {
258 std::string Message =
259 "expected a register operand for an optional definition. Instruction "
260 "has not been correctly analyzed.";
261 return make_error<InstructionError<MCInst>>(Message, MCI);
262 }
263 }
264
265 return ErrorSuccess();
266}
267
268void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
269 unsigned SchedClassID) {
270 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
271 const MCSchedModel &SM = STI.getSchedModel();
272 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
273
274 // Assumptions made by this algorithm:
275 // 1. The number of explicit and implicit register definitions in a MCInst
276 // matches the number of explicit and implicit definitions according to
277 // the opcode descriptor (MCInstrDesc).
278 // 2. Uses start at index #(MCDesc.getNumDefs()).
279 // 3. There can only be a single optional register definition, an it is
280 // either the last operand of the sequence (excluding extra operands
281 // contributed by variadic opcodes) or one of the explicit register
282 // definitions. The latter occurs for some Thumb1 instructions.
283 //
284 // These assumptions work quite well for most out-of-order in-tree targets
285 // like x86. This is mainly because the vast majority of instructions is
286 // expanded to MCInst using a straightforward lowering logic that preserves
287 // the ordering of the operands.
288 //
289 // About assumption 1.
290 // The algorithm allows non-register operands between register operand
291 // definitions. This helps to handle some special ARM instructions with
292 // implicit operand increment (-mtriple=armv7):
293 //
294 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
295 // @ <MCOperand Reg:59>
296 // @ <MCOperand Imm:0> (!!)
297 // @ <MCOperand Reg:67>
298 // @ <MCOperand Imm:0>
299 // @ <MCOperand Imm:14>
300 // @ <MCOperand Reg:0>>
301 //
302 // MCDesc reports:
303 // 6 explicit operands.
304 // 1 optional definition
305 // 2 explicit definitions (!!)
306 //
307 // The presence of an 'Imm' operand between the two register definitions
308 // breaks the assumption that "register definitions are always at the
309 // beginning of the operand sequence".
310 //
311 // To workaround this issue, this algorithm ignores (i.e. skips) any
312 // non-register operands between register definitions. The optional
313 // definition is still at index #(NumOperands-1).
314 //
315 // According to assumption 2. register reads start at #(NumExplicitDefs-1).
316 // That means, register R1 from the example is both read and written.
317 unsigned NumExplicitDefs = MCDesc.getNumDefs();
318 unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
319 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
320 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
321 if (MCDesc.hasOptionalDef())
322 TotalDefs++;
323
324 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
325 ID.Writes.resize(TotalDefs + NumVariadicOps);
326 // Iterate over the operands list, and skip non-register or constant register
327 // operands. The first NumExplicitDefs register operands are expected to be
328 // register definitions.
329 unsigned CurrentDef = 0;
330 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
331 unsigned i = 0;
332 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
333 const MCOperand &Op = MCI.getOperand(i);
334 if (!Op.isReg())
335 continue;
336
337 if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
338 OptionalDefIdx = CurrentDef++;
339 continue;
340 }
341 if (MRI.isConstant(Op.getReg())) {
342 CurrentDef++;
343 continue;
344 }
345
346 WriteDescriptor &Write = ID.Writes[CurrentDef];
347 Write.OpIndex = i;
348 if (CurrentDef < NumWriteLatencyEntries) {
349 const MCWriteLatencyEntry &WLE =
350 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
351 // Conservatively default to MaxLatency.
352 Write.Latency =
353 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
354 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
355 } else {
356 // Assign a default latency for this write.
357 Write.Latency = ID.MaxLatency;
358 Write.SClassOrWriteResourceID = 0;
359 }
360 Write.IsOptionalDef = false;
361 LLVM_DEBUG({
362 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
363 << ", Latency=" << Write.Latency
364 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
365 });
366 CurrentDef++;
367 }
368
369 assert(CurrentDef == NumExplicitDefs &&
370 "Expected more register operand definitions.");
371 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
372 unsigned Index = NumExplicitDefs + CurrentDef;
373 WriteDescriptor &Write = ID.Writes[Index];
374 Write.OpIndex = ~CurrentDef;
375 Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
376 if (Index < NumWriteLatencyEntries) {
377 const MCWriteLatencyEntry &WLE =
378 *STI.getWriteLatencyEntry(&SCDesc, Index);
379 // Conservatively default to MaxLatency.
380 Write.Latency =
381 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
382 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
383 } else {
384 // Assign a default latency for this write.
385 Write.Latency = ID.MaxLatency;
386 Write.SClassOrWriteResourceID = 0;
387 }
388
389 Write.IsOptionalDef = false;
390 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
391 LLVM_DEBUG({
392 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
393 << ", PhysReg=" << MRI.getName(Write.RegisterID)
394 << ", Latency=" << Write.Latency
395 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
396 });
397 }
398
399 if (MCDesc.hasOptionalDef()) {
400 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
401 Write.OpIndex = OptionalDefIdx;
402 // Assign a default latency for this write.
403 Write.Latency = ID.MaxLatency;
404 Write.SClassOrWriteResourceID = 0;
405 Write.IsOptionalDef = true;
406 LLVM_DEBUG({
407 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
408 << ", Latency=" << Write.Latency
409 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
410 });
411 }
412
413 if (!NumVariadicOps)
414 return;
415
416 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
417 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
418 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
419 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
420 const MCOperand &Op = MCI.getOperand(OpIndex);
421 if (!Op.isReg())
422 continue;
423 if (MRI.isConstant(Op.getReg()))
424 continue;
425
426 WriteDescriptor &Write = ID.Writes[CurrentDef];
427 Write.OpIndex = OpIndex;
428 // Assign a default latency for this write.
429 Write.Latency = ID.MaxLatency;
430 Write.SClassOrWriteResourceID = 0;
431 Write.IsOptionalDef = false;
432 ++CurrentDef;
433 LLVM_DEBUG({
434 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
435 << ", Latency=" << Write.Latency
436 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
437 });
438 }
439
440 ID.Writes.resize(CurrentDef);
441}
442
443void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
444 unsigned SchedClassID) {
445 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
446 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
447 unsigned NumImplicitUses = MCDesc.implicit_uses().size();
448 // Remove the optional definition.
449 if (MCDesc.hasOptionalDef())
450 --NumExplicitUses;
451 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
452 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
453 ID.Reads.resize(TotalUses);
454 unsigned CurrentUse = 0;
455 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
456 ++I, ++OpIndex) {
457 const MCOperand &Op = MCI.getOperand(OpIndex);
458 if (!Op.isReg())
459 continue;
460 if (MRI.isConstant(Op.getReg()))
461 continue;
462
463 ReadDescriptor &Read = ID.Reads[CurrentUse];
464 Read.OpIndex = OpIndex;
465 Read.UseIndex = I;
466 Read.SchedClassID = SchedClassID;
467 ++CurrentUse;
468 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
469 << ", UseIndex=" << Read.UseIndex << '\n');
470 }
471
472 // For the purpose of ReadAdvance, implicit uses come directly after explicit
473 // uses. The "UseIndex" must be updated according to that implicit layout.
474 for (unsigned I = 0; I < NumImplicitUses; ++I) {
475 ReadDescriptor &Read = ID.Reads[CurrentUse + I];
476 Read.OpIndex = ~I;
477 Read.UseIndex = NumExplicitUses + I;
478 Read.RegisterID = MCDesc.implicit_uses()[I];
479 if (MRI.isConstant(Read.RegisterID))
480 continue;
481 Read.SchedClassID = SchedClassID;
482 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
483 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
484 << MRI.getName(Read.RegisterID) << '\n');
485 }
486
487 CurrentUse += NumImplicitUses;
488
489 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
490 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
491 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
492 const MCOperand &Op = MCI.getOperand(OpIndex);
493 if (!Op.isReg())
494 continue;
495
496 ReadDescriptor &Read = ID.Reads[CurrentUse];
497 Read.OpIndex = OpIndex;
498 Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
499 Read.SchedClassID = SchedClassID;
500 ++CurrentUse;
501 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
502 << ", UseIndex=" << Read.UseIndex << '\n');
503 }
504
505 ID.Reads.resize(CurrentUse);
506}
507
509 hash_code TypeHash = hash_combine(MCO.isReg(), MCO.isImm(), MCO.isSFPImm(),
510 MCO.isDFPImm(), MCO.isExpr(), MCO.isInst());
511 if (MCO.isReg())
512 return hash_combine(TypeHash, MCO.getReg());
513
514 return TypeHash;
515}
516
518 hash_code InstructionHash = hash_combine(MCI.getOpcode(), MCI.getFlags());
519 for (unsigned I = 0; I < MCI.getNumOperands(); ++I) {
520 InstructionHash =
521 hash_combine(InstructionHash, hashMCOperand(MCI.getOperand(I)));
522 }
523 return InstructionHash;
524}
525
526Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
527 const MCInst &MCI) const {
528 if (ID.NumMicroOps != 0)
529 return ErrorSuccess();
530
531 bool UsesBuffers = ID.UsedBuffers;
532 bool UsesResources = !ID.Resources.empty();
533 if (!UsesBuffers && !UsesResources)
534 return ErrorSuccess();
535
536 // FIXME: see PR44797. We should revisit these checks and possibly move them
537 // in CodeGenSchedule.cpp.
538 StringRef Message = "found an inconsistent instruction that decodes to zero "
539 "opcodes and that consumes scheduler resources.";
540 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
541}
542
543Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI,
544 unsigned SchedClassID) {
545 const MCSchedModel &SM = STI.getSchedModel();
546 unsigned CPUID = SM.getProcessorID();
547 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
548 SchedClassID =
549 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
550
551 if (!SchedClassID) {
552 return make_error<InstructionError<MCInst>>(
553 "unable to resolve scheduling class for write variant.", MCI);
554 }
555
556 return SchedClassID;
557}
558
559Expected<const InstrDesc &>
560InstrBuilder::createInstrDescImpl(const MCInst &MCI,
561 const SmallVector<Instrument *> &IVec) {
563 "Itineraries are not yet supported!");
564
565 // Obtain the instruction descriptor from the opcode.
566 unsigned short Opcode = MCI.getOpcode();
567 const MCInstrDesc &MCDesc = MCII.get(Opcode);
568 const MCSchedModel &SM = STI.getSchedModel();
569
570 // Then obtain the scheduling class information from the instruction.
571 // Allow InstrumentManager to override and use a different SchedClassID
572 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
573 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
574
575 // Try to solve variant scheduling classes.
576 if (IsVariant) {
577 Expected<unsigned> VariantSchedClassIDOrErr =
578 getVariantSchedClassID(MCI, SchedClassID);
579 if (!VariantSchedClassIDOrErr) {
580 return VariantSchedClassIDOrErr.takeError();
581 }
582
583 SchedClassID = *VariantSchedClassIDOrErr;
584 }
585
586 // Check if this instruction is supported. Otherwise, report an error.
587 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
588 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
589 return make_error<InstructionError<MCInst>>(
590 "found an unsupported instruction in the input assembly sequence", MCI);
591 }
592
593 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
594 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
595 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
596
597 // Create a new empty descriptor.
598 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
599 ID->NumMicroOps = SCDesc.NumMicroOps;
600 ID->SchedClassID = SchedClassID;
601
602 if (MCDesc.isCall() && FirstCallInst) {
603 // We don't correctly model calls.
604 WithColor::warning() << "found a call in the input assembly sequence.\n";
605 WithColor::note() << "call instructions are not correctly modeled. "
606 << "Assume a latency of " << CallLatency << "cy.\n";
607 FirstCallInst = false;
608 }
609
610 if (MCDesc.isReturn() && FirstReturnInst) {
611 WithColor::warning() << "found a return instruction in the input"
612 << " assembly sequence.\n";
613 WithColor::note() << "program counter updates are ignored.\n";
614 FirstReturnInst = false;
615 }
616
617 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
618 computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency);
619
620 if (Error Err = verifyOperands(MCDesc, MCI))
621 return std::move(Err);
622
623 populateWrites(*ID, MCI, SchedClassID);
624 populateReads(*ID, MCI, SchedClassID);
625
626 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
627 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
628
629 // Validation check on the instruction descriptor.
630 if (Error Err = verifyInstrDesc(*ID, MCI))
631 return std::move(Err);
632
633 // Now add the new descriptor.
634 bool IsVariadic = MCDesc.isVariadic();
635 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
636 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
637 Descriptors[DKey] = std::move(ID);
638 return *Descriptors[DKey];
639 }
640
641 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID);
642 assert(
643 !VariantDescriptors.contains(VDKey) &&
644 "Expected VariantDescriptors to not already have a value for this key.");
645 VariantDescriptors[VDKey] = std::move(ID);
646 return *VariantDescriptors[VDKey];
647}
648
649Expected<const InstrDesc &>
650InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
651 const SmallVector<Instrument *> &IVec) {
652 // Cache lookup using SchedClassID from Instrumentation
653 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
654
655 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
656 if (Descriptors.find_as(DKey) != Descriptors.end())
657 return *Descriptors[DKey];
658
659 Expected<unsigned> VariantSchedClassIDOrErr =
660 getVariantSchedClassID(MCI, SchedClassID);
661 if (!VariantSchedClassIDOrErr) {
662 return VariantSchedClassIDOrErr.takeError();
663 }
664
665 SchedClassID = *VariantSchedClassIDOrErr;
666
667 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID);
668 if (VariantDescriptors.contains(VDKey))
669 return *VariantDescriptors[VDKey];
670
671 return createInstrDescImpl(MCI, IVec);
672}
673
674STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
675
678 const SmallVector<Instrument *> &IVec) {
679 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
680 if (!DescOrErr)
681 return DescOrErr.takeError();
682 const InstrDesc &D = *DescOrErr;
683 Instruction *NewIS = nullptr;
684 std::unique_ptr<Instruction> CreatedIS;
685 bool IsInstRecycled = false;
686
687 if (!D.IsRecyclable)
688 ++NumVariantInst;
689
690 if (D.IsRecyclable && InstRecycleCB) {
691 if (auto *I = InstRecycleCB(D)) {
692 NewIS = I;
693 NewIS->reset();
694 IsInstRecycled = true;
695 }
696 }
697 if (!IsInstRecycled) {
698 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
699 NewIS = CreatedIS.get();
700 }
701
702 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
703 const MCSchedClassDesc &SCDesc =
704 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
705
706 NewIS->setMayLoad(MCDesc.mayLoad());
707 NewIS->setMayStore(MCDesc.mayStore());
709 NewIS->setBeginGroup(SCDesc.BeginGroup);
710 NewIS->setEndGroup(SCDesc.EndGroup);
711 NewIS->setRetireOOO(SCDesc.RetireOOO);
712
713 // Check if this is a dependency breaking instruction.
714 APInt Mask;
715
716 bool IsZeroIdiom = false;
717 bool IsDepBreaking = false;
718 if (MCIA) {
719 unsigned ProcID = STI.getSchedModel().getProcessorID();
720 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
721 IsDepBreaking =
722 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
723 if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
724 NewIS->setOptimizableMove();
725 }
726
727 // Initialize Reads first.
728 MCPhysReg RegID = 0;
729 size_t Idx = 0U;
730 for (const ReadDescriptor &RD : D.Reads) {
731 if (!RD.isImplicitRead()) {
732 // explicit read.
733 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
734 // Skip non-register operands.
735 if (!Op.isReg())
736 continue;
737 RegID = Op.getReg();
738 } else {
739 // Implicit read.
740 RegID = RD.RegisterID;
741 }
742
743 // Skip invalid register operands.
744 if (!RegID)
745 continue;
746
747 // Okay, this is a register operand. Create a ReadState for it.
748 ReadState *RS = nullptr;
749 if (IsInstRecycled && Idx < NewIS->getUses().size()) {
750 NewIS->getUses()[Idx] = ReadState(RD, RegID);
751 RS = &NewIS->getUses()[Idx++];
752 } else {
753 NewIS->getUses().emplace_back(RD, RegID);
754 RS = &NewIS->getUses().back();
755 ++Idx;
756 }
757
758 if (IsDepBreaking) {
759 // A mask of all zeroes means: explicit input operands are not
760 // independent.
761 if (Mask.isZero()) {
762 if (!RD.isImplicitRead())
764 } else {
765 // Check if this register operand is independent according to `Mask`.
766 // Note that Mask may not have enough bits to describe all explicit and
767 // implicit input operands. If this register operand doesn't have a
768 // corresponding bit in Mask, then conservatively assume that it is
769 // dependent.
770 if (Mask.getBitWidth() > RD.UseIndex) {
771 // Okay. This map describe register use `RD.UseIndex`.
772 if (Mask[RD.UseIndex])
774 }
775 }
776 }
777 }
778 if (IsInstRecycled && Idx < NewIS->getUses().size())
779 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
780
781 // Early exit if there are no writes.
782 if (D.Writes.empty()) {
783 if (IsInstRecycled)
784 return llvm::make_error<RecycledInstErr>(NewIS);
785 else
786 return std::move(CreatedIS);
787 }
788
789 // Track register writes that implicitly clear the upper portion of the
790 // underlying super-registers using an APInt.
791 APInt WriteMask(D.Writes.size(), 0);
792
793 // Now query the MCInstrAnalysis object to obtain information about which
794 // register writes implicitly clear the upper portion of a super-register.
795 if (MCIA)
796 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
797
798 // Initialize writes.
799 unsigned WriteIndex = 0;
800 Idx = 0U;
801 for (const WriteDescriptor &WD : D.Writes) {
802 RegID = WD.isImplicitWrite() ? WD.RegisterID
803 : MCI.getOperand(WD.OpIndex).getReg();
804 // Check if this is a optional definition that references NoReg or a write
805 // to a constant register.
806 if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
807 ++WriteIndex;
808 continue;
809 }
810
811 assert(RegID && "Expected a valid register ID!");
812 if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
813 NewIS->getDefs()[Idx++] =
814 WriteState(WD, RegID,
815 /* ClearsSuperRegs */ WriteMask[WriteIndex],
816 /* WritesZero */ IsZeroIdiom);
817 } else {
818 NewIS->getDefs().emplace_back(WD, RegID,
819 /* ClearsSuperRegs */ WriteMask[WriteIndex],
820 /* WritesZero */ IsZeroIdiom);
821 ++Idx;
822 }
823 ++WriteIndex;
824 }
825 if (IsInstRecycled && Idx < NewIS->getDefs().size())
826 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
827
828 if (IsInstRecycled)
829 return llvm::make_error<RecycledInstErr>(NewIS);
830 else
831 return std::move(CreatedIS);
832}
833} // namespace mca
834} // namespace llvm
unsigned const MachineRegisterInfo * MRI
This file implements a class to represent arbitrary precision integral constant values and operations...
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define im(i)
A builder class for instructions that are statically analyzed by llvm-mca.
#define I(x, y, z)
Definition: MD5.cpp:58
while(!ToSimplify.empty())
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned OpIndex
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:335
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Error takeError()
Take ownership of the stored error.
Definition: Error.h:608
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getFlags() const
Definition: MCInst.h:201
unsigned getOpcode() const
Definition: MCInst.h:198
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID .
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool variadicOpsAreDefs() const
Return true if variadic operands of this instruction are definitions.
Definition: MCInstrDesc.h:418
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
bool isSFPImm() const
Definition: MCInst.h:63
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isInst() const
Definition: MCInst.h:66
bool isReg() const
Definition: MCInst.h:61
bool isDFPImm() const
Definition: MCInst.h:64
bool isExpr() const
Definition: MCInst.h:65
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
bool isConstant(MCRegister RegNo) const
Returns true if the given register is constant.
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition: WithColor.cpp:85
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition: WithColor.cpp:87
An opaque object representing a hash code.
Definition: Hashing.h:75
A sequence of cycles.
Definition: Instruction.h:389
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI, const SmallVector< Instrument * > &IVec)
void setEndGroup(bool newVal)
Definition: Instruction.h:585
void setRetireOOO(bool newVal)
Definition: Instruction.h:586
SmallVectorImpl< WriteState > & getDefs()
Definition: Instruction.h:535
void setBeginGroup(bool newVal)
Definition: Instruction.h:584
SmallVectorImpl< ReadState > & getUses()
Definition: Instruction.h:537
void setHasSideEffects(bool newVal)
Definition: Instruction.h:583
void setMayStore(bool newVal)
Definition: Instruction.h:582
void setMayLoad(bool newVal)
Definition: Instruction.h:581
An instruction propagated through the simulated instruction pipeline.
Definition: Instruction.h:600
This class allows targets to optionally customize the logic that resolves scheduling class IDs.
virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, const SmallVector< Instrument * > &IVec) const
Given an MCInst and a vector of Instrument, a target can return a SchedClassID.
Tracks register operand latency in cycles.
Definition: Instruction.h:326
void setIndependentFromDef()
Definition: Instruction.h:372
Tracks uses of a register definition (e.g.
Definition: Instruction.h:197
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
hash_code hashMCInst(const MCInst &MCI)
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
hash_code hashMCOperand(const MCOperand &MCO)
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, unsigned CallLatency)
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Definition: Support.cpp:40
unsigned getResourceStateIndex(uint64_t Mask)
Definition: Support.h:100
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
@ Read
Definition: CodeGenData.h:102
@ Write
Definition: CodeGenData.h:103
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition: Format.h:187
DWARFExpression::Operation Op
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
static const unsigned short InvalidNumMicroOps
Definition: MCSchedule.h:119
uint16_t NumWriteLatencyEntries
Definition: MCSchedule.h:132
uint16_t NumWriteProcResEntries
Definition: MCSchedule.h:130
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:253
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:360
unsigned getProcessorID() const
Definition: MCSchedule.h:331
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:349
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:334
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:42
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:353
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
Definition: MCSchedule.h:68
An instruction descriptor.
Definition: Instruction.h:447
A register read descriptor.
Definition: Instruction.h:163
bool isImplicitRead() const
Definition: Instruction.h:177
Helper used by class InstrDesc to describe how hardware resources are used.
Definition: Instruction.h:436
A register write descriptor.
Definition: Instruction.h:135
bool isImplicitWrite() const
Definition: Instruction.h:159