LLVM 19.0.0git
InstrBuilder.cpp
Go to the documentation of this file.
1//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the InstrBuilder interface.
11///
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/MC/MCInst.h"
19#include "llvm/Support/Debug.h"
22
23#define DEBUG_TYPE "llvm-mca-instrbuilder"
24
25namespace llvm {
26namespace mca {
27
28char RecycledInstErr::ID = 0;
29
30InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
31 const llvm::MCInstrInfo &mcii,
32 const llvm::MCRegisterInfo &mri,
33 const llvm::MCInstrAnalysis *mcia,
35 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
36 FirstReturnInst(true) {
37 const MCSchedModel &SM = STI.getSchedModel();
38 ProcResourceMasks.resize(SM.getNumProcResourceKinds());
39 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
40}
41
43 const MCSchedClassDesc &SCDesc,
44 const MCSubtargetInfo &STI,
45 ArrayRef<uint64_t> ProcResourceMasks) {
46 const MCSchedModel &SM = STI.getSchedModel();
47
48 // Populate resources consumed.
49 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
51
52 // Track cycles contributed by resources that are in a "Super" relationship.
53 // This is required if we want to correctly match the behavior of method
54 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
55 // of "consumed" processor resources and resource cycles, the logic in
56 // ExpandProcResource() doesn't update the number of resource cycles
57 // contributed by a "Super" resource to a group.
58 // We need to take this into account when we find that a processor resource is
59 // part of a group, and it is also used as the "Super" of other resources.
60 // This map stores the number of cycles contributed by sub-resources that are
61 // part of a "Super" resource. The key value is the "Super" resource mask ID.
62 DenseMap<uint64_t, unsigned> SuperResources;
63
64 unsigned NumProcResources = SM.getNumProcResourceKinds();
65 APInt Buffers(NumProcResources, 0);
66
67 bool AllInOrderResources = true;
68 bool AnyDispatchHazards = false;
69 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
70 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
72 if (!PRE->ReleaseAtCycle) {
73#ifndef NDEBUG
75 << "Ignoring invalid write of zero cycles on processor resource "
76 << PR.Name << "\n";
77 WithColor::note() << "found in scheduling class " << SCDesc.Name
78 << " (write index #" << I << ")\n";
79#endif
80 continue;
81 }
82
83 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
84 if (PR.BufferSize < 0) {
85 AllInOrderResources = false;
86 } else {
87 Buffers.setBit(getResourceStateIndex(Mask));
88 AnyDispatchHazards |= (PR.BufferSize == 0);
89 AllInOrderResources &= (PR.BufferSize <= 1);
90 }
91
92 CycleSegment RCy(0, PRE->ReleaseAtCycle, false);
93 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
94 if (PR.SuperIdx) {
95 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
96 SuperResources[Super] += PRE->ReleaseAtCycle;
97 }
98 }
99
100 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
101
102 // Sort elements by mask popcount, so that we prioritize resource units over
103 // resource groups, and smaller groups over larger groups.
104 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
105 unsigned popcntA = llvm::popcount(A.first);
106 unsigned popcntB = llvm::popcount(B.first);
107 if (popcntA < popcntB)
108 return true;
109 if (popcntA > popcntB)
110 return false;
111 return A.first < B.first;
112 });
113
114 uint64_t UsedResourceUnits = 0;
115 uint64_t UsedResourceGroups = 0;
116 uint64_t UnitsFromResourceGroups = 0;
117
118 // Remove cycles contributed by smaller resources, and check if there
119 // are partially overlapping resource groups.
120 ID.HasPartiallyOverlappingGroups = false;
121
122 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
123 ResourcePlusCycles &A = Worklist[I];
124 if (!A.second.size()) {
125 assert(llvm::popcount(A.first) > 1 && "Expected a group!");
126 UsedResourceGroups |= llvm::bit_floor(A.first);
127 continue;
128 }
129
130 ID.Resources.emplace_back(A);
131 uint64_t NormalizedMask = A.first;
132
133 if (llvm::popcount(A.first) == 1) {
134 UsedResourceUnits |= A.first;
135 } else {
136 // Remove the leading 1 from the resource group mask.
137 NormalizedMask ^= llvm::bit_floor(NormalizedMask);
138 if (UnitsFromResourceGroups & NormalizedMask)
139 ID.HasPartiallyOverlappingGroups = true;
140
141 UnitsFromResourceGroups |= NormalizedMask;
142 UsedResourceGroups |= (A.first ^ NormalizedMask);
143 }
144
145 for (unsigned J = I + 1; J < E; ++J) {
146 ResourcePlusCycles &B = Worklist[J];
147 if ((NormalizedMask & B.first) == NormalizedMask) {
148 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
149 if (llvm::popcount(B.first) > 1)
150 B.second.NumUnits++;
151 }
152 }
153 }
154
155 // A SchedWrite may specify a number of cycles in which a resource group
156 // is reserved. For example (on target x86; cpu Haswell):
157 //
158 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
159 // let ReleaseAtCycles = [2, 2, 3];
160 // }
161 //
162 // This means:
163 // Resource units HWPort0 and HWPort1 are both used for 2cy.
164 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
165 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
166 // will not be usable for 2 entire cycles from instruction issue.
167 //
168 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
169 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
170 // extra delay on top of the 2 cycles latency.
171 // During those extra cycles, HWPort01 is not usable by other instructions.
172 for (ResourcePlusCycles &RPC : ID.Resources) {
173 if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
174 // Remove the leading 1 from the resource group mask.
175 uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);
176 uint64_t MaxResourceUnits = llvm::popcount(Mask);
177 if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
178 RPC.second.setReserved();
179 RPC.second.NumUnits = MaxResourceUnits;
180 }
181 }
182 }
183
184 // Identify extra buffers that are consumed through super resources.
185 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
186 for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
187 const MCProcResourceDesc &PR = *SM.getProcResource(I);
188 if (PR.BufferSize == -1)
189 continue;
190
191 uint64_t Mask = ProcResourceMasks[I];
192 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
193 Buffers.setBit(getResourceStateIndex(Mask));
194 }
195 }
196
197 ID.UsedBuffers = Buffers.getZExtValue();
198 ID.UsedProcResUnits = UsedResourceUnits;
199 ID.UsedProcResGroups = UsedResourceGroups;
200
201 LLVM_DEBUG({
202 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
203 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
204 << "Reserved=" << R.second.isReserved() << ", "
205 << "#Units=" << R.second.NumUnits << ", "
206 << "cy=" << R.second.size() << '\n';
207 uint64_t BufferIDs = ID.UsedBuffers;
208 while (BufferIDs) {
209 uint64_t Current = BufferIDs & (-BufferIDs);
210 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
211 BufferIDs ^= Current;
212 }
213 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
214 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
215 << '\n';
216 dbgs() << "\t\tHasPartiallyOverlappingGroups="
217 << ID.HasPartiallyOverlappingGroups << '\n';
218 });
219}
220
221static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
222 const MCSchedClassDesc &SCDesc,
223 const MCSubtargetInfo &STI) {
224 if (MCDesc.isCall()) {
225 // We cannot estimate how long this call will take.
226 // Artificially set an arbitrarily high latency (100cy).
227 ID.MaxLatency = 100U;
228 return;
229 }
230
232 // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
233 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
234}
235
236static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
237 // Count register definitions, and skip non register operands in the process.
238 unsigned I, E;
239 unsigned NumExplicitDefs = MCDesc.getNumDefs();
240 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
241 const MCOperand &Op = MCI.getOperand(I);
242 if (Op.isReg())
243 --NumExplicitDefs;
244 }
245
246 if (NumExplicitDefs) {
247 return make_error<InstructionError<MCInst>>(
248 "Expected more register operand definitions.", MCI);
249 }
250
251 if (MCDesc.hasOptionalDef()) {
252 // Always assume that the optional definition is the last operand.
253 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
254 if (I == MCI.getNumOperands() || !Op.isReg()) {
255 std::string Message =
256 "expected a register operand for an optional definition. Instruction "
257 "has not been correctly analyzed.";
258 return make_error<InstructionError<MCInst>>(Message, MCI);
259 }
260 }
261
262 return ErrorSuccess();
263}
264
265void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
266 unsigned SchedClassID) {
267 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
268 const MCSchedModel &SM = STI.getSchedModel();
269 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
270
271 // Assumptions made by this algorithm:
272 // 1. The number of explicit and implicit register definitions in a MCInst
273 // matches the number of explicit and implicit definitions according to
274 // the opcode descriptor (MCInstrDesc).
275 // 2. Uses start at index #(MCDesc.getNumDefs()).
276 // 3. There can only be a single optional register definition, an it is
277 // either the last operand of the sequence (excluding extra operands
278 // contributed by variadic opcodes) or one of the explicit register
279 // definitions. The latter occurs for some Thumb1 instructions.
280 //
281 // These assumptions work quite well for most out-of-order in-tree targets
282 // like x86. This is mainly because the vast majority of instructions is
283 // expanded to MCInst using a straightforward lowering logic that preserves
284 // the ordering of the operands.
285 //
286 // About assumption 1.
287 // The algorithm allows non-register operands between register operand
288 // definitions. This helps to handle some special ARM instructions with
289 // implicit operand increment (-mtriple=armv7):
290 //
291 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
292 // @ <MCOperand Reg:59>
293 // @ <MCOperand Imm:0> (!!)
294 // @ <MCOperand Reg:67>
295 // @ <MCOperand Imm:0>
296 // @ <MCOperand Imm:14>
297 // @ <MCOperand Reg:0>>
298 //
299 // MCDesc reports:
300 // 6 explicit operands.
301 // 1 optional definition
302 // 2 explicit definitions (!!)
303 //
304 // The presence of an 'Imm' operand between the two register definitions
305 // breaks the assumption that "register definitions are always at the
306 // beginning of the operand sequence".
307 //
308 // To workaround this issue, this algorithm ignores (i.e. skips) any
309 // non-register operands between register definitions. The optional
310 // definition is still at index #(NumOperands-1).
311 //
312 // According to assumption 2. register reads start at #(NumExplicitDefs-1).
313 // That means, register R1 from the example is both read and written.
314 unsigned NumExplicitDefs = MCDesc.getNumDefs();
315 unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
316 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
317 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
318 if (MCDesc.hasOptionalDef())
319 TotalDefs++;
320
321 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
322 ID.Writes.resize(TotalDefs + NumVariadicOps);
323 // Iterate over the operands list, and skip non-register or constant register
324 // operands. The first NumExplicitDefs register operands are expected to be
325 // register definitions.
326 unsigned CurrentDef = 0;
327 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
328 unsigned i = 0;
329 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
330 const MCOperand &Op = MCI.getOperand(i);
331 if (!Op.isReg())
332 continue;
333
334 if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
335 OptionalDefIdx = CurrentDef++;
336 continue;
337 }
338 if (MRI.isConstant(Op.getReg())) {
339 CurrentDef++;
340 continue;
341 }
342
343 WriteDescriptor &Write = ID.Writes[CurrentDef];
344 Write.OpIndex = i;
345 if (CurrentDef < NumWriteLatencyEntries) {
346 const MCWriteLatencyEntry &WLE =
347 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
348 // Conservatively default to MaxLatency.
349 Write.Latency =
350 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
351 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
352 } else {
353 // Assign a default latency for this write.
354 Write.Latency = ID.MaxLatency;
355 Write.SClassOrWriteResourceID = 0;
356 }
357 Write.IsOptionalDef = false;
358 LLVM_DEBUG({
359 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
360 << ", Latency=" << Write.Latency
361 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
362 });
363 CurrentDef++;
364 }
365
366 assert(CurrentDef == NumExplicitDefs &&
367 "Expected more register operand definitions.");
368 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
369 unsigned Index = NumExplicitDefs + CurrentDef;
370 WriteDescriptor &Write = ID.Writes[Index];
371 Write.OpIndex = ~CurrentDef;
372 Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
373 if (Index < NumWriteLatencyEntries) {
374 const MCWriteLatencyEntry &WLE =
375 *STI.getWriteLatencyEntry(&SCDesc, Index);
376 // Conservatively default to MaxLatency.
377 Write.Latency =
378 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
379 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
380 } else {
381 // Assign a default latency for this write.
382 Write.Latency = ID.MaxLatency;
383 Write.SClassOrWriteResourceID = 0;
384 }
385
386 Write.IsOptionalDef = false;
387 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
388 LLVM_DEBUG({
389 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
390 << ", PhysReg=" << MRI.getName(Write.RegisterID)
391 << ", Latency=" << Write.Latency
392 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
393 });
394 }
395
396 if (MCDesc.hasOptionalDef()) {
397 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
398 Write.OpIndex = OptionalDefIdx;
399 // Assign a default latency for this write.
400 Write.Latency = ID.MaxLatency;
401 Write.SClassOrWriteResourceID = 0;
402 Write.IsOptionalDef = true;
403 LLVM_DEBUG({
404 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
405 << ", Latency=" << Write.Latency
406 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
407 });
408 }
409
410 if (!NumVariadicOps)
411 return;
412
413 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
414 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
415 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
416 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
417 const MCOperand &Op = MCI.getOperand(OpIndex);
418 if (!Op.isReg())
419 continue;
420 if (MRI.isConstant(Op.getReg()))
421 continue;
422
423 WriteDescriptor &Write = ID.Writes[CurrentDef];
424 Write.OpIndex = OpIndex;
425 // Assign a default latency for this write.
426 Write.Latency = ID.MaxLatency;
427 Write.SClassOrWriteResourceID = 0;
428 Write.IsOptionalDef = false;
429 ++CurrentDef;
430 LLVM_DEBUG({
431 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
432 << ", Latency=" << Write.Latency
433 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
434 });
435 }
436
437 ID.Writes.resize(CurrentDef);
438}
439
440void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
441 unsigned SchedClassID) {
442 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
443 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
444 unsigned NumImplicitUses = MCDesc.implicit_uses().size();
445 // Remove the optional definition.
446 if (MCDesc.hasOptionalDef())
447 --NumExplicitUses;
448 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
449 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
450 ID.Reads.resize(TotalUses);
451 unsigned CurrentUse = 0;
452 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
453 ++I, ++OpIndex) {
454 const MCOperand &Op = MCI.getOperand(OpIndex);
455 if (!Op.isReg())
456 continue;
457 if (MRI.isConstant(Op.getReg()))
458 continue;
459
460 ReadDescriptor &Read = ID.Reads[CurrentUse];
461 Read.OpIndex = OpIndex;
462 Read.UseIndex = I;
463 Read.SchedClassID = SchedClassID;
464 ++CurrentUse;
465 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
466 << ", UseIndex=" << Read.UseIndex << '\n');
467 }
468
469 // For the purpose of ReadAdvance, implicit uses come directly after explicit
470 // uses. The "UseIndex" must be updated according to that implicit layout.
471 for (unsigned I = 0; I < NumImplicitUses; ++I) {
472 ReadDescriptor &Read = ID.Reads[CurrentUse + I];
473 Read.OpIndex = ~I;
474 Read.UseIndex = NumExplicitUses + I;
475 Read.RegisterID = MCDesc.implicit_uses()[I];
476 if (MRI.isConstant(Read.RegisterID))
477 continue;
478 Read.SchedClassID = SchedClassID;
479 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
480 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
481 << MRI.getName(Read.RegisterID) << '\n');
482 }
483
484 CurrentUse += NumImplicitUses;
485
486 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
487 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
488 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
489 const MCOperand &Op = MCI.getOperand(OpIndex);
490 if (!Op.isReg())
491 continue;
492
493 ReadDescriptor &Read = ID.Reads[CurrentUse];
494 Read.OpIndex = OpIndex;
495 Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
496 Read.SchedClassID = SchedClassID;
497 ++CurrentUse;
498 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
499 << ", UseIndex=" << Read.UseIndex << '\n');
500 }
501
502 ID.Reads.resize(CurrentUse);
503}
504
505Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
506 const MCInst &MCI) const {
507 if (ID.NumMicroOps != 0)
508 return ErrorSuccess();
509
510 bool UsesBuffers = ID.UsedBuffers;
511 bool UsesResources = !ID.Resources.empty();
512 if (!UsesBuffers && !UsesResources)
513 return ErrorSuccess();
514
515 // FIXME: see PR44797. We should revisit these checks and possibly move them
516 // in CodeGenSchedule.cpp.
517 StringRef Message = "found an inconsistent instruction that decodes to zero "
518 "opcodes and that consumes scheduler resources.";
519 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
520}
521
522Expected<const InstrDesc &>
523InstrBuilder::createInstrDescImpl(const MCInst &MCI,
524 const SmallVector<Instrument *> &IVec) {
526 "Itineraries are not yet supported!");
527
528 // Obtain the instruction descriptor from the opcode.
529 unsigned short Opcode = MCI.getOpcode();
530 const MCInstrDesc &MCDesc = MCII.get(Opcode);
531 const MCSchedModel &SM = STI.getSchedModel();
532
533 // Then obtain the scheduling class information from the instruction.
534 // Allow InstrumentManager to override and use a different SchedClassID
535 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
536 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
537
538 // Try to solve variant scheduling classes.
539 if (IsVariant) {
540 unsigned CPUID = SM.getProcessorID();
541 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
542 SchedClassID =
543 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
544
545 if (!SchedClassID) {
546 return make_error<InstructionError<MCInst>>(
547 "unable to resolve scheduling class for write variant.", MCI);
548 }
549 }
550
551 // Check if this instruction is supported. Otherwise, report an error.
552 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
553 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
554 return make_error<InstructionError<MCInst>>(
555 "found an unsupported instruction in the input assembly sequence", MCI);
556 }
557
558 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
559 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
560 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
561
562 // Create a new empty descriptor.
563 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
564 ID->NumMicroOps = SCDesc.NumMicroOps;
565 ID->SchedClassID = SchedClassID;
566
567 if (MCDesc.isCall() && FirstCallInst) {
568 // We don't correctly model calls.
569 WithColor::warning() << "found a call in the input assembly sequence.\n";
570 WithColor::note() << "call instructions are not correctly modeled. "
571 << "Assume a latency of 100cy.\n";
572 FirstCallInst = false;
573 }
574
575 if (MCDesc.isReturn() && FirstReturnInst) {
576 WithColor::warning() << "found a return instruction in the input"
577 << " assembly sequence.\n";
578 WithColor::note() << "program counter updates are ignored.\n";
579 FirstReturnInst = false;
580 }
581
582 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
583 computeMaxLatency(*ID, MCDesc, SCDesc, STI);
584
585 if (Error Err = verifyOperands(MCDesc, MCI))
586 return std::move(Err);
587
588 populateWrites(*ID, MCI, SchedClassID);
589 populateReads(*ID, MCI, SchedClassID);
590
591 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
592 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
593
594 // Validation check on the instruction descriptor.
595 if (Error Err = verifyInstrDesc(*ID, MCI))
596 return std::move(Err);
597
598 // Now add the new descriptor.
599 bool IsVariadic = MCDesc.isVariadic();
600 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
601 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
602 Descriptors[DKey] = std::move(ID);
603 return *Descriptors[DKey];
604 }
605
606 auto VDKey = std::make_pair(&MCI, SchedClassID);
607 VariantDescriptors[VDKey] = std::move(ID);
608 return *VariantDescriptors[VDKey];
609}
610
611Expected<const InstrDesc &>
612InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
613 const SmallVector<Instrument *> &IVec) {
614 // Cache lookup using SchedClassID from Instrumentation
615 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
616
617 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
618 if (Descriptors.find_as(DKey) != Descriptors.end())
619 return *Descriptors[DKey];
620
621 unsigned CPUID = STI.getSchedModel().getProcessorID();
622 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
623 auto VDKey = std::make_pair(&MCI, SchedClassID);
624 if (VariantDescriptors.contains(VDKey))
625 return *VariantDescriptors[VDKey];
626
627 return createInstrDescImpl(MCI, IVec);
628}
629
630STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
631
634 const SmallVector<Instrument *> &IVec) {
635 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
636 if (!DescOrErr)
637 return DescOrErr.takeError();
638 const InstrDesc &D = *DescOrErr;
639 Instruction *NewIS = nullptr;
640 std::unique_ptr<Instruction> CreatedIS;
641 bool IsInstRecycled = false;
642
643 if (!D.IsRecyclable)
644 ++NumVariantInst;
645
646 if (D.IsRecyclable && InstRecycleCB) {
647 if (auto *I = InstRecycleCB(D)) {
648 NewIS = I;
649 NewIS->reset();
650 IsInstRecycled = true;
651 }
652 }
653 if (!IsInstRecycled) {
654 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
655 NewIS = CreatedIS.get();
656 }
657
658 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
659 const MCSchedClassDesc &SCDesc =
660 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
661
662 NewIS->setMayLoad(MCDesc.mayLoad());
663 NewIS->setMayStore(MCDesc.mayStore());
665 NewIS->setBeginGroup(SCDesc.BeginGroup);
666 NewIS->setEndGroup(SCDesc.EndGroup);
667 NewIS->setRetireOOO(SCDesc.RetireOOO);
668
669 // Check if this is a dependency breaking instruction.
670 APInt Mask;
671
672 bool IsZeroIdiom = false;
673 bool IsDepBreaking = false;
674 if (MCIA) {
675 unsigned ProcID = STI.getSchedModel().getProcessorID();
676 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
677 IsDepBreaking =
678 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
679 if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
680 NewIS->setOptimizableMove();
681 }
682
683 // Initialize Reads first.
684 MCPhysReg RegID = 0;
685 size_t Idx = 0U;
686 for (const ReadDescriptor &RD : D.Reads) {
687 if (!RD.isImplicitRead()) {
688 // explicit read.
689 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
690 // Skip non-register operands.
691 if (!Op.isReg())
692 continue;
693 RegID = Op.getReg();
694 } else {
695 // Implicit read.
696 RegID = RD.RegisterID;
697 }
698
699 // Skip invalid register operands.
700 if (!RegID)
701 continue;
702
703 // Okay, this is a register operand. Create a ReadState for it.
704 ReadState *RS = nullptr;
705 if (IsInstRecycled && Idx < NewIS->getUses().size()) {
706 NewIS->getUses()[Idx] = ReadState(RD, RegID);
707 RS = &NewIS->getUses()[Idx++];
708 } else {
709 NewIS->getUses().emplace_back(RD, RegID);
710 RS = &NewIS->getUses().back();
711 ++Idx;
712 }
713
714 if (IsDepBreaking) {
715 // A mask of all zeroes means: explicit input operands are not
716 // independent.
717 if (Mask.isZero()) {
718 if (!RD.isImplicitRead())
720 } else {
721 // Check if this register operand is independent according to `Mask`.
722 // Note that Mask may not have enough bits to describe all explicit and
723 // implicit input operands. If this register operand doesn't have a
724 // corresponding bit in Mask, then conservatively assume that it is
725 // dependent.
726 if (Mask.getBitWidth() > RD.UseIndex) {
727 // Okay. This map describe register use `RD.UseIndex`.
728 if (Mask[RD.UseIndex])
730 }
731 }
732 }
733 }
734 if (IsInstRecycled && Idx < NewIS->getUses().size())
735 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
736
737 // Early exit if there are no writes.
738 if (D.Writes.empty()) {
739 if (IsInstRecycled)
740 return llvm::make_error<RecycledInstErr>(NewIS);
741 else
742 return std::move(CreatedIS);
743 }
744
745 // Track register writes that implicitly clear the upper portion of the
746 // underlying super-registers using an APInt.
747 APInt WriteMask(D.Writes.size(), 0);
748
749 // Now query the MCInstrAnalysis object to obtain information about which
750 // register writes implicitly clear the upper portion of a super-register.
751 if (MCIA)
752 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
753
754 // Initialize writes.
755 unsigned WriteIndex = 0;
756 Idx = 0U;
757 for (const WriteDescriptor &WD : D.Writes) {
758 RegID = WD.isImplicitWrite() ? WD.RegisterID
759 : MCI.getOperand(WD.OpIndex).getReg();
760 // Check if this is a optional definition that references NoReg or a write
761 // to a constant register.
762 if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
763 ++WriteIndex;
764 continue;
765 }
766
767 assert(RegID && "Expected a valid register ID!");
768 if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
769 NewIS->getDefs()[Idx++] =
770 WriteState(WD, RegID,
771 /* ClearsSuperRegs */ WriteMask[WriteIndex],
772 /* WritesZero */ IsZeroIdiom);
773 } else {
774 NewIS->getDefs().emplace_back(WD, RegID,
775 /* ClearsSuperRegs */ WriteMask[WriteIndex],
776 /* WritesZero */ IsZeroIdiom);
777 ++Idx;
778 }
779 ++WriteIndex;
780 }
781 if (IsInstRecycled && Idx < NewIS->getDefs().size())
782 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
783
784 if (IsInstRecycled)
785 return llvm::make_error<RecycledInstErr>(NewIS);
786 else
787 return std::move(CreatedIS);
788}
789} // namespace mca
790} // namespace llvm
unsigned const MachineRegisterInfo * MRI
This file implements a class to represent arbitrary precision integral constant values and operations...
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define im(i)
A builder class for instructions that are statically analyzed by llvm-mca.
#define I(x, y, z)
Definition: MD5.cpp:58
while(!ToSimplify.empty())
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned OpIndex
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:332
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Error takeError()
Take ownership of the stored error.
Definition: Error.h:601
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID .
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool variadicOpsAreDefs() const
Return true if variadic operands of this instruction are definitions.
Definition: MCInstrDesc.h:418
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
bool isConstant(MCRegister RegNo) const
Returns true if the given register is constant.
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition: WithColor.cpp:85
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition: WithColor.cpp:87
A sequence of cycles.
Definition: Instruction.h:389
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI, const SmallVector< Instrument * > &IVec)
void setEndGroup(bool newVal)
Definition: Instruction.h:585
void setRetireOOO(bool newVal)
Definition: Instruction.h:586
SmallVectorImpl< WriteState > & getDefs()
Definition: Instruction.h:535
void setBeginGroup(bool newVal)
Definition: Instruction.h:584
SmallVectorImpl< ReadState > & getUses()
Definition: Instruction.h:537
void setHasSideEffects(bool newVal)
Definition: Instruction.h:583
void setMayStore(bool newVal)
Definition: Instruction.h:582
void setMayLoad(bool newVal)
Definition: Instruction.h:581
An instruction propagated through the simulated instruction pipeline.
Definition: Instruction.h:600
This class allows targets to optionally customize the logic that resolves scheduling class IDs.
virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, const SmallVector< Instrument * > &IVec) const
Given an MCInst and a vector of Instrument, a target can return a SchedClassID.
Tracks register operand latency in cycles.
Definition: Instruction.h:326
void setIndependentFromDef()
Definition: Instruction.h:372
Tracks uses of a register definition (e.g.
Definition: Instruction.h:197
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI)
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Definition: Support.cpp:40
unsigned getResourceStateIndex(uint64_t Mask)
Definition: Support.h:100
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition: Format.h:187
DWARFExpression::Operation Op
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
static const unsigned short InvalidNumMicroOps
Definition: MCSchedule.h:119
uint16_t NumWriteLatencyEntries
Definition: MCSchedule.h:132
uint16_t NumWriteProcResEntries
Definition: MCSchedule.h:130
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:253
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:360
unsigned getProcessorID() const
Definition: MCSchedule.h:331
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:349
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:334
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:42
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:353
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
Definition: MCSchedule.h:68
An instruction descriptor.
Definition: Instruction.h:447
A register read descriptor.
Definition: Instruction.h:163
bool isImplicitRead() const
Definition: Instruction.h:177
Helper used by class InstrDesc to describe how hardware resources are used.
Definition: Instruction.h:436
A register write descriptor.
Definition: Instruction.h:135
bool isImplicitWrite() const
Definition: Instruction.h:159