LLVM 23.0.0git
InstrBuilder.cpp
Go to the documentation of this file.
1//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the InstrBuilder interface.
11///
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Hashing.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/Support/Debug.h"
23
24#define DEBUG_TYPE "llvm-mca-instrbuilder"
25
26namespace llvm {
27namespace mca {
28
29char RecycledInstErr::ID = 0;
30
31InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
32 const llvm::MCInstrInfo &mcii,
33 const llvm::MCRegisterInfo &mri,
34 const llvm::MCInstrAnalysis *mcia,
35 const mca::InstrumentManager &im, unsigned cl)
36 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
37 FirstReturnInst(true), CallLatency(cl) {
38 const MCSchedModel &SM = STI.getSchedModel();
39 ProcResourceMasks.resize(SM.getNumProcResourceKinds());
40 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
41}
42
44 const MCSchedClassDesc &SCDesc,
45 const MCSubtargetInfo &STI,
46 ArrayRef<uint64_t> ProcResourceMasks) {
47 const MCSchedModel &SM = STI.getSchedModel();
48
49 // Populate resources consumed.
50 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
52
53 // Track cycles contributed by resources that are in a "Super" relationship.
54 // This is required if we want to correctly match the behavior of method
55 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
56 // of "consumed" processor resources and resource cycles, the logic in
57 // ExpandProcResource() doesn't update the number of resource cycles
58 // contributed by a "Super" resource to a group.
59 // We need to take this into account when we find that a processor resource is
60 // part of a group, and it is also used as the "Super" of other resources.
61 // This map stores the number of cycles contributed by sub-resources that are
62 // part of a "Super" resource. The key value is the "Super" resource mask ID.
63 DenseMap<uint64_t, unsigned> SuperResources;
64
65 unsigned NumProcResources = SM.getNumProcResourceKinds();
66 APInt Buffers(NumProcResources, 0);
67
68 bool AllInOrderResources = true;
69 bool AnyDispatchHazards = false;
70 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
71 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
72 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
73 if (!PRE->ReleaseAtCycle) {
74#ifndef NDEBUG
76 << "Ignoring invalid write of zero cycles on processor resource "
77 << PR.Name << "\n";
78 WithColor::note() << "found in scheduling class "
79 << SM.getSchedClassName(ID.SchedClassID)
80 << " (write index #" << I << ")\n";
81#endif
82 continue;
83 }
84
85 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
86 if (PR.BufferSize < 0) {
87 AllInOrderResources = false;
88 } else {
89 Buffers.setBit(getResourceStateIndex(Mask));
90 AnyDispatchHazards |= (PR.BufferSize == 0);
91 AllInOrderResources &= (PR.BufferSize <= 1);
92 }
93
94 CycleSegment RCy(0, PRE->ReleaseAtCycle, false);
95 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
96 if (PR.SuperIdx) {
97 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
98 SuperResources[Super] += PRE->ReleaseAtCycle;
99 }
100 }
101
102 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
103
104 // Sort elements by mask popcount, so that we prioritize resource units over
105 // resource groups, and smaller groups over larger groups.
106 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
107 unsigned popcntA = llvm::popcount(A.first);
108 unsigned popcntB = llvm::popcount(B.first);
109 if (popcntA < popcntB)
110 return true;
111 if (popcntA > popcntB)
112 return false;
113 return A.first < B.first;
114 });
115
116 uint64_t UsedResourceUnits = 0;
117 uint64_t UsedResourceGroups = 0;
118 uint64_t UnitsFromResourceGroups = 0;
119
120 // Remove cycles contributed by smaller resources, and check if there
121 // are partially overlapping resource groups.
122 ID.HasPartiallyOverlappingGroups = false;
123
124 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
125 ResourcePlusCycles &A = Worklist[I];
126 if (!A.second.size()) {
127 assert(llvm::popcount(A.first) > 1 && "Expected a group!");
128 UsedResourceGroups |= llvm::bit_floor(A.first);
129 continue;
130 }
131
132 ID.Resources.emplace_back(A);
133 uint64_t NormalizedMask = A.first;
134
135 if (llvm::popcount(A.first) == 1) {
136 UsedResourceUnits |= A.first;
137 } else {
138 // Remove the leading 1 from the resource group mask.
139 NormalizedMask ^= llvm::bit_floor(NormalizedMask);
140 if (UnitsFromResourceGroups & NormalizedMask)
141 ID.HasPartiallyOverlappingGroups = true;
142
143 UnitsFromResourceGroups |= NormalizedMask;
144 UsedResourceGroups |= (A.first ^ NormalizedMask);
145 }
146
147 for (unsigned J = I + 1; J < E; ++J) {
148 ResourcePlusCycles &B = Worklist[J];
149 if ((NormalizedMask & B.first) == NormalizedMask) {
150 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
151 if (llvm::popcount(B.first) > 1)
152 B.second.NumUnits++;
153 }
154 }
155 }
156
157 // A SchedWrite may specify a number of cycles in which a resource group
158 // is reserved. For example (on target x86; cpu Haswell):
159 //
160 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
161 // let ReleaseAtCycles = [2, 2, 3];
162 // }
163 //
164 // This means:
165 // Resource units HWPort0 and HWPort1 are both used for 2cy.
166 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
167 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
168 // will not be usable for 2 entire cycles from instruction issue.
169 //
170 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
171 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
172 // extra delay on top of the 2 cycles latency.
173 // During those extra cycles, HWPort01 is not usable by other instructions.
174 for (ResourcePlusCycles &RPC : ID.Resources) {
175 if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
176 // Remove the leading 1 from the resource group mask.
177 uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);
178 uint64_t MaxResourceUnits = llvm::popcount(Mask);
179 if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
180 RPC.second.setReserved();
181 RPC.second.NumUnits = MaxResourceUnits;
182 }
183 }
184 }
185
186 // Identify extra buffers that are consumed through super resources.
187 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
188 for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
189 const MCProcResourceDesc &PR = *SM.getProcResource(I);
190 if (PR.BufferSize == -1)
191 continue;
192
193 uint64_t Mask = ProcResourceMasks[I];
194 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
195 Buffers.setBit(getResourceStateIndex(Mask));
196 }
197 }
198
199 ID.UsedBuffers = Buffers.getZExtValue();
200 ID.UsedProcResUnits = UsedResourceUnits;
201 ID.UsedProcResGroups = UsedResourceGroups;
202
203 LLVM_DEBUG({
204 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
205 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
206 << "Reserved=" << R.second.isReserved() << ", "
207 << "#Units=" << R.second.NumUnits << ", "
208 << "cy=" << R.second.size() << '\n';
209 uint64_t BufferIDs = ID.UsedBuffers;
210 while (BufferIDs) {
211 uint64_t Current = BufferIDs & (-BufferIDs);
212 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
213 BufferIDs ^= Current;
214 }
215 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
216 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
217 << '\n';
218 dbgs() << "\t\tHasPartiallyOverlappingGroups="
219 << ID.HasPartiallyOverlappingGroups << '\n';
220 });
221}
222
223static void computeMaxLatency(InstrDesc &ID, const MCSchedClassDesc &SCDesc,
224 const MCSubtargetInfo &STI, unsigned CallLatency,
225 bool IsCall) {
226 if (IsCall) {
227 // We cannot estimate how long this call will take.
228 // Artificially set an arbitrarily high latency.
229 ID.MaxLatency = CallLatency;
230 return;
231 }
232
234 // If latency is unknown, then conservatively assume the MaxLatency set for
235 // calls.
237}
238
239static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
240 // Count register definitions, and skip non register operands in the process.
241 unsigned I, E;
242 unsigned NumExplicitDefs = MCDesc.getNumDefs();
243 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
244 const MCOperand &Op = MCI.getOperand(I);
245 if (Op.isReg())
246 --NumExplicitDefs;
247 }
248
249 if (NumExplicitDefs) {
251 "Expected more register operand definitions.", MCI);
252 }
253
254 if (MCDesc.hasOptionalDef()) {
255 // Always assume that the optional definition is the last operand.
256 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
257 if (I == MCI.getNumOperands() || !Op.isReg()) {
258 std::string Message =
259 "expected a register operand for an optional definition. Instruction "
260 "has not been correctly analyzed.";
261 return make_error<InstructionError<MCInst>>(Message, MCI);
262 }
263 }
264
265 return ErrorSuccess();
266}
267
268void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
269 unsigned SchedClassID) {
270 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
271 const MCSchedModel &SM = STI.getSchedModel();
272 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
273
274 // Assumptions made by this algorithm:
275 // 1. The number of explicit and implicit register definitions in a MCInst
276 // matches the number of explicit and implicit definitions according to
277 // the opcode descriptor (MCInstrDesc).
278 // 2. Uses start at index #(MCDesc.getNumDefs()).
279 // 3. There can only be a single optional register definition, an it is
280 // either the last operand of the sequence (excluding extra operands
281 // contributed by variadic opcodes) or one of the explicit register
282 // definitions. The latter occurs for some Thumb1 instructions.
283 //
284 // These assumptions work quite well for most out-of-order in-tree targets
285 // like x86. This is mainly because the vast majority of instructions is
286 // expanded to MCInst using a straightforward lowering logic that preserves
287 // the ordering of the operands.
288 //
289 // About assumption 1.
290 // The algorithm allows non-register operands between register operand
291 // definitions. This helps to handle some special ARM instructions with
292 // implicit operand increment (-mtriple=armv7):
293 //
294 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
295 // @ <MCOperand Reg:59>
296 // @ <MCOperand Imm:0> (!!)
297 // @ <MCOperand Reg:67>
298 // @ <MCOperand Imm:0>
299 // @ <MCOperand Imm:14>
300 // @ <MCOperand Reg:0>>
301 //
302 // MCDesc reports:
303 // 6 explicit operands.
304 // 1 optional definition
305 // 2 explicit definitions (!!)
306 //
307 // The presence of an 'Imm' operand between the two register definitions
308 // breaks the assumption that "register definitions are always at the
309 // beginning of the operand sequence".
310 //
311 // To workaround this issue, this algorithm ignores (i.e. skips) any
312 // non-register operands between register definitions. The optional
313 // definition is still at index #(NumOperands-1).
314 //
315 // According to assumption 2. register reads start at #(NumExplicitDefs-1).
316 // That means, register R1 from the example is both read and written.
317 unsigned NumExplicitDefs = MCDesc.getNumDefs();
318 unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
319 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
320 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
321 if (MCDesc.hasOptionalDef())
322 TotalDefs++;
323
324 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
325 ID.Writes.resize(TotalDefs + NumVariadicOps);
326 // Iterate over the operands list, and skip non-register or constant register
327 // operands. The first NumExplicitDefs register operands are expected to be
328 // register definitions.
329 unsigned CurrentDef = 0;
330 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
331 unsigned i = 0;
332 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
333 const MCOperand &Op = MCI.getOperand(i);
334 if (!Op.isReg())
335 continue;
336
337 if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
338 OptionalDefIdx = CurrentDef++;
339 continue;
340 }
341
342 WriteDescriptor &Write = ID.Writes[CurrentDef];
343 Write.OpIndex = i;
344 if (CurrentDef < NumWriteLatencyEntries) {
345 const MCWriteLatencyEntry &WLE =
346 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
347 // Conservatively default to MaxLatency.
348 Write.Latency =
349 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
350 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
351 } else {
352 // Assign a default latency for this write.
353 Write.Latency = ID.MaxLatency;
354 Write.SClassOrWriteResourceID = 0;
355 }
356 Write.IsOptionalDef = false;
357 LLVM_DEBUG({
358 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
359 << ", Latency=" << Write.Latency
360 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
361 });
362 CurrentDef++;
363 }
364
365 assert(CurrentDef == NumExplicitDefs &&
366 "Expected more register operand definitions.");
367 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
368 unsigned Index = NumExplicitDefs + CurrentDef;
369 WriteDescriptor &Write = ID.Writes[Index];
370 Write.OpIndex = ~CurrentDef;
371 Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
372 if (Index < NumWriteLatencyEntries) {
373 const MCWriteLatencyEntry &WLE =
374 *STI.getWriteLatencyEntry(&SCDesc, Index);
375 // Conservatively default to MaxLatency.
376 Write.Latency =
377 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
378 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
379 } else {
380 // Assign a default latency for this write.
381 Write.Latency = ID.MaxLatency;
382 Write.SClassOrWriteResourceID = 0;
383 }
384
385 Write.IsOptionalDef = false;
386 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
387 LLVM_DEBUG({
388 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
389 << ", PhysReg=" << MRI.getName(Write.RegisterID)
390 << ", Latency=" << Write.Latency
391 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
392 });
393 }
394
395 if (MCDesc.hasOptionalDef()) {
396 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
397 Write.OpIndex = OptionalDefIdx;
398 // Assign a default latency for this write.
399 Write.Latency = ID.MaxLatency;
400 Write.SClassOrWriteResourceID = 0;
401 Write.IsOptionalDef = true;
402 LLVM_DEBUG({
403 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
404 << ", Latency=" << Write.Latency
405 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
406 });
407 }
408
409 if (!NumVariadicOps)
410 return;
411
412 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
413 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
414 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
415 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
416 const MCOperand &Op = MCI.getOperand(OpIndex);
417 if (!Op.isReg())
418 continue;
419
420 WriteDescriptor &Write = ID.Writes[CurrentDef];
421 Write.OpIndex = OpIndex;
422 // Assign a default latency for this write.
423 Write.Latency = ID.MaxLatency;
424 Write.SClassOrWriteResourceID = 0;
425 Write.IsOptionalDef = false;
426 ++CurrentDef;
427 LLVM_DEBUG({
428 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
429 << ", Latency=" << Write.Latency
430 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
431 });
432 }
433
434 ID.Writes.resize(CurrentDef);
435}
436
437void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
438 unsigned SchedClassID) {
439 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
440 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
441 unsigned NumImplicitUses = MCDesc.implicit_uses().size();
442 // Remove the optional definition.
443 if (MCDesc.hasOptionalDef())
444 --NumExplicitUses;
445 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
446 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
447 ID.Reads.resize(TotalUses);
448 unsigned CurrentUse = 0;
449 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
450 ++I, ++OpIndex) {
451 const MCOperand &Op = MCI.getOperand(OpIndex);
452 if (!Op.isReg())
453 continue;
454
455 ReadDescriptor &Read = ID.Reads[CurrentUse];
456 Read.OpIndex = OpIndex;
457 Read.UseIndex = I;
458 Read.SchedClassID = SchedClassID;
459 ++CurrentUse;
460 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
461 << ", UseIndex=" << Read.UseIndex << '\n');
462 }
463
464 // For the purpose of ReadAdvance, implicit uses come directly after explicit
465 // uses. The "UseIndex" must be updated according to that implicit layout.
466 for (unsigned I = 0; I < NumImplicitUses; ++I) {
467 ReadDescriptor &Read = ID.Reads[CurrentUse + I];
468 Read.OpIndex = ~I;
469 Read.UseIndex = NumExplicitUses + I;
470 Read.RegisterID = MCDesc.implicit_uses()[I];
471 Read.SchedClassID = SchedClassID;
472 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
473 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
474 << MRI.getName(Read.RegisterID) << '\n');
475 }
476
477 CurrentUse += NumImplicitUses;
478
479 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
480 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
481 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
482 const MCOperand &Op = MCI.getOperand(OpIndex);
483 if (!Op.isReg())
484 continue;
485
486 ReadDescriptor &Read = ID.Reads[CurrentUse];
487 Read.OpIndex = OpIndex;
488 Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
489 Read.SchedClassID = SchedClassID;
490 ++CurrentUse;
491 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
492 << ", UseIndex=" << Read.UseIndex << '\n');
493 }
494
495 ID.Reads.resize(CurrentUse);
496}
497
499 hash_code TypeHash = hash_combine(MCO.isReg(), MCO.isImm(), MCO.isSFPImm(),
500 MCO.isDFPImm(), MCO.isExpr(), MCO.isInst());
501 if (MCO.isReg())
502 return hash_combine(TypeHash, MCO.getReg());
503
504 return TypeHash;
505}
506
508 hash_code InstructionHash = hash_combine(MCI.getOpcode(), MCI.getFlags());
509 for (unsigned I = 0; I < MCI.getNumOperands(); ++I) {
510 InstructionHash =
511 hash_combine(InstructionHash, hashMCOperand(MCI.getOperand(I)));
512 }
513 return InstructionHash;
514}
515
516Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
517 const MCInst &MCI) const {
518 if (ID.NumMicroOps != 0)
519 return ErrorSuccess();
520
521 bool UsesBuffers = ID.UsedBuffers;
522 bool UsesResources = !ID.Resources.empty();
523 if (!UsesBuffers && !UsesResources)
524 return ErrorSuccess();
525
526 // FIXME: see PR44797. We should revisit these checks and possibly move them
527 // in CodeGenSchedule.cpp.
528 StringRef Message = "found an inconsistent instruction that decodes to zero "
529 "opcodes and that consumes scheduler resources.";
530 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
531}
532
533Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI,
534 unsigned SchedClassID) {
535 const MCSchedModel &SM = STI.getSchedModel();
536 unsigned CPUID = SM.getProcessorID();
537 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
538 SchedClassID =
539 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
540
541 if (!SchedClassID) {
543 "unable to resolve scheduling class for write variant.", MCI);
544 }
545
546 return SchedClassID;
547}
548
549Expected<const InstrDesc &>
550InstrBuilder::createInstrDescImpl(const MCInst &MCI,
551 const SmallVector<Instrument *> &IVec) {
552 assert(STI.getSchedModel().hasInstrSchedModel() &&
553 "Itineraries are not yet supported!");
554
555 // Obtain the instruction descriptor from the opcode.
556 unsigned Opcode = MCI.getOpcode();
557 const MCInstrDesc &MCDesc = MCII.get(Opcode);
558 const MCSchedModel &SM = STI.getSchedModel();
559
560 // Then obtain the scheduling class information from the instruction.
561 // Allow InstrumentManager to override and use a different SchedClassID
562 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
563 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
564
565 // Try to solve variant scheduling classes.
566 if (IsVariant) {
567 Expected<unsigned> VariantSchedClassIDOrErr =
568 getVariantSchedClassID(MCI, SchedClassID);
569 if (!VariantSchedClassIDOrErr) {
570 return VariantSchedClassIDOrErr.takeError();
571 }
572
573 SchedClassID = *VariantSchedClassIDOrErr;
574 }
575
576 // Check if this instruction is supported. Otherwise, report an error.
577 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
578 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
580 "found an unsupported instruction in the input assembly sequence", MCI);
581 }
582
583 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
584 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
585 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
586
587 // Create a new empty descriptor.
588 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
589 ID->NumMicroOps = SCDesc.NumMicroOps;
590 ID->SchedClassID = SchedClassID;
591
592 bool IsCall = MCIA->isCall(MCI);
593 if (IsCall && FirstCallInst) {
594 // We don't correctly model calls.
595 WithColor::warning() << "found a call in the input assembly sequence.\n";
596 WithColor::note() << "call instructions are not correctly modeled. "
597 << "Assume a latency of " << CallLatency << "cy.\n";
598 FirstCallInst = false;
599 }
600
601 if (MCIA->isReturn(MCI) && FirstReturnInst) {
602 WithColor::warning() << "found a return instruction in the input"
603 << " assembly sequence.\n";
604 WithColor::note() << "program counter updates are ignored.\n";
605 FirstReturnInst = false;
606 }
607
608 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
609 computeMaxLatency(*ID, SCDesc, STI, CallLatency, IsCall);
610
611 if (Error Err = verifyOperands(MCDesc, MCI))
612 return std::move(Err);
613
614 populateWrites(*ID, MCI, SchedClassID);
615 populateReads(*ID, MCI, SchedClassID);
616
617 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
618 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
619
620 // Validation check on the instruction descriptor.
621 if (Error Err = verifyInstrDesc(*ID, MCI))
622 return std::move(Err);
623
624 // Now add the new descriptor.
625
626 if (IM.canCustomize(IVec)) {
627 IM.customize(IVec, *ID);
628 return *CustomDescriptors.emplace_back(std::move(ID));
629 }
630
631 bool IsVariadic = MCDesc.isVariadic();
632 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
633 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
634 return *(Descriptors[DKey] = std::move(ID));
635 }
636
637 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID);
638 assert(
639 !VariantDescriptors.contains(VDKey) &&
640 "Expected VariantDescriptors to not already have a value for this key.");
641 return *(VariantDescriptors[VDKey] = std::move(ID));
642}
643
644Expected<const InstrDesc &>
645InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
646 const SmallVector<Instrument *> &IVec) {
647 // Cache lookup using SchedClassID from Instrumentation
648 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
649
650 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
651 if (Descriptors.find_as(DKey) != Descriptors.end())
652 return *Descriptors[DKey];
653
654 Expected<unsigned> VariantSchedClassIDOrErr =
655 getVariantSchedClassID(MCI, SchedClassID);
656 if (!VariantSchedClassIDOrErr) {
657 return VariantSchedClassIDOrErr.takeError();
658 }
659
660 SchedClassID = *VariantSchedClassIDOrErr;
661
662 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID);
663 auto It = VariantDescriptors.find(VDKey);
664 if (It != VariantDescriptors.end())
665 return *It->second;
666
667 return createInstrDescImpl(MCI, IVec);
668}
669
670STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
671
674 const SmallVector<Instrument *> &IVec) {
675 Expected<const InstrDesc &> DescOrErr = IM.canCustomize(IVec)
676 ? createInstrDescImpl(MCI, IVec)
677 : getOrCreateInstrDesc(MCI, IVec);
678 if (!DescOrErr)
679 return DescOrErr.takeError();
680 const InstrDesc &D = *DescOrErr;
681 Instruction *NewIS = nullptr;
682 std::unique_ptr<Instruction> CreatedIS;
683 bool IsInstRecycled = false;
684
685 if (!D.IsRecyclable)
686 ++NumVariantInst;
687
688 if (D.IsRecyclable && InstRecycleCB) {
689 if (auto *I = InstRecycleCB(D)) {
690 NewIS = I;
691 NewIS->reset();
692 IsInstRecycled = true;
693 }
694 }
695 if (!IsInstRecycled) {
696 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
697 NewIS = CreatedIS.get();
698 }
699
700 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
701 const MCSchedClassDesc &SCDesc =
702 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
703
704 NewIS->setMayLoad(MCDesc.mayLoad());
705 NewIS->setMayStore(MCDesc.mayStore());
707 NewIS->setBeginGroup(SCDesc.BeginGroup);
708 NewIS->setEndGroup(SCDesc.EndGroup);
709 NewIS->setRetireOOO(SCDesc.RetireOOO);
710
711 // Check if this is a dependency breaking instruction.
712 APInt Mask;
713
714 bool IsZeroIdiom = false;
715 bool IsDepBreaking = false;
716 if (MCIA) {
717 unsigned ProcID = STI.getSchedModel().getProcessorID();
718 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
719 IsDepBreaking =
720 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
721 if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
722 NewIS->setOptimizableMove();
723 }
724
725 // Initialize Reads first.
726 MCPhysReg RegID = 0;
727 size_t Idx = 0U;
728 for (const ReadDescriptor &RD : D.Reads) {
729 if (!RD.isImplicitRead()) {
730 // explicit read.
731 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
732 // Skip non-register operands.
733 if (!Op.isReg())
734 continue;
735 // Skip constant register operands.
736 if (MRI.isConstant(Op.getReg()))
737 continue;
738 RegID = Op.getReg().id();
739 } else {
740 // Implicit read.
741 RegID = RD.RegisterID;
742 }
743
744 // Skip invalid register operands.
745 if (!RegID)
746 continue;
747
748 // Okay, this is a register operand. Create a ReadState for it.
749 ReadState *RS = nullptr;
750 if (IsInstRecycled && Idx < NewIS->getUses().size()) {
751 NewIS->getUses()[Idx] = ReadState(RD, RegID);
752 RS = &NewIS->getUses()[Idx++];
753 } else {
754 NewIS->getUses().emplace_back(RD, RegID);
755 RS = &NewIS->getUses().back();
756 ++Idx;
757 }
758
759 if (IsDepBreaking) {
760 // A mask of all zeroes means: explicit input operands are not
761 // independent.
762 if (Mask.isZero()) {
763 if (!RD.isImplicitRead())
764 RS->setIndependentFromDef();
765 } else {
766 // Check if this register operand is independent according to `Mask`.
767 // Note that Mask may not have enough bits to describe all explicit and
768 // implicit input operands. If this register operand doesn't have a
769 // corresponding bit in Mask, then conservatively assume that it is
770 // dependent.
771 if (Mask.getBitWidth() > RD.UseIndex) {
772 // Okay. This map describe register use `RD.UseIndex`.
773 if (Mask[RD.UseIndex])
774 RS->setIndependentFromDef();
775 }
776 }
777 }
778 }
779 if (IsInstRecycled && Idx < NewIS->getUses().size())
780 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
781
782 // Early exit if there are no writes.
783 if (D.Writes.empty()) {
784 if (IsInstRecycled)
786 else
787 return std::move(CreatedIS);
788 }
789
790 // Track register writes that implicitly clear the upper portion of the
791 // underlying super-registers using an APInt.
792 APInt WriteMask(D.Writes.size(), 0);
793
794 // Now query the MCInstrAnalysis object to obtain information about which
795 // register writes implicitly clear the upper portion of a super-register.
796 if (MCIA)
797 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
798
799 // Initialize writes.
800 unsigned WriteIndex = 0;
801 Idx = 0U;
802 for (const WriteDescriptor &WD : D.Writes) {
803 RegID = WD.isImplicitWrite() ? WD.RegisterID
804 : MCI.getOperand(WD.OpIndex).getReg().id();
805 // Check if this is a optional definition that references NoReg or a write
806 // to a constant register.
807 if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
808 ++WriteIndex;
809 continue;
810 }
811
812 assert(RegID && "Expected a valid register ID!");
813 if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
814 NewIS->getDefs()[Idx++] =
815 WriteState(WD, RegID,
816 /* ClearsSuperRegs */ WriteMask[WriteIndex],
817 /* WritesZero */ IsZeroIdiom);
818 } else {
819 NewIS->getDefs().emplace_back(WD, RegID,
820 /* ClearsSuperRegs */ WriteMask[WriteIndex],
821 /* WritesZero */ IsZeroIdiom);
822 ++Idx;
823 }
824 ++WriteIndex;
825 }
826 if (IsInstRecycled && Idx < NewIS->getDefs().size())
827 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
828
829 if (IsInstRecycled)
831 else
832 return std::move(CreatedIS);
833}
834} // namespace mca
835} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
#define im(i)
A builder class for instructions that are statically analyzed by llvm-mca.
#define I(x, y, z)
Definition MD5.cpp:57
static constexpr unsigned SM(unsigned Version)
unsigned OpIndex
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition Error.h:334
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
unsigned getFlags() const
Definition MCInst.h:205
unsigned getOpcode() const
Definition MCInst.h:202
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool variadicOpsAreDefs() const
Return true if variadic operands of this instruction are definitions.
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
bool isSFPImm() const
Definition MCInst.h:67
bool isImm() const
Definition MCInst.h:66
bool isInst() const
Definition MCInst.h:70
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isDFPImm() const
Definition MCInst.h:68
bool isExpr() const
Definition MCInst.h:69
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
constexpr unsigned id() const
Definition MCRegister.h:82
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static LLVM_ABI raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition WithColor.cpp:85
static LLVM_ABI raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition WithColor.cpp:87
An opaque object representing a hash code.
Definition Hashing.h:76
A sequence of cycles.
LLVM_ABI Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI, const SmallVector< Instrument * > &IVec)
void setEndGroup(bool newVal)
void setRetireOOO(bool newVal)
SmallVectorImpl< WriteState > & getDefs()
void setBeginGroup(bool newVal)
SmallVectorImpl< ReadState > & getUses()
void setHasSideEffects(bool newVal)
void setMayStore(bool newVal)
void setMayLoad(bool newVal)
An instruction propagated through the simulated instruction pipeline.
LLVM_ABI void reset()
This class allows targets to optionally customize the logic that resolves scheduling class IDs.
Tracks register operand latency in cycles.
static LLVM_ABI char ID
Tracks uses of a register definition (e.g.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This namespace contains all of the command line option processing machinery.
Definition CommandLine.h:52
static void computeMaxLatency(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, unsigned CallLatency, bool IsCall)
char InstructionError< T >::ID
Definition Support.h:44
hash_code hashMCInst(const MCInst &MCI)
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
hash_code hashMCOperand(const MCOperand &MCO)
LLVM_ABI void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Definition Support.cpp:40
unsigned getResourceStateIndex(uint64_t Mask)
Definition Support.h:101
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition Format.h:191
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
@ TypeHash
Token ID based on allocated type hash.
Definition AllocToken.h:32
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
Define a kind of processor resource that will be modeled by the scheduler.
Definition MCSchedule.h:36
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
static const unsigned short InvalidNumMicroOps
Definition MCSchedule.h:124
uint16_t NumWriteLatencyEntries
Definition MCSchedule.h:137
uint16_t NumWriteProcResEntries
Definition MCSchedule.h:135
Machine model for scheduling, bundling, and heuristics.
Definition MCSchedule.h:258
static LLVM_ABI int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
Definition MCSchedule.h:73
An instruction descriptor.
A register read descriptor.
Helper used by class InstrDesc to describe how hardware resources are used.
A register write descriptor.