LLVM 22.0.0git
IR2Vec.cpp
Go to the documentation of this file.
1//===- IR2Vec.cpp - Implementation of IR2Vec -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM
4// Exceptions. See the LICENSE file for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the IR2Vec algorithm.
11///
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/Sequence.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/IR/CFG.h"
20#include "llvm/IR/Module.h"
21#include "llvm/IR/PassManager.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/Errc.h"
24#include "llvm/Support/Error.h"
26#include "llvm/Support/Format.h"
28
29using namespace llvm;
30using namespace ir2vec;
31
32#define DEBUG_TYPE "ir2vec"
33
34STATISTIC(VocabMissCounter,
35 "Number of lookups to entities not present in the vocabulary");
36
37namespace llvm {
38namespace ir2vec {
40
41// FIXME: Use a default vocab when not specified
43 VocabFile("ir2vec-vocab-path", cl::Optional,
44 cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""),
46cl::opt<float> OpcWeight("ir2vec-opc-weight", cl::Optional, cl::init(1.0),
47 cl::desc("Weight for opcode embeddings"),
49cl::opt<float> TypeWeight("ir2vec-type-weight", cl::Optional, cl::init(0.5),
50 cl::desc("Weight for type embeddings"),
52cl::opt<float> ArgWeight("ir2vec-arg-weight", cl::Optional, cl::init(0.2),
53 cl::desc("Weight for argument embeddings"),
56 "ir2vec-kind", cl::Optional,
58 "Generate symbolic embeddings"),
60 "Generate flow-aware embeddings")),
61 cl::init(IR2VecKind::Symbolic), cl::desc("IR2Vec embedding kind"),
63
64} // namespace ir2vec
65} // namespace llvm
66
68
69// ==----------------------------------------------------------------------===//
70// Local helper functions
71//===----------------------------------------------------------------------===//
72namespace llvm::json {
73inline bool fromJSON(const llvm::json::Value &E, Embedding &Out,
75 std::vector<double> TempOut;
76 if (!llvm::json::fromJSON(E, TempOut, P))
77 return false;
78 Out = Embedding(std::move(TempOut));
79 return true;
80}
81} // namespace llvm::json
82
83// ==----------------------------------------------------------------------===//
84// Embedding
85//===----------------------------------------------------------------------===//
87 assert(this->size() == RHS.size() && "Vectors must have the same dimension");
88 std::transform(this->begin(), this->end(), RHS.begin(), this->begin(),
89 std::plus<double>());
90 return *this;
91}
92
94 Embedding Result(*this);
95 Result += RHS;
96 return Result;
97}
98
100 assert(this->size() == RHS.size() && "Vectors must have the same dimension");
101 std::transform(this->begin(), this->end(), RHS.begin(), this->begin(),
102 std::minus<double>());
103 return *this;
104}
105
107 Embedding Result(*this);
108 Result -= RHS;
109 return Result;
110}
111
113 std::transform(this->begin(), this->end(), this->begin(),
114 [Factor](double Elem) { return Elem * Factor; });
115 return *this;
116}
117
118Embedding Embedding::operator*(double Factor) const {
119 Embedding Result(*this);
120 Result *= Factor;
121 return Result;
122}
123
124Embedding &Embedding::scaleAndAdd(const Embedding &Src, float Factor) {
125 assert(this->size() == Src.size() && "Vectors must have the same dimension");
126 for (size_t Itr = 0; Itr < this->size(); ++Itr)
127 (*this)[Itr] += Src[Itr] * Factor;
128 return *this;
129}
130
132 double Tolerance) const {
133 assert(this->size() == RHS.size() && "Vectors must have the same dimension");
134 for (size_t Itr = 0; Itr < this->size(); ++Itr)
135 if (std::abs((*this)[Itr] - RHS[Itr]) > Tolerance) {
136 LLVM_DEBUG(errs() << "Embedding mismatch at index " << Itr << ": "
137 << (*this)[Itr] << " vs " << RHS[Itr]
138 << "; Tolerance: " << Tolerance << "\n");
139 return false;
140 }
141 return true;
142}
143
145 OS << " [";
146 for (const auto &Elem : Data)
147 OS << " " << format("%.2f", Elem) << " ";
148 OS << "]\n";
149}
150
151// ==----------------------------------------------------------------------===//
152// Embedder and its subclasses
153//===----------------------------------------------------------------------===//
154
156 : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()),
158 FuncVector(Embedding(Dimension, 0)) {}
159
160std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F,
161 const Vocabulary &Vocab) {
162 switch (Mode) {
164 return std::make_unique<SymbolicEmbedder>(F, Vocab);
166 return std::make_unique<FlowAwareEmbedder>(F, Vocab);
167 }
168 return nullptr;
169}
170
172 if (InstVecMap.empty())
174 return InstVecMap;
175}
176
178 if (BBVecMap.empty())
180 return BBVecMap;
181}
182
184 auto It = BBVecMap.find(&BB);
185 if (It != BBVecMap.end())
186 return It->second;
188 return BBVecMap[&BB];
189}
190
192 // Currently, we always (re)compute the embeddings for the function.
193 // This is cheaper than caching the vector.
195 return FuncVector;
196}
197
199 if (F.isDeclaration())
200 return;
201
202 // Consider only the basic blocks that are reachable from entry
203 for (const BasicBlock *BB : depth_first(&F)) {
205 FuncVector += BBVecMap[BB];
206 }
207}
208
210 Embedding BBVector(Dimension, 0);
211
212 // We consider only the non-debug and non-pseudo instructions
213 for (const auto &I : BB.instructionsWithoutDebug()) {
214 Embedding ArgEmb(Dimension, 0);
215 for (const auto &Op : I.operands())
216 ArgEmb += Vocab[*Op];
217 auto InstVector =
218 Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
219 InstVecMap[&I] = InstVector;
220 BBVector += InstVector;
221 }
222 BBVecMap[&BB] = BBVector;
223}
224
226 Embedding BBVector(Dimension, 0);
227
228 // We consider only the non-debug and non-pseudo instructions
229 for (const auto &I : BB.instructionsWithoutDebug()) {
230 // TODO: Handle call instructions differently.
231 // For now, we treat them like other instructions
232 Embedding ArgEmb(Dimension, 0);
233 for (const auto &Op : I.operands()) {
234 // If the operand is defined elsewhere, we use its embedding
235 if (const auto *DefInst = dyn_cast<Instruction>(Op)) {
236 auto DefIt = InstVecMap.find(DefInst);
237 assert(DefIt != InstVecMap.end() &&
238 "Instruction should have been processed before its operands");
239 ArgEmb += DefIt->second;
240 continue;
241 }
242 // If the operand is not defined by an instruction, we use the vocabulary
243 else {
244 LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: "
245 << *Op << "=" << Vocab[*Op][0] << "\n");
246 ArgEmb += Vocab[*Op];
247 }
248 }
249 // Create the instruction vector by combining opcode, type, and arguments
250 // embeddings
251 auto InstVector =
252 Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
253 InstVecMap[&I] = InstVector;
254 BBVector += InstVector;
255 }
256 BBVecMap[&BB] = BBVector;
257}
258
259// ==----------------------------------------------------------------------===//
260// Vocabulary
261//===----------------------------------------------------------------------===//
262
263Vocabulary::Vocabulary(VocabVector &&Vocab)
264 : Vocab(std::move(Vocab)), Valid(true) {}
265
267 return Vocab.size() == NumCanonicalEntries && Valid;
268}
269
270unsigned Vocabulary::getDimension() const {
271 assert(Valid && "IR2Vec Vocabulary is invalid");
272 return Vocab[0].size();
273}
274
275unsigned Vocabulary::getSlotIndex(unsigned Opcode) {
276 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
277 return Opcode - 1; // Convert to zero-based index
278}
279
281 assert(static_cast<unsigned>(TypeID) < MaxTypeIDs && "Invalid type ID");
282 return MaxOpcodes + static_cast<unsigned>(getCanonicalTypeID(TypeID));
283}
284
286 unsigned Index = static_cast<unsigned>(getOperandKind(&Op));
287 assert(Index < MaxOperandKinds && "Invalid OperandKind");
288 return MaxOpcodes + MaxCanonicalTypeIDs + Index;
289}
290
291const Embedding &Vocabulary::operator[](unsigned Opcode) const {
292 return Vocab[getSlotIndex(Opcode)];
293}
294
296 return Vocab[getSlotIndex(TypeID)];
297}
298
300 return Vocab[getSlotIndex(Arg)];
301}
302
304 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
305#define HANDLE_INST(NUM, OPCODE, CLASS) \
306 if (Opcode == NUM) { \
307 return #OPCODE; \
308 }
309#include "llvm/IR/Instruction.def"
310#undef HANDLE_INST
311 return "UnknownOpcode";
312}
313
314StringRef Vocabulary::getVocabKeyForCanonicalTypeID(CanonicalTypeID CType) {
315 unsigned Index = static_cast<unsigned>(CType);
316 assert(Index < MaxCanonicalTypeIDs && "Invalid CanonicalTypeID");
317 return CanonicalTypeNames[Index];
318}
319
321Vocabulary::getCanonicalTypeID(Type::TypeID TypeID) {
322 unsigned Index = static_cast<unsigned>(TypeID);
323 assert(Index < MaxTypeIDs && "Invalid TypeID");
324 return TypeIDMapping[Index];
325}
326
328 return getVocabKeyForCanonicalTypeID(getCanonicalTypeID(TypeID));
329}
330
332 unsigned Index = static_cast<unsigned>(Kind);
333 assert(Index < MaxOperandKinds && "Invalid OperandKind");
334 return OperandKindNames[Index];
335}
336
337// Helper function to classify an operand into OperandKind
339 if (isa<Function>(Op))
341 if (isa<PointerType>(Op->getType()))
343 if (isa<Constant>(Op))
346}
347
349 assert(Pos < NumCanonicalEntries && "Position out of bounds in vocabulary");
350 // Opcode
351 if (Pos < MaxOpcodes)
352 return getVocabKeyForOpcode(Pos + 1);
353 // Type
354 if (Pos < MaxOpcodes + MaxCanonicalTypeIDs)
355 return getVocabKeyForCanonicalTypeID(
356 static_cast<CanonicalTypeID>(Pos - MaxOpcodes));
357 // Operand
359 static_cast<OperandKind>(Pos - MaxOpcodes - MaxCanonicalTypeIDs));
360}
361
362// For now, assume vocabulary is stable unless explicitly invalidated.
365 auto PAC = PA.getChecker<IR2VecVocabAnalysis>();
366 return !(PAC.preservedWhenStateless());
367}
368
369Vocabulary::VocabVector Vocabulary::createDummyVocabForTest(unsigned Dim) {
370 VocabVector DummyVocab;
371 DummyVocab.reserve(NumCanonicalEntries);
372 float DummyVal = 0.1f;
373 // Create a dummy vocabulary with entries for all opcodes, types, and
374 // operands
375 for ([[maybe_unused]] unsigned _ :
376 seq(0u, Vocabulary::MaxOpcodes + Vocabulary::MaxCanonicalTypeIDs +
378 DummyVocab.push_back(Embedding(Dim, DummyVal));
379 DummyVal += 0.1f;
380 }
381 return DummyVocab;
382}
383
384// ==----------------------------------------------------------------------===//
385// IR2VecVocabAnalysis
386//===----------------------------------------------------------------------===//
387
388Error IR2VecVocabAnalysis::parseVocabSection(
389 StringRef Key, const json::Value &ParsedVocabValue, VocabMap &TargetVocab,
390 unsigned &Dim) {
391 json::Path::Root Path("");
392 const json::Object *RootObj = ParsedVocabValue.getAsObject();
393 if (!RootObj)
395 "JSON root is not an object");
396
397 const json::Value *SectionValue = RootObj->get(Key);
398 if (!SectionValue)
400 "Missing '" + std::string(Key) +
401 "' section in vocabulary file");
402 if (!json::fromJSON(*SectionValue, TargetVocab, Path))
404 "Unable to parse '" + std::string(Key) +
405 "' section from vocabulary");
406
407 Dim = TargetVocab.begin()->second.size();
408 if (Dim == 0)
410 "Dimension of '" + std::string(Key) +
411 "' section of the vocabulary is zero");
412
413 if (!std::all_of(TargetVocab.begin(), TargetVocab.end(),
414 [Dim](const std::pair<StringRef, Embedding> &Entry) {
415 return Entry.second.size() == Dim;
416 }))
417 return createStringError(
419 "All vectors in the '" + std::string(Key) +
420 "' section of the vocabulary are not of the same dimension");
421
422 return Error::success();
423}
424
425// FIXME: Make this optional. We can avoid file reads
426// by auto-generating a default vocabulary during the build time.
427Error IR2VecVocabAnalysis::readVocabulary() {
428 auto BufOrError = MemoryBuffer::getFileOrSTDIN(VocabFile, /*IsText=*/true);
429 if (!BufOrError)
430 return createFileError(VocabFile, BufOrError.getError());
431
432 auto Content = BufOrError.get()->getBuffer();
433
434 Expected<json::Value> ParsedVocabValue = json::parse(Content);
435 if (!ParsedVocabValue)
436 return ParsedVocabValue.takeError();
437
438 unsigned OpcodeDim = 0, TypeDim = 0, ArgDim = 0;
439 if (auto Err =
440 parseVocabSection("Opcodes", *ParsedVocabValue, OpcVocab, OpcodeDim))
441 return Err;
442
443 if (auto Err =
444 parseVocabSection("Types", *ParsedVocabValue, TypeVocab, TypeDim))
445 return Err;
446
447 if (auto Err =
448 parseVocabSection("Arguments", *ParsedVocabValue, ArgVocab, ArgDim))
449 return Err;
450
451 if (!(OpcodeDim == TypeDim && TypeDim == ArgDim))
453 "Vocabulary sections have different dimensions");
454
455 return Error::success();
456}
457
458void IR2VecVocabAnalysis::generateNumMappedVocab() {
459
460 // Helper for handling missing entities in the vocabulary.
461 // Currently, we use a zero vector. In the future, we will throw an error to
462 // ensure that *all* known entities are present in the vocabulary.
463 auto handleMissingEntity = [](const std::string &Val) {
464 LLVM_DEBUG(errs() << Val
465 << " is not in vocabulary, using zero vector; This "
466 "would result in an error in future.\n");
467 ++VocabMissCounter;
468 };
469
470 unsigned Dim = OpcVocab.begin()->second.size();
471 assert(Dim > 0 && "Vocabulary dimension must be greater than zero");
472
473 // Handle Opcodes
474 std::vector<Embedding> NumericOpcodeEmbeddings(Vocabulary::MaxOpcodes,
475 Embedding(Dim, 0));
476 NumericOpcodeEmbeddings.reserve(Vocabulary::MaxOpcodes);
477 for (unsigned Opcode : seq(0u, Vocabulary::MaxOpcodes)) {
478 StringRef VocabKey = Vocabulary::getVocabKeyForOpcode(Opcode + 1);
479 auto It = OpcVocab.find(VocabKey.str());
480 if (It != OpcVocab.end())
481 NumericOpcodeEmbeddings[Opcode] = It->second;
482 else
483 handleMissingEntity(VocabKey.str());
484 }
485 Vocab.insert(Vocab.end(), NumericOpcodeEmbeddings.begin(),
486 NumericOpcodeEmbeddings.end());
487
488 // Handle Types - only canonical types are present in vocabulary
489 std::vector<Embedding> NumericTypeEmbeddings(Vocabulary::MaxCanonicalTypeIDs,
490 Embedding(Dim, 0));
491 NumericTypeEmbeddings.reserve(Vocabulary::MaxCanonicalTypeIDs);
492 for (unsigned CTypeID : seq(0u, Vocabulary::MaxCanonicalTypeIDs)) {
493 StringRef VocabKey = Vocabulary::getVocabKeyForCanonicalTypeID(
494 static_cast<Vocabulary::CanonicalTypeID>(CTypeID));
495 if (auto It = TypeVocab.find(VocabKey.str()); It != TypeVocab.end()) {
496 NumericTypeEmbeddings[CTypeID] = It->second;
497 continue;
498 }
499 handleMissingEntity(VocabKey.str());
500 }
501 Vocab.insert(Vocab.end(), NumericTypeEmbeddings.begin(),
502 NumericTypeEmbeddings.end());
503
504 // Handle Arguments/Operands
505 std::vector<Embedding> NumericArgEmbeddings(Vocabulary::MaxOperandKinds,
506 Embedding(Dim, 0));
507 NumericArgEmbeddings.reserve(Vocabulary::MaxOperandKinds);
508 for (unsigned OpKind : seq(0u, Vocabulary::MaxOperandKinds)) {
511 auto It = ArgVocab.find(VocabKey.str());
512 if (It != ArgVocab.end()) {
513 NumericArgEmbeddings[OpKind] = It->second;
514 continue;
515 }
516 handleMissingEntity(VocabKey.str());
517 }
518 Vocab.insert(Vocab.end(), NumericArgEmbeddings.begin(),
519 NumericArgEmbeddings.end());
520}
521
523 : Vocab(Vocab) {}
524
526 : Vocab(std::move(Vocab)) {}
527
528void IR2VecVocabAnalysis::emitError(Error Err, LLVMContext &Ctx) {
529 handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
530 Ctx.emitError("Error reading vocabulary: " + EI.message());
531 });
532}
533
536 auto Ctx = &M.getContext();
537 // If vocabulary is already populated by the constructor, use it.
538 if (!Vocab.empty())
539 return Vocabulary(std::move(Vocab));
540
541 // Otherwise, try to read from the vocabulary file.
542 if (VocabFile.empty()) {
543 // FIXME: Use default vocabulary
544 Ctx->emitError("IR2Vec vocabulary file path not specified; You may need to "
545 "set it using --ir2vec-vocab-path");
546 return Vocabulary(); // Return invalid result
547 }
548 if (auto Err = readVocabulary()) {
549 emitError(std::move(Err), *Ctx);
550 return Vocabulary();
551 }
552
553 // Scale the vocabulary sections based on the provided weights
554 auto scaleVocabSection = [](VocabMap &Vocab, double Weight) {
555 for (auto &Entry : Vocab)
556 Entry.second *= Weight;
557 };
558 scaleVocabSection(OpcVocab, OpcWeight);
559 scaleVocabSection(TypeVocab, TypeWeight);
560 scaleVocabSection(ArgVocab, ArgWeight);
561
562 // Generate the numeric lookup vocabulary
563 generateNumMappedVocab();
564
565 return Vocabulary(std::move(Vocab));
566}
567
568// ==----------------------------------------------------------------------===//
569// Printer Passes
570//===----------------------------------------------------------------------===//
571
575 assert(Vocabulary.isValid() && "IR2Vec Vocabulary is invalid");
576
577 for (Function &F : M) {
579 if (!Emb) {
580 OS << "Error creating IR2Vec embeddings \n";
581 continue;
582 }
583
584 OS << "IR2Vec embeddings for function " << F.getName() << ":\n";
585 OS << "Function vector: ";
586 Emb->getFunctionVector().print(OS);
587
588 OS << "Basic block vectors:\n";
589 const auto &BBMap = Emb->getBBVecMap();
590 for (const BasicBlock &BB : F) {
591 auto It = BBMap.find(&BB);
592 if (It != BBMap.end()) {
593 OS << "Basic block: " << BB.getName() << ":\n";
594 It->second.print(OS);
595 }
596 }
597
598 OS << "Instruction vectors:\n";
599 const auto &InstMap = Emb->getInstVecMap();
600 for (const BasicBlock &BB : F) {
601 for (const Instruction &I : BB) {
602 auto It = InstMap.find(&I);
603 if (It != InstMap.end()) {
604 OS << "Instruction: ";
605 I.print(OS);
606 It->second.print(OS);
607 }
608 }
609 }
610 }
611 return PreservedAnalyses::all();
612}
613
616 auto IR2VecVocabulary = MAM.getResult<IR2VecVocabAnalysis>(M);
617 assert(IR2VecVocabulary.isValid() && "IR2Vec Vocabulary is invalid");
618
619 // Print each entry
620 unsigned Pos = 0;
621 for (const auto &Entry : IR2VecVocabulary) {
622 OS << "Key: " << IR2VecVocabulary.getStringKey(Pos++) << ": ";
623 Entry.print(OS);
624 }
625 return PreservedAnalyses::all();
626}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
T Content
#define _
This file defines the IR2Vec vocabulary analysis(IR2VecVocabAnalysis), the core ir2vec::Embedder inte...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
ModuleAnalysisManager MAM
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:294
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:206
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool empty() const
Definition: DenseMap.h:119
iterator end()
Definition: DenseMap.h:87
Base class for error info classes.
Definition: Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:52
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
static ErrorSuccess success()
Create a success value.
Definition: Error.h:336
Tagged union holding either a T or a Error.
Definition: Error.h:485
Error takeError()
Take ownership of the stored error.
Definition: Error.h:612
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:316
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition: IR2Vec.cpp:572
This analysis provides the vocabulary for IR2Vec.
Definition: IR2Vec.h:415
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &MAM)
Definition: IR2Vec.cpp:535
static LLVM_ABI AnalysisKey Key
Definition: IR2Vec.h:428
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition: IR2Vec.cpp:614
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileOrSTDIN(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, or open stdin if the Filename is "-".
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:275
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:233
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI const Embedding & getBBVector(const BasicBlock &BB) const
Returns the embedding for a given basic block in the function F if it has been computed.
Definition: IR2Vec.cpp:183
static LLVM_ABI std::unique_ptr< Embedder > create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab)
Factory method to create an Embedder object.
Definition: IR2Vec.cpp:160
BBEmbeddingsMap BBVecMap
Definition: IR2Vec.h:347
LLVM_ABI const BBEmbeddingsMap & getBBVecMap() const
Returns a map containing basic block and the corresponding embeddings for the function F if it has be...
Definition: IR2Vec.cpp:177
const Vocabulary & Vocab
Definition: IR2Vec.h:335
void computeEmbeddings() const
Function to compute embeddings.
Definition: IR2Vec.cpp:198
LLVM_ABI const InstEmbeddingsMap & getInstVecMap() const
Returns a map containing instructions and the corresponding embeddings for the function F if it has b...
Definition: IR2Vec.cpp:171
const unsigned Dimension
Dimension of the vector representation; captured from the input vocabulary.
Definition: IR2Vec.h:338
LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab)
Definition: IR2Vec.cpp:155
Embedding FuncVector
Definition: IR2Vec.h:346
LLVM_ABI const Embedding & getFunctionVector() const
Computes and returns the embedding for the current function.
Definition: IR2Vec.cpp:191
InstEmbeddingsMap InstVecMap
Definition: IR2Vec.h:348
const Function & F
Definition: IR2Vec.h:334
Class for storing and accessing the IR2Vec vocabulary.
Definition: IR2Vec.h:157
static LLVM_ABI unsigned getSlotIndex(unsigned Opcode)
Functions to return the slot index or position of a given Opcode, TypeID, or OperandKind in the vocab...
Definition: IR2Vec.cpp:275
LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv) const
Definition: IR2Vec.cpp:363
LLVM_ABI const ir2vec::Embedding & operator[](unsigned Opcode) const
Accessors to get the embedding for a given entity.
Definition: IR2Vec.cpp:291
static LLVM_ABI OperandKind getOperandKind(const Value *Op)
Function to classify an operand into OperandKind.
Definition: IR2Vec.cpp:338
LLVM_ABI bool isValid() const
Definition: IR2Vec.cpp:266
static LLVM_ABI StringRef getStringKey(unsigned Pos)
Returns the string key for a given index position in the vocabulary.
Definition: IR2Vec.cpp:348
static constexpr unsigned MaxCanonicalTypeIDs
Definition: IR2Vec.h:201
static LLVM_ABI VocabVector createDummyVocabForTest(unsigned Dim=1)
Create a dummy vocabulary for testing purposes.
Definition: IR2Vec.cpp:369
static constexpr unsigned MaxOperandKinds
Definition: IR2Vec.h:203
OperandKind
Operand kinds supported by IR2Vec Vocabulary.
Definition: IR2Vec.h:187
static constexpr unsigned MaxTypeIDs
Definition: IR2Vec.h:200
static LLVM_ABI StringRef getVocabKeyForTypeID(Type::TypeID TypeID)
Function to get vocabulary key for a given TypeID.
Definition: IR2Vec.cpp:327
static LLVM_ABI StringRef getVocabKeyForOpcode(unsigned Opcode)
Function to get vocabulary key for a given Opcode.
Definition: IR2Vec.cpp:303
LLVM_ABI unsigned getDimension() const
Definition: IR2Vec.cpp:270
CanonicalTypeID
Canonical type IDs supported by IR2Vec Vocabulary.
Definition: IR2Vec.h:170
static LLVM_ABI StringRef getVocabKeyForOperandKind(OperandKind Kind)
Function to get vocabulary key for a given OperandKind.
Definition: IR2Vec.cpp:331
An Object is a JSON object, which maps strings to heterogenous JSON values.
Definition: JSON.h:98
LLVM_ABI Value * get(StringRef K)
Definition: JSON.cpp:30
The root is the trivial Path to the root value.
Definition: JSON.h:711
A "cursor" marking a position within a Value.
Definition: JSON.h:664
A Value is an JSON value of unknown type.
Definition: JSON.h:288
const json::Object * getAsObject() const
Definition: JSON.h:462
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
static cl::opt< std::string > VocabFile("ir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""), cl::cat(IR2VecCategory))
LLVM_ABI cl::opt< float > ArgWeight
LLVM_ABI cl::opt< float > OpcWeight
LLVM_ABI cl::opt< float > TypeWeight
LLVM_ABI cl::opt< IR2VecKind > IR2VecEmbeddingKind
llvm::cl::OptionCategory IR2VecCategory
LLVM_ABI llvm::Expected< Value > parse(llvm::StringRef JSON)
Parses the provided JSON source, or returns a ParseError.
Definition: JSON.cpp:684
bool fromJSON(const Value &E, std::string &Out, Path P)
Definition: JSON.h:740
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition: Error.h:1399
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:990
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1305
@ illegal_byte_sequence
IR2VecKind
IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
Definition: IR2Vec.h:63
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:126
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
iterator_range< df_iterator< T > > depth_first(const T &G)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29
Embedding is a datatype that wraps std::vector<double>.
Definition: IR2Vec.h:79
iterator begin()
Definition: IR2Vec.h:108
LLVM_ABI bool approximatelyEquals(const Embedding &RHS, double Tolerance=1e-4) const
Returns true if the embedding is approximately equal to the RHS embedding within the specified tolera...
Definition: IR2Vec.cpp:131
LLVM_ABI Embedding & operator+=(const Embedding &RHS)
Arithmetic operators.
Definition: IR2Vec.cpp:86
LLVM_ABI Embedding operator-(const Embedding &RHS) const
Definition: IR2Vec.cpp:106
LLVM_ABI Embedding & operator-=(const Embedding &RHS)
Definition: IR2Vec.cpp:99
LLVM_ABI Embedding operator*(double Factor) const
Definition: IR2Vec.cpp:118
size_t size() const
Definition: IR2Vec.h:92
LLVM_ABI Embedding & operator*=(double Factor)
Definition: IR2Vec.cpp:112
LLVM_ABI Embedding operator+(const Embedding &RHS) const
Definition: IR2Vec.cpp:93
LLVM_ABI Embedding & scaleAndAdd(const Embedding &Src, float Factor)
Adds Src Embedding scaled by Factor with the called Embedding.
Definition: IR2Vec.cpp:124
LLVM_ABI void print(raw_ostream &OS) const
Definition: IR2Vec.cpp:144