LLVM  14.0.0git
InstrOrderFile.cpp
Go to the documentation of this file.
1 //===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
12 #include "llvm/ADT/Statistic.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/IRBuilder.h"
17 #include "llvm/IR/Instruction.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/Metadata.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/InitializePasses.h"
22 #include "llvm/Pass.h"
23 #include "llvm/PassRegistry.h"
26 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Path.h"
31 #include <fstream>
32 #include <map>
33 #include <mutex>
34 #include <set>
35 #include <sstream>
36 
37 using namespace llvm;
38 #define DEBUG_TYPE "instrorderfile"
39 
41  "orderfile-write-mapping", cl::init(""),
42  cl::desc(
43  "Dump functions and their MD5 hash to deobfuscate profile data"),
44  cl::Hidden);
45 
46 namespace {
47 
48 // We need a global bitmap to tell if a function is executed. We also
49 // need a global variable to save the order of functions. We can use a
50 // fixed-size buffer that saves the MD5 hash of the function. We need
51 // a global variable to save the index into the buffer.
52 
53 std::mutex MappingMutex;
54 
55 struct InstrOrderFile {
56 private:
57  GlobalVariable *OrderFileBuffer;
58  GlobalVariable *BufferIdx;
59  GlobalVariable *BitMap;
60  ArrayType *BufferTy;
61  ArrayType *MapTy;
62 
63 public:
64  InstrOrderFile() {}
65 
66  void createOrderFileData(Module &M) {
67  LLVMContext &Ctx = M.getContext();
68  int NumFunctions = 0;
69  for (Function &F : M) {
70  if (!F.isDeclaration())
71  NumFunctions++;
72  }
73 
74  BufferTy =
75  ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
76  Type *IdxTy = Type::getInt32Ty(Ctx);
77  MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
78 
79  // Create the global variables.
80  std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
81  OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
83  Triple TT = Triple(M.getTargetTriple());
84  OrderFileBuffer->setSection(
85  getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
86 
87  std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
88  BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
89  Constant::getNullValue(IdxTy), IndexName);
90 
91  std::string BitMapName = "bitmap_0";
92  BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
93  Constant::getNullValue(MapTy), BitMapName);
94  }
95 
96  // Generate the code sequence in the entry block of each function to
97  // update the buffer.
98  void generateCodeSequence(Module &M, Function &F, int FuncId) {
99  if (!ClOrderFileWriteMapping.empty()) {
100  std::lock_guard<std::mutex> LogLock(MappingMutex);
101  std::error_code EC;
104  if (EC) {
105  report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
106  " to save mapping file for order file instrumentation\n");
107  } else {
108  std::stringstream stream;
109  stream << std::hex << MD5Hash(F.getName());
110  std::string singleLine = "MD5 " + stream.str() + " " +
111  std::string(F.getName()) + '\n';
112  OS << singleLine;
113  }
114  }
115 
116  BasicBlock *OrigEntry = &F.getEntryBlock();
117 
118  LLVMContext &Ctx = M.getContext();
120  IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
121 
122  // Create a new entry block for instrumentation. We will check the bitmap
123  // in this basic block.
124  BasicBlock *NewEntry =
125  BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
126  IRBuilder<> entryB(NewEntry);
127  // Create a basic block for updating the circular buffer.
128  BasicBlock *UpdateOrderFileBB =
129  BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
130  IRBuilder<> updateB(UpdateOrderFileBB);
131 
132  // Check the bitmap, if it is already 1, do nothing.
133  // Otherwise, set the bit, grab the index, update the buffer.
134  Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
136  Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
137  LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
138  entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
139  Value *IsNotExecuted =
140  entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
141  entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
142 
143  // Fill up UpdateOrderFileBB: grab the index, update the buffer!
144  Value *IdxVal = updateB.CreateAtomicRMW(
147  // We need to wrap around the index to fit it inside the buffer.
148  Value *WrappedIdx = updateB.CreateAnd(
149  IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
150  Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
151  Value *BufferAddr =
152  updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
153  updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
154  BufferAddr);
155  updateB.CreateBr(OrigEntry);
156  }
157 
158  bool run(Module &M) {
159  createOrderFileData(M);
160 
161  int FuncId = 0;
162  for (Function &F : M) {
163  if (F.isDeclaration())
164  continue;
165  generateCodeSequence(M, F, FuncId);
166  ++FuncId;
167  }
168 
169  return true;
170  }
171 
172 }; // End of InstrOrderFile struct
173 
174 class InstrOrderFileLegacyPass : public ModulePass {
175 public:
176  static char ID;
177 
178  InstrOrderFileLegacyPass() : ModulePass(ID) {
181  }
182 
183  bool runOnModule(Module &M) override;
184 };
185 
186 } // End anonymous namespace
187 
188 bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
189  if (skipModule(M))
190  return false;
191 
192  return InstrOrderFile().run(M);
193 }
194 
197  if (InstrOrderFile().run(M))
198  return PreservedAnalyses::none();
199  return PreservedAnalyses::all();
200 }
201 
202 INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
203  "Instrumentation for Order File", false, false)
204 INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
205  "Instrumentation for Order File", false, false)
206 
207 char InstrOrderFileLegacyPass::ID = 0;
208 
210  return new InstrOrderFileLegacyPass();
211 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Instrumentation.h
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
instrorderfile
instrorderfile
Definition: InstrOrderFile.cpp:204
llvm::ARM::PredBlockMask::TT
@ TT
FileSystem.h
Metadata.h
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:61
Pass.h
Statistic.h
Path.h
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
PassRegistry.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Instruction.h
CommandLine.h
llvm::InstrOrderFilePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: InstrOrderFile.cpp:196
GlobalValue.h
ClOrderFileWriteMapping
static cl::opt< std::string > ClOrderFileWriteMapping("orderfile-write-mapping", cl::init(""), cl::desc("Dump functions and their MD5 hash to deobfuscate profile data"), cl::Hidden)
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:212
false
Definition: StackSlotColoring.cpp:142
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
InstrProf.h
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:742
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
FuncId
Profile::FuncID FuncId
Definition: Profile.cpp:321
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile", "Instrumentation for Order File", false, false) INITIALIZE_PASS_END(InstrOrderFileLegacyPass
IRBuilder.h
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:602
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::raw_fd_ostream
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:443
llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition: AMDGPUMetadata.h:381
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
llvm::initializeInstrOrderFileLegacyPassPass
void initializeInstrOrderFileLegacyPassPass(PassRegistry &)
llvm::getInstrProfSectionName
std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
Return the name of the profile section corresponding to IPSK.
Definition: InstrProf.cpp:175
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
Instructions.h
llvm::sys::fs::OF_Append
@ OF_Append
The file should be opened in append mode.
Definition: FileSystem.h:773
llvm::createInstrOrderFilePass
ModulePass * createInstrOrderFilePass()
Definition: InstrOrderFile.cpp:209
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
InstrOrderFile.h
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
File
Instrumentation for Order File
Definition: InstrOrderFile.cpp:205
llvm::MD5Hash
uint64_t MD5Hash(StringRef Str)
Helper to compute and return lower 64 bits of the given string's MD5 hash.
Definition: MD5.h:122
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37