LLVM 20.0.0git
RandomIRBuilder.cpp
Go to the documentation of this file.
1//===-- RandomIRBuilder.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/STLExtras.h"
13#include "llvm/IR/BasicBlock.h"
14#include "llvm/IR/Constants.h"
15#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/Dominators.h"
17#include "llvm/IR/Function.h"
20#include "llvm/IR/Module.h"
21
22using namespace llvm;
23using namespace fuzzerop;
24
25/// Return a vector of Blocks that dominates this block, excluding current
26/// block.
27static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
28 std::vector<BasicBlock *> ret;
29 DominatorTree DT(*BB->getParent());
30 DomTreeNode *Node = DT.getNode(BB);
31 // It's possible that an orphan block is not in the dom tree. In that case we
32 // just return nothing.
33 if (!Node)
34 return ret;
35 Node = Node->getIDom();
36 while (Node && Node->getBlock()) {
37 ret.push_back(Node->getBlock());
38 // Get parent block.
39 Node = Node->getIDom();
40 }
41 return ret;
42}
43
44/// Return a vector of Blocks that is dominated by this block, excluding current
45/// block
46static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
47 DominatorTree DT(*BB->getParent());
48 std::vector<BasicBlock *> ret;
49 DomTreeNode *Parent = DT.getNode(BB);
50 // It's possible that an orphan block is not in the dom tree. In that case we
51 // just return nothing.
52 if (!Parent)
53 return ret;
54 for (DomTreeNode *Child : Parent->children())
55 ret.push_back(Child->getBlock());
56 uint64_t Idx = 0;
57 while (Idx < ret.size()) {
58 DomTreeNode *Node = DT[ret[Idx]];
59 Idx++;
60 for (DomTreeNode *Child : Node->children())
61 ret.push_back(Child->getBlock());
62 }
63 return ret;
64}
65
67 Value *Init) {
68 /// TODO: For all Allocas, maybe allocate an array.
69 BasicBlock *EntryBB = &F->getEntryBlock();
70 const DataLayout &DL = F->getDataLayout();
71 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
72 EntryBB->getFirstInsertionPt());
73 if (Init)
74 new StoreInst(Init, Alloca, std::next(Alloca->getIterator()));
75 return Alloca;
76}
77
78std::pair<GlobalVariable *, bool>
81 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
82 // Can't directly compare GV's type, as it would be a pointer to the actual
83 // type.
84 return Pred.matches(Srcs, PoisonValue::get(GV->getValueType()));
85 };
86 bool DidCreate = false;
88 for (GlobalVariable &GV : M->globals()) {
89 GlobalVars.push_back(&GV);
90 }
91 auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
92 RS.sample(nullptr, 1);
93 GlobalVariable *GV = RS.getSelection();
94 if (!GV) {
95 DidCreate = true;
96 using LinkageTypes = GlobalVariable::LinkageTypes;
97 auto TRS = makeSampler<Constant *>(Rand);
98 TRS.sample(Pred.generate(Srcs, KnownTypes));
99 Constant *Init = TRS.getSelection();
100 Type *Ty = Init->getType();
101 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
102 "G", nullptr,
104 M->getDataLayout().getDefaultGlobalsAddressSpace());
105 }
106 return {GV, DidCreate};
107}
108
111 return findOrCreateSource(BB, Insts, {}, anyType());
112}
113
117 SourcePred Pred,
118 bool allowConstant) {
119 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
121 for (uint64_t i = 0; i < EndOfValueSource; i++)
122 SrcTys.push_back(i);
123 std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
124 for (uint64_t SrcTy : SrcTys) {
125 switch (SrcTy) {
127 auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
128 if (!RS.isEmpty()) {
129 return RS.getSelection();
130 }
131 break;
132 }
133 case FunctionArgument: {
134 Function *F = BB.getParent();
136 for (uint64_t i = 0; i < F->arg_size(); i++) {
137 Args.push_back(F->getArg(i));
138 }
139 auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
140 if (!RS.isEmpty()) {
141 return RS.getSelection();
142 }
143 break;
144 }
145 case InstInDominator: {
146 auto Dominators = getDominators(&BB);
147 std::shuffle(Dominators.begin(), Dominators.end(), Rand);
148 for (BasicBlock *Dom : Dominators) {
150 for (Instruction &I : *Dom) {
151 Instructions.push_back(&I);
152 }
153 auto RS =
154 makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
155 // Also consider choosing no source, meaning we want a new one.
156 if (!RS.isEmpty()) {
157 return RS.getSelection();
158 }
159 }
160 break;
161 }
163 Module *M = BB.getParent()->getParent();
164 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
165 Type *Ty = GV->getValueType();
166 LoadInst *LoadGV = nullptr;
167 if (BB.getTerminator()) {
168 LoadGV = new LoadInst(Ty, GV, "LGV", BB.getFirstInsertionPt());
169 } else {
170 LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
171 }
172 // Because we might be generating new values, we have to check if it
173 // matches again.
174 if (DidCreate) {
175 if (Pred.matches(Srcs, LoadGV)) {
176 return LoadGV;
177 }
178 LoadGV->eraseFromParent();
179 // If no one is using this GlobalVariable, delete it too.
180 if (GV->use_empty()) {
181 GV->eraseFromParent();
182 }
183 }
184 break;
185 }
186 case NewConstOrStack: {
187 return newSource(BB, Insts, Srcs, Pred, allowConstant);
188 }
189 default:
190 case EndOfValueSource: {
191 llvm_unreachable("EndOfValueSource executed");
192 }
193 }
194 }
195 llvm_unreachable("Can't find a source");
196}
197
199 ArrayRef<Value *> Srcs, SourcePred Pred,
200 bool allowConstant) {
201 // Generate some constants to choose from.
202 auto RS = makeSampler<Value *>(Rand);
203 RS.sample(Pred.generate(Srcs, KnownTypes));
204
205 // If we can find a pointer to load from, use it half the time.
206 Value *Ptr = findPointer(BB, Insts);
207 if (Ptr) {
208 // Create load from the chosen pointer
209 auto IP = BB.getFirstInsertionPt();
210 if (auto *I = dyn_cast<Instruction>(Ptr)) {
211 IP = ++I->getIterator();
212 assert(IP != BB.end() && "guaranteed by the findPointer");
213 }
214 // Pick the type independently.
215 Type *AccessTy = RS.getSelection()->getType();
216 auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", IP);
217
218 // Only sample this load if it really matches the descriptor
219 if (Pred.matches(Srcs, NewLoad))
220 RS.sample(NewLoad, RS.totalWeight());
221 else
222 NewLoad->eraseFromParent();
223 }
224
225 Value *newSrc = RS.getSelection();
226 // Generate a stack alloca and store the constant to it if constant is not
227 // allowed, our hope is that later mutations can generate some values and
228 // store to this placeholder.
229 if (!allowConstant && isa<Constant>(newSrc)) {
230 Type *Ty = newSrc->getType();
231 Function *F = BB.getParent();
232 AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
233 if (BB.getTerminator()) {
234 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
235 BB.getTerminator()->getIterator());
236 } else {
237 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
238 }
239 }
240 return newSrc;
241}
242
243static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
244 const Value *Replacement) {
245 unsigned int OperandNo = Operand.getOperandNo();
246 if (Operand->getType() != Replacement->getType())
247 return false;
248 switch (I->getOpcode()) {
249 case Instruction::GetElementPtr:
250 case Instruction::ExtractElement:
251 case Instruction::ExtractValue:
252 // TODO: We could potentially validate these, but for now just leave indices
253 // alone.
254 if (OperandNo >= 1)
255 return false;
256 break;
257 case Instruction::InsertValue:
258 case Instruction::InsertElement:
259 case Instruction::ShuffleVector:
260 if (OperandNo >= 2)
261 return false;
262 break;
263 // For Br/Switch, we only try to modify the 1st Operand (condition).
264 // Modify other operands, like switch case may accidently change case from
265 // ConstantInt to a register, which is illegal.
266 case Instruction::Switch:
267 case Instruction::Br:
268 if (OperandNo >= 1)
269 return false;
270 break;
271 case Instruction::Call:
272 case Instruction::Invoke:
273 case Instruction::CallBr: {
274 const Function *Callee = cast<CallBase>(I)->getCalledFunction();
275 // If it's an indirect call, give up.
276 if (!Callee)
277 return false;
278 // If callee is not an intrinsic, operand 0 is the function to be called.
279 // Since we cannot assume that the replacement is a function pointer,
280 // we give up.
281 if (!Callee->getIntrinsicID() && OperandNo == 0)
282 return false;
283 return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
284 }
285 default:
286 break;
287 }
288 return true;
289}
290
293 Value *V) {
295 for (uint64_t i = 0; i < EndOfValueSink; i++)
296 SinkTys.push_back(i);
297 std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
298 auto findSinkAndConnect =
299 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
300 auto RS = makeSampler<Use *>(Rand);
301 for (auto &I : Instructions) {
302 for (Use &U : I->operands())
303 if (isCompatibleReplacement(I, U, V))
304 RS.sample(&U, 1);
305 }
306 if (!RS.isEmpty()) {
307 Use *Sink = RS.getSelection();
308 User *U = Sink->getUser();
309 unsigned OpNo = Sink->getOperandNo();
310 U->setOperand(OpNo, V);
311 return cast<Instruction>(U);
312 }
313 return nullptr;
314 };
315 Instruction *Sink = nullptr;
316 for (uint64_t SinkTy : SinkTys) {
317 switch (SinkTy) {
319 Sink = findSinkAndConnect(Insts);
320 if (Sink)
321 return Sink;
322 break;
323 case PointersInDominator: {
324 auto Dominators = getDominators(&BB);
325 std::shuffle(Dominators.begin(), Dominators.end(), Rand);
326 for (BasicBlock *Dom : Dominators) {
327 for (Instruction &I : *Dom) {
328 if (isa<PointerType>(I.getType()))
329 return new StoreInst(V, &I, Insts.back()->getIterator());
330 }
331 }
332 break;
333 }
334 case InstInDominatee: {
335 auto Dominatees = getDominatees(&BB);
336 std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
337 for (BasicBlock *Dominee : Dominatees) {
338 std::vector<Instruction *> Instructions;
339 for (Instruction &I : *Dominee)
340 Instructions.push_back(&I);
341 Sink = findSinkAndConnect(Instructions);
342 if (Sink) {
343 return Sink;
344 }
345 }
346 break;
347 }
348 case NewStore:
349 /// TODO: allocate a new stack memory.
350 return newSink(BB, Insts, V);
352 Module *M = BB.getParent()->getParent();
353 auto [GV, DidCreate] =
355 return new StoreInst(V, GV, Insts.back()->getIterator());
356 }
357 case EndOfValueSink:
358 default:
359 llvm_unreachable("EndOfValueSink executed");
360 }
361 }
362 llvm_unreachable("Can't find a sink");
363}
364
366 ArrayRef<Instruction *> Insts, Value *V) {
367 Value *Ptr = findPointer(BB, Insts);
368 if (!Ptr) {
369 if (uniform(Rand, 0, 1)) {
370 Type *Ty = V->getType();
372 } else {
373 Ptr = PoisonValue::get(PointerType::get(V->getType(), 0));
374 }
375 }
376
377 return new StoreInst(V, Ptr, Insts.back()->getIterator());
378}
379
382 auto IsMatchingPtr = [](Instruction *Inst) {
383 // Invoke instructions sometimes produce valid pointers but currently
384 // we can't insert loads or stores from them
385 if (Inst->isTerminator())
386 return false;
387
388 return Inst->getType()->isPointerTy();
389 };
390 if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
391 return RS.getSelection();
392 return nullptr;
393}
394
396 uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
397 return KnownTypes[TyIdx];
398}
399
401 uint64_t ArgNum) {
402 Type *RetType = randomType();
403
405 for (uint64_t i = 0; i < ArgNum; i++) {
406 Args.push_back(randomType());
407 }
408
410 /*isVarArg=*/false),
412 return F;
413}
416 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
417}
418
420 uint64_t ArgNum) {
421 Function *F = this->createFunctionDeclaration(M, ArgNum);
422
423 // TODO: Some arguments and a return value would probably be more
424 // interesting.
425 LLVMContext &Context = M.getContext();
426 const DataLayout &DL = M.getDataLayout();
427 BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
428 Type *RetTy = F->getReturnType();
429 if (RetTy != Type::getVoidTy(Context)) {
430 Instruction *RetAlloca =
431 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
432 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
433 ReturnInst::Create(Context, RetLoad, BB);
434 } else {
435 ReturnInst::Create(Context, BB);
436 }
437
438 return F;
439}
442 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
443}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, const Value *Replacement)
static std::vector< BasicBlock * > getDominators(BasicBlock *BB)
Return a vector of Blocks that dominates this block, excluding current block.
static std::vector< BasicBlock * > getDominatees(BasicBlock *BB)
Return a vector of Blocks that is dominated by this block, excluding current block.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:177
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static Type * getVoidTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
A matcher/generator for finding suitable values for the next source in an operation's partially compl...
Definition: OpDescriptor.h:43
bool matches(ArrayRef< Value * > Cur, const Value *New)
Returns true if New is compatible for the argument after Cur.
Definition: OpDescriptor.h:77
std::vector< Constant * > generate(ArrayRef< Value * > Cur, ArrayRef< Type * > BaseTypes)
Generates a list of potential values for the argument after Cur.
Definition: OpDescriptor.h:82
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SourcePred onlyType(Type *Only)
Definition: OpDescriptor.h:95
static SourcePred anyType()
Definition: OpDescriptor.h:105
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ReservoirSampler< ElT, GenT > makeSampler(GenT &RandGen, RangeT &&Items)
Definition: Random.h:75
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:573
T uniform(GenT &Gen, T Min, T Max)
Return a uniformly distributed random value between Min and Max.
Definition: Random.h:21
Function * createFunctionDeclaration(Module &M, uint64_t ArgNum)
SmallVector< Type *, 16 > KnownTypes
std::pair< GlobalVariable *, bool > findOrCreateGlobalVariable(Module *M, ArrayRef< Value * > Srcs, fuzzerop::SourcePred Pred)
Find or create a global variable.
Value * findOrCreateSource(BasicBlock &BB, ArrayRef< Instruction * > Insts)
Find a "source" for some operation, which will be used in one of the operation's operands.
AllocaInst * createStackMemory(Function *F, Type *Ty, Value *Init=nullptr)
Create a stack memory at the head of the function, store Init to the memory if provided.
Instruction * newSink(BasicBlock &BB, ArrayRef< Instruction * > Insts, Value *V)
Create a user for V in BB.
Function * createFunctionDefinition(Module &M, uint64_t ArgNum)
Value * newSource(BasicBlock &BB, ArrayRef< Instruction * > Insts, ArrayRef< Value * > Srcs, fuzzerop::SourcePred Pred, bool allowConstant=true)
Create some Value suitable as a source for some operation.
Instruction * connectToSink(BasicBlock &BB, ArrayRef< Instruction * > Insts, Value *V)
Find a viable user for V in Insts, which should all be contained in BB.
@ SinkToInstInCurBlock
TODO: Also consider pointers in function argument.
Value * findPointer(BasicBlock &BB, ArrayRef< Instruction * > Insts)
Type * randomType()
Return a uniformly choosen type from AllowedTypes.