LLVM 20.0.0git
IRMutator.cpp
Go to the documentation of this file.
1//===-- IRMutator.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SmallSet.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/FMF.h"
20#include "llvm/IR/Function.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/Operator.h"
26#include "llvm/IR/Verifier.h"
31#include <map>
32#include <optional>
33
34using namespace llvm;
35
37 auto RS = makeSampler<Function *>(IB.Rand);
38 for (Function &F : M)
39 if (!F.isDeclaration())
40 RS.sample(&F, /*Weight=*/1);
41
42 while (RS.totalWeight() < IB.MinFunctionNum) {
43 Function *F = IB.createFunctionDefinition(M);
44 RS.sample(F, /*Weight=*/1);
45 }
46 mutate(*RS.getSelection(), IB);
47}
48
51 [](BasicBlock *BB) { return !BB->isEHPad(); });
53 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
54}
55
57 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
58}
59
61 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
62}
63
64void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
65 std::vector<Type *> Types;
66 for (const auto &Getter : AllowedTypes)
67 Types.push_back(Getter(M.getContext()));
68 RandomIRBuilder IB(Seed, Types);
69
70 size_t CurSize = IRMutator::getModuleSize(M);
71 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
72 for (const auto &Strategy : Strategies)
73 RS.sample(Strategy.get(),
74 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
75 if (RS.totalWeight() == 0)
76 return;
77 auto Strategy = RS.getSelection();
78
79 Strategy->mutate(M, IB);
80}
81
84 FPM.addPass(DCEPass());
86 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
88 FPM.run(F, FAM);
89}
90
94}
95
96std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
97 std::vector<fuzzerop::OpDescriptor> Ops;
104 return Ops;
105}
106
107std::optional<fuzzerop::OpDescriptor>
108InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
109 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
110 return Op.SourcePreds[0].matches({}, Src);
111 };
112 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
113 if (RS.isEmpty())
114 return std::nullopt;
115 return *RS;
116}
117
120 auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end();
121 return make_range(BB.getFirstInsertionPt(), End);
122}
123
126 for (Instruction &I : getInsertionRange(BB))
127 Insts.push_back(&I);
128 if (Insts.size() < 1)
129 return;
130
131 // Choose an insertion point for our new instruction.
132 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
133
134 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
135 auto InstsAfter = ArrayRef(Insts).slice(IP);
136
137 // Choose a source, which will be used to constrain the operation selection.
139 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
140
141 // Choose an operation that's constrained to be valid for the type of the
142 // source, collect any other sources it needs, and then build it.
143 auto OpDesc = chooseOperation(Srcs[0], IB);
144 // Bail if no operation was found
145 if (!OpDesc)
146 return;
147
148 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
149 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
150
151 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP]->getIterator())) {
152 // Find a sink and wire up the results of the operation.
153 IB.connectToSink(BB, InstsAfter, Op);
154 }
155}
156
157uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
158 uint64_t CurrentWeight) {
159 // If we have less than 200 bytes, panic and try to always delete.
160 if (CurrentSize > MaxSize - 200)
161 return CurrentWeight ? CurrentWeight * 100 : 1;
162 // Draw a line starting from when we only have 1k left and increasing linearly
163 // to double the current weight.
164 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
165 (static_cast<int64_t>(MaxSize) -
166 static_cast<int64_t>(CurrentSize) - 1000) /
167 1000;
168 // Clamp negative weights to zero.
169 if (Line < 0)
170 return 0;
171 return Line;
172}
173
175 auto RS = makeSampler<Instruction *>(IB.Rand);
176 for (Instruction &Inst : instructions(F)) {
177 // TODO: We can't handle these instructions.
178 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
179 isa<PHINode>(Inst))
180 continue;
181
182 RS.sample(&Inst, /*Weight=*/1);
183 }
184 if (RS.isEmpty())
185 return;
186
187 // Delete the instruction.
188 mutate(*RS.getSelection(), IB);
189 // Clean up any dead code that's left over after removing the instruction.
191}
192
194 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
195
196 if (Inst.getType()->isVoidTy()) {
197 // Instructions with void type (ie, store) have no uses to worry about. Just
198 // erase it and move on.
199 Inst.eraseFromParent();
200 return;
201 }
202
203 // Otherwise we need to find some other value with the right type to keep the
204 // users happy.
205 auto Pred = fuzzerop::onlyType(Inst.getType());
206 auto RS = makeSampler<Value *>(IB.Rand);
208 BasicBlock *BB = Inst.getParent();
209 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
210 ++I) {
211 if (Pred.matches({}, &*I))
212 RS.sample(&*I, /*Weight=*/1);
213 InstsBefore.push_back(&*I);
214 }
215 if (!RS)
216 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
217
218 Inst.replaceAllUsesWith(RS.getSelection());
219 Inst.eraseFromParent();
220}
221
223 RandomIRBuilder &IB) {
224 SmallVector<std::function<void()>, 8> Modifications;
225 CmpInst *CI = nullptr;
226 GetElementPtrInst *GEP = nullptr;
227 switch (Inst.getOpcode()) {
228 default:
229 break;
230 // Add nsw, nuw flag
231 case Instruction::Add:
232 case Instruction::Mul:
233 case Instruction::Sub:
234 case Instruction::Shl:
235 Modifications.push_back(
236 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
237 Modifications.push_back(
238 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
239 break;
240 case Instruction::ICmp:
241 CI = cast<ICmpInst>(&Inst);
242 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
244 Modifications.push_back(
245 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
246 }
247 break;
248 // Add inbound flag.
249 case Instruction::GetElementPtr:
250 GEP = cast<GetElementPtrInst>(&Inst);
251 Modifications.push_back(
252 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
253 break;
254 // Add exact flag.
255 case Instruction::UDiv:
256 case Instruction::SDiv:
257 case Instruction::LShr:
258 case Instruction::AShr:
259 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
260 break;
261
262 case Instruction::FCmp:
263 CI = cast<FCmpInst>(&Inst);
264 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
266 Modifications.push_back(
267 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
268 }
269 break;
270 }
271
272 // Add fast math flag if possible.
273 if (isa<FPMathOperator>(&Inst)) {
274 // Try setting everything unless they are already on.
275 Modifications.push_back(
276 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
277 // Try unsetting everything unless they are already off.
278 Modifications.push_back(
279 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
280 // Individual setting by flipping the bit
281 Modifications.push_back(
282 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
283 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
284 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
285 Modifications.push_back(
286 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
287 Modifications.push_back(
288 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
289 Modifications.push_back(
290 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
291 Modifications.push_back(
292 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
293 }
294
295 // Randomly switch operands of instructions
296 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
297 switch (Inst.getOpcode()) {
298 case Instruction::SDiv:
299 case Instruction::UDiv:
300 case Instruction::SRem:
301 case Instruction::URem:
302 case Instruction::FDiv:
303 case Instruction::FRem: {
304 // Verify that the after shuffle the second operand is not
305 // constant 0.
306 Value *Operand = Inst.getOperand(0);
307 if (Constant *C = dyn_cast<Constant>(Operand)) {
308 if (!C->isZeroValue()) {
309 ShuffleItems = {0, 1};
310 }
311 }
312 break;
313 }
314 case Instruction::Select:
315 ShuffleItems = {1, 2};
316 break;
317 case Instruction::Add:
318 case Instruction::Sub:
319 case Instruction::Mul:
320 case Instruction::Shl:
321 case Instruction::LShr:
322 case Instruction::AShr:
323 case Instruction::And:
324 case Instruction::Or:
325 case Instruction::Xor:
326 case Instruction::FAdd:
327 case Instruction::FSub:
328 case Instruction::FMul:
329 case Instruction::ICmp:
330 case Instruction::FCmp:
331 case Instruction::ShuffleVector:
332 ShuffleItems = {0, 1};
333 break;
334 }
335 if (ShuffleItems != NoneItem) {
336 Modifications.push_back([&Inst, &ShuffleItems]() {
337 Value *Op0 = Inst.getOperand(ShuffleItems.first);
338 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
339 Inst.setOperand(ShuffleItems.second, Op0);
340 });
341 }
342
343 auto RS = makeSampler(IB.Rand, Modifications);
344 if (RS)
345 RS.getSelection()();
346}
347
348/// Return a case value that is not already taken to make sure we don't have two
349/// cases with same value.
351 uint64_t MaxValue, RandomIRBuilder &IB) {
352 uint64_t tmp;
353 do {
354 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
355 } while (CasesTaken.count(tmp) != 0);
356 CasesTaken.insert(tmp);
357 return tmp;
358}
359
361 Module *M = BB.getParent()->getParent();
362 // If nullptr is selected, we will create a new function declaration.
363 SmallVector<Function *, 32> Functions({nullptr});
364 for (Function &F : M->functions()) {
365 Functions.push_back(&F);
366 }
367
368 auto RS = makeSampler(IB.Rand, Functions);
369 Function *F = RS.getSelection();
370 // Some functions accept metadata type or token type as arguments.
371 // We don't call those functions for now.
372 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
373 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
374 auto IsUnsupportedTy = [](Type *T) {
375 return T->isMetadataTy() || T->isTokenTy();
376 };
377 if (!F || IsUnsupportedTy(F->getReturnType()) ||
378 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
379 F = IB.createFunctionDeclaration(*M);
380 }
381
382 FunctionType *FTy = F->getFunctionType();
384 if (!F->arg_empty()) {
385 for (Type *ArgTy : FTy->params()) {
386 SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
387 }
388 }
389 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
390 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
391 BasicBlock::iterator InsertPt) {
392 StringRef Name = isRetVoid ? nullptr : "C";
393 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, InsertPt);
394 // Don't return this call inst if it return void as it can't be sinked.
395 return isRetVoid ? nullptr : Call;
396 };
397
399 for (Instruction &I : getInsertionRange(BB))
400 Insts.push_back(&I);
401 if (Insts.size() < 1)
402 return;
403
404 // Choose an insertion point for our new call instruction.
405 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
406
407 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
408 auto InstsAfter = ArrayRef(Insts).slice(IP);
409
410 // Choose a source, which will be used to constrain the operation selection.
412
413 for (const auto &Pred : ArrayRef(SourcePreds)) {
414 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
415 }
416
417 if (Value *Op = BuilderFunc(Srcs, Insts[IP]->getIterator())) {
418 // Find a sink and wire up the results of the operation.
419 IB.connectToSink(BB, InstsAfter, Op);
420 }
421}
422
425 for (Instruction &I : getInsertionRange(BB))
426 Insts.push_back(&I);
427 if (Insts.size() < 1)
428 return;
429
430 // Choose a point where we split the block.
431 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
432 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
433
434 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
435 // directly jumps to `Sink`. Here, we have to create a new terminator for
436 // `Source`.
437 BasicBlock *Block = Insts[IP]->getParent();
438 BasicBlock *Source = Block;
439 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
440
441 Function *F = BB.getParent();
442 LLVMContext &C = F->getParent()->getContext();
443 // A coin decides if it is branch or switch
444 if (uniform<uint64_t>(IB.Rand, 0, 1)) {
445 // Branch
446 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
447 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
448 Value *Cond =
449 IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
451 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
452 // Remove the old terminator.
453 ReplaceInstWithInst(Source->getTerminator(), Branch);
454 // Connect these blocks to `Sink`
455 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
456 } else {
457 // Switch
458 // Determine Integer type, it IS possible we use a boolean to switch.
459 auto RS =
460 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
461 return Ty->isIntegerTy();
462 }));
463 assert(RS && "There is no integer type in all allowed types, is the "
464 "setting correct?");
465 Type *Ty = RS.getSelection();
466 IntegerType *IntTy = cast<IntegerType>(Ty);
467
468 uint64_t BitSize = IntTy->getBitWidth();
469 uint64_t MaxCaseVal =
470 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
471 // Create Switch inst in Block
472 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
473 fuzzerop::onlyType(IntTy), false);
474 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
475 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
476 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
477 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
478 // Remove the old terminator.
479 ReplaceInstWithInst(Source->getTerminator(), Switch);
480
481 // Create blocks, for each block assign a case value.
482 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
483 SmallSet<uint64_t, 4> CasesTaken;
484 for (uint64_t i = 0; i < NumCases; i++) {
485 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
486 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
487 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
488 Switch->addCase(OnValue, CaseBlock);
489 Blocks.push_back(CaseBlock);
490 }
491
492 // Connect these blocks to `Sink`
493 connectBlocksToSink(Blocks, Sink, IB);
494 }
495}
496
497/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
498/// even have terminator.
499void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
500 BasicBlock *Sink,
501 RandomIRBuilder &IB) {
502 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
503 for (uint64_t i = 0; i < Blocks.size(); i++) {
504 // We have at least one block that directly goes to sink.
505 CFGToSink ToSink = (i == DirectSinkIdx)
506 ? CFGToSink::DirectSink
507 : static_cast<CFGToSink>(uniform<uint64_t>(
508 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
509 BasicBlock *BB = Blocks[i];
510 Function *F = BB->getParent();
511 LLVMContext &C = F->getParent()->getContext();
512 switch (ToSink) {
513 case CFGToSink::Return: {
514 Type *RetTy = F->getReturnType();
515 Value *RetValue = nullptr;
516 if (!RetTy->isVoidTy())
517 RetValue =
518 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
519 ReturnInst::Create(C, RetValue, BB);
520 break;
521 }
522 case CFGToSink::DirectSink: {
523 BranchInst::Create(Sink, BB);
524 break;
525 }
526 case CFGToSink::SinkOrSelfLoop: {
527 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
528 // A coin decides which block is true branch.
529 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
530 Value *Cond = IB.findOrCreateSource(
531 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
532 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
533 break;
534 }
535 case CFGToSink::EndOfCFGToLink:
536 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
537 }
538 }
539}
540
542 // Can't insert PHI node to entry node.
543 if (&BB == &BB.getParent()->getEntryBlock())
544 return;
545 Type *Ty = IB.randomType();
546 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", BB.begin());
547
548 // Use a map to make sure the same incoming basic block has the same value.
549 DenseMap<BasicBlock *, Value *> IncomingValues;
550 for (BasicBlock *Pred : predecessors(&BB)) {
551 Value *Src = IncomingValues[Pred];
552 // If `Pred` is not in the map yet, we'll get a nullptr.
553 if (!Src) {
555 for (auto I = Pred->begin(); I != Pred->end(); ++I)
556 Insts.push_back(&*I);
557 // There is no need to inform IB what previously used values are if we are
558 // using `onlyType`
559 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
560 IncomingValues[Pred] = Src;
561 }
562 PHI->addIncoming(Src, Pred);
563 }
565 for (Instruction &I : getInsertionRange(BB))
566 InstsAfter.push_back(&I);
567 IB.connectToSink(BB, InstsAfter, PHI);
568}
569
571 for (BasicBlock &BB : F) {
572 this->mutate(BB, IB);
573 }
574}
577 for (Instruction &I : getInsertionRange(BB))
578 Insts.push_back(&I);
579 if (Insts.size() < 1)
580 return;
581 // Choose an Instruction to mutate.
582 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
583 Instruction *Inst = Insts[Idx];
584 // `Idx + 1` so we don't sink to ourselves.
585 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
586 Type *Ty = Inst->getType();
587 // Don't sink terminators, void function calls, token, etc.
588 if (!Ty->isVoidTy() && !Ty->isTokenTy())
589 // Find a new sink and wire up the results of the operation.
590 IB.connectToSink(BB, InstsAfter, Inst);
591}
592
594 // A deterministic alternative to SmallPtrSet with the same lookup
595 // performance.
596 std::map<size_t, Instruction *> AliveInsts;
597 std::map<Instruction *, size_t> AliveInstsLookup;
598 size_t InsertIdx = 0;
599 for (auto &I : make_early_inc_range(make_range(
601 // First gather all instructions that can be shuffled. Don't take
602 // terminator.
603 AliveInsts.insert({InsertIdx, &I});
604 AliveInstsLookup.insert({&I, InsertIdx++});
605 // Then remove these instructions from the block
606 I.removeFromParent();
607 }
608
609 // Shuffle these instructions using topological sort.
610 // Returns false if all current instruction's dependencies in this block have
611 // been shuffled. If so, this instruction can be shuffled too.
612 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
613 for (Value *O : AliveInsts[Index]->operands()) {
614 Instruction *P = dyn_cast<Instruction>(O);
615 if (P && AliveInstsLookup.count(P))
616 return true;
617 }
618 return false;
619 };
620 // Get all alive instructions that depend on the current instruction.
621 // Takes Instruction* instead of index because the instruction is already
622 // shuffled.
623 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
625 for (Value *U : I->users()) {
626 Instruction *P = dyn_cast<Instruction>(U);
627 if (P && AliveInstsLookup.count(P))
628 Children.insert(AliveInstsLookup[P]);
629 }
630 return Children;
631 };
632 SmallSet<size_t, 8> RootIndices;
634 for (const auto &[Index, Inst] : AliveInsts) {
635 if (!hasAliveParent(Index))
636 RootIndices.insert(Index);
637 }
638 // Topological sort by randomly selecting a node without a parent, or root.
639 while (!RootIndices.empty()) {
640 auto RS = makeSampler<size_t>(IB.Rand);
641 for (size_t RootIdx : RootIndices)
642 RS.sample(RootIdx, 1);
643 size_t RootIdx = RS.getSelection();
644
645 RootIndices.erase(RootIdx);
646 Instruction *Root = AliveInsts[RootIdx];
647 AliveInsts.erase(RootIdx);
648 AliveInstsLookup.erase(Root);
649 Insts.push_back(Root);
650
651 for (size_t Child : getAliveChildren(Root)) {
652 if (!hasAliveParent(Child)) {
653 RootIndices.insert(Child);
654 }
655 }
656 }
657
658 Instruction *Terminator = BB.getTerminator();
659 // Then put instructions back.
660 for (Instruction *I : Insts) {
661 I->insertBefore(Terminator);
662 }
663}
664
665std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
666 LLVMContext &Context) {
667
668 if (Size <= 1)
669 // We get bogus data given an empty corpus - just create a new module.
670 return std::make_unique<Module>("M", Context);
671
672 auto Buffer = MemoryBuffer::getMemBuffer(
673 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
674 /*RequiresNullTerminator=*/false);
675
676 SMDiagnostic Err;
677 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
678 if (Error E = M.takeError()) {
679 errs() << toString(std::move(E)) << "\n";
680 return nullptr;
681 }
682 return std::move(M.get());
683}
684
685size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
686 std::string Buf;
687 {
690 }
691 if (Buf.size() > MaxSize)
692 return 0;
693 memcpy(Dest, Buf.data(), Buf.size());
694 return Buf.size();
695}
696
697std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
698 LLVMContext &Context) {
699 auto M = parseModule(Data, Size, Context);
700 if (!M || verifyModule(*M, &errs()))
701 return nullptr;
702
703 return M;
704}
Rewrite undef for PHI
Expand Atomic instructions
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
static void eliminateDeadCode(Function &F)
Definition: IRMutator.cpp:82
static iterator_range< BasicBlock::iterator > getInsertionRange(BasicBlock &BB)
Definition: IRMutator.cpp:119
static uint64_t getUniqueCaseValue(SmallSet< uint64_t, 4 > &CasesTaken, uint64_t MaxValue, RandomIRBuilder &IB)
Return a case value that is not already taken to make sure we don't have two cases with same value.
Definition: IRMutator.cpp:350
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static ManagedStatic< cl::opt< uint64_t >, CreateSeed > Seed
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallSet class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:467
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:675
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const CallInst * getTerminatingMustTailCall() const
Returns the call instruction marked 'musttail' prior to the terminating return instruction of this ba...
Definition: BasicBlock.cpp:300
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:850
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:788
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:775
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:42
Basic Dead Code Elimination pass.
Definition: DCE.h:23
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
bool none() const
Definition: FMF.h:58
bool all() const
Definition: FMF.h:59
Class to represent function types.
Definition: DerivedTypes.h:103
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
virtual void mutate(Module &M, RandomIRBuilder &IB)
Definition: IRMutator.cpp:36
void mutateModule(Module &M, int Seed, size_t MaxSize)
Mutate given module.
Definition: IRMutator.cpp:64
static size_t getModuleSize(const Module &M)
Calculate the size of module as the number of objects in it, i.e.
Definition: IRMutator.cpp:60
static std::vector< fuzzerop::OpDescriptor > getDefaultOps()
Definition: IRMutator.cpp:96
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:91
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:423
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:360
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:541
uint64_t getWeight(size_t CurrentSize, size_t MaxSize, uint64_t CurrentWeight) override
Provide a weight to bias towards choosing this strategy for a mutation.
Definition: IRMutator.cpp:157
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:174
void mutate(Instruction &Inst, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:222
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
bool hasNoNaNs() const LLVM_READONLY
Determine whether the no-NaNs flag is set.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoInfs() const LLVM_READONLY
Determine whether the no-infs flag is set.
void setHasNoSignedZeros(bool B)
Set or clear the no-signed-zeros flag on this instruction, which must be an operator which supports t...
bool hasNoSignedZeros() const LLVM_READONLY
Determine whether the no-signed-zeros flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void setHasAllowContract(bool B)
Set or clear the allow-contract flag on this instruction, which must be an operator which supports th...
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void setHasNoNaNs(bool B)
Set or clear the no-nans flag on this instruction, which must be an operator which supports this flag...
void setHasApproxFunc(bool B)
Set or clear the approximate-math-functions flag on this instruction, which must be an operator which...
void setHasAllowReassoc(bool B)
Set or clear the reassociation flag on this instruction, which must be an operator which supports thi...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
bool isTerminator() const
Definition: Instruction.h:277
bool hasAllowReciprocal() const LLVM_READONLY
Determine whether the allow-reciprocal flag is set.
void setHasNoInfs(bool B)
Set or clear the no-infs flag on this instruction, which must be an operator which supports this flag...
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
void setHasAllowReciprocal(bool B)
Set or clear the allow-reciprocal flag on this instruction, which must be an operator which supports ...
bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
void setFast(bool B)
Set or clear all fast-math-flags on this instruction, which must be an operator which supports this f...
bool hasAllowReassoc() const LLVM_READONLY
Determine whether the allow-reassociation flag is set.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:195
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)
Run all of the passes in this manager over the given unit of IR.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:593
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:570
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
Analysis pass providing the TargetLibraryInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt1Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:221
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A range adaptor for a pair of iterators.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
static SourcePred onlyType(Type *Only)
Definition: OpDescriptor.h:95
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
void describeFuzzerIntOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Getters for the default sets of operations, per general category.
Definition: Operations.cpp:18
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
void WriteBitcodeToFile(const Module &M, raw_ostream &Out, bool ShouldPreserveUseListOrder=false, const ModuleSummaryIndex *Index=nullptr, bool GenerateHash=false, ModuleHash *ModHash=nullptr)
Write the specified module to the specified raw output stream.
std::unique_ptr< Module > parseAndVerify(const uint8_t *Data, size_t Size, LLVMContext &Context)
Try to parse module and verify it.
Definition: IRMutator.cpp:697
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::unique_ptr< Module > parseModule(const uint8_t *Data, size_t Size, LLVMContext &Context)
Fuzzer friendly interface for the llvm bitcode parser.
Definition: IRMutator.cpp:665
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void describeFuzzerAggregateOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:88
size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize)
Fuzzer friendly interface for the llvm bitcode printer.
Definition: IRMutator.cpp:685
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
ReservoirSampler< ElT, GenT > makeSampler(GenT &RandGen, RangeT &&Items)
Definition: Random.h:75
void describeFuzzerVectorOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:94
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
DWARFExpression::Operation Op
void describeFuzzerFloatOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:45
void describeFuzzerControlFlowOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:75
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
void describeFuzzerPointerOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:84
unsigned pred_size(const MachineBasicBlock *BB)
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7162
A description of some operation we can build while fuzzing IR.
Definition: OpDescriptor.h:89