LLVM  4.0.0
FuzzerCorpus.h
Go to the documentation of this file.
1 //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // fuzzer::InputCorpus
10 //===----------------------------------------------------------------------===//
11 
12 #ifndef LLVM_FUZZER_CORPUS
13 #define LLVM_FUZZER_CORPUS
14 
15 #include "FuzzerDefs.h"
16 #include "FuzzerIO.h"
17 #include "FuzzerRandom.h"
18 #include "FuzzerSHA1.h"
19 #include "FuzzerTracePC.h"
20 #include <algorithm>
21 #include <numeric>
22 #include <random>
23 #include <unordered_set>
24 
25 namespace fuzzer {
26 
27 struct InputInfo {
28  Unit U; // The actual input data.
29  uint8_t Sha1[kSHA1NumBytes]; // Checksum.
30  // Number of features that this input has and no smaller input has.
31  size_t NumFeatures = 0;
32  size_t Tmp = 0; // Used by ValidateFeatureSet.
33  // Stats.
36  bool MayDeleteFile = false;
37 };
38 
39 class InputCorpus {
40  public:
41  static const size_t kFeatureSetSize = 1 << 16;
42  InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
43  memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
44  memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
45  }
47  for (auto II : Inputs)
48  delete II;
49  }
50  size_t size() const { return Inputs.size(); }
51  size_t SizeInBytes() const {
52  size_t Res = 0;
53  for (auto II : Inputs)
54  Res += II->U.size();
55  return Res;
56  }
57  size_t NumActiveUnits() const {
58  size_t Res = 0;
59  for (auto II : Inputs)
60  Res += !II->U.empty();
61  return Res;
62  }
63  size_t MaxInputSize() const {
64  size_t Res = 0;
65  for (auto II : Inputs)
66  Res = std::max(Res, II->U.size());
67  return Res;
68  }
69  bool empty() const { return Inputs.empty(); }
70  const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
71  void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile = false) {
72  assert(!U.empty());
73  uint8_t Hash[kSHA1NumBytes];
74  if (FeatureDebug)
75  Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
76  ComputeSHA1(U.data(), U.size(), Hash);
77  Hashes.insert(Sha1ToString(Hash));
78  Inputs.push_back(new InputInfo());
79  InputInfo &II = *Inputs.back();
80  II.U = U;
82  II.MayDeleteFile = MayDeleteFile;
83  memcpy(II.Sha1, Hash, kSHA1NumBytes);
84  UpdateCorpusDistribution();
85  ValidateFeatureSet();
86  }
87 
88  bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
89  bool HasUnit(const std::string &H) { return Hashes.count(H); }
91  InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)];
92  assert(!II.U.empty());
93  return II;
94  };
95 
96  // Returns an index of random unit from the corpus to mutate.
97  // Hypothesis: units added to the corpus last are more likely to be
98  // interesting. This function gives more weight to the more recent units.
99  size_t ChooseUnitIdxToMutate(Random &Rand) {
100  size_t Idx = static_cast<size_t>(CorpusDistribution(Rand.Get_mt19937()));
101  assert(Idx < Inputs.size());
102  return Idx;
103  }
104 
105  void PrintStats() {
106  for (size_t i = 0; i < Inputs.size(); i++) {
107  const auto &II = *Inputs[i];
108  Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i,
109  Sha1ToString(II.Sha1).c_str(), II.U.size(),
110  II.NumExecutedMutations, II.NumSuccessfullMutations);
111  }
112  }
113 
115  for (size_t i = 0; i < kFeatureSetSize; i++) {
116  if(size_t Sz = GetFeature(i))
117  Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz);
118  }
119  Printf("\n\t");
120  for (size_t i = 0; i < Inputs.size(); i++)
121  if (size_t N = Inputs[i]->NumFeatures)
122  Printf(" %zd=>%zd ", i, N);
123  Printf("\n");
124  }
125 
126  void DeleteInput(size_t Idx) {
127  InputInfo &II = *Inputs[Idx];
128  if (!OutputCorpus.empty() && II.MayDeleteFile)
129  RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
130  Unit().swap(II.U);
131  if (FeatureDebug)
132  Printf("EVICTED %zd\n", Idx);
133  }
134 
135  bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
136  assert(NewSize);
137  Idx = Idx % kFeatureSetSize;
138  uint32_t OldSize = GetFeature(Idx);
139  if (OldSize == 0 || (Shrink && OldSize > NewSize)) {
140  if (OldSize > 0) {
141  size_t OldIdx = SmallestElementPerFeature[Idx];
142  InputInfo &II = *Inputs[OldIdx];
143  assert(II.NumFeatures > 0);
144  II.NumFeatures--;
145  if (II.NumFeatures == 0)
146  DeleteInput(OldIdx);
147  }
148  if (FeatureDebug)
149  Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
150  SmallestElementPerFeature[Idx] = Inputs.size();
151  InputSizesPerFeature[Idx] = NewSize;
152  CountingFeatures = true;
153  return true;
154  }
155  return false;
156  }
157 
158  size_t NumFeatures() const {
159  size_t Res = 0;
160  for (size_t i = 0; i < kFeatureSetSize; i++)
161  Res += GetFeature(i) != 0;
162  return Res;
163  }
164 
166  assert(Inputs.empty());
167  memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
168  memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
169  }
170 
171 private:
172 
173  static const bool FeatureDebug = false;
174 
175  size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }
176 
177  void ValidateFeatureSet() {
178  if (!CountingFeatures) return;
179  if (FeatureDebug)
180  PrintFeatureSet();
181  for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
182  if (GetFeature(Idx))
183  Inputs[SmallestElementPerFeature[Idx]]->Tmp++;
184  for (auto II: Inputs) {
185  if (II->Tmp != II->NumFeatures)
186  Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures);
187  assert(II->Tmp == II->NumFeatures);
188  II->Tmp = 0;
189  }
190  }
191 
192  // Updates the probability distribution for the units in the corpus.
193  // Must be called whenever the corpus or unit weights are changed.
194  void UpdateCorpusDistribution() {
195  size_t N = Inputs.size();
196  Intervals.resize(N + 1);
197  Weights.resize(N);
198  std::iota(Intervals.begin(), Intervals.end(), 0);
199  if (CountingFeatures)
200  for (size_t i = 0; i < N; i++)
201  Weights[i] = Inputs[i]->NumFeatures * (i + 1);
202  else
203  std::iota(Weights.begin(), Weights.end(), 1);
204  CorpusDistribution = std::piecewise_constant_distribution<double>(
205  Intervals.begin(), Intervals.end(), Weights.begin());
206  }
207  std::piecewise_constant_distribution<double> CorpusDistribution;
208 
209  std::vector<double> Intervals;
210  std::vector<double> Weights;
211 
212  std::unordered_set<std::string> Hashes;
213  std::vector<InputInfo*> Inputs;
214 
215  bool CountingFeatures = false;
216  uint32_t InputSizesPerFeature[kFeatureSetSize];
217  uint32_t SmallestElementPerFeature[kFeatureSetSize];
218 
219  std::string OutputCorpus;
220 };
221 
222 } // namespace fuzzer
223 
224 #endif // LLVM_FUZZER_CORPUS
size_t i
size_t ChooseUnitIdxToMutate(Random &Rand)
Definition: FuzzerCorpus.h:99
InputCorpus(const std::string &OutputCorpus)
Definition: FuzzerCorpus.h:42
const Unit & operator[](size_t Idx) const
Definition: FuzzerCorpus.h:70
void Printf(const char *Fmt,...)
Definition: FuzzerIO.cpp:109
void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out)
Definition: FuzzerSHA1.cpp:202
static const size_t kFeatureSetSize
Definition: FuzzerCorpus.h:41
#define H(x, y, z)
Definition: MD5.cpp:53
bool empty() const
Definition: FuzzerCorpus.h:69
std::string DirPlusFile(const std::string &DirPath, const std::string &FileName)
Definition: FuzzerIO.cpp:87
uint8_t Sha1[kSHA1NumBytes]
Definition: FuzzerCorpus.h:29
size_t size() const
Definition: FuzzerCorpus.h:50
void DeleteInput(size_t Idx)
Definition: FuzzerCorpus.h:126
size_t NumActiveUnits() const
Definition: FuzzerCorpus.h:57
std::mt19937 & Get_mt19937()
Definition: FuzzerRandom.h:29
bool HasUnit(const std::string &H)
Definition: FuzzerCorpus.h:89
void RemoveFile(const std::string &Path)
size_t SizeInBytes() const
Definition: FuzzerCorpus.h:51
size_t NumSuccessfullMutations
Definition: FuzzerCorpus.h:35
std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes])
Definition: FuzzerSHA1.cpp:209
#define N
size_t MaxInputSize() const
Definition: FuzzerCorpus.h:63
std::vector< uint8_t > Unit
Definition: FuzzerDefs.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
size_t NumFeatures() const
Definition: FuzzerCorpus.h:158
InputInfo & ChooseUnitToMutate(Random &Rand)
Definition: FuzzerCorpus.h:90
bool HasUnit(const Unit &U)
Definition: FuzzerCorpus.h:88
std::string Hash(const Unit &U)
Definition: FuzzerSHA1.cpp:216
bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink)
Definition: FuzzerCorpus.h:135
static const int kSHA1NumBytes
Definition: FuzzerSHA1.h:22
size_t NumExecutedMutations
Definition: FuzzerCorpus.h:34
void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile=false)
Definition: FuzzerCorpus.h:71