LLVM  4.0.0
FuzzerTraceState.cpp
Go to the documentation of this file.
1 //===- FuzzerTraceState.cpp - Trace-based fuzzer mutator ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Data tracing.
10 //===----------------------------------------------------------------------===//
11 
12 #include "FuzzerDictionary.h"
13 #include "FuzzerInternal.h"
14 #include "FuzzerIO.h"
15 #include "FuzzerMutate.h"
16 #include "FuzzerRandom.h"
17 #include "FuzzerTracePC.h"
18 #include <algorithm>
19 #include <cstring>
20 #include <map>
21 #include <set>
22 #include <thread>
23 
24 namespace fuzzer {
25 
26 // For now, very simple: put Size bytes of Data at position Pos.
30 };
31 
32 // Declared as static globals for faster checks inside the hooks.
33 static bool RecordingMemcmp = false;
34 static bool RecordingMemmem = false;
35 static bool DoingMyOwnMemmem = false;
36 
39 
40 class TraceState {
41 public:
43  const Fuzzer *F)
44  : MD(MD), Options(Options), F(F) {}
45 
46  void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
47  const uint8_t *Data2);
48 
49  int TryToAddDesiredData(const uint8_t *PresentData,
50  const uint8_t *DesiredData, size_t DataSize);
51 
53  if (!Options.UseMemcmp)
54  return;
55  RecordingMemcmp = Options.UseMemcmp;
56  RecordingMemmem = Options.UseMemmem;
57  NumMutations = 0;
58  InterestingWords.clear();
60  }
61 
63  if (!RecordingMemcmp)
64  return;
65  RecordingMemcmp = false;
66  for (size_t i = 0; i < NumMutations; i++) {
67  auto &M = Mutations[i];
68  if (Options.Verbosity >= 2) {
69  AutoDictUnitCounts[M.W]++;
70  AutoDictAdds++;
71  if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
72  typedef std::pair<size_t, Word> CU;
73  std::vector<CU> CountedUnits;
74  for (auto &I : AutoDictUnitCounts)
75  CountedUnits.push_back(std::make_pair(I.second, I.first));
76  std::sort(CountedUnits.begin(), CountedUnits.end(),
77  [](const CU &a, const CU &b) { return a.first > b.first; });
78  Printf("AutoDict:\n");
79  for (auto &I : CountedUnits) {
80  Printf(" %zd ", I.first);
81  PrintASCII(I.second.data(), I.second.size());
82  Printf("\n");
83  }
84  }
85  }
86  MD.AddWordToAutoDictionary({M.W, M.Pos});
87  }
88  for (auto &W : InterestingWords)
90  }
91 
92  void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
93  if (NumMutations >= kMaxMutations) return;
94  auto &M = Mutations[NumMutations++];
95  M.Pos = Pos;
96  M.W.Set(Data, Size);
97  }
98 
99  void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
100  assert(Size <= sizeof(Data));
101  AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
102  }
103 
104  void AddInterestingWord(const uint8_t *Data, size_t Size) {
105  if (!RecordingMemmem || !F->InFuzzingThread()) return;
106  if (Size <= 1) return;
107  Size = std::min(Size, Word::GetMaxSize());
108  Word W(Data, Size);
109  InterestingWords.insert(W);
110  }
111 
112  private:
113  bool IsTwoByteData(uint64_t Data) {
114  int64_t Signed = static_cast<int64_t>(Data);
115  Signed >>= 16;
116  return Signed == 0 || Signed == -1L;
117  }
118 
119  // We don't want to create too many trace-based mutations as it is both
120  // expensive and useless. So after some number of mutations is collected,
121  // start rejecting some of them. The more there are mutations the more we
122  // reject.
123  bool WantToHandleOneMoreMutation() {
124  const size_t FirstN = 64;
125  // Gladly handle first N mutations.
126  if (NumMutations <= FirstN) return true;
127  size_t Diff = NumMutations - FirstN;
128  size_t DiffLog = sizeof(long) * 8 - __builtin_clzl((long)Diff);
129  assert(DiffLog > 0 && DiffLog < 64);
130  bool WantThisOne = MD.GetRand()(1 << DiffLog) == 0; // 1 out of DiffLog.
131  return WantThisOne;
132  }
133 
134  static const size_t kMaxMutations = 1 << 16;
135  size_t NumMutations;
136  TraceBasedMutation Mutations[kMaxMutations];
137  // TODO: std::set is too inefficient, need to have a custom DS here.
138  std::set<Word> InterestingWords;
139  MutationDispatcher &MD;
140  const FuzzingOptions Options;
141  const Fuzzer *F;
142  std::map<Word, size_t> AutoDictUnitCounts;
143  size_t AutoDictAdds = 0;
144 };
145 
146 int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
147  const uint8_t *DesiredData,
148  size_t DataSize) {
149  if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
150  ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
151  const uint8_t *UnitData;
152  auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
153  int Res = 0;
154  const uint8_t *Beg = UnitData;
155  const uint8_t *End = Beg + UnitSize;
156  for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
157  Cur = (uint8_t *)SearchMemory(Cur, End - Cur, PresentData, DataSize);
158  if (!Cur)
159  break;
160  size_t Pos = Cur - Beg;
161  assert(Pos < UnitSize);
162  AddMutation(Pos, DataSize, DesiredData);
163  Res++;
164  }
165  return Res;
166 }
167 
168 void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
169  const uint8_t *Data2) {
170  if (!RecordingMemcmp || !F->InFuzzingThread()) return;
171  CmpSize = std::min(CmpSize, Word::GetMaxSize());
172  int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
173  int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
174  if ((Added1 || Added2) && Options.Verbosity >= 3) {
175  Printf("MemCmp Added %d%d: ", Added1, Added2);
176  if (Added1) PrintASCII(Data1, CmpSize);
177  if (Added2) PrintASCII(Data2, CmpSize);
178  Printf("\n");
179  }
180 }
181 
182 static TraceState *TS;
183 
184 void Fuzzer::StartTraceRecording() {
185  if (!TS) return;
187 }
188 
189 void Fuzzer::StopTraceRecording() {
190  if (!TS) return;
192 }
193 
195  if (!Options.UseMemcmp) return;
196  TS = new TraceState(MD, Options, this);
197 }
198 
199 static size_t InternalStrnlen(const char *S, size_t MaxLen) {
200  size_t Len = 0;
201  for (; Len < MaxLen && S[Len]; Len++) {}
202  return Len;
203 }
204 
205 } // namespace fuzzer
206 
207 using fuzzer::TS;
209 
210 extern "C" {
211 
212 // We may need to avoid defining weak hooks to stay compatible with older clang.
213 #ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
214 # define LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS 1
215 #endif
216 
217 #if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
218 void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
219  const void *s2, size_t n, int result) {
220  fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n);
221  if (!RecordingMemcmp) return;
222  if (result == 0) return; // No reason to mutate.
223  if (n <= 1) return; // Not interesting.
224  TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
225  reinterpret_cast<const uint8_t *>(s2));
226 }
227 
228 void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
229  const char *s2, size_t n, int result) {
230  fuzzer::TPC.AddValueForStrcmp(caller_pc, s1, s2, n);
231  if (!RecordingMemcmp) return;
232  if (result == 0) return; // No reason to mutate.
233  size_t Len1 = fuzzer::InternalStrnlen(s1, n);
234  size_t Len2 = fuzzer::InternalStrnlen(s2, n);
235  n = std::min(n, Len1);
236  n = std::min(n, Len2);
237  if (n <= 1) return; // Not interesting.
238  TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
239  reinterpret_cast<const uint8_t *>(s2));
240 }
241 
242 void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
243  const char *s2, int result) {
244  fuzzer::TPC.AddValueForStrcmp(caller_pc, s1, s2, 64);
245  if (!RecordingMemcmp) return;
246  if (result == 0) return; // No reason to mutate.
247  size_t Len1 = strlen(s1);
248  size_t Len2 = strlen(s2);
249  size_t N = std::min(Len1, Len2);
250  if (N <= 1) return; // Not interesting.
251  TS->TraceMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
252  reinterpret_cast<const uint8_t *>(s2));
253 }
254 
255 void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
256  const char *s2, size_t n, int result) {
257  return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
258 }
259 void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
260  const char *s2, int result) {
261  return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
262 }
263 void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
264  const char *s2, char *result) {
265  TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
266 }
267 void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
268  const char *s2, char *result) {
269  TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
270 }
271 void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
272  const void *s2, size_t len2, void *result) {
273  if (fuzzer::DoingMyOwnMemmem) return;
274  TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), len2);
275 }
276 
277 #endif // LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
278 } // extern "C"
MachineLoop * L
void AddValueForStrcmp(void *caller_pc, const char *s1, const char *s2, size_t n)
void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, const void *s2, size_t len2, void *result)
size_t i
static bool DoingMyOwnMemmem
void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, size_t n)
bool InFuzzingThread() const
void AddInterestingWord(const uint8_t *Data, size_t Size)
void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, size_t n, int result)
void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, const char *s2, size_t n, int result)
size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const
Definition: FuzzerLoop.cpp:526
static bool RecordingMemmem
void AddWordToAutoDictionary(DictionaryEntry DE)
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data)
TracePC TPC
void Printf(const char *Fmt,...)
Definition: FuzzerIO.cpp:109
int TryToAddDesiredData(const uint8_t *PresentData, const uint8_t *DesiredData, size_t DataSize)
static const unsigned End
TraceState(MutationDispatcher &MD, const FuzzingOptions &Options, const Fuzzer *F)
void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, size_t n, int result)
static size_t GetMaxSize()
void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1, const uint8_t *Data2)
const void * SearchMemory(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen)
void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, const char *s2, int result)
void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data)
static size_t InternalStrnlen(const char *S, size_t MaxLen)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static TraceState * TS
void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1, const char *s2, char *result)
void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1, const char *s2, char *result)
void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2, int result)
static bool RecordingMemcmp
static void PrintASCII(const Word &W, const char *PrintAfter)