LLVM  3.7.0
FuzzerTraceState.cpp
Go to the documentation of this file.
1 //===- FuzzerTraceState.cpp - Trace-based fuzzer mutator ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // This file implements a mutation algorithm based on instruction traces and
10 // on taint analysis feedback from DFSan.
11 //
12 // Instruction traces are special hooks inserted by the compiler around
13 // interesting instructions. Currently supported traces:
14 // * __sanitizer_cov_trace_cmp -- inserted before every ICMP instruction,
15 // receives the type, size and arguments of ICMP.
16 //
17 // Every time a traced event is intercepted we analyse the data involved
18 // in the event and suggest a mutation for future executions.
19 // For example if 4 bytes of data that derive from input bytes {4,5,6,7}
20 // are compared with a constant 12345,
21 // we try to insert 12345, 12344, 12346 into bytes
22 // {4,5,6,7} of the next fuzzed inputs.
23 //
24 // The fuzzer can work only with the traces, or with both traces and DFSan.
25 //
26 // DataFlowSanitizer (DFSan) is a tool for
27 // generalised dynamic data flow (taint) analysis:
28 // http://clang.llvm.org/docs/DataFlowSanitizer.html .
29 //
30 // The approach with DFSan-based fuzzing has some similarity to
31 // "Taint-based Directed Whitebox Fuzzing"
32 // by Vijay Ganesh & Tim Leek & Martin Rinard:
33 // http://dspace.mit.edu/openaccess-disseminate/1721.1/59320,
34 // but it uses a full blown LLVM IR taint analysis and separate instrumentation
35 // to analyze all of the "attack points" at once.
36 //
37 // Workflow with DFSan:
38 // * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation.
39 // * The code under test is compiled with DFSan *and* with instruction traces.
40 // * Every call to HOOK(a,b) is replaced by DFSan with
41 // __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK
42 // gets all the taint labels for the arguments.
43 // * At the Fuzzer startup we assign a unique DFSan label
44 // to every byte of the input string (Fuzzer::CurrentUnit) so that for any
45 // chunk of data we know which input bytes it has derived from.
46 // * The __dfsw_* functions (implemented in this file) record the
47 // parameters (i.e. the application data and the corresponding taint labels)
48 // in a global state.
49 // * Fuzzer::ApplyTraceBasedMutation() tries to use the data recorded
50 // by __dfsw_* hooks to guide the fuzzing towards new application states.
51 //
52 // Parts of this code will not function when DFSan is not linked in.
53 // Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
54 // we redeclare the dfsan_* interface functions as weak and check if they
55 // are nullptr before calling.
56 // If this approach proves to be useful we may add attribute(weak) to the
57 // dfsan declarations in dfsan_interface.h
58 //
59 // This module is in the "proof of concept" stage.
60 // It is capable of solving only the simplest puzzles
61 // like test/dfsan/DFSanSimpleCmpTest.cpp.
62 //===----------------------------------------------------------------------===//
63 
64 /* Example of manual usage (-fsanitize=dataflow is optional):
65 (
66  cd $LLVM/lib/Fuzzer/
67  clang -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp
68  clang++ -O0 -std=c++11 -fsanitize-coverage=edge,trace-cmp \
69  -fsanitize=dataflow \
70  test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o
71  ./a.out
72 )
73 */
74 
75 #include "FuzzerInternal.h"
76 #include <sanitizer/dfsan_interface.h>
77 
78 #include <algorithm>
79 #include <cstring>
80 #include <unordered_map>
81 
82 extern "C" {
83 __attribute__((weak))
84 dfsan_label dfsan_create_label(const char *desc, void *userdata);
85 __attribute__((weak))
86 void dfsan_set_label(dfsan_label label, void *addr, size_t size);
87 __attribute__((weak))
88 void dfsan_add_label(dfsan_label label, void *addr, size_t size);
89 __attribute__((weak))
90 const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
91 __attribute__((weak))
92 dfsan_label dfsan_read_label(const void *addr, size_t size);
93 } // extern "C"
94 
95 namespace fuzzer {
96 
97 static bool ReallyHaveDFSan() {
98  return &dfsan_create_label != nullptr;
99 }
100 
101 // These values are copied from include/llvm/IR/InstrTypes.h.
102 // We do not include the LLVM headers here to remain independent.
103 // If these values ever change, an assertion in ComputeCmp will fail.
104 enum Predicate {
105  ICMP_EQ = 32, ///< equal
106  ICMP_NE = 33, ///< not equal
107  ICMP_UGT = 34, ///< unsigned greater than
108  ICMP_UGE = 35, ///< unsigned greater or equal
109  ICMP_ULT = 36, ///< unsigned less than
110  ICMP_ULE = 37, ///< unsigned less or equal
111  ICMP_SGT = 38, ///< signed greater than
112  ICMP_SGE = 39, ///< signed greater or equal
113  ICMP_SLT = 40, ///< signed less than
114  ICMP_SLE = 41, ///< signed less or equal
115 };
116 
117 template <class U, class S>
118 bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) {
119  switch(CmpType) {
120  case ICMP_EQ : return Arg1 == Arg2;
121  case ICMP_NE : return Arg1 != Arg2;
122  case ICMP_UGT: return Arg1 > Arg2;
123  case ICMP_UGE: return Arg1 >= Arg2;
124  case ICMP_ULT: return Arg1 < Arg2;
125  case ICMP_ULE: return Arg1 <= Arg2;
126  case ICMP_SGT: return (S)Arg1 > (S)Arg2;
127  case ICMP_SGE: return (S)Arg1 >= (S)Arg2;
128  case ICMP_SLT: return (S)Arg1 < (S)Arg2;
129  case ICMP_SLE: return (S)Arg1 <= (S)Arg2;
130  default: assert(0 && "unsupported CmpType");
131  }
132  return false;
133 }
134 
135 static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1,
136  uint64_t Arg2) {
137  if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2);
138  if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2);
139  if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2);
140  if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2);
141  assert(0 && "unsupported type size");
142  return true;
143 }
144 
145 // As a simplification we use the range of input bytes instead of a set of input
146 // bytes.
147 struct LabelRange {
148  uint16_t Beg, End; // Range is [Beg, End), thus Beg==End is an empty range.
149 
150  LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
151 
153  if (LR1.Beg == LR1.End) return LR2;
154  if (LR2.Beg == LR2.End) return LR1;
155  return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
156  }
158  return *this = Join(*this, LR);
159  }
160  static LabelRange Singleton(const dfsan_label_info *LI) {
161  uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
162  assert(Idx > 0);
163  return {(uint16_t)(Idx - 1), Idx};
164  }
165 };
166 
167 // For now, very simple: put Size bytes of Data at position Pos.
169  size_t Pos;
170  size_t Size;
171  uint64_t Data;
172 };
173 
174 class TraceState {
175  public:
176  TraceState(const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
177  : Options(Options), CurrentUnit(CurrentUnit) {}
178 
179  LabelRange GetLabelRange(dfsan_label L);
180  void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
181  uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
182  dfsan_label L2);
183  void TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1,
184  uint64_t Arg2);
185  int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
186  size_t DataSize);
187 
189  if (!Options.UseTraces) return;
190  RecordingTraces = true;
191  Mutations.clear();
192  }
193 
195  RecordingTraces = false;
196  std::random_shuffle(Mutations.begin(), Mutations.end());
197  return Mutations.size();
198  }
199 
200  void ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U);
201 
202  private:
203  bool IsTwoByteData(uint64_t Data) {
204  int64_t Signed = static_cast<int64_t>(Data);
205  Signed >>= 16;
206  return Signed == 0 || Signed == -1L;
207  }
208  bool RecordingTraces = false;
209  std::vector<TraceBasedMutation> Mutations;
210  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
211  const Fuzzer::FuzzingOptions &Options;
212  const Unit &CurrentUnit;
213 };
214 
215 LabelRange TraceState::GetLabelRange(dfsan_label L) {
216  LabelRange &LR = LabelRanges[L];
217  if (LR.Beg < LR.End || L == 0)
218  return LR;
219  const dfsan_label_info *LI = dfsan_get_label_info(L);
220  if (LI->l1 || LI->l2)
221  return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
222  return LR = LabelRange::Singleton(LI);
223 }
224 
225 void TraceState::ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U) {
226  assert(Idx < Mutations.size());
227  auto &M = Mutations[Idx];
228  if (Options.Verbosity >= 3)
229  Printf("TBM %zd %zd %zd\n", M.Pos, M.Size, M.Data);
230  if (M.Pos + M.Size > U->size()) return;
231  memcpy(U->data() + M.Pos, &M.Data, M.Size);
232 }
233 
234 void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
235  uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
236  dfsan_label L2) {
237  assert(ReallyHaveDFSan());
238  if (!RecordingTraces) return;
239  if (L1 == 0 && L2 == 0)
240  return; // Not actionable.
241  if (L1 != 0 && L2 != 0)
242  return; // Probably still actionable.
243  bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2);
244  uint64_t Data = L1 ? Arg2 : Arg1;
245  LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
246 
247  for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
248  Mutations.push_back({Pos, CmpSize, Data});
249  Mutations.push_back({Pos, CmpSize, Data + 1});
250  Mutations.push_back({Pos, CmpSize, Data - 1});
251  }
252 
253  if (CmpSize > LR.End - LR.Beg)
254  Mutations.push_back({LR.Beg, (unsigned)(LR.End - LR.Beg), Data});
255 
256 
257  if (Options.Verbosity >= 3)
258  Printf("DFSAN: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 %d MU %zd\n",
259  PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, Mutations.size());
260 }
261 
262 int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
263  size_t DataSize) {
264  int Res = 0;
265  const uint8_t *Beg = CurrentUnit.data();
266  const uint8_t *End = Beg + CurrentUnit.size();
267  for (const uint8_t *Cur = Beg; Cur < End; Cur += DataSize) {
268  Cur = (uint8_t *)memmem(Cur, End - Cur, &PresentData, DataSize);
269  if (!Cur)
270  break;
271  size_t Pos = Cur - Beg;
272  assert(Pos < CurrentUnit.size());
273  Mutations.push_back({Pos, DataSize, DesiredData});
274  Mutations.push_back({Pos, DataSize, DesiredData + 1});
275  Mutations.push_back({Pos, DataSize, DesiredData - 1});
276  Cur += DataSize;
277  Res++;
278  }
279  return Res;
280 }
281 
282 void TraceState::TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1,
283  uint64_t Arg2) {
284  if (!RecordingTraces) return;
285  int Added = 0;
286  if (Options.Verbosity >= 3)
287  Printf("TraceCmp: %zd %zd\n", Arg1, Arg2);
288  Added += TryToAddDesiredData(Arg1, Arg2, CmpSize);
289  Added += TryToAddDesiredData(Arg2, Arg1, CmpSize);
290  if (!Added && CmpSize == 4 && IsTwoByteData(Arg1) && IsTwoByteData(Arg2)) {
291  Added += TryToAddDesiredData(Arg1, Arg2, 2);
292  Added += TryToAddDesiredData(Arg2, Arg1, 2);
293  }
294 }
295 
296 static TraceState *TS;
297 
298 void Fuzzer::StartTraceRecording() {
299  if (!TS) return;
300  TS->StartTraceRecording();
301 }
302 
303 size_t Fuzzer::StopTraceRecording() {
304  if (!TS) return 0;
305  return TS->StopTraceRecording();
306 }
307 
308 void Fuzzer::ApplyTraceBasedMutation(size_t Idx, Unit *U) {
309  assert(TS);
310  TS->ApplyTraceBasedMutation(Idx, U);
311 }
312 
313 void Fuzzer::InitializeTraceState() {
314  if (!Options.UseTraces) return;
315  TS = new TraceState(Options, CurrentUnit);
316  CurrentUnit.resize(Options.MaxLen);
317  // The rest really requires DFSan.
318  if (!ReallyHaveDFSan()) return;
319  for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
320  dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
321  // We assume that no one else has called dfsan_create_label before.
322  assert(L == i + 1);
323  dfsan_set_label(L, &CurrentUnit[i], 1);
324  }
325 }
326 
327 } // namespace fuzzer
328 
329 using fuzzer::TS;
330 
331 extern "C" {
332 void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
333  uint64_t Arg2, dfsan_label L0,
334  dfsan_label L1, dfsan_label L2) {
335  if (!TS) return;
336  assert(L0 == 0);
337  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
338  uint64_t CmpSize = (SizeAndType >> 32) / 8;
339  uint64_t Type = (SizeAndType << 32) >> 32;
340  TS->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2);
341 }
342 
343 void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
344  size_t n, dfsan_label s1_label,
345  dfsan_label s2_label, dfsan_label n_label) {
346  if (!TS) return;
347  uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
348  uint64_t S1 = 0, S2 = 0;
349  // Simplification: handle only first 8 bytes.
350  memcpy(&S1, s1, std::min(n, sizeof(S1)));
351  memcpy(&S2, s2, std::min(n, sizeof(S2)));
352  dfsan_label L1 = dfsan_read_label(s1, n);
353  dfsan_label L2 = dfsan_read_label(s2, n);
354  TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
355 }
356 
357 void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
358  uint64_t Arg2) {
359  if (!TS) return;
360  uint64_t CmpSize = (SizeAndType >> 32) / 8;
361  uint64_t Type = (SizeAndType << 32) >> 32;
362  TS->TraceCmpCallback(CmpSize, Type, Arg1, Arg2);
363 }
364 
365 } // extern "C"
LabelRange & Join(LabelRange LR)
unsigned greater than
unsigned less than
LabelRange(uint16_t Beg=0, uint16_t End=0)
TraceState(const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
static bool ReallyHaveDFSan()
static LabelRange Join(LabelRange LR1, LabelRange LR2)
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned greater or equal
void TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1, uint64_t Arg2)
signed greater than
void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, uint64_t Arg2)
void Printf(const char *Fmt,...)
Definition: FuzzerIO.cpp:87
void ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U)
static LabelRange Singleton(const dfsan_label_info *LI)
signed less than
void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, uint64_t Arg1, uint64_t Arg2, dfsan_label L1, dfsan_label L2)
unsigned less or equal
void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, uint64_t Arg2, dfsan_label L0, dfsan_label L1, dfsan_label L2)
void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label)
void size_t size
__attribute__((weak)) dfsan_label dfsan_create_label(const char *desc
std::vector< uint8_t > Unit
signed less or equal
static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1, uint64_t Arg2)
aarch64 promote const
static TraceState * TS
signed greater or equal
void * addr
void * userdata