clang  9.0.0
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
26 #include <climits>
27 #include <initializer_list>
28 #include <utility>
29 
30 using namespace clang;
31 using namespace ento;
32 using namespace taint;
33 
34 namespace {
35 class GenericTaintChecker
36  : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
37 public:
38  static void *getTag() {
39  static int Tag;
40  return &Tag;
41  }
42 
43  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
44 
45  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
46 
47  void printState(raw_ostream &Out, ProgramStateRef State,
48  const char *NL, const char *Sep) const override;
49 
50 private:
51  static const unsigned InvalidArgIndex = UINT_MAX;
52  /// Denotes the return vale.
53  static const unsigned ReturnValueIndex = UINT_MAX - 1;
54 
55  mutable std::unique_ptr<BugType> BT;
56  void initBugType() const {
57  if (!BT)
58  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
59  }
60 
61  /// Catch taint related bugs. Check if tainted data is passed to a
62  /// system call etc.
63  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
64 
65  /// Add taint sources on a pre-visit.
66  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
67 
68  /// Propagate taint generated at pre-visit.
69  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
70 
71  /// Check if the region the expression evaluates to is the standard input,
72  /// and thus, is tainted.
73  static bool isStdin(const Expr *E, CheckerContext &C);
74 
75  /// Given a pointer argument, return the value it points to.
76  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
77 
78  /// Check for CWE-134: Uncontrolled Format String.
79  static const char MsgUncontrolledFormatString[];
80  bool checkUncontrolledFormatString(const CallExpr *CE,
81  CheckerContext &C) const;
82 
83  /// Check for:
84  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
85  /// CWE-78, "Failure to Sanitize Data into an OS Command"
86  static const char MsgSanitizeSystemArgs[];
87  bool checkSystemCall(const CallExpr *CE, StringRef Name,
88  CheckerContext &C) const;
89 
90  /// Check if tainted data is used as a buffer size ins strn.. functions,
91  /// and allocators.
92  static const char MsgTaintedBufferSize[];
93  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
94  CheckerContext &C) const;
95 
96  /// Generate a report if the expression is tainted or points to tainted data.
97  bool generateReportIfTainted(const Expr *E, const char Msg[],
98  CheckerContext &C) const;
99 
100  using ArgVector = SmallVector<unsigned, 2>;
101 
102  /// A struct used to specify taint propagation rules for a function.
103  ///
104  /// If any of the possible taint source arguments is tainted, all of the
105  /// destination arguments should also be tainted. Use InvalidArgIndex in the
106  /// src list to specify that all of the arguments can introduce taint. Use
107  /// InvalidArgIndex in the dst arguments to signify that all the non-const
108  /// pointer and reference arguments might be tainted on return. If
109  /// ReturnValueIndex is added to the dst list, the return value will be
110  /// tainted.
111  struct TaintPropagationRule {
112  enum class VariadicType { None, Src, Dst };
113 
114  using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
115  CheckerContext &C);
116 
117  /// List of arguments which can be taint sources and should be checked.
118  ArgVector SrcArgs;
119  /// List of arguments which should be tainted on function return.
120  ArgVector DstArgs;
121  /// Index for the first variadic parameter if exist.
122  unsigned VariadicIndex;
123  /// Show when a function has variadic parameters. If it has, it marks all
124  /// of them as source or destination.
125  VariadicType VarType;
126  /// Special function for tainted source determination. If defined, it can
127  /// override the default behavior.
128  PropagationFuncType PropagationFunc;
129 
130  TaintPropagationRule()
131  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
132  PropagationFunc(nullptr) {}
133 
134  TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135  std::initializer_list<unsigned> &&Dst,
136  VariadicType Var = VariadicType::None,
137  unsigned VarIndex = InvalidArgIndex,
138  PropagationFuncType Func = nullptr)
139  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
140  VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
141 
142  /// Get the propagation rule for a given function.
143  static TaintPropagationRule
144  getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
145  CheckerContext &C);
146 
147  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
148  void addDstArg(unsigned A) { DstArgs.push_back(A); }
149 
150  bool isNull() const {
151  return SrcArgs.empty() && DstArgs.empty() &&
152  VariadicType::None == VarType;
153  }
154 
155  bool isDestinationArgument(unsigned ArgNum) const {
156  return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
157  }
158 
159  static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
160  CheckerContext &C) {
161  if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
162  return true;
163 
164  if (!E->getType().getTypePtr()->isPointerType())
165  return false;
166 
167  Optional<SVal> V = getPointedToSVal(C, E);
168  return (V && isTainted(State, *V));
169  }
170 
171  /// Pre-process a function which propagates taint according to the
172  /// taint rule.
173  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
174 
175  // Functions for custom taintedness propagation.
176  static bool postSocket(bool IsTainted, const CallExpr *CE,
177  CheckerContext &C);
178  };
179 };
180 
181 const unsigned GenericTaintChecker::ReturnValueIndex;
182 const unsigned GenericTaintChecker::InvalidArgIndex;
183 
184 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
185  "Untrusted data is used as a format string "
186  "(CWE-134: Uncontrolled Format String)";
187 
188 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
189  "Untrusted data is passed to a system call "
190  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
191 
192 const char GenericTaintChecker::MsgTaintedBufferSize[] =
193  "Untrusted data is used to specify the buffer size "
194  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
195  "for character data and the null terminator)";
196 
197 } // end of anonymous namespace
198 
199 /// A set which is used to pass information from call pre-visit instruction
200 /// to the call post-visit. The values are unsigned integers, which are either
201 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
202 /// points to data, which should be tainted on return.
203 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
204 
205 GenericTaintChecker::TaintPropagationRule
206 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
207  const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
208  // TODO: Currently, we might lose precision here: we always mark a return
209  // value as tainted even if it's just a pointer, pointing to tainted data.
210 
211  // Check for exact name match for functions without builtin substitutes.
212  TaintPropagationRule Rule =
213  llvm::StringSwitch<TaintPropagationRule>(Name)
214  // Source functions
215  // TODO: Add support for vfscanf & family.
216  .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
217  .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
218  .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
219  .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
220  .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
221  .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
222  .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
223  .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224  .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
225  .Case("socket",
226  TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
227  InvalidArgIndex,
228  &TaintPropagationRule::postSocket))
229  .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
230  // Propagating functions
231  .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
232  .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
233  .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
234  .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
235  .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
236  .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
237  .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
238  .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
239  .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
240  .Case("getdelim", TaintPropagationRule({3}, {0}))
241  .Case("getline", TaintPropagationRule({2}, {0}))
242  .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
243  .Case("pread",
244  TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
245  .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
246  .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
247  .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
248  .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
249  .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
250  .Default(TaintPropagationRule());
251 
252  if (!Rule.isNull())
253  return Rule;
254 
255  // Check if it's one of the memory setting/copying functions.
256  // This check is specialized but faster then calling isCLibraryFunction.
257  unsigned BId = 0;
258  if ((BId = FDecl->getMemoryFunctionKind()))
259  switch (BId) {
260  case Builtin::BImemcpy:
261  case Builtin::BImemmove:
262  case Builtin::BIstrncpy:
263  case Builtin::BIstrncat:
264  return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
265  case Builtin::BIstrlcpy:
266  case Builtin::BIstrlcat:
267  return TaintPropagationRule({1, 2}, {0});
268  case Builtin::BIstrndup:
269  return TaintPropagationRule({0, 1}, {ReturnValueIndex});
270 
271  default:
272  break;
273  };
274 
275  // Process all other functions which could be defined as builtins.
276  if (Rule.isNull()) {
277  if (C.isCLibraryFunction(FDecl, "snprintf"))
278  return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
279  3);
280  else if (C.isCLibraryFunction(FDecl, "sprintf"))
281  return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
282  2);
283  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
284  C.isCLibraryFunction(FDecl, "stpcpy") ||
285  C.isCLibraryFunction(FDecl, "strcat"))
286  return TaintPropagationRule({1}, {0, ReturnValueIndex});
287  else if (C.isCLibraryFunction(FDecl, "bcopy"))
288  return TaintPropagationRule({0, 2}, {1});
289  else if (C.isCLibraryFunction(FDecl, "strdup") ||
290  C.isCLibraryFunction(FDecl, "strdupa"))
291  return TaintPropagationRule({0}, {ReturnValueIndex});
292  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
293  return TaintPropagationRule({0}, {ReturnValueIndex});
294  }
295 
296  // Skipping the following functions, since they might be used for cleansing
297  // or smart memory copy:
298  // - memccpy - copying until hitting a special character.
299 
300  return TaintPropagationRule();
301 }
302 
303 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
304  CheckerContext &C) const {
305  // Check for taintedness related errors first: system call, uncontrolled
306  // format string, tainted buffer size.
307  if (checkPre(CE, C))
308  return;
309 
310  // Marks the function's arguments and/or return value tainted if it present in
311  // the list.
312  addSourcesPre(CE, C);
313 }
314 
315 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
316  CheckerContext &C) const {
317  // Set the marked values as tainted. The return value only accessible from
318  // checkPostStmt.
319  propagateFromPre(CE, C);
320 }
321 
322 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
323  const char *NL, const char *Sep) const {
324  printTaint(State, Out, NL, Sep);
325 }
326 
327 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
328  CheckerContext &C) const {
329  ProgramStateRef State = nullptr;
330  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
331  if (!FDecl || FDecl->getKind() != Decl::Function)
332  return;
333 
334  StringRef Name = C.getCalleeName(FDecl);
335  if (Name.empty())
336  return;
337 
338  // First, try generating a propagation rule for this function.
339  TaintPropagationRule Rule =
340  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
341  if (!Rule.isNull()) {
342  State = Rule.process(CE, C);
343  if (!State)
344  return;
345  C.addTransition(State);
346  return;
347  }
348 
349  if (!State)
350  return;
351  C.addTransition(State);
352 }
353 
354 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
355  CheckerContext &C) const {
356  ProgramStateRef State = C.getState();
357 
358  // Depending on what was tainted at pre-visit, we determined a set of
359  // arguments which should be tainted after the function returns. These are
360  // stored in the state as TaintArgsOnPostVisit set.
361  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
362  if (TaintArgs.isEmpty())
363  return false;
364 
365  for (unsigned ArgNum : TaintArgs) {
366  // Special handling for the tainted return value.
367  if (ArgNum == ReturnValueIndex) {
368  State = addTaint(State, CE, C.getLocationContext());
369  continue;
370  }
371 
372  // The arguments are pointer arguments. The data they are pointing at is
373  // tainted after the call.
374  if (CE->getNumArgs() < (ArgNum + 1))
375  return false;
376  const Expr *Arg = CE->getArg(ArgNum);
377  Optional<SVal> V = getPointedToSVal(C, Arg);
378  if (V)
379  State = addTaint(State, *V);
380  }
381 
382  // Clear up the taint info from the state.
383  State = State->remove<TaintArgsOnPostVisit>();
384 
385  if (State != C.getState()) {
386  C.addTransition(State);
387  return true;
388  }
389  return false;
390 }
391 
392 bool GenericTaintChecker::checkPre(const CallExpr *CE,
393  CheckerContext &C) const {
394 
395  if (checkUncontrolledFormatString(CE, C))
396  return true;
397 
398  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
399  if (!FDecl || FDecl->getKind() != Decl::Function)
400  return false;
401 
402  StringRef Name = C.getCalleeName(FDecl);
403  if (Name.empty())
404  return false;
405 
406  if (checkSystemCall(CE, Name, C))
407  return true;
408 
409  if (checkTaintedBufferSize(CE, FDecl, C))
410  return true;
411 
412  return false;
413 }
414 
415 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
416  const Expr *Arg) {
417  ProgramStateRef State = C.getState();
418  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
419  if (AddrVal.isUnknownOrUndef())
420  return None;
421 
422  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
423  if (!AddrLoc)
424  return None;
425 
426  QualType ArgTy = Arg->getType().getCanonicalType();
427  if (!ArgTy->isPointerType())
428  return None;
429 
430  QualType ValTy = ArgTy->getPointeeType();
431 
432  // Do not dereference void pointers. Treat them as byte pointers instead.
433  // FIXME: we might want to consider more than just the first byte.
434  if (ValTy->isVoidType())
435  ValTy = C.getASTContext().CharTy;
436 
437  return State->getSVal(*AddrLoc, ValTy);
438 }
439 
441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
442  CheckerContext &C) const {
443  ProgramStateRef State = C.getState();
444 
445  // Check for taint in arguments.
446  bool IsTainted = true;
447  for (unsigned ArgNum : SrcArgs) {
448  if (ArgNum >= CE->getNumArgs())
449  return State;
450  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
451  break;
452  }
453 
454  // Check for taint in variadic arguments.
455  if (!IsTainted && VariadicType::Src == VarType) {
456  // Check if any of the arguments is tainted
457  for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
458  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
459  break;
460  }
461  }
462 
463  if (PropagationFunc)
464  IsTainted = PropagationFunc(IsTainted, CE, C);
465 
466  if (!IsTainted)
467  return State;
468 
469  // Mark the arguments which should be tainted after the function returns.
470  for (unsigned ArgNum : DstArgs) {
471  // Should mark the return value?
472  if (ArgNum == ReturnValueIndex) {
473  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
474  continue;
475  }
476 
477  // Mark the given argument.
478  assert(ArgNum < CE->getNumArgs());
479  State = State->add<TaintArgsOnPostVisit>(ArgNum);
480  }
481 
482  // Mark all variadic arguments tainted if present.
483  if (VariadicType::Dst == VarType) {
484  // For all pointer and references that were passed in:
485  // If they are not pointing to const data, mark data as tainted.
486  // TODO: So far we are just going one level down; ideally we'd need to
487  // recurse here.
488  for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
489  const Expr *Arg = CE->getArg(i);
490  // Process pointer argument.
491  const Type *ArgTy = Arg->getType().getTypePtr();
492  QualType PType = ArgTy->getPointeeType();
493  if ((!PType.isNull() && !PType.isConstQualified()) ||
494  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
495  State = State->add<TaintArgsOnPostVisit>(i);
496  }
497  }
498 
499  return State;
500 }
501 
502 // If argument 0(protocol domain) is network, the return value should get taint.
503 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
504  const CallExpr *CE,
505  CheckerContext &C) {
506  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
507  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
508  // White list the internal communication protocols.
509  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
510  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
511  return false;
512 
513  return true;
514 }
515 
516 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
517  ProgramStateRef State = C.getState();
518  SVal Val = C.getSVal(E);
519 
520  // stdin is a pointer, so it would be a region.
521  const MemRegion *MemReg = Val.getAsRegion();
522 
523  // The region should be symbolic, we do not know it's value.
524  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
525  if (!SymReg)
526  return false;
527 
528  // Get it's symbol and find the declaration region it's pointing to.
529  const SymbolRegionValue *Sm =
530  dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
531  if (!Sm)
532  return false;
533  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
534  if (!DeclReg)
535  return false;
536 
537  // This region corresponds to a declaration, find out if it's a global/extern
538  // variable named stdin with the proper type.
539  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
540  D = D->getCanonicalDecl();
541  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
542  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
543  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
544  C.getASTContext().getFILEType().getCanonicalType())
545  return true;
546  }
547  }
548  return false;
549 }
550 
551 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
552  const CheckerContext &C,
553  unsigned int &ArgNum) {
554  // Find if the function contains a format string argument.
555  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
556  // vsnprintf, syslog, custom annotated functions.
557  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
558  if (!FDecl)
559  return false;
560  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
561  ArgNum = Format->getFormatIdx() - 1;
562  if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
563  return true;
564  }
565 
566  // Or if a function is named setproctitle (this is a heuristic).
567  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
568  ArgNum = 0;
569  return true;
570  }
571 
572  return false;
573 }
574 
575 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
576  const char Msg[],
577  CheckerContext &C) const {
578  assert(E);
579 
580  // Check for taint.
581  ProgramStateRef State = C.getState();
582  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
583  SVal TaintedSVal;
584  if (PointedToSVal && isTainted(State, *PointedToSVal))
585  TaintedSVal = *PointedToSVal;
586  else if (isTainted(State, E, C.getLocationContext()))
587  TaintedSVal = C.getSVal(E);
588  else
589  return false;
590 
591  // Generate diagnostic.
592  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
593  initBugType();
594  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
595  report->addRange(E->getSourceRange());
596  report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
597  C.emitReport(std::move(report));
598  return true;
599  }
600  return false;
601 }
602 
603 bool GenericTaintChecker::checkUncontrolledFormatString(
604  const CallExpr *CE, CheckerContext &C) const {
605  // Check if the function contains a format string argument.
606  unsigned int ArgNum = 0;
607  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
608  return false;
609 
610  // If either the format string content or the pointer itself are tainted,
611  // warn.
612  return generateReportIfTainted(CE->getArg(ArgNum),
613  MsgUncontrolledFormatString, C);
614 }
615 
616 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
617  CheckerContext &C) const {
618  // TODO: It might make sense to run this check on demand. In some cases,
619  // we should check if the environment has been cleansed here. We also might
620  // need to know if the user was reset before these calls(seteuid).
621  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
622  .Case("system", 0)
623  .Case("popen", 0)
624  .Case("execl", 0)
625  .Case("execle", 0)
626  .Case("execlp", 0)
627  .Case("execv", 0)
628  .Case("execvp", 0)
629  .Case("execvP", 0)
630  .Case("execve", 0)
631  .Case("dlopen", 0)
632  .Default(UINT_MAX);
633 
634  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
635  return false;
636 
637  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
638 }
639 
640 // TODO: Should this check be a part of the CString checker?
641 // If yes, should taint be a global setting?
642 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
643  const FunctionDecl *FDecl,
644  CheckerContext &C) const {
645  // If the function has a buffer size argument, set ArgNum.
646  unsigned ArgNum = InvalidArgIndex;
647  unsigned BId = 0;
648  if ((BId = FDecl->getMemoryFunctionKind()))
649  switch (BId) {
650  case Builtin::BImemcpy:
651  case Builtin::BImemmove:
652  case Builtin::BIstrncpy:
653  ArgNum = 2;
654  break;
655  case Builtin::BIstrndup:
656  ArgNum = 1;
657  break;
658  default:
659  break;
660  };
661 
662  if (ArgNum == InvalidArgIndex) {
663  if (C.isCLibraryFunction(FDecl, "malloc") ||
664  C.isCLibraryFunction(FDecl, "calloc") ||
665  C.isCLibraryFunction(FDecl, "alloca"))
666  ArgNum = 0;
667  else if (C.isCLibraryFunction(FDecl, "memccpy"))
668  ArgNum = 3;
669  else if (C.isCLibraryFunction(FDecl, "realloc"))
670  ArgNum = 1;
671  else if (C.isCLibraryFunction(FDecl, "bcopy"))
672  ArgNum = 2;
673  }
674 
675  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
676  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
677 }
678 
679 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
680  mgr.registerChecker<GenericTaintChecker>();
681 }
682 
683 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
684  return true;
685 }
Represents a function declaration or definition.
Definition: Decl.h:1748
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3755
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2569
A (possibly-)qualified type.
Definition: Type.h:643
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2673
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2660
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1433
constexpr XRayInstrMask Function
Definition: XRayInstr.h:38
long i
Definition: xmmintrin.h:1456
LineState State
Definition: Format.h:2274
bool isReferenceType() const
Definition: Type.h:6396
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6142
#define UINT_MAX
Definition: limits.h:56
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\, const char *sep="")
This represents one expression.
Definition: Expr.h:108
#define V(N, I)
Definition: ASTContext.h:2907
#define bool
Definition: stdbool.h:15
QualType getType() const
Definition: Expr.h:137
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:708
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6201
QualType getCanonicalType() const
Definition: Type.h:6181
Encodes a location in the source.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
constexpr XRayInstrMask None
Definition: XRayInstr.h:37
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
Kind getKind() const
Definition: DeclBase.h:432
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:524
bool isVoidType() const
Definition: Type.h:6643
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:251
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2516
bool isPointerType() const
Definition: Type.h:6384
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2938