clang  8.0.0
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker
32  : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
33 public:
34  static void *getTag() {
35  static int Tag;
36  return &Tag;
37  }
38 
39  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
40 
41  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
42 
43 private:
44  static const unsigned InvalidArgIndex = UINT_MAX;
45  /// Denotes the return vale.
46  static const unsigned ReturnValueIndex = UINT_MAX - 1;
47 
48  mutable std::unique_ptr<BugType> BT;
49  inline void initBugType() const {
50  if (!BT)
51  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
52  }
53 
54  /// Catch taint related bugs. Check if tainted data is passed to a
55  /// system call etc.
56  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
57 
58  /// Add taint sources on a pre-visit.
59  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
60 
61  /// Propagate taint generated at pre-visit.
62  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
63 
64  /// Add taint sources on a post visit.
65  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
66 
67  /// Check if the region the expression evaluates to is the standard input,
68  /// and thus, is tainted.
69  static bool isStdin(const Expr *E, CheckerContext &C);
70 
71  /// Given a pointer argument, return the value it points to.
72  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
73 
74  /// Functions defining the attack surface.
75  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(
76  const CallExpr *, CheckerContext &C) const;
77  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
78  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
79  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
80 
81  /// Taint the scanned input if the file is tainted.
82  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
83 
84  /// Check for CWE-134: Uncontrolled Format String.
85  static const char MsgUncontrolledFormatString[];
86  bool checkUncontrolledFormatString(const CallExpr *CE,
87  CheckerContext &C) const;
88 
89  /// Check for:
90  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
91  /// CWE-78, "Failure to Sanitize Data into an OS Command"
92  static const char MsgSanitizeSystemArgs[];
93  bool checkSystemCall(const CallExpr *CE, StringRef Name,
94  CheckerContext &C) const;
95 
96  /// Check if tainted data is used as a buffer size ins strn.. functions,
97  /// and allocators.
98  static const char MsgTaintedBufferSize[];
99  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
100  CheckerContext &C) const;
101 
102  /// Generate a report if the expression is tainted or points to tainted data.
103  bool generateReportIfTainted(const Expr *E, const char Msg[],
104  CheckerContext &C) const;
105 
106  typedef SmallVector<unsigned, 2> ArgVector;
107 
108  /// A struct used to specify taint propagation rules for a function.
109  ///
110  /// If any of the possible taint source arguments is tainted, all of the
111  /// destination arguments should also be tainted. Use InvalidArgIndex in the
112  /// src list to specify that all of the arguments can introduce taint. Use
113  /// InvalidArgIndex in the dst arguments to signify that all the non-const
114  /// pointer and reference arguments might be tainted on return. If
115  /// ReturnValueIndex is added to the dst list, the return value will be
116  /// tainted.
117  struct TaintPropagationRule {
118  /// List of arguments which can be taint sources and should be checked.
119  ArgVector SrcArgs;
120  /// List of arguments which should be tainted on function return.
121  ArgVector DstArgs;
122  // TODO: Check if using other data structures would be more optimal.
123 
124  TaintPropagationRule() {}
125 
126  TaintPropagationRule(unsigned SArg, unsigned DArg, bool TaintRet = false) {
127  SrcArgs.push_back(SArg);
128  DstArgs.push_back(DArg);
129  if (TaintRet)
130  DstArgs.push_back(ReturnValueIndex);
131  }
132 
133  TaintPropagationRule(unsigned SArg1, unsigned SArg2, unsigned DArg,
134  bool TaintRet = false) {
135  SrcArgs.push_back(SArg1);
136  SrcArgs.push_back(SArg2);
137  DstArgs.push_back(DArg);
138  if (TaintRet)
139  DstArgs.push_back(ReturnValueIndex);
140  }
141 
142  /// Get the propagation rule for a given function.
143  static TaintPropagationRule
144  getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
145  CheckerContext &C);
146 
147  inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
148  inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
149 
150  inline bool isNull() const { return SrcArgs.empty(); }
151 
152  inline bool isDestinationArgument(unsigned ArgNum) const {
153  return (std::find(DstArgs.begin(), DstArgs.end(), ArgNum) !=
154  DstArgs.end());
155  }
156 
157  static inline bool isTaintedOrPointsToTainted(const Expr *E,
159  CheckerContext &C) {
160  if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
161  return true;
162 
163  if (!E->getType().getTypePtr()->isPointerType())
164  return false;
165 
166  Optional<SVal> V = getPointedToSVal(C, E);
167  return (V && State->isTainted(*V));
168  }
169 
170  /// Pre-process a function which propagates taint according to the
171  /// taint rule.
172  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
173  };
174 };
175 
176 const unsigned GenericTaintChecker::ReturnValueIndex;
177 const unsigned GenericTaintChecker::InvalidArgIndex;
178 
179 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180  "Untrusted data is used as a format string "
181  "(CWE-134: Uncontrolled Format String)";
182 
183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
184  "Untrusted data is passed to a system call "
185  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
186 
187 const char GenericTaintChecker::MsgTaintedBufferSize[] =
188  "Untrusted data is used to specify the buffer size "
189  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
190  "for "
191  "character data and the null terminator)";
192 
193 } // end of anonymous namespace
194 
195 /// A set which is used to pass information from call pre-visit instruction
196 /// to the call post-visit. The values are unsigned integers, which are either
197 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
198 /// points to data, which should be tainted on return.
199 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
200 
201 GenericTaintChecker::TaintPropagationRule
202 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
203  const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
204  // TODO: Currently, we might lose precision here: we always mark a return
205  // value as tainted even if it's just a pointer, pointing to tainted data.
206 
207  // Check for exact name match for functions without builtin substitutes.
208  TaintPropagationRule Rule =
209  llvm::StringSwitch<TaintPropagationRule>(Name)
210  .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
211  .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
212  .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
213  .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
214  .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
215  .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
216  .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
217  .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
218  .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
219  .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
220  .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
221  .Case("read", TaintPropagationRule(0, 2, 1, true))
222  .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
223  .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
224  .Case("fgets", TaintPropagationRule(2, 0, true))
225  .Case("getline", TaintPropagationRule(2, 0))
226  .Case("getdelim", TaintPropagationRule(3, 0))
227  .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
228  .Default(TaintPropagationRule());
229 
230  if (!Rule.isNull())
231  return Rule;
232 
233  // Check if it's one of the memory setting/copying functions.
234  // This check is specialized but faster then calling isCLibraryFunction.
235  unsigned BId = 0;
236  if ((BId = FDecl->getMemoryFunctionKind()))
237  switch (BId) {
238  case Builtin::BImemcpy:
239  case Builtin::BImemmove:
240  case Builtin::BIstrncpy:
241  case Builtin::BIstrncat:
242  return TaintPropagationRule(1, 2, 0, true);
243  case Builtin::BIstrlcpy:
244  case Builtin::BIstrlcat:
245  return TaintPropagationRule(1, 2, 0, false);
246  case Builtin::BIstrndup:
247  return TaintPropagationRule(0, 1, ReturnValueIndex);
248 
249  default:
250  break;
251  };
252 
253  // Process all other functions which could be defined as builtins.
254  if (Rule.isNull()) {
255  if (C.isCLibraryFunction(FDecl, "snprintf") ||
256  C.isCLibraryFunction(FDecl, "sprintf"))
257  return TaintPropagationRule(InvalidArgIndex, 0, true);
258  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
259  C.isCLibraryFunction(FDecl, "stpcpy") ||
260  C.isCLibraryFunction(FDecl, "strcat"))
261  return TaintPropagationRule(1, 0, true);
262  else if (C.isCLibraryFunction(FDecl, "bcopy"))
263  return TaintPropagationRule(0, 2, 1, false);
264  else if (C.isCLibraryFunction(FDecl, "strdup") ||
265  C.isCLibraryFunction(FDecl, "strdupa"))
266  return TaintPropagationRule(0, ReturnValueIndex);
267  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
268  return TaintPropagationRule(0, ReturnValueIndex);
269  }
270 
271  // Skipping the following functions, since they might be used for cleansing
272  // or smart memory copy:
273  // - memccpy - copying until hitting a special character.
274 
275  return TaintPropagationRule();
276 }
277 
278 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
279  CheckerContext &C) const {
280  // Check for errors first.
281  if (checkPre(CE, C))
282  return;
283 
284  // Add taint second.
285  addSourcesPre(CE, C);
286 }
287 
288 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
289  CheckerContext &C) const {
290  if (propagateFromPre(CE, C))
291  return;
292  addSourcesPost(CE, C);
293 }
294 
295 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
296  CheckerContext &C) const {
297  ProgramStateRef State = nullptr;
298  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
299  if (!FDecl || FDecl->getKind() != Decl::Function)
300  return;
301 
302  StringRef Name = C.getCalleeName(FDecl);
303  if (Name.empty())
304  return;
305 
306  // First, try generating a propagation rule for this function.
307  TaintPropagationRule Rule =
308  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
309  if (!Rule.isNull()) {
310  State = Rule.process(CE, C);
311  if (!State)
312  return;
313  C.addTransition(State);
314  return;
315  }
316 
317  // Otherwise, check if we have custom pre-processing implemented.
318  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
319  .Case("fscanf", &GenericTaintChecker::preFscanf)
320  .Default(nullptr);
321  // Check and evaluate the call.
322  if (evalFunction)
323  State = (this->*evalFunction)(CE, C);
324  if (!State)
325  return;
326  C.addTransition(State);
327 }
328 
329 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
330  CheckerContext &C) const {
331  ProgramStateRef State = C.getState();
332 
333  // Depending on what was tainted at pre-visit, we determined a set of
334  // arguments which should be tainted after the function returns. These are
335  // stored in the state as TaintArgsOnPostVisit set.
336  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
337  if (TaintArgs.isEmpty())
338  return false;
339 
340  for (llvm::ImmutableSet<unsigned>::iterator I = TaintArgs.begin(),
341  E = TaintArgs.end();
342  I != E; ++I) {
343  unsigned ArgNum = *I;
344 
345  // Special handling for the tainted return value.
346  if (ArgNum == ReturnValueIndex) {
347  State = State->addTaint(CE, C.getLocationContext());
348  continue;
349  }
350 
351  // The arguments are pointer arguments. The data they are pointing at is
352  // tainted after the call.
353  if (CE->getNumArgs() < (ArgNum + 1))
354  return false;
355  const Expr *Arg = CE->getArg(ArgNum);
356  Optional<SVal> V = getPointedToSVal(C, Arg);
357  if (V)
358  State = State->addTaint(*V);
359  }
360 
361  // Clear up the taint info from the state.
362  State = State->remove<TaintArgsOnPostVisit>();
363 
364  if (State != C.getState()) {
365  C.addTransition(State);
366  return true;
367  }
368  return false;
369 }
370 
371 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
372  CheckerContext &C) const {
373  // Define the attack surface.
374  // Set the evaluation function by switching on the callee name.
375  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
376  if (!FDecl || FDecl->getKind() != Decl::Function)
377  return;
378 
379  StringRef Name = C.getCalleeName(FDecl);
380  if (Name.empty())
381  return;
382  FnCheck evalFunction =
383  llvm::StringSwitch<FnCheck>(Name)
384  .Case("scanf", &GenericTaintChecker::postScanf)
385  // TODO: Add support for vfscanf & family.
386  .Case("getchar", &GenericTaintChecker::postRetTaint)
387  .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
388  .Case("getenv", &GenericTaintChecker::postRetTaint)
389  .Case("fopen", &GenericTaintChecker::postRetTaint)
390  .Case("fdopen", &GenericTaintChecker::postRetTaint)
391  .Case("freopen", &GenericTaintChecker::postRetTaint)
392  .Case("getch", &GenericTaintChecker::postRetTaint)
393  .Case("wgetch", &GenericTaintChecker::postRetTaint)
394  .Case("socket", &GenericTaintChecker::postSocket)
395  .Default(nullptr);
396 
397  // If the callee isn't defined, it is not of security concern.
398  // Check and evaluate the call.
399  ProgramStateRef State = nullptr;
400  if (evalFunction)
401  State = (this->*evalFunction)(CE, C);
402  if (!State)
403  return;
404 
405  C.addTransition(State);
406 }
407 
408 bool GenericTaintChecker::checkPre(const CallExpr *CE,
409  CheckerContext &C) const {
410 
411  if (checkUncontrolledFormatString(CE, C))
412  return true;
413 
414  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
415  if (!FDecl || FDecl->getKind() != Decl::Function)
416  return false;
417 
418  StringRef Name = C.getCalleeName(FDecl);
419  if (Name.empty())
420  return false;
421 
422  if (checkSystemCall(CE, Name, C))
423  return true;
424 
425  if (checkTaintedBufferSize(CE, FDecl, C))
426  return true;
427 
428  return false;
429 }
430 
431 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
432  const Expr *Arg) {
433  ProgramStateRef State = C.getState();
434  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
435  if (AddrVal.isUnknownOrUndef())
436  return None;
437 
438  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
439  if (!AddrLoc)
440  return None;
441 
442  QualType ArgTy = Arg->getType().getCanonicalType();
443  if (!ArgTy->isPointerType())
444  return None;
445 
446  QualType ValTy = ArgTy->getPointeeType();
447 
448  // Do not dereference void pointers. Treat them as byte pointers instead.
449  // FIXME: we might want to consider more than just the first byte.
450  if (ValTy->isVoidType())
451  ValTy = C.getASTContext().CharTy;
452 
453  return State->getSVal(*AddrLoc, ValTy);
454 }
455 
457 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
458  CheckerContext &C) const {
459  ProgramStateRef State = C.getState();
460 
461  // Check for taint in arguments.
462  bool IsTainted = false;
463  for (ArgVector::const_iterator I = SrcArgs.begin(), E = SrcArgs.end(); I != E;
464  ++I) {
465  unsigned ArgNum = *I;
466 
467  if (ArgNum == InvalidArgIndex) {
468  // Check if any of the arguments is tainted, but skip the
469  // destination arguments.
470  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
471  if (isDestinationArgument(i))
472  continue;
473  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
474  break;
475  }
476  break;
477  }
478 
479  if (CE->getNumArgs() < (ArgNum + 1))
480  return State;
481  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
482  break;
483  }
484  if (!IsTainted)
485  return State;
486 
487  // Mark the arguments which should be tainted after the function returns.
488  for (ArgVector::const_iterator I = DstArgs.begin(), E = DstArgs.end(); I != E;
489  ++I) {
490  unsigned ArgNum = *I;
491 
492  // Should we mark all arguments as tainted?
493  if (ArgNum == InvalidArgIndex) {
494  // For all pointer and references that were passed in:
495  // If they are not pointing to const data, mark data as tainted.
496  // TODO: So far we are just going one level down; ideally we'd need to
497  // recurse here.
498  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
499  const Expr *Arg = CE->getArg(i);
500  // Process pointer argument.
501  const Type *ArgTy = Arg->getType().getTypePtr();
502  QualType PType = ArgTy->getPointeeType();
503  if ((!PType.isNull() && !PType.isConstQualified()) ||
504  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
505  State = State->add<TaintArgsOnPostVisit>(i);
506  }
507  continue;
508  }
509 
510  // Should mark the return value?
511  if (ArgNum == ReturnValueIndex) {
512  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
513  continue;
514  }
515 
516  // Mark the given argument.
517  assert(ArgNum < CE->getNumArgs());
518  State = State->add<TaintArgsOnPostVisit>(ArgNum);
519  }
520 
521  return State;
522 }
523 
524 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
525 // and arg 1 should get taint.
526 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
527  CheckerContext &C) const {
528  assert(CE->getNumArgs() >= 2);
529  ProgramStateRef State = C.getState();
530 
531  // Check is the file descriptor is tainted.
532  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
533  isStdin(CE->getArg(0), C)) {
534  // All arguments except for the first two should get taint.
535  for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
536  State = State->add<TaintArgsOnPostVisit>(i);
537  return State;
538  }
539 
540  return nullptr;
541 }
542 
543 // If argument 0(protocol domain) is network, the return value should get taint.
544 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
545  CheckerContext &C) const {
546  ProgramStateRef State = C.getState();
547  if (CE->getNumArgs() < 3)
548  return State;
549 
550  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
551  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
552  // White list the internal communication protocols.
553  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
554  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
555  return State;
556  State = State->addTaint(CE, C.getLocationContext());
557  return State;
558 }
559 
560 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
561  CheckerContext &C) const {
562  ProgramStateRef State = C.getState();
563  if (CE->getNumArgs() < 2)
564  return State;
565 
566  // All arguments except for the very first one should get taint.
567  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
568  // The arguments are pointer arguments. The data they are pointing at is
569  // tainted after the call.
570  const Expr *Arg = CE->getArg(i);
571  Optional<SVal> V = getPointedToSVal(C, Arg);
572  if (V)
573  State = State->addTaint(*V);
574  }
575  return State;
576 }
577 
578 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
579  CheckerContext &C) const {
580  return C.getState()->addTaint(CE, C.getLocationContext());
581 }
582 
583 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
584  ProgramStateRef State = C.getState();
585  SVal Val = C.getSVal(E);
586 
587  // stdin is a pointer, so it would be a region.
588  const MemRegion *MemReg = Val.getAsRegion();
589 
590  // The region should be symbolic, we do not know it's value.
591  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
592  if (!SymReg)
593  return false;
594 
595  // Get it's symbol and find the declaration region it's pointing to.
596  const SymbolRegionValue *Sm =
597  dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
598  if (!Sm)
599  return false;
600  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
601  if (!DeclReg)
602  return false;
603 
604  // This region corresponds to a declaration, find out if it's a global/extern
605  // variable named stdin with the proper type.
606  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
607  D = D->getCanonicalDecl();
608  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
609  if (const PointerType *PtrTy =
610  dyn_cast<PointerType>(D->getType().getTypePtr()))
611  if (PtrTy->getPointeeType().getCanonicalType() ==
612  C.getASTContext().getFILEType().getCanonicalType())
613  return true;
614  }
615  return false;
616 }
617 
618 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
619  const CheckerContext &C,
620  unsigned int &ArgNum) {
621  // Find if the function contains a format string argument.
622  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
623  // vsnprintf, syslog, custom annotated functions.
624  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
625  if (!FDecl)
626  return false;
627  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
628  ArgNum = Format->getFormatIdx() - 1;
629  if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
630  return true;
631  }
632 
633  // Or if a function is named setproctitle (this is a heuristic).
634  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
635  ArgNum = 0;
636  return true;
637  }
638 
639  return false;
640 }
641 
642 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
643  const char Msg[],
644  CheckerContext &C) const {
645  assert(E);
646 
647  // Check for taint.
648  ProgramStateRef State = C.getState();
649  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
650  SVal TaintedSVal;
651  if (PointedToSVal && State->isTainted(*PointedToSVal))
652  TaintedSVal = *PointedToSVal;
653  else if (State->isTainted(E, C.getLocationContext()))
654  TaintedSVal = C.getSVal(E);
655  else
656  return false;
657 
658  // Generate diagnostic.
659  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
660  initBugType();
661  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
662  report->addRange(E->getSourceRange());
663  report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
664  C.emitReport(std::move(report));
665  return true;
666  }
667  return false;
668 }
669 
670 bool GenericTaintChecker::checkUncontrolledFormatString(
671  const CallExpr *CE, CheckerContext &C) const {
672  // Check if the function contains a format string argument.
673  unsigned int ArgNum = 0;
674  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
675  return false;
676 
677  // If either the format string content or the pointer itself are tainted,
678  // warn.
679  return generateReportIfTainted(CE->getArg(ArgNum),
680  MsgUncontrolledFormatString, C);
681 }
682 
683 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
684  CheckerContext &C) const {
685  // TODO: It might make sense to run this check on demand. In some cases,
686  // we should check if the environment has been cleansed here. We also might
687  // need to know if the user was reset before these calls(seteuid).
688  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
689  .Case("system", 0)
690  .Case("popen", 0)
691  .Case("execl", 0)
692  .Case("execle", 0)
693  .Case("execlp", 0)
694  .Case("execv", 0)
695  .Case("execvp", 0)
696  .Case("execvP", 0)
697  .Case("execve", 0)
698  .Case("dlopen", 0)
699  .Default(UINT_MAX);
700 
701  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
702  return false;
703 
704  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
705 }
706 
707 // TODO: Should this check be a part of the CString checker?
708 // If yes, should taint be a global setting?
709 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
710  const FunctionDecl *FDecl,
711  CheckerContext &C) const {
712  // If the function has a buffer size argument, set ArgNum.
713  unsigned ArgNum = InvalidArgIndex;
714  unsigned BId = 0;
715  if ((BId = FDecl->getMemoryFunctionKind()))
716  switch (BId) {
717  case Builtin::BImemcpy:
718  case Builtin::BImemmove:
719  case Builtin::BIstrncpy:
720  ArgNum = 2;
721  break;
722  case Builtin::BIstrndup:
723  ArgNum = 1;
724  break;
725  default:
726  break;
727  };
728 
729  if (ArgNum == InvalidArgIndex) {
730  if (C.isCLibraryFunction(FDecl, "malloc") ||
731  C.isCLibraryFunction(FDecl, "calloc") ||
732  C.isCLibraryFunction(FDecl, "alloca"))
733  ArgNum = 0;
734  else if (C.isCLibraryFunction(FDecl, "memccpy"))
735  ArgNum = 3;
736  else if (C.isCLibraryFunction(FDecl, "realloc"))
737  ArgNum = 1;
738  else if (C.isCLibraryFunction(FDecl, "bcopy"))
739  ArgNum = 2;
740  }
741 
742  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
743  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
744 }
745 
746 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
747  mgr.registerChecker<GenericTaintChecker>();
748 }
Represents a function declaration or definition.
Definition: Decl.h:1738
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3641
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2537
A (possibly-)qualified type.
Definition: Type.h:638
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2553
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2540
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1407
constexpr XRayInstrMask Function
Definition: XRayInstr.h:39
Represents a variable declaration or definition.
Definition: Decl.h:813
LineState State
bool isReferenceType() const
Definition: Type.h:6308
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6072
#define UINT_MAX
Definition: limits.h:72
This represents one expression.
Definition: Expr.h:106
QualType getType() const
Definition: Expr.h:128
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:703
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6131
QualType getCanonicalType() const
Definition: Type.h:6111
Encodes a location in the source.
constexpr XRayInstrMask None
Definition: XRayInstr.h:38
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
Kind getKind() const
Definition: DeclBase.h:421
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:513
bool isVoidType() const
Definition: Type.h:6544
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:276
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2396
bool isPointerType() const
Definition: Type.h:6296
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.
Definition: Expr.cpp:2560