27 using namespace clang;
31 class GenericTaintChecker :
public Checker< check::PostStmt<CallExpr>,
32 check::PreStmt<CallExpr> > {
34 static void *getTag() {
static int Tag;
return &Tag; }
41 static const unsigned InvalidArgIndex =
UINT_MAX;
43 static const unsigned ReturnValueIndex =
UINT_MAX - 1;
45 mutable std::unique_ptr<BugType> BT;
46 inline void initBugType()
const {
48 BT.reset(
new BugType(
this,
"Use of Untrusted Data",
"Untrusted Data"));
82 static const char MsgUncontrolledFormatString[];
83 bool checkUncontrolledFormatString(
const CallExpr *CE,
89 static const char MsgSanitizeSystemArgs[];
90 bool checkSystemCall(
const CallExpr *CE, StringRef Name,
95 static const char MsgTaintedBufferSize[];
96 bool checkTaintedBufferSize(
const CallExpr *CE,
const FunctionDecl *FDecl,
100 bool generateReportIfTainted(
const Expr *E,
const char Msg[],
114 struct TaintPropagationRule {
121 TaintPropagationRule() {}
123 TaintPropagationRule(
unsigned SArg,
124 unsigned DArg,
bool TaintRet =
false) {
125 SrcArgs.push_back(SArg);
126 DstArgs.push_back(DArg);
128 DstArgs.push_back(ReturnValueIndex);
131 TaintPropagationRule(
unsigned SArg1,
unsigned SArg2,
132 unsigned DArg,
bool TaintRet =
false) {
133 SrcArgs.push_back(SArg1);
134 SrcArgs.push_back(SArg2);
135 DstArgs.push_back(DArg);
137 DstArgs.push_back(ReturnValueIndex);
141 static TaintPropagationRule
146 inline void addSrcArg(
unsigned A) { SrcArgs.push_back(A); }
147 inline void addDstArg(
unsigned A) { DstArgs.push_back(A); }
149 inline bool isNull()
const {
return SrcArgs.empty(); }
151 inline bool isDestinationArgument(
unsigned ArgNum)
const {
152 return (std::find(DstArgs.begin(),
153 DstArgs.end(), ArgNum) != DstArgs.end());
156 static inline bool isTaintedOrPointsToTainted(
const Expr *E,
166 return (V && State->isTainted(*V));
176 const unsigned GenericTaintChecker::ReturnValueIndex;
177 const unsigned GenericTaintChecker::InvalidArgIndex;
179 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180 "Untrusted data is used as a format string " 181 "(CWE-134: Uncontrolled Format String)";
183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
184 "Untrusted data is passed to a system call " 185 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
187 const char GenericTaintChecker::MsgTaintedBufferSize[] =
188 "Untrusted data is used to specify the buffer size " 189 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 190 "character data and the null terminator)";
200 GenericTaintChecker::TaintPropagationRule
201 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
209 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
210 .Case(
"atoi", TaintPropagationRule(0, ReturnValueIndex))
211 .Case(
"atol", TaintPropagationRule(0, ReturnValueIndex))
212 .Case(
"atoll", TaintPropagationRule(0, ReturnValueIndex))
213 .Case(
"getc", TaintPropagationRule(0, ReturnValueIndex))
214 .Case(
"fgetc", TaintPropagationRule(0, ReturnValueIndex))
215 .Case(
"getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
216 .Case(
"getw", TaintPropagationRule(0, ReturnValueIndex))
217 .Case(
"toupper", TaintPropagationRule(0, ReturnValueIndex))
218 .Case(
"tolower", TaintPropagationRule(0, ReturnValueIndex))
219 .Case(
"strchr", TaintPropagationRule(0, ReturnValueIndex))
220 .Case(
"strrchr", TaintPropagationRule(0, ReturnValueIndex))
221 .Case(
"read", TaintPropagationRule(0, 2, 1,
true))
222 .Case(
"pread", TaintPropagationRule(InvalidArgIndex, 1,
true))
223 .Case(
"gets", TaintPropagationRule(InvalidArgIndex, 0,
true))
224 .Case(
"fgets", TaintPropagationRule(2, 0,
true))
225 .Case(
"getline", TaintPropagationRule(2, 0))
226 .Case(
"getdelim", TaintPropagationRule(3, 0))
227 .Case(
"fgetln", TaintPropagationRule(0, ReturnValueIndex))
228 .
Default(TaintPropagationRule());
238 case Builtin::BImemcpy:
239 case Builtin::BImemmove:
240 case Builtin::BIstrncpy:
241 case Builtin::BIstrncat:
242 return TaintPropagationRule(1, 2, 0,
true);
243 case Builtin::BIstrlcpy:
244 case Builtin::BIstrlcat:
245 return TaintPropagationRule(1, 2, 0,
false);
246 case Builtin::BIstrndup:
247 return TaintPropagationRule(0, 1, ReturnValueIndex);
257 return TaintPropagationRule(InvalidArgIndex, 0,
true);
261 return TaintPropagationRule(1, 0,
true);
263 return TaintPropagationRule(0, 2, 1,
false);
266 return TaintPropagationRule(0, ReturnValueIndex);
268 return TaintPropagationRule(0, ReturnValueIndex);
275 return TaintPropagationRule();
278 void GenericTaintChecker::checkPreStmt(
const CallExpr *CE,
285 addSourcesPre(CE, C);
288 void GenericTaintChecker::checkPostStmt(
const CallExpr *CE,
290 if (propagateFromPre(CE, C))
292 addSourcesPost(CE, C);
295 void GenericTaintChecker::addSourcesPre(
const CallExpr *CE,
307 TaintPropagationRule Rule =
308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
309 if (!Rule.isNull()) {
310 State = Rule.process(CE, C);
318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
319 .Case(
"fscanf", &GenericTaintChecker::preFscanf)
323 State = (this->*evalFunction)(CE, C);
330 bool GenericTaintChecker::propagateFromPre(
const CallExpr *CE,
337 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
338 if (TaintArgs.isEmpty())
341 for (llvm::ImmutableSet<unsigned>::iterator
342 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
343 unsigned ArgNum = *I;
346 if (ArgNum == ReturnValueIndex) {
358 State = State->addTaint(*V);
362 State = State->remove<TaintArgsOnPostVisit>();
371 void GenericTaintChecker::addSourcesPost(
const CallExpr *CE,
382 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
383 .Case(
"scanf", &GenericTaintChecker::postScanf)
385 .Case(
"getchar", &GenericTaintChecker::postRetTaint)
386 .Case(
"getchar_unlocked", &GenericTaintChecker::postRetTaint)
387 .Case(
"getenv", &GenericTaintChecker::postRetTaint)
388 .Case(
"fopen", &GenericTaintChecker::postRetTaint)
389 .Case(
"fdopen", &GenericTaintChecker::postRetTaint)
390 .Case(
"freopen", &GenericTaintChecker::postRetTaint)
391 .Case(
"getch", &GenericTaintChecker::postRetTaint)
392 .Case(
"wgetch", &GenericTaintChecker::postRetTaint)
393 .Case(
"socket", &GenericTaintChecker::postSocket)
400 State = (this->*evalFunction)(CE, C);
407 bool GenericTaintChecker::checkPre(
const CallExpr *CE,
CheckerContext &C)
const{
409 if (checkUncontrolledFormatString(CE, C))
420 if (checkSystemCall(CE, Name, C))
423 if (checkTaintedBufferSize(CE, FDecl, C))
451 return State->getSVal(*AddrLoc, ValTy);
455 GenericTaintChecker::TaintPropagationRule::process(
const CallExpr *CE,
460 bool IsTainted =
false;
461 for (ArgVector::const_iterator I = SrcArgs.begin(),
462 E = SrcArgs.end(); I != E; ++I) {
463 unsigned ArgNum = *I;
465 if (ArgNum == InvalidArgIndex) {
468 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
469 if (isDestinationArgument(i))
471 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(i),
State, C)))
479 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(ArgNum),
State, C)))
486 for (ArgVector::const_iterator I = DstArgs.begin(),
487 E = DstArgs.end(); I != E; ++I) {
488 unsigned ArgNum = *I;
491 if (ArgNum == InvalidArgIndex) {
496 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
503 State = State->add<TaintArgsOnPostVisit>(i);
509 if (ArgNum == ReturnValueIndex) {
510 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
515 assert(ArgNum < CE->getNumArgs());
516 State = State->add<TaintArgsOnPostVisit>(ArgNum);
532 isStdin(CE->
getArg(0), C)) {
534 for (
unsigned int i = 2; i < CE->
getNumArgs(); ++i)
535 State = State->add<TaintArgsOnPostVisit>(i);
553 if (DomName.equals(
"AF_SYSTEM") || DomName.equals(
"AF_LOCAL") ||
554 DomName.equals(
"AF_UNIX") || DomName.equals(
"AF_RESERVED_36"))
567 for (
unsigned int i = 1; i < CE->
getNumArgs(); ++i) {
573 State = State->addTaint(*V);
591 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
605 if (
const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->
getDecl())) {
606 D = D->getCanonicalDecl();
607 if ((D->getName().find(
"stdin") != StringRef::npos) && D->isExternC())
609 dyn_cast<PointerType>(D->getType().getTypePtr()))
610 if (PtrTy->getPointeeType().getCanonicalType() ==
619 unsigned int &ArgNum) {
627 ArgNum = Format->getFormatIdx() - 1;
628 if ((Format->getType()->getName() ==
"printf") &&
634 if (C.
getCalleeName(CE).find(
"setproctitle") != StringRef::npos) {
642 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E,
651 if (PointedToSVal && State->isTainted(*PointedToSVal))
652 TaintedSVal = *PointedToSVal;
661 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
663 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
670 bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE,
673 unsigned int ArgNum = 0;
678 return generateReportIfTainted(CE->
getArg(ArgNum),
679 MsgUncontrolledFormatString, C);
682 bool GenericTaintChecker::checkSystemCall(
const CallExpr *CE,
688 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
704 return generateReportIfTainted(CE->
getArg(ArgNum), MsgSanitizeSystemArgs, C);
709 bool GenericTaintChecker::checkTaintedBufferSize(
const CallExpr *CE,
713 unsigned ArgNum = InvalidArgIndex;
717 case Builtin::BImemcpy:
718 case Builtin::BImemmove:
719 case Builtin::BIstrncpy:
722 case Builtin::BIstrndup:
729 if (ArgNum == InvalidArgIndex) {
742 return ArgNum != InvalidArgIndex && CE->
getNumArgs() > ArgNum &&
743 generateReportIfTainted(CE->
getArg(ArgNum), MsgTaintedBufferSize, C);
Represents a function declaration or definition.
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
MemRegion - The root abstract class for all memory regions.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const ValueDecl * getDecl() const
The base class of the type hierarchy.
constexpr XRayInstrMask Function
Represents a variable declaration or definition.
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
bool isReferenceType() const
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
SymbolicRegion - A special, "non-concrete" region.
Expr - This represents one expression.
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
QualType getFILEType() const
Retrieve the C FILE type.
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
CHECKER * registerChecker(AT... Args)
Used to register checkers.
bool isConstQualified() const
Determine whether this type is const-qualified.
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
QualType getCanonicalType() const
const TypedValueRegion * getRegion() const
Encodes a location in the source.
SymbolRef getSymbol() const
const MemRegion * getAsRegion() const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
constexpr XRayInstrMask None
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
A symbol representing the value stored at a MemRegion.
Dataflow Directional Tag Classes.
ASTContext & getASTContext()
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
const ProgramStateRef & getState() const
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
bool isPointerType() const
const LocationContext * getLocationContext() const
Defines enum values for all the target-independent builtin functions.
bool isUnknownOrUndef() const
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.