27 using namespace clang;
31 class GenericTaintChecker :
public Checker< check::PostStmt<CallExpr>,
32 check::PreStmt<CallExpr> > {
34 static void *getTag() {
static int Tag;
return &Tag; }
41 static const unsigned InvalidArgIndex =
UINT_MAX;
43 static const unsigned ReturnValueIndex =
UINT_MAX - 1;
45 mutable std::unique_ptr<BugType> BT;
46 inline void initBugType()
const {
48 BT.reset(
new BugType(
this,
"Use of Untrusted Data",
"Untrusted Data"));
82 static const char MsgUncontrolledFormatString[];
83 bool checkUncontrolledFormatString(
const CallExpr *CE,
89 static const char MsgSanitizeSystemArgs[];
95 static const char MsgTaintedBufferSize[];
100 bool generateReportIfTainted(
const Expr *
E,
const char Msg[],
105 class TaintBugVisitor
111 TaintBugVisitor(
const SVal V) : V(V) {}
112 void Profile(llvm::FoldingSetNodeID &
ID)
const override { ID.Add(V); }
114 std::shared_ptr<PathDiagnosticPiece> VisitNode(
const ExplodedNode *N,
131 struct TaintPropagationRule {
138 TaintPropagationRule() {}
140 TaintPropagationRule(
unsigned SArg,
141 unsigned DArg,
bool TaintRet =
false) {
142 SrcArgs.push_back(SArg);
143 DstArgs.push_back(DArg);
145 DstArgs.push_back(ReturnValueIndex);
148 TaintPropagationRule(
unsigned SArg1,
unsigned SArg2,
149 unsigned DArg,
bool TaintRet =
false) {
150 SrcArgs.push_back(SArg1);
151 SrcArgs.push_back(SArg2);
152 DstArgs.push_back(DArg);
154 DstArgs.push_back(ReturnValueIndex);
158 static TaintPropagationRule
163 inline void addSrcArg(
unsigned A) { SrcArgs.push_back(A); }
164 inline void addDstArg(
unsigned A) { DstArgs.push_back(A); }
166 inline bool isNull()
const {
return SrcArgs.empty(); }
168 inline bool isDestinationArgument(
unsigned ArgNum)
const {
169 return (std::find(DstArgs.begin(),
170 DstArgs.end(), ArgNum) != DstArgs.end());
173 static inline bool isTaintedOrPointsToTainted(
const Expr *
E,
182 Optional<SVal> V = getPointedToSVal(C, E);
183 return (V && State->isTainted(*V));
193 const unsigned GenericTaintChecker::ReturnValueIndex;
194 const unsigned GenericTaintChecker::InvalidArgIndex;
196 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
197 "Untrusted data is used as a format string "
198 "(CWE-134: Uncontrolled Format String)";
200 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
201 "Untrusted data is passed to a system call "
202 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
204 const char GenericTaintChecker::MsgTaintedBufferSize[] =
205 "Untrusted data is used to specify the buffer size "
206 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
207 "character data and the null terminator)";
218 GenericTaintChecker::TaintBugVisitor::VisitNode(const
ExplodedNode *N,
222 if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V))
235 return std::make_shared<PathDiagnosticEventPiece>(
236 L,
"Taint originated here");
239 GenericTaintChecker::TaintPropagationRule
240 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
248 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(
Name)
249 .Case(
"atoi", TaintPropagationRule(0, ReturnValueIndex))
250 .Case(
"atol", TaintPropagationRule(0, ReturnValueIndex))
251 .Case(
"atoll", TaintPropagationRule(0, ReturnValueIndex))
252 .Case(
"getc", TaintPropagationRule(0, ReturnValueIndex))
253 .Case(
"fgetc", TaintPropagationRule(0, ReturnValueIndex))
254 .Case(
"getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
255 .Case(
"getw", TaintPropagationRule(0, ReturnValueIndex))
256 .Case(
"toupper", TaintPropagationRule(0, ReturnValueIndex))
257 .Case(
"tolower", TaintPropagationRule(0, ReturnValueIndex))
258 .Case(
"strchr", TaintPropagationRule(0, ReturnValueIndex))
259 .Case(
"strrchr", TaintPropagationRule(0, ReturnValueIndex))
260 .Case(
"read", TaintPropagationRule(0, 2, 1,
true))
261 .Case(
"pread", TaintPropagationRule(InvalidArgIndex, 1,
true))
262 .Case(
"gets", TaintPropagationRule(InvalidArgIndex, 0,
true))
263 .Case(
"fgets", TaintPropagationRule(2, 0,
true))
264 .Case(
"getline", TaintPropagationRule(2, 0))
265 .Case(
"getdelim", TaintPropagationRule(3, 0))
266 .Case(
"fgetln", TaintPropagationRule(0, ReturnValueIndex))
267 .
Default(TaintPropagationRule());
277 case Builtin::BImemcpy:
278 case Builtin::BImemmove:
279 case Builtin::BIstrncpy:
280 case Builtin::BIstrncat:
281 return TaintPropagationRule(1, 2, 0,
true);
282 case Builtin::BIstrlcpy:
283 case Builtin::BIstrlcat:
284 return TaintPropagationRule(1, 2, 0,
false);
285 case Builtin::BIstrndup:
286 return TaintPropagationRule(0, 1, ReturnValueIndex);
296 return TaintPropagationRule(InvalidArgIndex, 0,
true);
300 return TaintPropagationRule(1, 0,
true);
302 return TaintPropagationRule(0, 2, 1,
false);
305 return TaintPropagationRule(0, ReturnValueIndex);
307 return TaintPropagationRule(0, ReturnValueIndex);
314 return TaintPropagationRule();
317 void GenericTaintChecker::checkPreStmt(
const CallExpr *CE,
324 addSourcesPre(CE, C);
327 void GenericTaintChecker::checkPostStmt(
const CallExpr *CE,
329 if (propagateFromPre(CE, C))
331 addSourcesPost(CE, C);
334 void GenericTaintChecker::addSourcesPre(
const CallExpr *CE,
338 if (!FDecl || FDecl->
getKind() != Decl::Function)
346 TaintPropagationRule Rule =
347 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
348 if (!Rule.isNull()) {
349 State = Rule.process(CE, C);
357 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(
Name)
358 .Case(
"fscanf", &GenericTaintChecker::preFscanf)
362 State = (this->*evalFunction)(CE, C);
369 bool GenericTaintChecker::propagateFromPre(
const CallExpr *CE,
376 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
377 if (TaintArgs.isEmpty())
381 I = TaintArgs.begin(), E = TaintArgs.end();
I !=
E; ++
I) {
382 unsigned ArgNum = *
I;
385 if (ArgNum == ReturnValueIndex) {
395 Optional<SVal> V = getPointedToSVal(C, Arg);
397 State = State->addTaint(*V);
401 State = State->remove<TaintArgsOnPostVisit>();
410 void GenericTaintChecker::addSourcesPost(
const CallExpr *CE,
415 if (!FDecl || FDecl->
getKind() != Decl::Function)
421 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(
Name)
422 .Case(
"scanf", &GenericTaintChecker::postScanf)
424 .Case(
"getchar", &GenericTaintChecker::postRetTaint)
425 .Case(
"getchar_unlocked", &GenericTaintChecker::postRetTaint)
426 .Case(
"getenv", &GenericTaintChecker::postRetTaint)
427 .Case(
"fopen", &GenericTaintChecker::postRetTaint)
428 .Case(
"fdopen", &GenericTaintChecker::postRetTaint)
429 .Case(
"freopen", &GenericTaintChecker::postRetTaint)
430 .Case(
"getch", &GenericTaintChecker::postRetTaint)
431 .Case(
"wgetch", &GenericTaintChecker::postRetTaint)
432 .Case(
"socket", &GenericTaintChecker::postSocket)
439 State = (this->*evalFunction)(CE, C);
448 if (checkUncontrolledFormatString(CE, C))
452 if (!FDecl || FDecl->
getKind() != Decl::Function)
459 if (checkSystemCall(CE, Name, C))
462 if (checkTaintedBufferSize(CE, FDecl, C))
468 Optional<SVal> GenericTaintChecker::getPointedToSVal(
CheckerContext &C,
475 Optional<Loc> AddrLoc = AddrVal.
getAs<
Loc>();
485 GenericTaintChecker::TaintPropagationRule::process(
const CallExpr *CE,
490 bool IsTainted =
false;
491 for (ArgVector::const_iterator
I = SrcArgs.begin(),
492 E = SrcArgs.end();
I !=
E; ++
I) {
493 unsigned ArgNum = *
I;
495 if (ArgNum == InvalidArgIndex) {
498 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
499 if (isDestinationArgument(i))
501 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(i),
State, C)))
509 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(ArgNum),
State, C)))
516 for (ArgVector::const_iterator
I = DstArgs.begin(),
517 E = DstArgs.end();
I !=
E; ++
I) {
518 unsigned ArgNum = *
I;
521 if (ArgNum == InvalidArgIndex) {
526 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
533 State = State->add<TaintArgsOnPostVisit>(i);
539 if (ArgNum == ReturnValueIndex) {
540 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
545 assert(ArgNum < CE->getNumArgs());
546 State = State->add<TaintArgsOnPostVisit>(ArgNum);
562 isStdin(CE->
getArg(0), C)) {
564 for (
unsigned int i = 2; i < CE->
getNumArgs(); ++i)
565 State = State->add<TaintArgsOnPostVisit>(i);
583 if (DomName.equals(
"AF_SYSTEM") || DomName.equals(
"AF_LOCAL") ||
584 DomName.equals(
"AF_UNIX") || DomName.equals(
"AF_RESERVED_36"))
597 for (
unsigned int i = 1; i < CE->
getNumArgs(); ++i) {
601 Optional<SVal> V = getPointedToSVal(C, Arg);
603 State = State->addTaint(*V);
621 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
635 if (
const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->
getDecl())) {
636 D = D->getCanonicalDecl();
637 if ((D->getName().find(
"stdin") != StringRef::npos) && D->isExternC())
639 dyn_cast<PointerType>(D->getType().getTypePtr()))
648 unsigned int &ArgNum) {
656 ArgNum = Format->getFormatIdx() - 1;
657 if ((Format->getType()->getName() ==
"printf") &&
663 if (C.
getCalleeName(CE).find(
"setproctitle") != StringRef::npos) {
671 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E,
678 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
680 if (PointedToSVal && State->isTainted(*PointedToSVal))
681 TaintedSVal = *PointedToSVal;
690 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
692 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
699 bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE,
702 unsigned int ArgNum = 0;
707 return generateReportIfTainted(CE->
getArg(ArgNum),
708 MsgUncontrolledFormatString, C);
711 bool GenericTaintChecker::checkSystemCall(
const CallExpr *CE,
717 unsigned ArgNum = llvm::StringSwitch<unsigned>(
Name)
733 return generateReportIfTainted(CE->
getArg(ArgNum), MsgSanitizeSystemArgs, C);
738 bool GenericTaintChecker::checkTaintedBufferSize(
const CallExpr *CE,
742 unsigned ArgNum = InvalidArgIndex;
746 case Builtin::BImemcpy:
747 case Builtin::BImemmove:
748 case Builtin::BIstrncpy:
751 case Builtin::BIstrndup:
758 if (ArgNum == InvalidArgIndex) {
771 return ArgNum != InvalidArgIndex && CE->
getNumArgs() > ArgNum &&
772 generateReportIfTainted(CE->
getArg(ArgNum), MsgTaintedBufferSize, C);
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
MemRegion - The root abstract class for all memory regions.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
const Decl * getDecl() const
Stmt - This represents one statement.
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
VarDecl - An instance of this class is created to represent a variable declaration or definition...
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
FullSourceLoc asLocation() const
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
bool isReferenceType() const
This class provides a convenience implementation for clone() using the Curiously-Recurring Template P...
SymbolRef getSymbol() const
bool isUnknownOrUndef() const
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
detail::InMemoryDirectory::const_iterator I
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
SymbolicRegion - A special, "non-concrete" region.
Expr - This represents one expression.
const ProgramStateRef & getState() const
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
QualType getFILEType() const
Retrieve the C FILE type.
const TypedValueRegion * getRegion() const
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
static const Stmt * getStmt(const ExplodedNode *N)
Given an exploded node, retrieve the statement that should be used for the diagnostic location...
static PathDiagnosticLocation createBegin(const Decl *D, const SourceManager &SM)
Create a location for the beginning of the declaration.
CHECKER * registerChecker()
Used to register checkers.
Encodes a location in the source.
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
bool isValid() const
Return true if this is a valid SourceLocation object.
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
QualType getPointeeType() const
A symbol representing the value stored at a MemRegion.
ASTContext & getASTContext()
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
detail::InMemoryDirectory::const_iterator E
const MemRegion * getAsRegion() const
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
QualType getCanonicalType() const
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
This class provides an interface through which checkers can create individual bug reports...
bool isConstQualified() const
Determine whether this type is const-qualified.
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.
const LocationContext * getLocationContext() const
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
bool isPointerType() const