27 #include <initializer_list> 30 using namespace clang;
32 using namespace taint;
35 class GenericTaintChecker
36 :
public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
38 static void *getTag() {
43 void checkPostStmt(
const CallExpr *CE, CheckerContext &C)
const;
45 void checkPreStmt(
const CallExpr *CE, CheckerContext &C)
const;
48 const char *NL,
const char *Sep)
const override;
51 static const unsigned InvalidArgIndex =
UINT_MAX;
53 static const unsigned ReturnValueIndex =
UINT_MAX - 1;
55 mutable std::unique_ptr<BugType> BT;
56 void initBugType()
const {
58 BT.reset(
new BugType(
this,
"Use of Untrusted Data",
"Untrusted Data"));
63 bool checkPre(
const CallExpr *CE, CheckerContext &C)
const;
66 void addSourcesPre(
const CallExpr *CE, CheckerContext &C)
const;
69 bool propagateFromPre(
const CallExpr *CE, CheckerContext &C)
const;
73 static bool isStdin(
const Expr *E, CheckerContext &C);
76 static Optional<SVal> getPointedToSVal(CheckerContext &C,
const Expr *Arg);
79 static const char MsgUncontrolledFormatString[];
80 bool checkUncontrolledFormatString(
const CallExpr *CE,
81 CheckerContext &C)
const;
86 static const char MsgSanitizeSystemArgs[];
87 bool checkSystemCall(
const CallExpr *CE, StringRef Name,
88 CheckerContext &C)
const;
92 static const char MsgTaintedBufferSize[];
94 CheckerContext &C)
const;
97 bool generateReportIfTainted(
const Expr *E,
const char Msg[],
98 CheckerContext &C)
const;
111 struct TaintPropagationRule {
112 enum class VariadicType {
None, Src, Dst };
114 using PropagationFuncType =
bool (*)(
bool IsTainted,
const CallExpr *,
122 unsigned VariadicIndex;
125 VariadicType VarType;
128 PropagationFuncType PropagationFunc;
130 TaintPropagationRule()
131 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::
None),
132 PropagationFunc(nullptr) {}
134 TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135 std::initializer_list<unsigned> &&Dst,
137 unsigned VarIndex = InvalidArgIndex,
138 PropagationFuncType Func =
nullptr)
139 : SrcArgs(
std::move(Src)), DstArgs(
std::move(Dst)),
140 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
143 static TaintPropagationRule
144 getTaintPropagationRule(
const FunctionDecl *FDecl, StringRef Name,
147 void addSrcArg(
unsigned A) { SrcArgs.push_back(A); }
148 void addDstArg(
unsigned A) { DstArgs.push_back(A); }
150 bool isNull()
const {
151 return SrcArgs.empty() && DstArgs.empty() &&
155 bool isDestinationArgument(
unsigned ArgNum)
const {
156 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
161 if (
isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
167 Optional<SVal>
V = getPointedToSVal(C, E);
176 static bool postSocket(
bool IsTainted,
const CallExpr *CE,
181 const unsigned GenericTaintChecker::ReturnValueIndex;
182 const unsigned GenericTaintChecker::InvalidArgIndex;
184 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
185 "Untrusted data is used as a format string " 186 "(CWE-134: Uncontrolled Format String)";
188 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
189 "Untrusted data is passed to a system call " 190 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
192 const char GenericTaintChecker::MsgTaintedBufferSize[] =
193 "Untrusted data is used to specify the buffer size " 194 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 195 "for character data and the null terminator)";
205 GenericTaintChecker::TaintPropagationRule
206 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
207 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
212 TaintPropagationRule Rule =
213 llvm::StringSwitch<TaintPropagationRule>(Name)
216 .Case(
"fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
217 .Case(
"fopen", TaintPropagationRule({}, {ReturnValueIndex}))
218 .Case(
"freopen", TaintPropagationRule({}, {ReturnValueIndex}))
219 .Case(
"getch", TaintPropagationRule({}, {ReturnValueIndex}))
220 .Case(
"getchar", TaintPropagationRule({}, {ReturnValueIndex}))
221 .Case(
"getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
222 .Case(
"getenv", TaintPropagationRule({}, {ReturnValueIndex}))
223 .Case(
"gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224 .Case(
"scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
228 &TaintPropagationRule::postSocket))
229 .Case(
"wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
231 .Case(
"atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
232 .Case(
"atol", TaintPropagationRule({0}, {ReturnValueIndex}))
233 .Case(
"atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
234 .Case(
"fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
235 .Case(
"fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
236 .Case(
"fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
237 .Case(
"fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
238 .Case(
"getc", TaintPropagationRule({0}, {ReturnValueIndex}))
239 .Case(
"getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
240 .Case(
"getdelim", TaintPropagationRule({3}, {0}))
241 .Case(
"getline", TaintPropagationRule({2}, {0}))
242 .Case(
"getw", TaintPropagationRule({0}, {ReturnValueIndex}))
244 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
245 .Case(
"read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
246 .Case(
"strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
247 .Case(
"strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
248 .Case(
"tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
249 .Case(
"toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
250 .Default(TaintPropagationRule());
260 case Builtin::BImemcpy:
261 case Builtin::BImemmove:
262 case Builtin::BIstrncpy:
263 case Builtin::BIstrncat:
264 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
265 case Builtin::BIstrlcpy:
266 case Builtin::BIstrlcat:
267 return TaintPropagationRule({1, 2}, {0});
268 case Builtin::BIstrndup:
269 return TaintPropagationRule({0, 1}, {ReturnValueIndex});
277 if (C.isCLibraryFunction(FDecl,
"snprintf"))
278 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
280 else if (C.isCLibraryFunction(FDecl,
"sprintf"))
281 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
283 else if (C.isCLibraryFunction(FDecl,
"strcpy") ||
284 C.isCLibraryFunction(FDecl,
"stpcpy") ||
285 C.isCLibraryFunction(FDecl,
"strcat"))
286 return TaintPropagationRule({1}, {0, ReturnValueIndex});
287 else if (C.isCLibraryFunction(FDecl,
"bcopy"))
288 return TaintPropagationRule({0, 2}, {1});
289 else if (C.isCLibraryFunction(FDecl,
"strdup") ||
290 C.isCLibraryFunction(FDecl,
"strdupa"))
291 return TaintPropagationRule({0}, {ReturnValueIndex});
292 else if (C.isCLibraryFunction(FDecl,
"wcsdup"))
293 return TaintPropagationRule({0}, {ReturnValueIndex});
300 return TaintPropagationRule();
303 void GenericTaintChecker::checkPreStmt(
const CallExpr *CE,
304 CheckerContext &C)
const {
312 addSourcesPre(CE, C);
315 void GenericTaintChecker::checkPostStmt(
const CallExpr *CE,
316 CheckerContext &C)
const {
319 propagateFromPre(CE, C);
322 void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
323 const char *NL,
const char *Sep)
const {
327 void GenericTaintChecker::addSourcesPre(
const CallExpr *CE,
328 CheckerContext &C)
const {
334 StringRef Name = C.getCalleeName(FDecl);
339 TaintPropagationRule Rule =
340 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
341 if (!Rule.isNull()) {
342 State = Rule.process(CE, C);
345 C.addTransition(State);
351 C.addTransition(State);
354 bool GenericTaintChecker::propagateFromPre(
const CallExpr *CE,
355 CheckerContext &C)
const {
361 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
362 if (TaintArgs.isEmpty())
365 for (
unsigned ArgNum : TaintArgs) {
367 if (ArgNum == ReturnValueIndex) {
368 State =
addTaint(State, CE, C.getLocationContext());
377 Optional<SVal>
V = getPointedToSVal(C, Arg);
383 State = State->remove<TaintArgsOnPostVisit>();
385 if (State != C.getState()) {
386 C.addTransition(State);
392 bool GenericTaintChecker::checkPre(
const CallExpr *CE,
393 CheckerContext &C)
const {
395 if (checkUncontrolledFormatString(CE, C))
402 StringRef Name = C.getCalleeName(FDecl);
406 if (checkSystemCall(CE, Name, C))
409 if (checkTaintedBufferSize(CE, FDecl, C))
415 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
419 if (AddrVal.isUnknownOrUndef())
422 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
435 ValTy = C.getASTContext().CharTy;
437 return State->getSVal(*AddrLoc, ValTy);
441 GenericTaintChecker::TaintPropagationRule::process(
const CallExpr *CE,
442 CheckerContext &C)
const {
446 bool IsTainted =
true;
447 for (
unsigned ArgNum : SrcArgs) {
450 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(ArgNum),
State, C)))
455 if (!IsTainted && VariadicType::Src == VarType) {
457 for (
unsigned int i = VariadicIndex;
i < CE->
getNumArgs(); ++
i) {
458 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(
i),
State, C)))
464 IsTainted = PropagationFunc(IsTainted, CE, C);
470 for (
unsigned ArgNum : DstArgs) {
472 if (ArgNum == ReturnValueIndex) {
473 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
478 assert(ArgNum < CE->getNumArgs());
479 State = State->add<TaintArgsOnPostVisit>(ArgNum);
483 if (VariadicType::Dst == VarType) {
488 for (
unsigned int i = VariadicIndex;
i < CE->
getNumArgs(); ++
i) {
495 State = State->add<TaintArgsOnPostVisit>(
i);
503 bool GenericTaintChecker::TaintPropagationRule::postSocket(
bool ,
507 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
509 if (DomName.equals(
"AF_SYSTEM") || DomName.equals(
"AF_LOCAL") ||
510 DomName.equals(
"AF_UNIX") || DomName.equals(
"AF_RESERVED_36"))
516 bool GenericTaintChecker::isStdin(
const Expr *E, CheckerContext &C) {
518 SVal Val = C.getSVal(E);
521 const MemRegion *MemReg = Val.getAsRegion();
524 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
529 const SymbolRegionValue *Sm =
530 dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
533 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
539 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
540 D = D->getCanonicalDecl();
541 if ((D->getName().find(
"stdin") != StringRef::npos) && D->isExternC()) {
542 const auto *PtrTy = dyn_cast<
PointerType>(D->getType().getTypePtr());
543 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
544 C.getASTContext().getFILEType().getCanonicalType())
552 const CheckerContext &C,
553 unsigned int &ArgNum) {
561 ArgNum = Format->getFormatIdx() - 1;
562 if ((Format->getType()->getName() ==
"printf") && CE->
getNumArgs() > ArgNum)
567 if (C.getCalleeName(CE).find(
"setproctitle") != StringRef::npos) {
575 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E,
577 CheckerContext &C)
const {
582 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
584 if (PointedToSVal &&
isTainted(State, *PointedToSVal))
585 TaintedSVal = *PointedToSVal;
586 else if (
isTainted(State, E, C.getLocationContext()))
587 TaintedSVal = C.getSVal(E);
592 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
594 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
596 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
597 C.emitReport(std::move(report));
603 bool GenericTaintChecker::checkUncontrolledFormatString(
604 const CallExpr *CE, CheckerContext &C)
const {
606 unsigned int ArgNum = 0;
612 return generateReportIfTainted(CE->
getArg(ArgNum),
613 MsgUncontrolledFormatString, C);
616 bool GenericTaintChecker::checkSystemCall(
const CallExpr *CE, StringRef Name,
617 CheckerContext &C)
const {
621 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
637 return generateReportIfTainted(CE->
getArg(ArgNum), MsgSanitizeSystemArgs, C);
642 bool GenericTaintChecker::checkTaintedBufferSize(
const CallExpr *CE,
644 CheckerContext &C)
const {
646 unsigned ArgNum = InvalidArgIndex;
650 case Builtin::BImemcpy:
651 case Builtin::BImemmove:
652 case Builtin::BIstrncpy:
655 case Builtin::BIstrndup:
662 if (ArgNum == InvalidArgIndex) {
663 if (C.isCLibraryFunction(FDecl,
"malloc") ||
664 C.isCLibraryFunction(FDecl,
"calloc") ||
665 C.isCLibraryFunction(FDecl,
"alloca"))
667 else if (C.isCLibraryFunction(FDecl,
"memccpy"))
669 else if (C.isCLibraryFunction(FDecl,
"realloc"))
671 else if (C.isCLibraryFunction(FDecl,
"bcopy"))
675 return ArgNum != InvalidArgIndex && CE->
getNumArgs() > ArgNum &&
676 generateReportIfTainted(CE->
getArg(ArgNum), MsgTaintedBufferSize, C);
679 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
680 mgr.registerChecker<GenericTaintChecker>();
683 bool ento::shouldRegisterGenericTaintChecker(
const LangOptions &LO) {
Represents a function declaration or definition.
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
constexpr XRayInstrMask Function
bool isReferenceType() const
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\, const char *sep="")
This represents one expression.
bool isNull() const
Return true if this QualType doesn't point to a type yet.
bool isConstQualified() const
Determine whether this type is const-qualified.
QualType getCanonicalType() const
Encodes a location in the source.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
constexpr XRayInstrMask None
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
bool isPointerType() const
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...