This file is a part of DataFlowSanitizer, a generalised dynamic data flow analysis. More...

#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Instrumentation.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

Functions
static StringRef	getGlobalTypeString (const GlobalValue &G)

static Value *	expandFromPrimitiveShadowRecursive (Value Shadow, SmallVector< unsigned, 4 > &Indices, Type SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)

static AtomicOrdering	addAcquireOrdering (AtomicOrdering AO)

Value *	StripPointerGEPsAndCasts (Value *V)

static AtomicOrdering	addReleaseOrdering (AtomicOrdering AO)

static bool	isAMustTailRetVal (Value *RetVal)

Variables
static const Align	ShadowTLSAlignment = Align(2)

static const Align	MinOriginAlignment = Align(4)

static const unsigned	ArgTLSSize = 800

static const unsigned	RetvalTLSSize = 800

static cl::opt< bool >	ClPreserveAlignment ("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))

static cl::list< std::string >	ClABIListFiles ("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)

static cl::opt< bool >	ClCombinePointerLabelsOnLoad ("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))

static cl::opt< bool >	ClCombinePointerLabelsOnStore ("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))

static cl::opt< bool >	ClCombineOffsetLabelsOnGEP ("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))

static cl::list< std::string >	ClCombineTaintLookupTables ("dfsan-combine-taint-lookup-table", cl::desc("When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)

static cl::opt< bool >	ClDebugNonzeroLabels ("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)

static cl::opt< bool >	ClEventCallbacks ("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))

static cl::opt< bool >	ClConditionalCallbacks ("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))

static cl::opt< bool >	ClReachesFunctionCallbacks ("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))

static cl::opt< bool >	ClTrackSelectControlFlow ("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))

static cl::opt< int >	ClInstrumentWithCallThreshold ("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))

static cl::opt< int >	ClTrackOrigins ("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))

static cl::opt< bool >	ClIgnorePersonalityRoutine ("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))

const MemoryMapParams	Linux_AArch64_MemoryMapParams

const MemoryMapParams	Linux_X86_64_MemoryMapParams

const MemoryMapParams	Linux_LoongArch64_MemoryMapParams

Detailed Description

This file is a part of DataFlowSanitizer, a generalised dynamic data flow analysis.

Unlike other Sanitizer tools, this tool is not designed to detect a specific class of bugs on its own. Instead, it provides a generic dynamic data flow analysis framework to be used by clients to help detect application-specific issues within their own code.

The analysis is based on automatic propagation of data flow labels (also known as taint labels) through a program as it performs computation.

Argument and return value labels are passed through TLS variables __dfsan_arg_tls and __dfsan_retval_tls.

Each byte of application memory is backed by a shadow memory byte. The shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then laid out as follows:

+-----------------—+ 0x800000000000 (top of memory) | application 3 | +-----------------—+ 0x700000000000 | invalid | +-----------------—+ 0x610000000000 | origin 1 | +-----------------—+ 0x600000000000 | application 2 | +-----------------—+ 0x510000000000 | shadow 1 | +-----------------—+ 0x500000000000 | invalid | +-----------------—+ 0x400000000000 | origin 3 | +-----------------—+ 0x300000000000 | shadow 3 | +-----------------—+ 0x200000000000 | origin 2 | +-----------------—+ 0x110000000000 | invalid | +-----------------—+ 0x100000000000 | shadow 2 | +-----------------—+ 0x010000000000 | application 1 | +-----------------—+ 0x000000000000

MEM_TO_SHADOW(mem) = mem ^ 0x500000000000 SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000

For more information, please refer to the design document: http://clang.llvm.org/docs/DataFlowSanitizerDesign.html

Definition in file DataFlowSanitizer.cpp.

Function Documentation

◆ addAcquireOrdering()

static AtomicOrdering addAcquireOrdering ( AtomicOrdering AO )

static

Definition at line 2352 of file DataFlowSanitizer.cpp.

References llvm_unreachable.

◆ addReleaseOrdering()

static AtomicOrdering addReleaseOrdering ( AtomicOrdering AO )

static

Definition at line 2642 of file DataFlowSanitizer.cpp.

References llvm_unreachable.

◆ expandFromPrimitiveShadowRecursive()

static Value * expandFromPrimitiveShadowRecursive	(	Value *	Shadow,
		SmallVector< unsigned, 4 > &	Indices,
		Type *	SubShadowTy,
		Value *	PrimitiveShadow,
		IRBuilder<> &	IRB
	)

static

Definition at line 948 of file DataFlowSanitizer.cpp.

References llvm::IRBuilderBase::CreateInsertValue(), expandFromPrimitiveShadowRecursive(), Idx, llvm_unreachable, llvm::SmallVectorTemplateBase< T, bool >::pop_back(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().

Referenced by expandFromPrimitiveShadowRecursive().

◆ getGlobalTypeString()

static StringRef getGlobalTypeString ( const GlobalValue & G )

static

Definition at line 264 of file DataFlowSanitizer.cpp.

References G.

◆ isAMustTailRetVal()

static bool isAMustTailRetVal ( Value * RetVal )

static

Definition at line 2969 of file DataFlowSanitizer.cpp.

References I.

◆ StripPointerGEPsAndCasts()

Value * StripPointerGEPsAndCasts ( Value * V )

Definition at line 2369 of file DataFlowSanitizer.cpp.

References GEP, llvm::Operator::getOpcode(), and llvm::SmallPtrSetImpl< PtrType >::insert().

Variable Documentation

◆ ArgTLSSize

const unsigned ArgTLSSize = 800

static

Definition at line 129 of file DataFlowSanitizer.cpp.

◆ ClABIListFiles

cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)	(	"dfsan-abilist"	,
		cl::desc("File listing native ABI functions and how the pass treats them")	,
		cl::Hidden
	)

static

◆ ClCombineOffsetLabelsOnGEP

cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc( "Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))	(	"dfsan-combine-offset-labels-on-gep"	,
		cl::desc( "Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic.")	,
		cl::Hidden	,
		cl::init(true)
	)

static

◆ ClCombinePointerLabelsOnLoad

cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))	(	"dfsan-combine-pointer-labels-on-load"	,
		cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory.")	,
		cl::Hidden	,
		cl::init(true)
	)

static

◆ ClCombinePointerLabelsOnStore

cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))	(	"dfsan-combine-pointer-labels-on-store"	,
		cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory.")	,
		cl::Hidden	,
		cl::init(false)
	)

static

◆ ClCombineTaintLookupTables

cl::list< std::string > ClCombineTaintLookupTables("dfsan-combine-taint-lookup-table", cl::desc( "When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)	(	"dfsan-combine-taint-lookup-table"	,
		cl::desc( "When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables).")	,
		cl::Hidden
	)

static

◆ ClConditionalCallbacks

cl::opt< bool > ClConditionalCallbacks("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))	(	"dfsan-conditional-callbacks"	,
		cl::desc("Insert calls to callback functions on conditionals.")	,
		cl::Hidden	,
		cl::init(false)
	)

static

◆ ClDebugNonzeroLabels

cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)	(	"dfsan-debug-nonzero-labels"	,
		cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label")	,
		cl::Hidden
	)

static

◆ ClEventCallbacks

cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))	(	"dfsan-event-callbacks"	,
		cl::desc("Insert calls to __dfsan_*_callback functions on data events.")	,
		cl::Hidden	,
		cl::init(false)
	)

static

◆ ClIgnorePersonalityRoutine

cl::opt< bool > ClIgnorePersonalityRoutine("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))	(	"dfsan-ignore-personality-routine"	,
		cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it.")	,
		cl::Hidden	,
		cl::init(false)
	)

static

◆ ClInstrumentWithCallThreshold

cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))	(	"dfsan-instrument-with-call-threshold"	,
		cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks).")	,
		cl::Hidden	,
		cl::init(3500)
	)

static

◆ ClPreserveAlignment

cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))	(	"dfsan-preserve-alignment"	,
		cl::desc("respect alignment requirements provided by input IR")	,
		cl::Hidden	,
		cl::init(false)
	)

static

◆ ClReachesFunctionCallbacks

cl::opt< bool > ClReachesFunctionCallbacks("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))	(	"dfsan-reaches-function-callbacks"	,
		cl::desc("Insert calls to callback functions on data reaching a function.")	,
		cl::Hidden	,
		cl::init(false)
	)

static

◆ ClTrackOrigins

cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))	(	"dfsan-track-origins"	,
		cl::desc("Track origins of labels")	,
		cl::Hidden	,
		cl::init(0)
	)

static

◆ ClTrackSelectControlFlow

cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))	(	"dfsan-track-select-control-flow"	,
		cl::desc("Propagate labels from condition values of select instructions " "to results.")	,
		cl::Hidden	,
		cl::init(true)
	)

static

◆ Linux_AArch64_MemoryMapParams

const MemoryMapParams Linux_AArch64_MemoryMapParams

Initial value:

= {
    0,               
    0x0B00000000000, 
    0,               
    0x0200000000000, 
}

Definition at line 292 of file DataFlowSanitizer.cpp.

◆ Linux_LoongArch64_MemoryMapParams

const MemoryMapParams Linux_LoongArch64_MemoryMapParams

Initial value:

= {
    0,              
    0x500000000000, 
    0,              
    0x100000000000, 
}

Definition at line 309 of file DataFlowSanitizer.cpp.

◆ Linux_X86_64_MemoryMapParams

const MemoryMapParams Linux_X86_64_MemoryMapParams

Initial value:

= {
    0,              
    0x500000000000, 
    0,              
    0x100000000000, 
}

Definition at line 300 of file DataFlowSanitizer.cpp.

◆ MinOriginAlignment

const Align MinOriginAlignment = Align(4)

static

Definition at line 125 of file DataFlowSanitizer.cpp.

◆ RetvalTLSSize

const unsigned RetvalTLSSize = 800

static

Definition at line 130 of file DataFlowSanitizer.cpp.

◆ ShadowTLSAlignment

const Align ShadowTLSAlignment = Align(2)

static

Definition at line 123 of file DataFlowSanitizer.cpp.

Functions

Variables

Detailed Description

Function Documentation

◆ addAcquireOrdering()

◆ addReleaseOrdering()

◆ expandFromPrimitiveShadowRecursive()

◆ getGlobalTypeString()

◆ isAMustTailRetVal()

◆ StripPointerGEPsAndCasts()

Variable Documentation

◆ ArgTLSSize

◆ ClABIListFiles

◆ ClCombineOffsetLabelsOnGEP

◆ ClCombinePointerLabelsOnLoad

◆ ClCombinePointerLabelsOnStore

◆ ClCombineTaintLookupTables

◆ ClConditionalCallbacks

◆ ClDebugNonzeroLabels

◆ ClEventCallbacks

◆ ClIgnorePersonalityRoutine

◆ ClInstrumentWithCallThreshold

◆ ClPreserveAlignment

◆ ClReachesFunctionCallbacks

◆ ClTrackOrigins

◆ ClTrackSelectControlFlow

◆ Linux_AArch64_MemoryMapParams

◆ Linux_LoongArch64_MemoryMapParams

◆ Linux_X86_64_MemoryMapParams

◆ MinOriginAlignment

◆ RetvalTLSSize

◆ ShadowTLSAlignment