LLVM 22.0.0git
DataFlowSanitizer.cpp
Go to the documentation of this file.
1//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11/// analysis.
12///
13/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14/// class of bugs on its own. Instead, it provides a generic dynamic data flow
15/// analysis framework to be used by clients to help detect application-specific
16/// issues within their own code.
17///
18/// The analysis is based on automatic propagation of data flow labels (also
19/// known as taint labels) through a program as it performs computation.
20///
21/// Argument and return value labels are passed through TLS variables
22/// __dfsan_arg_tls and __dfsan_retval_tls.
23///
24/// Each byte of application memory is backed by a shadow memory byte. The
25/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26/// laid out as follows:
27///
28/// +--------------------+ 0x800000000000 (top of memory)
29/// | application 3 |
30/// +--------------------+ 0x700000000000
31/// | invalid |
32/// +--------------------+ 0x610000000000
33/// | origin 1 |
34/// +--------------------+ 0x600000000000
35/// | application 2 |
36/// +--------------------+ 0x510000000000
37/// | shadow 1 |
38/// +--------------------+ 0x500000000000
39/// | invalid |
40/// +--------------------+ 0x400000000000
41/// | origin 3 |
42/// +--------------------+ 0x300000000000
43/// | shadow 3 |
44/// +--------------------+ 0x200000000000
45/// | origin 2 |
46/// +--------------------+ 0x110000000000
47/// | invalid |
48/// +--------------------+ 0x100000000000
49/// | shadow 2 |
50/// +--------------------+ 0x010000000000
51/// | application 1 |
52/// +--------------------+ 0x000000000000
53///
54/// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55/// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56///
57/// For more information, please refer to the design document:
58/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/DenseMap.h"
64#include "llvm/ADT/DenseSet.h"
68#include "llvm/ADT/StringRef.h"
69#include "llvm/ADT/StringSet.h"
70#include "llvm/ADT/iterator.h"
75#include "llvm/IR/Argument.h"
77#include "llvm/IR/Attributes.h"
78#include "llvm/IR/BasicBlock.h"
79#include "llvm/IR/Constant.h"
80#include "llvm/IR/Constants.h"
81#include "llvm/IR/DataLayout.h"
83#include "llvm/IR/Dominators.h"
84#include "llvm/IR/Function.h"
85#include "llvm/IR/GlobalAlias.h"
86#include "llvm/IR/GlobalValue.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstVisitor.h"
90#include "llvm/IR/InstrTypes.h"
91#include "llvm/IR/Instruction.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Module.h"
96#include "llvm/IR/PassManager.h"
97#include "llvm/IR/Type.h"
98#include "llvm/IR/User.h"
99#include "llvm/IR/Value.h"
101#include "llvm/Support/Casting.h"
110#include <algorithm>
111#include <cassert>
112#include <cstddef>
113#include <cstdint>
114#include <memory>
115#include <set>
116#include <string>
117#include <utility>
118#include <vector>
119
120using namespace llvm;
121
122// This must be consistent with ShadowWidthBits.
124
126
127// The size of TLS variables. These constants must be kept in sync with the ones
128// in dfsan.cpp.
129static const unsigned ArgTLSSize = 800;
130static const unsigned RetvalTLSSize = 800;
131
132// The -dfsan-preserve-alignment flag controls whether this pass assumes that
133// alignment requirements provided by the input IR are correct. For example,
134// if the input IR contains a load with alignment 8, this flag will cause
135// the shadow load to have alignment 16. This flag is disabled by default as
136// we have unfortunately encountered too much code (including Clang itself;
137// see PR14291) which performs misaligned access.
139 "dfsan-preserve-alignment",
140 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
141 cl::init(false));
142
143// The ABI list files control how shadow parameters are passed. The pass treats
144// every function labelled "uninstrumented" in the ABI list file as conforming
145// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
146// additional annotations for those functions, a call to one of those functions
147// will produce a warning message, as the labelling behaviour of the function is
148// unknown. The other supported annotations for uninstrumented functions are
149// "functional" and "discard", which are described below under
150// DataFlowSanitizer::WrapperKind.
151// Functions will often be labelled with both "uninstrumented" and one of
152// "functional" or "discard". This will leave the function unchanged by this
153// pass, and create a wrapper function that will call the original.
154//
155// Instrumented functions can also be annotated as "force_zero_labels", which
156// will make all shadow and return values set zero labels.
157// Functions should never be labelled with both "force_zero_labels" and
158// "uninstrumented" or any of the unistrumented wrapper kinds.
160 "dfsan-abilist",
161 cl::desc("File listing native ABI functions and how the pass treats them"),
162 cl::Hidden);
163
164// Controls whether the pass includes or ignores the labels of pointers in load
165// instructions.
167 "dfsan-combine-pointer-labels-on-load",
168 cl::desc("Combine the label of the pointer with the label of the data when "
169 "loading from memory."),
170 cl::Hidden, cl::init(true));
171
172// Controls whether the pass includes or ignores the labels of pointers in
173// stores instructions.
175 "dfsan-combine-pointer-labels-on-store",
176 cl::desc("Combine the label of the pointer with the label of the data when "
177 "storing in memory."),
178 cl::Hidden, cl::init(false));
179
180// Controls whether the pass propagates labels of offsets in GEP instructions.
182 "dfsan-combine-offset-labels-on-gep",
183 cl::desc(
184 "Combine the label of the offset with the label of the pointer when "
185 "doing pointer arithmetic."),
186 cl::Hidden, cl::init(true));
187
189 "dfsan-combine-taint-lookup-table",
190 cl::desc(
191 "When dfsan-combine-offset-labels-on-gep and/or "
192 "dfsan-combine-pointer-labels-on-load are false, this flag can "
193 "be used to re-enable combining offset and/or pointer taint when "
194 "loading specific constant global variables (i.e. lookup tables)."),
195 cl::Hidden);
196
198 "dfsan-debug-nonzero-labels",
199 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
200 "load or return with a nonzero label"),
201 cl::Hidden);
202
203// Experimental feature that inserts callbacks for certain data events.
204// Currently callbacks are only inserted for loads, stores, memory transfers
205// (i.e. memcpy and memmove), and comparisons.
206//
207// If this flag is set to true, the user must provide definitions for the
208// following callback functions:
209// void __dfsan_load_callback(dfsan_label Label, void* addr);
210// void __dfsan_store_callback(dfsan_label Label, void* addr);
211// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
212// void __dfsan_cmp_callback(dfsan_label CombinedLabel);
214 "dfsan-event-callbacks",
215 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
216 cl::Hidden, cl::init(false));
217
218// Experimental feature that inserts callbacks for conditionals, including:
219// conditional branch, switch, select.
220// This must be true for dfsan_set_conditional_callback() to have effect.
222 "dfsan-conditional-callbacks",
223 cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
224 cl::init(false));
225
226// Experimental feature that inserts callbacks for data reaching a function,
227// either via function arguments and loads.
228// This must be true for dfsan_set_reaches_function_callback() to have effect.
230 "dfsan-reaches-function-callbacks",
231 cl::desc("Insert calls to callback functions on data reaching a function."),
232 cl::Hidden, cl::init(false));
233
234// Controls whether the pass tracks the control flow of select instructions.
236 "dfsan-track-select-control-flow",
237 cl::desc("Propagate labels from condition values of select instructions "
238 "to results."),
239 cl::Hidden, cl::init(true));
240
241// TODO: This default value follows MSan. DFSan may use a different value.
243 "dfsan-instrument-with-call-threshold",
244 cl::desc("If the function being instrumented requires more than "
245 "this number of origin stores, use callbacks instead of "
246 "inline checks (-1 means never use callbacks)."),
247 cl::Hidden, cl::init(3500));
248
249// Controls how to track origins.
250// * 0: do not track origins.
251// * 1: track origins at memory store operations.
252// * 2: track origins at memory load and store operations.
253// TODO: track callsites.
254static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
255 cl::desc("Track origins of labels"),
256 cl::Hidden, cl::init(0));
257
259 "dfsan-ignore-personality-routine",
260 cl::desc("If a personality routine is marked uninstrumented from the ABI "
261 "list, do not create a wrapper for it."),
262 cl::Hidden, cl::init(false));
263
265 "dfsan-add-global-name-suffix",
266 cl::desc("Whether to add .dfsan suffix to global names"), cl::Hidden,
267 cl::init(true));
268
270 // Types of GlobalVariables are always pointer types.
271 Type *GType = G.getValueType();
272 // For now we support excluding struct types only.
273 if (StructType *SGType = dyn_cast<StructType>(GType)) {
274 if (!SGType->isLiteral())
275 return SGType->getName();
276 }
277 return "<unknown type>";
278}
279
280namespace {
281
282// Memory map parameters used in application-to-shadow address calculation.
283// Offset = (Addr & ~AndMask) ^ XorMask
284// Shadow = ShadowBase + Offset
285// Origin = (OriginBase + Offset) & ~3ULL
286struct MemoryMapParams {
287 uint64_t AndMask;
288 uint64_t XorMask;
289 uint64_t ShadowBase;
290 uint64_t OriginBase;
291};
292
293} // end anonymous namespace
294
295// NOLINTBEGIN(readability-identifier-naming)
296// aarch64 Linux
297const MemoryMapParams Linux_AArch64_MemoryMapParams = {
298 0, // AndMask (not used)
299 0x0B00000000000, // XorMask
300 0, // ShadowBase (not used)
301 0x0200000000000, // OriginBase
302};
303
304// x86_64 Linux
305const MemoryMapParams Linux_X86_64_MemoryMapParams = {
306 0, // AndMask (not used)
307 0x500000000000, // XorMask
308 0, // ShadowBase (not used)
309 0x100000000000, // OriginBase
310};
311// NOLINTEND(readability-identifier-naming)
312
313// loongarch64 Linux
314const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
315 0, // AndMask (not used)
316 0x500000000000, // XorMask
317 0, // ShadowBase (not used)
318 0x100000000000, // OriginBase
319};
320
321namespace {
322
323class DFSanABIList {
324 std::unique_ptr<SpecialCaseList> SCL;
325
326public:
327 DFSanABIList() = default;
328
329 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
330
331 /// Returns whether either this function or its source file are listed in the
332 /// given category.
333 bool isIn(const Function &F, StringRef Category) const {
334 return isIn(*F.getParent(), Category) ||
335 SCL->inSection("dataflow", "fun", F.getName(), Category);
336 }
337
338 /// Returns whether this global alias is listed in the given category.
339 ///
340 /// If GA aliases a function, the alias's name is matched as a function name
341 /// would be. Similarly, aliases of globals are matched like globals.
342 bool isIn(const GlobalAlias &GA, StringRef Category) const {
343 if (isIn(*GA.getParent(), Category))
344 return true;
345
347 return SCL->inSection("dataflow", "fun", GA.getName(), Category);
348
349 return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
350 SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
351 Category);
352 }
353
354 /// Returns whether this module is listed in the given category.
355 bool isIn(const Module &M, StringRef Category) const {
356 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
357 }
358};
359
360/// TransformedFunction is used to express the result of transforming one
361/// function type into another. This struct is immutable. It holds metadata
362/// useful for updating calls of the old function to the new type.
363struct TransformedFunction {
364 TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
365 const std::vector<unsigned> &ArgumentIndexMapping)
366 : OriginalType(OriginalType), TransformedType(TransformedType),
367 ArgumentIndexMapping(ArgumentIndexMapping) {}
368
369 // Disallow copies.
370 TransformedFunction(const TransformedFunction &) = delete;
371 TransformedFunction &operator=(const TransformedFunction &) = delete;
372
373 // Allow moves.
374 TransformedFunction(TransformedFunction &&) = default;
375 TransformedFunction &operator=(TransformedFunction &&) = default;
376
377 /// Type of the function before the transformation.
378 FunctionType *OriginalType;
379
380 /// Type of the function after the transformation.
381 FunctionType *TransformedType;
382
383 /// Transforming a function may change the position of arguments. This
384 /// member records the mapping from each argument's old position to its new
385 /// position. Argument positions are zero-indexed. If the transformation
386 /// from F to F' made the first argument of F into the third argument of F',
387 /// then ArgumentIndexMapping[0] will equal 2.
388 std::vector<unsigned> ArgumentIndexMapping;
389};
390
391/// Given function attributes from a call site for the original function,
392/// return function attributes appropriate for a call to the transformed
393/// function.
394AttributeList
395transformFunctionAttributes(const TransformedFunction &TransformedFunction,
396 LLVMContext &Ctx, AttributeList CallSiteAttrs) {
397
398 // Construct a vector of AttributeSet for each function argument.
399 std::vector<llvm::AttributeSet> ArgumentAttributes(
400 TransformedFunction.TransformedType->getNumParams());
401
402 // Copy attributes from the parameter of the original function to the
403 // transformed version. 'ArgumentIndexMapping' holds the mapping from
404 // old argument position to new.
405 for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
406 I < IE; ++I) {
407 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
408 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
409 }
410
411 // Copy annotations on varargs arguments.
412 for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
413 IE = CallSiteAttrs.getNumAttrSets();
414 I < IE; ++I) {
415 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
416 }
417
418 return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
419 CallSiteAttrs.getRetAttrs(),
420 llvm::ArrayRef(ArgumentAttributes));
421}
422
423class DataFlowSanitizer {
424 friend struct DFSanFunction;
425 friend class DFSanVisitor;
426
427 enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
428
429 enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
430
431 /// How should calls to uninstrumented functions be handled?
432 enum WrapperKind {
433 /// This function is present in an uninstrumented form but we don't know
434 /// how it should be handled. Print a warning and call the function anyway.
435 /// Don't label the return value.
436 WK_Warning,
437
438 /// This function does not write to (user-accessible) memory, and its return
439 /// value is unlabelled.
440 WK_Discard,
441
442 /// This function does not write to (user-accessible) memory, and the label
443 /// of its return value is the union of the label of its arguments.
444 WK_Functional,
445
446 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
447 /// where F is the name of the function. This function may wrap the
448 /// original function or provide its own implementation. WK_Custom uses an
449 /// extra pointer argument to return the shadow. This allows the wrapped
450 /// form of the function type to be expressed in C.
451 WK_Custom
452 };
453
454 Module *Mod;
455 LLVMContext *Ctx;
456 Type *Int8Ptr;
457 IntegerType *OriginTy;
458 PointerType *OriginPtrTy;
459 ConstantInt *ZeroOrigin;
460 /// The shadow type for all primitive types and vector types.
461 IntegerType *PrimitiveShadowTy;
462 PointerType *PrimitiveShadowPtrTy;
463 IntegerType *IntptrTy;
464 ConstantInt *ZeroPrimitiveShadow;
465 Constant *ArgTLS;
466 ArrayType *ArgOriginTLSTy;
467 Constant *ArgOriginTLS;
468 Constant *RetvalTLS;
469 Constant *RetvalOriginTLS;
470 FunctionType *DFSanUnionLoadFnTy;
471 FunctionType *DFSanLoadLabelAndOriginFnTy;
472 FunctionType *DFSanUnimplementedFnTy;
473 FunctionType *DFSanWrapperExternWeakNullFnTy;
474 FunctionType *DFSanSetLabelFnTy;
475 FunctionType *DFSanNonzeroLabelFnTy;
476 FunctionType *DFSanVarargWrapperFnTy;
477 FunctionType *DFSanConditionalCallbackFnTy;
478 FunctionType *DFSanConditionalCallbackOriginFnTy;
479 FunctionType *DFSanReachesFunctionCallbackFnTy;
480 FunctionType *DFSanReachesFunctionCallbackOriginFnTy;
481 FunctionType *DFSanCmpCallbackFnTy;
482 FunctionType *DFSanLoadStoreCallbackFnTy;
483 FunctionType *DFSanMemTransferCallbackFnTy;
484 FunctionType *DFSanChainOriginFnTy;
485 FunctionType *DFSanChainOriginIfTaintedFnTy;
486 FunctionType *DFSanMemOriginTransferFnTy;
487 FunctionType *DFSanMemShadowOriginTransferFnTy;
488 FunctionType *DFSanMemShadowOriginConditionalExchangeFnTy;
489 FunctionType *DFSanMaybeStoreOriginFnTy;
490 FunctionCallee DFSanUnionLoadFn;
491 FunctionCallee DFSanLoadLabelAndOriginFn;
492 FunctionCallee DFSanUnimplementedFn;
493 FunctionCallee DFSanWrapperExternWeakNullFn;
494 FunctionCallee DFSanSetLabelFn;
495 FunctionCallee DFSanNonzeroLabelFn;
496 FunctionCallee DFSanVarargWrapperFn;
497 FunctionCallee DFSanLoadCallbackFn;
498 FunctionCallee DFSanStoreCallbackFn;
499 FunctionCallee DFSanMemTransferCallbackFn;
500 FunctionCallee DFSanConditionalCallbackFn;
501 FunctionCallee DFSanConditionalCallbackOriginFn;
502 FunctionCallee DFSanReachesFunctionCallbackFn;
503 FunctionCallee DFSanReachesFunctionCallbackOriginFn;
504 FunctionCallee DFSanCmpCallbackFn;
505 FunctionCallee DFSanChainOriginFn;
506 FunctionCallee DFSanChainOriginIfTaintedFn;
507 FunctionCallee DFSanMemOriginTransferFn;
508 FunctionCallee DFSanMemShadowOriginTransferFn;
509 FunctionCallee DFSanMemShadowOriginConditionalExchangeFn;
510 FunctionCallee DFSanMaybeStoreOriginFn;
511 SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
512 MDNode *ColdCallWeights;
513 MDNode *OriginStoreWeights;
514 DFSanABIList ABIList;
515 DenseMap<Value *, Function *> UnwrappedFnMap;
516 AttributeMask ReadOnlyNoneAttrs;
517 StringSet<> CombineTaintLookupTableNames;
518
519 /// Memory map parameters used in calculation mapping application addresses
520 /// to shadow addresses and origin addresses.
521 const MemoryMapParams *MapParams;
522
523 Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
524 Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos);
525 Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos,
526 Value *ShadowOffset);
527 std::pair<Value *, Value *> getShadowOriginAddress(Value *Addr,
528 Align InstAlignment,
530 bool isInstrumented(const Function *F);
531 bool isInstrumented(const GlobalAlias *GA);
532 bool isForceZeroLabels(const Function *F);
533 TransformedFunction getCustomFunctionType(FunctionType *T);
534 WrapperKind getWrapperKind(Function *F);
535 void addGlobalNameSuffix(GlobalValue *GV);
536 void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);
537 Function *buildWrapperFunction(Function *F, StringRef NewFName,
539 FunctionType *NewFT);
540 void initializeCallbackFunctions(Module &M);
541 void initializeRuntimeFunctions(Module &M);
542 bool initializeModule(Module &M);
543
544 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
545 /// from it. Returns the origin's loaded value.
546 Value *loadNextOrigin(BasicBlock::iterator Pos, Align OriginAlign,
547 Value **OriginAddr);
548
549 /// Returns whether the given load byte size is amenable to inlined
550 /// optimization patterns.
551 bool hasLoadSizeForFastPath(uint64_t Size);
552
553 /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
554 bool shouldTrackOrigins();
555
556 /// Returns a zero constant with the shadow type of OrigTy.
557 ///
558 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
559 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
560 /// getZeroShadow(other type) = i16(0)
561 Constant *getZeroShadow(Type *OrigTy);
562 /// Returns a zero constant with the shadow type of V's type.
563 Constant *getZeroShadow(Value *V);
564
565 /// Checks if V is a zero shadow.
566 bool isZeroShadow(Value *V);
567
568 /// Returns the shadow type of OrigTy.
569 ///
570 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
571 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
572 /// getShadowTy(other type) = i16
573 Type *getShadowTy(Type *OrigTy);
574 /// Returns the shadow type of V's type.
575 Type *getShadowTy(Value *V);
576
577 const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
578
579public:
580 DataFlowSanitizer(const std::vector<std::string> &ABIListFiles,
581 IntrusiveRefCntPtr<vfs::FileSystem> FS);
582
583 bool runImpl(Module &M,
584 llvm::function_ref<TargetLibraryInfo &(Function &)> GetTLI);
585};
586
587struct DFSanFunction {
588 DataFlowSanitizer &DFS;
589 Function *F;
590 DominatorTree DT;
591 bool IsNativeABI;
592 bool IsForceZeroLabels;
593 TargetLibraryInfo &TLI;
594 AllocaInst *LabelReturnAlloca = nullptr;
595 AllocaInst *OriginReturnAlloca = nullptr;
596 DenseMap<Value *, Value *> ValShadowMap;
597 DenseMap<Value *, Value *> ValOriginMap;
598 DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
599 DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;
600
601 struct PHIFixupElement {
602 PHINode *Phi;
603 PHINode *ShadowPhi;
604 PHINode *OriginPhi;
605 };
606 std::vector<PHIFixupElement> PHIFixups;
607
608 DenseSet<Instruction *> SkipInsts;
609 std::vector<Value *> NonZeroChecks;
610
611 struct CachedShadow {
612 BasicBlock *Block; // The block where Shadow is defined.
613 Value *Shadow;
614 };
615 /// Maps a value to its latest shadow value in terms of domination tree.
616 DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
617 /// Maps a value to its latest collapsed shadow value it was converted to in
618 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
619 /// used at a post process where CFG blocks are split. So it does not cache
620 /// BasicBlock like CachedShadows, but uses domination between values.
621 DenseMap<Value *, Value *> CachedCollapsedShadows;
622 DenseMap<Value *, std::set<Value *>> ShadowElements;
623
624 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
625 bool IsForceZeroLabels, TargetLibraryInfo &TLI)
626 : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
627 IsForceZeroLabels(IsForceZeroLabels), TLI(TLI) {
628 DT.recalculate(*F);
629 }
630
631 /// Computes the shadow address for a given function argument.
632 ///
633 /// Shadow = ArgTLS+ArgOffset.
634 Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
635
636 /// Computes the shadow address for a return value.
637 Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
638
639 /// Computes the origin address for a given function argument.
640 ///
641 /// Origin = ArgOriginTLS[ArgNo].
642 Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
643
644 /// Computes the origin address for a return value.
645 Value *getRetvalOriginTLS();
646
647 Value *getOrigin(Value *V);
648 void setOrigin(Instruction *I, Value *Origin);
649 /// Generates IR to compute the origin of the last operand with a taint label.
650 Value *combineOperandOrigins(Instruction *Inst);
651 /// Before the instruction Pos, generates IR to compute the last origin with a
652 /// taint label. Labels and origins are from vectors Shadows and Origins
653 /// correspondingly. The generated IR is like
654 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
655 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
656 /// zeros with other bitwidths.
657 Value *combineOrigins(const std::vector<Value *> &Shadows,
658 const std::vector<Value *> &Origins,
659 BasicBlock::iterator Pos, ConstantInt *Zero = nullptr);
660
661 Value *getShadow(Value *V);
662 void setShadow(Instruction *I, Value *Shadow);
663 /// Generates IR to compute the union of the two given shadows, inserting it
664 /// before Pos. The combined value is with primitive type.
665 Value *combineShadows(Value *V1, Value *V2, BasicBlock::iterator Pos);
666 /// Combines the shadow values of V1 and V2, then converts the combined value
667 /// with primitive type into a shadow value with the original type T.
668 Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
670 Value *combineOperandShadows(Instruction *Inst);
671
672 /// Generates IR to load shadow and origin corresponding to bytes [\p
673 /// Addr, \p Addr + \p Size), where addr has alignment \p
674 /// InstAlignment, and take the union of each of those shadows. The returned
675 /// shadow always has primitive type.
676 ///
677 /// When tracking loads is enabled, the returned origin is a chain at the
678 /// current stack if the returned shadow is tainted.
679 std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
680 Align InstAlignment,
682
683 void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
684 Align InstAlignment, Value *PrimitiveShadow,
685 Value *Origin, BasicBlock::iterator Pos);
686 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
687 /// the expanded shadow value.
688 ///
689 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
690 /// EFP([n x T], PS) = [n x EFP(T,PS)]
691 /// EFP(other types, PS) = PS
692 Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
694 /// Collapses Shadow into a single primitive shadow value, unioning all
695 /// primitive shadow values in the process. Returns the final primitive
696 /// shadow value.
697 ///
698 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
699 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
700 /// CTP(other types, PS) = PS
701 Value *collapseToPrimitiveShadow(Value *Shadow, BasicBlock::iterator Pos);
702
703 void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
705
706 Align getShadowAlign(Align InstAlignment);
707
708 // If ClConditionalCallbacks is enabled, insert a callback after a given
709 // branch instruction using the given conditional expression.
710 void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
711
712 // If ClReachesFunctionCallbacks is enabled, insert a callback for each
713 // argument and load instruction.
714 void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,
715 Value *Data);
716
717 bool isLookupTableConstant(Value *P);
718
719private:
720 /// Collapses the shadow with aggregate type into a single primitive shadow
721 /// value.
722 template <class AggregateType>
723 Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
724 IRBuilder<> &IRB);
725
726 Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
727
728 /// Returns the shadow value of an argument A.
729 Value *getShadowForTLSArgument(Argument *A);
730
731 /// The fast path of loading shadows.
732 std::pair<Value *, Value *>
733 loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
734 Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
736
737 Align getOriginAlign(Align InstAlignment);
738
739 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
740 /// is __dfsan_load_label_and_origin. This function returns the union of all
741 /// labels and the origin of the first taint label. However this is an
742 /// additional call with many instructions. To ensure common cases are fast,
743 /// checks if it is possible to load labels and origins without using the
744 /// callback function.
745 ///
746 /// When enabling tracking load instructions, we always use
747 /// __dfsan_load_label_and_origin to reduce code size.
748 bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
749
750 /// Returns a chain at the current stack with previous origin V.
751 Value *updateOrigin(Value *V, IRBuilder<> &IRB);
752
753 /// Returns a chain at the current stack with previous origin V if Shadow is
754 /// tainted.
755 Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
756
757 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
758 /// Origin otherwise.
759 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
760
761 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
762 /// Size).
763 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
764 uint64_t StoreOriginSize, Align Alignment);
765
766 /// Stores Origin in terms of its Shadow value.
767 /// * Do not write origins for zero shadows because we do not trace origins
768 /// for untainted sinks.
769 /// * Use __dfsan_maybe_store_origin if there are too many origin store
770 /// instrumentations.
771 void storeOrigin(BasicBlock::iterator Pos, Value *Addr, uint64_t Size,
772 Value *Shadow, Value *Origin, Value *StoreOriginAddr,
773 Align InstAlignment);
774
775 /// Convert a scalar value to an i1 by comparing with 0.
776 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
777
778 bool shouldInstrumentWithCall();
779
780 /// Generates IR to load shadow and origin corresponding to bytes [\p
781 /// Addr, \p Addr + \p Size), where addr has alignment \p
782 /// InstAlignment, and take the union of each of those shadows. The returned
783 /// shadow always has primitive type.
784 std::pair<Value *, Value *>
785 loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
786 Align InstAlignment,
788 int NumOriginStores = 0;
789};
790
791class DFSanVisitor : public InstVisitor<DFSanVisitor> {
792public:
793 DFSanFunction &DFSF;
794
795 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
796
797 const DataLayout &getDataLayout() const {
798 return DFSF.F->getDataLayout();
799 }
800
801 // Combines shadow values and origins for all of I's operands.
802 void visitInstOperands(Instruction &I);
803
804 void visitUnaryOperator(UnaryOperator &UO);
805 void visitBinaryOperator(BinaryOperator &BO);
806 void visitBitCastInst(BitCastInst &BCI);
807 void visitCastInst(CastInst &CI);
808 void visitCmpInst(CmpInst &CI);
809 void visitLandingPadInst(LandingPadInst &LPI);
810 void visitGetElementPtrInst(GetElementPtrInst &GEPI);
811 void visitLoadInst(LoadInst &LI);
812 void visitStoreInst(StoreInst &SI);
813 void visitAtomicRMWInst(AtomicRMWInst &I);
814 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
815 void visitReturnInst(ReturnInst &RI);
816 void visitLibAtomicLoad(CallBase &CB);
817 void visitLibAtomicStore(CallBase &CB);
818 void visitLibAtomicExchange(CallBase &CB);
819 void visitLibAtomicCompareExchange(CallBase &CB);
820 void visitCallBase(CallBase &CB);
821 void visitPHINode(PHINode &PN);
822 void visitExtractElementInst(ExtractElementInst &I);
823 void visitInsertElementInst(InsertElementInst &I);
824 void visitShuffleVectorInst(ShuffleVectorInst &I);
825 void visitExtractValueInst(ExtractValueInst &I);
826 void visitInsertValueInst(InsertValueInst &I);
827 void visitAllocaInst(AllocaInst &I);
828 void visitSelectInst(SelectInst &I);
829 void visitMemSetInst(MemSetInst &I);
830 void visitMemTransferInst(MemTransferInst &I);
831 void visitBranchInst(BranchInst &BR);
832 void visitSwitchInst(SwitchInst &SW);
833
834private:
835 void visitCASOrRMW(Align InstAlignment, Instruction &I);
836
837 // Returns false when this is an invoke of a custom function.
838 bool visitWrappedCallBase(Function &F, CallBase &CB);
839
840 // Combines origins for all of I's operands.
841 void visitInstOperandOrigins(Instruction &I);
842
843 void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
844 IRBuilder<> &IRB);
845
846 void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
847 IRBuilder<> &IRB);
848
849 Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB);
850 Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB);
851};
852
853bool LibAtomicFunction(const Function &F) {
854 // This is a bit of a hack because TargetLibraryInfo is a function pass.
855 // The DFSan pass would need to be refactored to be function pass oriented
856 // (like MSan is) in order to fit together nicely with TargetLibraryInfo.
857 // We need this check to prevent them from being instrumented, or wrapped.
858 // Match on name and number of arguments.
859 if (!F.hasName() || F.isVarArg())
860 return false;
861 switch (F.arg_size()) {
862 case 4:
863 return F.getName() == "__atomic_load" || F.getName() == "__atomic_store";
864 case 5:
865 return F.getName() == "__atomic_exchange";
866 case 6:
867 return F.getName() == "__atomic_compare_exchange";
868 default:
869 return false;
870 }
871}
872
873} // end anonymous namespace
874
875DataFlowSanitizer::DataFlowSanitizer(
876 const std::vector<std::string> &ABIListFiles,
878 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
879 llvm::append_range(AllABIListFiles, ClABIListFiles);
880 ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles, *FS));
881
882 CombineTaintLookupTableNames.insert_range(ClCombineTaintLookupTables);
883}
884
885TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
886 SmallVector<Type *, 4> ArgTypes;
887
888 // Some parameters of the custom function being constructed are
889 // parameters of T. Record the mapping from parameters of T to
890 // parameters of the custom function, so that parameter attributes
891 // at call sites can be updated.
892 std::vector<unsigned> ArgumentIndexMapping;
893 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
894 Type *ParamType = T->getParamType(I);
895 ArgumentIndexMapping.push_back(ArgTypes.size());
896 ArgTypes.push_back(ParamType);
897 }
898 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
899 ArgTypes.push_back(PrimitiveShadowTy);
900 if (T->isVarArg())
901 ArgTypes.push_back(PrimitiveShadowPtrTy);
902 Type *RetType = T->getReturnType();
903 if (!RetType->isVoidTy())
904 ArgTypes.push_back(PrimitiveShadowPtrTy);
905
906 if (shouldTrackOrigins()) {
907 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
908 ArgTypes.push_back(OriginTy);
909 if (T->isVarArg())
910 ArgTypes.push_back(OriginPtrTy);
911 if (!RetType->isVoidTy())
912 ArgTypes.push_back(OriginPtrTy);
913 }
914
915 return TransformedFunction(
916 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
917 ArgumentIndexMapping);
918}
919
920bool DataFlowSanitizer::isZeroShadow(Value *V) {
921 Type *T = V->getType();
922 if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
923 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
924 return CI->isZero();
925 return false;
926 }
927
929}
930
931bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
932 uint64_t ShadowSize = Size * ShadowWidthBytes;
933 return ShadowSize % 8 == 0 || ShadowSize == 4;
934}
935
936bool DataFlowSanitizer::shouldTrackOrigins() {
937 static const bool ShouldTrackOrigins = ClTrackOrigins;
938 return ShouldTrackOrigins;
939}
940
941Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
942 if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
943 return ZeroPrimitiveShadow;
944 Type *ShadowTy = getShadowTy(OrigTy);
945 return ConstantAggregateZero::get(ShadowTy);
946}
947
948Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
949 return getZeroShadow(V->getType());
950}
951
953 Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
954 Value *PrimitiveShadow, IRBuilder<> &IRB) {
955 if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
956 return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
957
958 if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
959 for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
960 Indices.push_back(Idx);
962 Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
963 Indices.pop_back();
964 }
965 return Shadow;
966 }
967
968 if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
969 for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
970 Indices.push_back(Idx);
972 Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
973 Indices.pop_back();
974 }
975 return Shadow;
976 }
977 llvm_unreachable("Unexpected shadow type");
978}
979
980bool DFSanFunction::shouldInstrumentWithCall() {
981 return ClInstrumentWithCallThreshold >= 0 &&
982 NumOriginStores >= ClInstrumentWithCallThreshold;
983}
984
985Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
987 Type *ShadowTy = DFS.getShadowTy(T);
988
989 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
990 return PrimitiveShadow;
991
992 if (DFS.isZeroShadow(PrimitiveShadow))
993 return DFS.getZeroShadow(ShadowTy);
994
995 IRBuilder<> IRB(Pos->getParent(), Pos);
996 SmallVector<unsigned, 4> Indices;
997 Value *Shadow = UndefValue::get(ShadowTy);
998 Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
999 PrimitiveShadow, IRB);
1000
1001 // Caches the primitive shadow value that built the shadow value.
1002 CachedCollapsedShadows[Shadow] = PrimitiveShadow;
1003 return Shadow;
1004}
1005
1006template <class AggregateType>
1007Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
1008 IRBuilder<> &IRB) {
1009 if (!AT->getNumElements())
1010 return DFS.ZeroPrimitiveShadow;
1011
1012 Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1013 Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
1014
1015 for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
1016 Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1017 Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
1018 Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1019 }
1020 return Aggregator;
1021}
1022
1023Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1024 IRBuilder<> &IRB) {
1025 Type *ShadowTy = Shadow->getType();
1026 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1027 return Shadow;
1028 if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
1029 return collapseAggregateShadow<>(AT, Shadow, IRB);
1030 if (StructType *ST = dyn_cast<StructType>(ShadowTy))
1031 return collapseAggregateShadow<>(ST, Shadow, IRB);
1032 llvm_unreachable("Unexpected shadow type");
1033}
1034
1035Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1037 Type *ShadowTy = Shadow->getType();
1038 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1039 return Shadow;
1040
1041 // Checks if the cached collapsed shadow value dominates Pos.
1042 Value *&CS = CachedCollapsedShadows[Shadow];
1043 if (CS && DT.dominates(CS, Pos))
1044 return CS;
1045
1046 IRBuilder<> IRB(Pos->getParent(), Pos);
1047 Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
1048 // Caches the converted primitive shadow value.
1049 CS = PrimitiveShadow;
1050 return PrimitiveShadow;
1051}
1052
1053void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
1054 Value *Condition) {
1056 return;
1057 }
1058 IRBuilder<> IRB(&I);
1059 Value *CondShadow = getShadow(Condition);
1060 CallInst *CI;
1061 if (DFS.shouldTrackOrigins()) {
1062 Value *CondOrigin = getOrigin(Condition);
1063 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
1064 {CondShadow, CondOrigin});
1065 } else {
1066 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
1067 }
1068 CI->addParamAttr(0, Attribute::ZExt);
1069}
1070
1071void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
1072 Instruction &I,
1073 Value *Data) {
1075 return;
1076 }
1077 const DebugLoc &dbgloc = I.getDebugLoc();
1078 Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);
1079 ConstantInt *CILine;
1080 llvm::Value *FilePathPtr;
1081
1082 if (dbgloc.get() == nullptr) {
1083 CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0));
1084 FilePathPtr = IRB.CreateGlobalString(
1085 I.getFunction()->getParent()->getSourceFileName());
1086 } else {
1087 CILine = llvm::ConstantInt::get(I.getContext(),
1088 llvm::APInt(32, dbgloc.getLine()));
1089 FilePathPtr = IRB.CreateGlobalString(dbgloc->getFilename());
1090 }
1091
1092 llvm::Value *FunctionNamePtr =
1093 IRB.CreateGlobalString(I.getFunction()->getName());
1094
1095 CallInst *CB;
1096 std::vector<Value *> args;
1097
1098 if (DFS.shouldTrackOrigins()) {
1099 Value *DataOrigin = getOrigin(Data);
1100 args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };
1101 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);
1102 } else {
1103 args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };
1104 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);
1105 }
1106 CB->addParamAttr(0, Attribute::ZExt);
1107 CB->setDebugLoc(dbgloc);
1108}
1109
1110Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
1111 if (!OrigTy->isSized())
1112 return PrimitiveShadowTy;
1113 if (isa<IntegerType>(OrigTy))
1114 return PrimitiveShadowTy;
1115 if (isa<VectorType>(OrigTy))
1116 return PrimitiveShadowTy;
1117 if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
1118 return ArrayType::get(getShadowTy(AT->getElementType()),
1119 AT->getNumElements());
1120 if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1122 for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
1123 Elements.push_back(getShadowTy(ST->getElementType(I)));
1124 return StructType::get(*Ctx, Elements);
1125 }
1126 return PrimitiveShadowTy;
1127}
1128
1129Type *DataFlowSanitizer::getShadowTy(Value *V) {
1130 return getShadowTy(V->getType());
1131}
1132
1133bool DataFlowSanitizer::initializeModule(Module &M) {
1134 Triple TargetTriple(M.getTargetTriple());
1135 const DataLayout &DL = M.getDataLayout();
1136
1137 if (TargetTriple.getOS() != Triple::Linux)
1138 report_fatal_error("unsupported operating system");
1139 switch (TargetTriple.getArch()) {
1140 case Triple::aarch64:
1141 MapParams = &Linux_AArch64_MemoryMapParams;
1142 break;
1143 case Triple::x86_64:
1144 MapParams = &Linux_X86_64_MemoryMapParams;
1145 break;
1148 break;
1149 default:
1150 report_fatal_error("unsupported architecture");
1151 }
1152
1153 Mod = &M;
1154 Ctx = &M.getContext();
1155 Int8Ptr = PointerType::getUnqual(*Ctx);
1156 OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1157 OriginPtrTy = PointerType::getUnqual(*Ctx);
1158 PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1159 PrimitiveShadowPtrTy = PointerType::getUnqual(*Ctx);
1160 IntptrTy = DL.getIntPtrType(*Ctx);
1161 ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1162 ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1163
1164 Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1165 DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1166 /*isVarArg=*/false);
1167 Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1168 DFSanLoadLabelAndOriginFnTy =
1169 FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1170 /*isVarArg=*/false);
1171 DFSanUnimplementedFnTy = FunctionType::get(
1172 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1173 Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
1174 DFSanWrapperExternWeakNullFnTy =
1175 FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
1176 /*isVarArg=*/false);
1177 Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1178 PointerType::getUnqual(*Ctx), IntptrTy};
1179 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1180 DFSanSetLabelArgs, /*isVarArg=*/false);
1181 DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), {},
1182 /*isVarArg=*/false);
1183 DFSanVarargWrapperFnTy = FunctionType::get(
1184 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1185 DFSanConditionalCallbackFnTy =
1186 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1187 /*isVarArg=*/false);
1188 Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
1189 DFSanConditionalCallbackOriginFnTy = FunctionType::get(
1190 Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
1191 /*isVarArg=*/false);
1192 Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,
1193 OriginTy, Int8Ptr};
1194 DFSanReachesFunctionCallbackFnTy =
1195 FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,
1196 /*isVarArg=*/false);
1197 Type *DFSanReachesFunctionCallbackOriginArgs[5] = {
1198 PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};
1199 DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(
1200 Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,
1201 /*isVarArg=*/false);
1202 DFSanCmpCallbackFnTy =
1203 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1204 /*isVarArg=*/false);
1205 DFSanChainOriginFnTy =
1206 FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1207 Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1208 DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1209 OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1210 Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1211 Int8Ptr, IntptrTy, OriginTy};
1212 DFSanMaybeStoreOriginFnTy = FunctionType::get(
1213 Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1214 Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1215 DFSanMemOriginTransferFnTy = FunctionType::get(
1216 Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1217 Type *DFSanMemShadowOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1218 DFSanMemShadowOriginTransferFnTy =
1219 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemShadowOriginTransferArgs,
1220 /*isVarArg=*/false);
1221 Type *DFSanMemShadowOriginConditionalExchangeArgs[5] = {
1222 IntegerType::get(*Ctx, 8), Int8Ptr, Int8Ptr, Int8Ptr, IntptrTy};
1223 DFSanMemShadowOriginConditionalExchangeFnTy = FunctionType::get(
1224 Type::getVoidTy(*Ctx), DFSanMemShadowOriginConditionalExchangeArgs,
1225 /*isVarArg=*/false);
1226 Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1227 DFSanLoadStoreCallbackFnTy =
1228 FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1229 /*isVarArg=*/false);
1230 Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1231 DFSanMemTransferCallbackFnTy =
1232 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1233 /*isVarArg=*/false);
1234
1235 ColdCallWeights = MDBuilder(*Ctx).createUnlikelyBranchWeights();
1236 OriginStoreWeights = MDBuilder(*Ctx).createUnlikelyBranchWeights();
1237 return true;
1238}
1239
1240bool DataFlowSanitizer::isInstrumented(const Function *F) {
1241 return !ABIList.isIn(*F, "uninstrumented");
1242}
1243
1244bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1245 return !ABIList.isIn(*GA, "uninstrumented");
1246}
1247
1248bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1249 return ABIList.isIn(*F, "force_zero_labels");
1250}
1251
1252DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1253 if (ABIList.isIn(*F, "functional"))
1254 return WK_Functional;
1255 if (ABIList.isIn(*F, "discard"))
1256 return WK_Discard;
1257 if (ABIList.isIn(*F, "custom"))
1258 return WK_Custom;
1259
1260 return WK_Warning;
1261}
1262
1263void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1265 return;
1266
1267 std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1268 GV->setName(GVName + Suffix);
1269
1270 // Try to change the name of the function in module inline asm. We only do
1271 // this for specific asm directives, currently only ".symver", to try to avoid
1272 // corrupting asm which happens to contain the symbol name as a substring.
1273 // Note that the substitution for .symver assumes that the versioned symbol
1274 // also has an instrumented name.
1275 std::string Asm = GV->getParent()->getModuleInlineAsm();
1276 std::string SearchStr = ".symver " + GVName + ",";
1277 size_t Pos = Asm.find(SearchStr);
1278 if (Pos != std::string::npos) {
1279 Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1280 Pos = Asm.find('@');
1281
1282 if (Pos == std::string::npos)
1283 report_fatal_error(Twine("unsupported .symver: ", Asm));
1284
1285 Asm.replace(Pos, 1, Suffix + "@");
1286 GV->getParent()->setModuleInlineAsm(Asm);
1287 }
1288}
1289
1290void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
1291 Function *F) {
1292 // If the function we are wrapping was ExternWeak, it may be null.
1293 // The original code before calling this wrapper may have checked for null,
1294 // but replacing with a known-to-not-be-null wrapper can break this check.
1295 // When replacing uses of the extern weak function with the wrapper we try
1296 // to avoid replacing uses in conditionals, but this is not perfect.
1297 // In the case where we fail, and accidentally optimize out a null check
1298 // for a extern weak function, add a check here to help identify the issue.
1299 if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
1300 std::vector<Value *> Args;
1301 Args.push_back(F);
1302 Args.push_back(IRB.CreateGlobalString(F->getName()));
1303 IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
1304 }
1305}
1306
1307Function *
1308DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1310 FunctionType *NewFT) {
1311 FunctionType *FT = F->getFunctionType();
1312 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1313 NewFName, F->getParent());
1314 NewF->copyAttributesFrom(F);
1315 NewF->removeRetAttrs(AttributeFuncs::typeIncompatible(
1316 NewFT->getReturnType(), NewF->getAttributes().getRetAttrs()));
1317
1318 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1319 if (F->isVarArg()) {
1320 NewF->removeFnAttr("split-stack");
1321 CallInst::Create(DFSanVarargWrapperFn,
1322 IRBuilder<>(BB).CreateGlobalString(F->getName()), "", BB);
1323 new UnreachableInst(*Ctx, BB);
1324 } else {
1325 auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1326 std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1327
1328 CallInst *CI = CallInst::Create(F, Args, "", BB);
1329 if (FT->getReturnType()->isVoidTy())
1330 ReturnInst::Create(*Ctx, BB);
1331 else
1332 ReturnInst::Create(*Ctx, CI, BB);
1333 }
1334
1335 return NewF;
1336}
1337
1338// Initialize DataFlowSanitizer runtime functions and declare them in the module
1339void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1340 LLVMContext &C = M.getContext();
1341 {
1342 AttributeList AL;
1343 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1344 AL = AL.addFnAttribute(
1345 C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly()));
1346 AL = AL.addRetAttribute(C, Attribute::ZExt);
1347 DFSanUnionLoadFn =
1348 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1349 }
1350 {
1351 AttributeList AL;
1352 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1353 AL = AL.addFnAttribute(
1354 C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly()));
1355 AL = AL.addRetAttribute(C, Attribute::ZExt);
1356 DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1357 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1358 }
1359 DFSanUnimplementedFn =
1360 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1361 DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(
1362 "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);
1363 {
1364 AttributeList AL;
1365 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1366 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1367 DFSanSetLabelFn =
1368 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1369 }
1370 DFSanNonzeroLabelFn =
1371 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1372 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1373 DFSanVarargWrapperFnTy);
1374 {
1375 AttributeList AL;
1376 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1377 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1378 DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1379 DFSanChainOriginFnTy, AL);
1380 }
1381 {
1382 AttributeList AL;
1383 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1384 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1385 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1386 DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1387 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1388 }
1389 DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1390 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1391
1392 DFSanMemShadowOriginTransferFn = Mod->getOrInsertFunction(
1393 "__dfsan_mem_shadow_origin_transfer", DFSanMemShadowOriginTransferFnTy);
1394
1395 DFSanMemShadowOriginConditionalExchangeFn =
1396 Mod->getOrInsertFunction("__dfsan_mem_shadow_origin_conditional_exchange",
1397 DFSanMemShadowOriginConditionalExchangeFnTy);
1398
1399 {
1400 AttributeList AL;
1401 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1402 AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1403 DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1404 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1405 }
1406
1407 DFSanRuntimeFunctions.insert(
1408 DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1409 DFSanRuntimeFunctions.insert(
1410 DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1411 DFSanRuntimeFunctions.insert(
1412 DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1413 DFSanRuntimeFunctions.insert(
1414 DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());
1415 DFSanRuntimeFunctions.insert(
1416 DFSanSetLabelFn.getCallee()->stripPointerCasts());
1417 DFSanRuntimeFunctions.insert(
1418 DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1419 DFSanRuntimeFunctions.insert(
1420 DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1421 DFSanRuntimeFunctions.insert(
1422 DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1423 DFSanRuntimeFunctions.insert(
1424 DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1425 DFSanRuntimeFunctions.insert(
1426 DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1427 DFSanRuntimeFunctions.insert(
1428 DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
1429 DFSanRuntimeFunctions.insert(
1430 DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
1431 DFSanRuntimeFunctions.insert(
1432 DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());
1433 DFSanRuntimeFunctions.insert(
1434 DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());
1435 DFSanRuntimeFunctions.insert(
1436 DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1437 DFSanRuntimeFunctions.insert(
1438 DFSanChainOriginFn.getCallee()->stripPointerCasts());
1439 DFSanRuntimeFunctions.insert(
1440 DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1441 DFSanRuntimeFunctions.insert(
1442 DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1443 DFSanRuntimeFunctions.insert(
1444 DFSanMemShadowOriginTransferFn.getCallee()->stripPointerCasts());
1445 DFSanRuntimeFunctions.insert(
1446 DFSanMemShadowOriginConditionalExchangeFn.getCallee()
1447 ->stripPointerCasts());
1448 DFSanRuntimeFunctions.insert(
1449 DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1450}
1451
1452// Initializes event callback functions and declare them in the module
1453void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1454 {
1455 AttributeList AL;
1456 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1457 DFSanLoadCallbackFn = Mod->getOrInsertFunction(
1458 "__dfsan_load_callback", DFSanLoadStoreCallbackFnTy, AL);
1459 }
1460 {
1461 AttributeList AL;
1462 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1463 DFSanStoreCallbackFn = Mod->getOrInsertFunction(
1464 "__dfsan_store_callback", DFSanLoadStoreCallbackFnTy, AL);
1465 }
1466 DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1467 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1468 {
1469 AttributeList AL;
1470 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1471 DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",
1472 DFSanCmpCallbackFnTy, AL);
1473 }
1474 {
1475 AttributeList AL;
1476 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1477 DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
1478 "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy, AL);
1479 }
1480 {
1481 AttributeList AL;
1482 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1483 DFSanConditionalCallbackOriginFn =
1484 Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
1485 DFSanConditionalCallbackOriginFnTy, AL);
1486 }
1487 {
1488 AttributeList AL;
1489 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1490 DFSanReachesFunctionCallbackFn =
1491 Mod->getOrInsertFunction("__dfsan_reaches_function_callback",
1492 DFSanReachesFunctionCallbackFnTy, AL);
1493 }
1494 {
1495 AttributeList AL;
1496 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1497 DFSanReachesFunctionCallbackOriginFn =
1498 Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
1499 DFSanReachesFunctionCallbackOriginFnTy, AL);
1500 }
1501}
1502
1503bool DataFlowSanitizer::runImpl(
1504 Module &M, llvm::function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1505 initializeModule(M);
1506
1507 if (ABIList.isIn(M, "skip"))
1508 return false;
1509
1510 const unsigned InitialGlobalSize = M.global_size();
1511 const unsigned InitialModuleSize = M.size();
1512
1513 bool Changed = false;
1514
1515 auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1516 Type *Ty) -> Constant * {
1517 GlobalVariable *G = Mod->getOrInsertGlobal(Name, Ty);
1518 Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1519 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1520 return G;
1521 };
1522
1523 // These globals must be kept in sync with the ones in dfsan.cpp.
1524 ArgTLS =
1525 GetOrInsertGlobal("__dfsan_arg_tls",
1526 ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1527 RetvalTLS = GetOrInsertGlobal(
1528 "__dfsan_retval_tls",
1529 ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1530 ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1531 ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1532 RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1533
1534 (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1535 Changed = true;
1536 return new GlobalVariable(
1537 M, OriginTy, true, GlobalValue::WeakODRLinkage,
1538 ConstantInt::getSigned(OriginTy,
1539 shouldTrackOrigins() ? ClTrackOrigins : 0),
1540 "__dfsan_track_origins");
1541 });
1542
1543 initializeCallbackFunctions(M);
1544 initializeRuntimeFunctions(M);
1545
1546 std::vector<Function *> FnsToInstrument;
1547 SmallPtrSet<Function *, 2> FnsWithNativeABI;
1548 SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1549 SmallPtrSet<Constant *, 1> PersonalityFns;
1550 for (Function &F : M)
1551 if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F) &&
1552 !LibAtomicFunction(F) &&
1553 !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) {
1554 FnsToInstrument.push_back(&F);
1555 if (F.hasPersonalityFn())
1556 PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
1557 }
1558
1560 for (auto *C : PersonalityFns) {
1561 assert(isa<Function>(C) && "Personality routine is not a function!");
1563 if (!isInstrumented(F))
1564 llvm::erase(FnsToInstrument, F);
1565 }
1566 }
1567
1568 // Give function aliases prefixes when necessary, and build wrappers where the
1569 // instrumentedness is inconsistent.
1570 for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1571 // Don't stop on weak. We assume people aren't playing games with the
1572 // instrumentedness of overridden weak aliases.
1574 if (!F)
1575 continue;
1576
1577 bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1578 if (GAInst && FInst) {
1579 addGlobalNameSuffix(&GA);
1580 } else if (GAInst != FInst) {
1581 // Non-instrumented alias of an instrumented function, or vice versa.
1582 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1583 // below will take care of instrumenting it.
1584 Function *NewF =
1585 buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1586 GA.replaceAllUsesWith(NewF);
1587 NewF->takeName(&GA);
1588 GA.eraseFromParent();
1589 FnsToInstrument.push_back(NewF);
1590 }
1591 }
1592
1593 // TODO: This could be more precise.
1594 ReadOnlyNoneAttrs.addAttribute(Attribute::Memory);
1595
1596 // First, change the ABI of every function in the module. ABI-listed
1597 // functions keep their original ABI and get a wrapper function.
1598 for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1599 FE = FnsToInstrument.end();
1600 FI != FE; ++FI) {
1601 Function &F = **FI;
1602 FunctionType *FT = F.getFunctionType();
1603
1604 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1605 FT->getReturnType()->isVoidTy());
1606
1607 if (isInstrumented(&F)) {
1608 if (isForceZeroLabels(&F))
1609 FnsWithForceZeroLabel.insert(&F);
1610
1611 // Instrumented functions get a '.dfsan' suffix. This allows us to more
1612 // easily identify cases of mismatching ABIs. This naming scheme is
1613 // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1614 addGlobalNameSuffix(&F);
1615 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1616 // Build a wrapper function for F. The wrapper simply calls F, and is
1617 // added to FnsToInstrument so that any instrumentation according to its
1618 // WrapperKind is done in the second pass below.
1619
1620 // If the function being wrapped has local linkage, then preserve the
1621 // function's linkage in the wrapper function.
1622 GlobalValue::LinkageTypes WrapperLinkage =
1623 F.hasLocalLinkage() ? F.getLinkage()
1625
1626 Function *NewF = buildWrapperFunction(
1627 &F,
1628 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1629 std::string(F.getName()),
1630 WrapperLinkage, FT);
1631 NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1632
1633 // Extern weak functions can sometimes be null at execution time.
1634 // Code will sometimes check if an extern weak function is null.
1635 // This could look something like:
1636 // declare extern_weak i8 @my_func(i8)
1637 // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
1638 // label %avoid_my_func
1639 // The @"dfsw$my_func" wrapper is never null, so if we replace this use
1640 // in the comparison, the icmp will simplify to false and we have
1641 // accidentally optimized away a null check that is necessary.
1642 // This can lead to a crash when the null extern_weak my_func is called.
1643 //
1644 // To prevent (the most common pattern of) this problem,
1645 // do not replace uses in comparisons with the wrapper.
1646 // We definitely want to replace uses in call instructions.
1647 // Other uses (e.g. store the function address somewhere) might be
1648 // called or compared or both - this case may not be handled correctly.
1649 // We will default to replacing with wrapper in cases we are unsure.
1650 auto IsNotCmpUse = [](Use &U) -> bool {
1651 User *Usr = U.getUser();
1652 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
1653 // This is the most common case for icmp ne null
1654 if (CE->getOpcode() == Instruction::ICmp) {
1655 return false;
1656 }
1657 }
1658 if (Instruction *I = dyn_cast<Instruction>(Usr)) {
1659 if (I->getOpcode() == Instruction::ICmp) {
1660 return false;
1661 }
1662 }
1663 return true;
1664 };
1665 F.replaceUsesWithIf(NewF, IsNotCmpUse);
1666
1667 UnwrappedFnMap[NewF] = &F;
1668 *FI = NewF;
1669
1670 if (!F.isDeclaration()) {
1671 // This function is probably defining an interposition of an
1672 // uninstrumented function and hence needs to keep the original ABI.
1673 // But any functions it may call need to use the instrumented ABI, so
1674 // we instrument it in a mode which preserves the original ABI.
1675 FnsWithNativeABI.insert(&F);
1676
1677 // This code needs to rebuild the iterators, as they may be invalidated
1678 // by the push_back, taking care that the new range does not include
1679 // any functions added by this code.
1680 size_t N = FI - FnsToInstrument.begin(),
1681 Count = FE - FnsToInstrument.begin();
1682 FnsToInstrument.push_back(&F);
1683 FI = FnsToInstrument.begin() + N;
1684 FE = FnsToInstrument.begin() + Count;
1685 }
1686 // Hopefully, nobody will try to indirectly call a vararg
1687 // function... yet.
1688 } else if (FT->isVarArg()) {
1689 UnwrappedFnMap[&F] = &F;
1690 *FI = nullptr;
1691 }
1692 }
1693
1694 for (Function *F : FnsToInstrument) {
1695 if (!F || F->isDeclaration())
1696 continue;
1697
1699
1700 DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1701 FnsWithForceZeroLabel.count(F), GetTLI(*F));
1702
1704 // Add callback for arguments reaching this function.
1705 for (auto &FArg : F->args()) {
1706 Instruction *Next = &F->getEntryBlock().front();
1707 Value *FArgShadow = DFSF.getShadow(&FArg);
1708 if (isZeroShadow(FArgShadow))
1709 continue;
1710 if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {
1711 Next = FArgShadowInst->getNextNode();
1712 }
1713 if (shouldTrackOrigins()) {
1714 if (Instruction *Origin =
1715 dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {
1716 // Ensure IRB insertion point is after loads for shadow and origin.
1717 Instruction *OriginNext = Origin->getNextNode();
1718 if (Next->comesBefore(OriginNext)) {
1719 Next = OriginNext;
1720 }
1721 }
1722 }
1723 IRBuilder<> IRB(Next);
1724 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);
1725 }
1726 }
1727
1728 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1729 // Build a copy of the list before iterating over it.
1730 SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1731
1732 for (BasicBlock *BB : BBList) {
1733 Instruction *Inst = &BB->front();
1734 while (true) {
1735 // DFSanVisitor may split the current basic block, changing the current
1736 // instruction's next pointer and moving the next instruction to the
1737 // tail block from which we should continue.
1738 Instruction *Next = Inst->getNextNode();
1739 // DFSanVisitor may delete Inst, so keep track of whether it was a
1740 // terminator.
1741 bool IsTerminator = Inst->isTerminator();
1742 if (!DFSF.SkipInsts.count(Inst))
1743 DFSanVisitor(DFSF).visit(Inst);
1744 if (IsTerminator)
1745 break;
1746 Inst = Next;
1747 }
1748 }
1749
1750 // We will not necessarily be able to compute the shadow for every phi node
1751 // until we have visited every block. Therefore, the code that handles phi
1752 // nodes adds them to the PHIFixups list so that they can be properly
1753 // handled here.
1754 for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1755 for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1756 ++Val) {
1757 P.ShadowPhi->setIncomingValue(
1758 Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1759 if (P.OriginPhi)
1760 P.OriginPhi->setIncomingValue(
1761 Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1762 }
1763 }
1764
1765 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1766 // places (i.e. instructions in basic blocks we haven't even begun visiting
1767 // yet). To make our life easier, do this work in a pass after the main
1768 // instrumentation.
1770 for (Value *V : DFSF.NonZeroChecks) {
1772 if (Instruction *I = dyn_cast<Instruction>(V))
1773 Pos = std::next(I->getIterator());
1774 else
1775 Pos = DFSF.F->getEntryBlock().begin();
1776 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1777 Pos = std::next(Pos->getIterator());
1778 IRBuilder<> IRB(Pos->getParent(), Pos);
1779 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1780 Value *Ne =
1781 IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1783 Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1784 IRBuilder<> ThenIRB(BI);
1785 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1786 }
1787 }
1788 }
1789
1790 return Changed || !FnsToInstrument.empty() ||
1791 M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1792}
1793
1794Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1795 return IRB.CreatePtrAdd(DFS.ArgTLS, ConstantInt::get(DFS.IntptrTy, ArgOffset),
1796 "_dfsarg");
1797}
1798
1799Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1800 return IRB.CreatePointerCast(DFS.RetvalTLS, PointerType::get(*DFS.Ctx, 0),
1801 "_dfsret");
1802}
1803
1804Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1805
1806Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1807 return IRB.CreateConstInBoundsGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0,
1808 ArgNo, "_dfsarg_o");
1809}
1810
1811Value *DFSanFunction::getOrigin(Value *V) {
1812 assert(DFS.shouldTrackOrigins());
1813 if (!isa<Argument>(V) && !isa<Instruction>(V))
1814 return DFS.ZeroOrigin;
1815 Value *&Origin = ValOriginMap[V];
1816 if (!Origin) {
1817 if (Argument *A = dyn_cast<Argument>(V)) {
1818 if (IsNativeABI)
1819 return DFS.ZeroOrigin;
1820 if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1821 Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1822 IRBuilder<> IRB(ArgOriginTLSPos);
1823 Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1824 Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1825 } else {
1826 // Overflow
1827 Origin = DFS.ZeroOrigin;
1828 }
1829 } else {
1830 Origin = DFS.ZeroOrigin;
1831 }
1832 }
1833 return Origin;
1834}
1835
1836void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1837 if (!DFS.shouldTrackOrigins())
1838 return;
1839 assert(!ValOriginMap.count(I));
1840 assert(Origin->getType() == DFS.OriginTy);
1841 ValOriginMap[I] = Origin;
1842}
1843
1844Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1845 unsigned ArgOffset = 0;
1846 const DataLayout &DL = F->getDataLayout();
1847 for (auto &FArg : F->args()) {
1848 if (!FArg.getType()->isSized()) {
1849 if (A == &FArg)
1850 break;
1851 continue;
1852 }
1853
1854 unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1855 if (A != &FArg) {
1856 ArgOffset += alignTo(Size, ShadowTLSAlignment);
1857 if (ArgOffset > ArgTLSSize)
1858 break; // ArgTLS overflows, uses a zero shadow.
1859 continue;
1860 }
1861
1862 if (ArgOffset + Size > ArgTLSSize)
1863 break; // ArgTLS overflows, uses a zero shadow.
1864
1865 Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1866 IRBuilder<> IRB(ArgTLSPos);
1867 Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1868 return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1870 }
1871
1872 return DFS.getZeroShadow(A);
1873}
1874
1875Value *DFSanFunction::getShadow(Value *V) {
1876 if (!isa<Argument>(V) && !isa<Instruction>(V))
1877 return DFS.getZeroShadow(V);
1878 if (IsForceZeroLabels)
1879 return DFS.getZeroShadow(V);
1880 Value *&Shadow = ValShadowMap[V];
1881 if (!Shadow) {
1882 if (Argument *A = dyn_cast<Argument>(V)) {
1883 if (IsNativeABI)
1884 return DFS.getZeroShadow(V);
1885 Shadow = getShadowForTLSArgument(A);
1886 NonZeroChecks.push_back(Shadow);
1887 } else {
1888 Shadow = DFS.getZeroShadow(V);
1889 }
1890 }
1891 return Shadow;
1892}
1893
1894void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1895 assert(!ValShadowMap.count(I));
1896 ValShadowMap[I] = Shadow;
1897}
1898
1899/// Compute the integer shadow offset that corresponds to a given
1900/// application address.
1901///
1902/// Offset = (Addr & ~AndMask) ^ XorMask
1903Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1904 assert(Addr != RetvalTLS && "Reinstrumenting?");
1905 Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1906
1907 uint64_t AndMask = MapParams->AndMask;
1908 if (AndMask)
1909 OffsetLong =
1910 IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1911
1912 uint64_t XorMask = MapParams->XorMask;
1913 if (XorMask)
1914 OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1915 return OffsetLong;
1916}
1917
1918std::pair<Value *, Value *>
1919DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1921 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1922 IRBuilder<> IRB(Pos->getParent(), Pos);
1923 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1924 Value *ShadowLong = ShadowOffset;
1925 uint64_t ShadowBase = MapParams->ShadowBase;
1926 if (ShadowBase != 0) {
1927 ShadowLong =
1928 IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1929 }
1930 Value *ShadowPtr = IRB.CreateIntToPtr(ShadowLong, PointerType::get(*Ctx, 0));
1931 Value *OriginPtr = nullptr;
1932 if (shouldTrackOrigins()) {
1933 Value *OriginLong = ShadowOffset;
1934 uint64_t OriginBase = MapParams->OriginBase;
1935 if (OriginBase != 0)
1936 OriginLong =
1937 IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1938 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1939 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1940 // So Mask is unnecessary.
1941 if (Alignment < MinOriginAlignment) {
1942 uint64_t Mask = MinOriginAlignment.value() - 1;
1943 OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1944 }
1945 OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1946 }
1947 return std::make_pair(ShadowPtr, OriginPtr);
1948}
1949
1950Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
1952 Value *ShadowOffset) {
1953 IRBuilder<> IRB(Pos->getParent(), Pos);
1954 return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1955}
1956
1957Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
1959 IRBuilder<> IRB(Pos->getParent(), Pos);
1960 Value *ShadowAddr = getShadowOffset(Addr, IRB);
1961 uint64_t ShadowBase = MapParams->ShadowBase;
1962 if (ShadowBase != 0)
1963 ShadowAddr =
1964 IRB.CreateAdd(ShadowAddr, ConstantInt::get(IntptrTy, ShadowBase));
1965 return getShadowAddress(Addr, Pos, ShadowAddr);
1966}
1967
1968Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1970 Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1971 return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1972}
1973
1974// Generates IR to compute the union of the two given shadows, inserting it
1975// before Pos. The combined value is with primitive type.
1976Value *DFSanFunction::combineShadows(Value *V1, Value *V2,
1978 if (DFS.isZeroShadow(V1))
1979 return collapseToPrimitiveShadow(V2, Pos);
1980 if (DFS.isZeroShadow(V2))
1981 return collapseToPrimitiveShadow(V1, Pos);
1982 if (V1 == V2)
1983 return collapseToPrimitiveShadow(V1, Pos);
1984
1985 auto V1Elems = ShadowElements.find(V1);
1986 auto V2Elems = ShadowElements.find(V2);
1987 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1988 if (llvm::includes(V1Elems->second, V2Elems->second)) {
1989 return collapseToPrimitiveShadow(V1, Pos);
1990 }
1991 if (llvm::includes(V2Elems->second, V1Elems->second)) {
1992 return collapseToPrimitiveShadow(V2, Pos);
1993 }
1994 } else if (V1Elems != ShadowElements.end()) {
1995 if (V1Elems->second.count(V2))
1996 return collapseToPrimitiveShadow(V1, Pos);
1997 } else if (V2Elems != ShadowElements.end()) {
1998 if (V2Elems->second.count(V1))
1999 return collapseToPrimitiveShadow(V2, Pos);
2000 }
2001
2002 auto Key = std::make_pair(V1, V2);
2003 if (V1 > V2)
2004 std::swap(Key.first, Key.second);
2005 CachedShadow &CCS = CachedShadows[Key];
2006 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
2007 return CCS.Shadow;
2008
2009 // Converts inputs shadows to shadows with primitive types.
2010 Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
2011 Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
2012
2013 IRBuilder<> IRB(Pos->getParent(), Pos);
2014 CCS.Block = Pos->getParent();
2015 CCS.Shadow = IRB.CreateOr(PV1, PV2);
2016
2017 std::set<Value *> UnionElems;
2018 if (V1Elems != ShadowElements.end()) {
2019 UnionElems = V1Elems->second;
2020 } else {
2021 UnionElems.insert(V1);
2022 }
2023 if (V2Elems != ShadowElements.end()) {
2024 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
2025 } else {
2026 UnionElems.insert(V2);
2027 }
2028 ShadowElements[CCS.Shadow] = std::move(UnionElems);
2029
2030 return CCS.Shadow;
2031}
2032
2033// A convenience function which folds the shadows of each of the operands
2034// of the provided instruction Inst, inserting the IR before Inst. Returns
2035// the computed union Value.
2036Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
2037 if (Inst->getNumOperands() == 0)
2038 return DFS.getZeroShadow(Inst);
2039
2040 Value *Shadow = getShadow(Inst->getOperand(0));
2041 for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
2042 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)),
2043 Inst->getIterator());
2044
2045 return expandFromPrimitiveShadow(Inst->getType(), Shadow,
2046 Inst->getIterator());
2047}
2048
2049void DFSanVisitor::visitInstOperands(Instruction &I) {
2050 Value *CombinedShadow = DFSF.combineOperandShadows(&I);
2051 DFSF.setShadow(&I, CombinedShadow);
2052 visitInstOperandOrigins(I);
2053}
2054
2055Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
2056 const std::vector<Value *> &Origins,
2058 ConstantInt *Zero) {
2059 assert(Shadows.size() == Origins.size());
2060 size_t Size = Origins.size();
2061 if (Size == 0)
2062 return DFS.ZeroOrigin;
2063 Value *Origin = nullptr;
2064 if (!Zero)
2065 Zero = DFS.ZeroPrimitiveShadow;
2066 for (size_t I = 0; I != Size; ++I) {
2067 Value *OpOrigin = Origins[I];
2068 Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
2069 if (ConstOpOrigin && ConstOpOrigin->isNullValue())
2070 continue;
2071 if (!Origin) {
2072 Origin = OpOrigin;
2073 continue;
2074 }
2075 Value *OpShadow = Shadows[I];
2076 Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
2077 IRBuilder<> IRB(Pos->getParent(), Pos);
2078 Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
2079 Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2080 }
2081 return Origin ? Origin : DFS.ZeroOrigin;
2082}
2083
2084Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
2085 size_t Size = Inst->getNumOperands();
2086 std::vector<Value *> Shadows(Size);
2087 std::vector<Value *> Origins(Size);
2088 for (unsigned I = 0; I != Size; ++I) {
2089 Shadows[I] = getShadow(Inst->getOperand(I));
2090 Origins[I] = getOrigin(Inst->getOperand(I));
2091 }
2092 return combineOrigins(Shadows, Origins, Inst->getIterator());
2093}
2094
2095void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
2096 if (!DFSF.DFS.shouldTrackOrigins())
2097 return;
2098 Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
2099 DFSF.setOrigin(&I, CombinedOrigin);
2100}
2101
2102Align DFSanFunction::getShadowAlign(Align InstAlignment) {
2103 const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
2104 return Align(Alignment.value() * DFS.ShadowWidthBytes);
2105}
2106
2107Align DFSanFunction::getOriginAlign(Align InstAlignment) {
2108 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2109 return Align(std::max(MinOriginAlignment, Alignment));
2110}
2111
2112bool DFSanFunction::isLookupTableConstant(Value *P) {
2113 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
2114 if (GV->isConstant() && GV->hasName())
2115 return DFS.CombineTaintLookupTableNames.count(GV->getName());
2116
2117 return false;
2118}
2119
2120bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
2121 Align InstAlignment) {
2122 // When enabling tracking load instructions, we always use
2123 // __dfsan_load_label_and_origin to reduce code size.
2124 if (ClTrackOrigins == 2)
2125 return true;
2126
2127 assert(Size != 0);
2128 // * if Size == 1, it is sufficient to load its origin aligned at 4.
2129 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
2130 // load its origin aligned at 4. If not, although origins may be lost, it
2131 // should not happen very often.
2132 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
2133 // Size % 4 == 0, it is more efficient to load origins without callbacks.
2134 // * Otherwise we use __dfsan_load_label_and_origin.
2135 // This should ensure that common cases run efficiently.
2136 if (Size <= 2)
2137 return false;
2138
2139 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2140 return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
2141}
2142
2143Value *DataFlowSanitizer::loadNextOrigin(BasicBlock::iterator Pos,
2144 Align OriginAlign,
2145 Value **OriginAddr) {
2146 IRBuilder<> IRB(Pos->getParent(), Pos);
2147 *OriginAddr =
2148 IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
2149 return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
2150}
2151
2152std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
2153 Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
2154 Align OriginAlign, Value *FirstOrigin, BasicBlock::iterator Pos) {
2155 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2156 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
2157
2158 assert(Size >= 4 && "Not large enough load size for fast path!");
2159
2160 // Used for origin tracking.
2161 std::vector<Value *> Shadows;
2162 std::vector<Value *> Origins;
2163
2164 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
2165 // but this function is only used in a subset of cases that make it possible
2166 // to optimize the instrumentation.
2167 //
2168 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
2169 // per byte) is either:
2170 // - a multiple of 8 (common)
2171 // - equal to 4 (only for load32)
2172 //
2173 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2174 // other cases, we use a 64-bit integer to hold the wide shadow.
2175 Type *WideShadowTy =
2176 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
2177
2178 IRBuilder<> IRB(Pos->getParent(), Pos);
2179 Value *CombinedWideShadow =
2180 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2181
2182 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
2183 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
2184
2185 auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
2186 if (BytesPerWideShadow > 4) {
2187 assert(BytesPerWideShadow == 8);
2188 // The wide shadow relates to two origin pointers: one for the first four
2189 // application bytes, and one for the latest four. We use a left shift to
2190 // get just the shadow bytes that correspond to the first origin pointer,
2191 // and then the entire shadow for the second origin pointer (which will be
2192 // chosen by combineOrigins() iff the least-significant half of the wide
2193 // shadow was empty but the other half was not).
2194 Value *WideShadowLo =
2195 F->getParent()->getDataLayout().isLittleEndian()
2196 ? IRB.CreateShl(
2197 WideShadow,
2198 ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2))
2199 : IRB.CreateAnd(
2200 WideShadow,
2201 ConstantInt::get(WideShadowTy,
2202 (1 - (1 << (WideShadowBitWidth / 2)))
2203 << (WideShadowBitWidth / 2)));
2204 Shadows.push_back(WideShadow);
2205 Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
2206
2207 Shadows.push_back(WideShadowLo);
2208 Origins.push_back(Origin);
2209 } else {
2210 Shadows.push_back(WideShadow);
2211 Origins.push_back(Origin);
2212 }
2213 };
2214
2215 if (ShouldTrackOrigins)
2216 AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
2217
2218 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2219 // then OR individual shadows within the combined WideShadow by binary ORing.
2220 // This is fewer instructions than ORing shadows individually, since it
2221 // needs logN shift/or instructions (N being the bytes of the combined wide
2222 // shadow).
2223 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2224 ByteOfs += BytesPerWideShadow) {
2225 ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr,
2226 ConstantInt::get(DFS.IntptrTy, 1));
2227 Value *NextWideShadow =
2228 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2229 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
2230 if (ShouldTrackOrigins) {
2231 Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
2232 AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
2233 }
2234 }
2235 for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
2236 Width >>= 1) {
2237 Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
2238 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
2239 }
2240 return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
2241 ShouldTrackOrigins
2242 ? combineOrigins(Shadows, Origins, Pos,
2244 : DFS.ZeroOrigin};
2245}
2246
2247std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
2248 Value *Addr, uint64_t Size, Align InstAlignment, BasicBlock::iterator Pos) {
2249 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2250
2251 // Non-escaped loads.
2252 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2253 const auto SI = AllocaShadowMap.find(AI);
2254 if (SI != AllocaShadowMap.end()) {
2255 IRBuilder<> IRB(Pos->getParent(), Pos);
2256 Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
2257 const auto OI = AllocaOriginMap.find(AI);
2258 assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
2259 return {ShadowLI, ShouldTrackOrigins
2260 ? IRB.CreateLoad(DFS.OriginTy, OI->second)
2261 : nullptr};
2262 }
2263 }
2264
2265 // Load from constant addresses.
2266 SmallVector<const Value *, 2> Objs;
2267 getUnderlyingObjects(Addr, Objs);
2268 bool AllConstants = true;
2269 for (const Value *Obj : Objs) {
2270 if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
2271 continue;
2273 continue;
2274
2275 AllConstants = false;
2276 break;
2277 }
2278 if (AllConstants)
2279 return {DFS.ZeroPrimitiveShadow,
2280 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2281
2282 if (Size == 0)
2283 return {DFS.ZeroPrimitiveShadow,
2284 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2285
2286 // Use callback to load if this is not an optimizable case for origin
2287 // tracking.
2288 if (ShouldTrackOrigins &&
2289 useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2290 IRBuilder<> IRB(Pos->getParent(), Pos);
2291 CallInst *Call =
2292 IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2293 {Addr, ConstantInt::get(DFS.IntptrTy, Size)});
2294 Call->addRetAttr(Attribute::ZExt);
2295 return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2296 DFS.PrimitiveShadowTy),
2297 IRB.CreateTrunc(Call, DFS.OriginTy)};
2298 }
2299
2300 // Other cases that support loading shadows or origins in a fast way.
2301 Value *ShadowAddr, *OriginAddr;
2302 std::tie(ShadowAddr, OriginAddr) =
2303 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2304
2305 const Align ShadowAlign = getShadowAlign(InstAlignment);
2306 const Align OriginAlign = getOriginAlign(InstAlignment);
2307 Value *Origin = nullptr;
2308 if (ShouldTrackOrigins) {
2309 IRBuilder<> IRB(Pos->getParent(), Pos);
2310 Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2311 }
2312
2313 // When the byte size is small enough, we can load the shadow directly with
2314 // just a few instructions.
2315 switch (Size) {
2316 case 1: {
2317 LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2318 LI->setAlignment(ShadowAlign);
2319 return {LI, Origin};
2320 }
2321 case 2: {
2322 IRBuilder<> IRB(Pos->getParent(), Pos);
2323 Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2324 ConstantInt::get(DFS.IntptrTy, 1));
2325 Value *Load =
2326 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2327 Value *Load1 =
2328 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2329 return {combineShadows(Load, Load1, Pos), Origin};
2330 }
2331 }
2332 bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2333
2334 if (HasSizeForFastPath)
2335 return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2336 OriginAlign, Origin, Pos);
2337
2338 IRBuilder<> IRB(Pos->getParent(), Pos);
2339 CallInst *FallbackCall = IRB.CreateCall(
2340 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2341 FallbackCall->addRetAttr(Attribute::ZExt);
2342 return {FallbackCall, Origin};
2343}
2344
2345std::pair<Value *, Value *>
2346DFSanFunction::loadShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment,
2348 Value *PrimitiveShadow, *Origin;
2349 std::tie(PrimitiveShadow, Origin) =
2350 loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2351 if (DFS.shouldTrackOrigins()) {
2352 if (ClTrackOrigins == 2) {
2353 IRBuilder<> IRB(Pos->getParent(), Pos);
2354 auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2355 if (!ConstantShadow || !ConstantShadow->isZeroValue())
2356 Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2357 }
2358 }
2359 return {PrimitiveShadow, Origin};
2360}
2361
2378
2380 if (!V->getType()->isPointerTy())
2381 return V;
2382
2383 // DFSan pass should be running on valid IR, but we'll
2384 // keep a seen set to ensure there are no issues.
2386 Visited.insert(V);
2387 do {
2388 if (auto *GEP = dyn_cast<GEPOperator>(V)) {
2389 V = GEP->getPointerOperand();
2390 } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2391 V = cast<Operator>(V)->getOperand(0);
2392 if (!V->getType()->isPointerTy())
2393 return V;
2394 } else if (isa<GlobalAlias>(V)) {
2395 V = cast<GlobalAlias>(V)->getAliasee();
2396 }
2397 } while (Visited.insert(V).second);
2398
2399 return V;
2400}
2401
2402void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2403 auto &DL = LI.getDataLayout();
2404 uint64_t Size = DL.getTypeStoreSize(LI.getType());
2405 if (Size == 0) {
2406 DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2407 DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2408 return;
2409 }
2410
2411 // When an application load is atomic, increase atomic ordering between
2412 // atomic application loads and stores to ensure happen-before order; load
2413 // shadow data after application data; store zero shadow data before
2414 // application data. This ensure shadow loads return either labels of the
2415 // initial application data or zeros.
2416 if (LI.isAtomic())
2418
2419 BasicBlock::iterator AfterLi = std::next(LI.getIterator());
2421 if (LI.isAtomic())
2422 Pos = std::next(Pos);
2423
2424 std::vector<Value *> Shadows;
2425 std::vector<Value *> Origins;
2426 Value *PrimitiveShadow, *Origin;
2427 std::tie(PrimitiveShadow, Origin) =
2428 DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2429 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2430 if (ShouldTrackOrigins) {
2431 Shadows.push_back(PrimitiveShadow);
2432 Origins.push_back(Origin);
2433 }
2435 DFSF.isLookupTableConstant(
2437 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2438 PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2439 if (ShouldTrackOrigins) {
2440 Shadows.push_back(PtrShadow);
2441 Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2442 }
2443 }
2444 if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2445 DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2446
2447 Value *Shadow =
2448 DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2449 DFSF.setShadow(&LI, Shadow);
2450
2451 if (ShouldTrackOrigins) {
2452 DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2453 }
2454
2455 if (ClEventCallbacks) {
2456 IRBuilder<> IRB(Pos->getParent(), Pos);
2457 Value *Addr = LI.getPointerOperand();
2458 CallInst *CI =
2459 IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr});
2460 CI->addParamAttr(0, Attribute::ZExt);
2461 }
2462
2463 IRBuilder<> IRB(AfterLi->getParent(), AfterLi);
2464 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);
2465}
2466
2467Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2468 IRBuilder<> &IRB) {
2469 assert(DFS.shouldTrackOrigins());
2470 return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2471}
2472
2473Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2474 if (!DFS.shouldTrackOrigins())
2475 return V;
2476 return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2477}
2478
2479Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2480 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2481 const DataLayout &DL = F->getDataLayout();
2482 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2483 if (IntptrSize == OriginSize)
2484 return Origin;
2485 assert(IntptrSize == OriginSize * 2);
2486 Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2487 return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2488}
2489
2490void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2491 Value *StoreOriginAddr,
2492 uint64_t StoreOriginSize, Align Alignment) {
2493 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2494 const DataLayout &DL = F->getDataLayout();
2495 const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2496 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2497 assert(IntptrAlignment >= MinOriginAlignment);
2498 assert(IntptrSize >= OriginSize);
2499
2500 unsigned Ofs = 0;
2501 Align CurrentAlignment = Alignment;
2502 if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2503 Value *IntptrOrigin = originToIntptr(IRB, Origin);
2504 Value *IntptrStoreOriginPtr =
2505 IRB.CreatePointerCast(StoreOriginAddr, PointerType::get(*DFS.Ctx, 0));
2506 for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2507 Value *Ptr =
2508 I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2509 : IntptrStoreOriginPtr;
2510 IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2511 Ofs += IntptrSize / OriginSize;
2512 CurrentAlignment = IntptrAlignment;
2513 }
2514 }
2515
2516 for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2517 ++I) {
2518 Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2519 : StoreOriginAddr;
2520 IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2521 CurrentAlignment = MinOriginAlignment;
2522 }
2523}
2524
2525Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2526 const Twine &Name) {
2527 Type *VTy = V->getType();
2528 assert(VTy->isIntegerTy());
2529 if (VTy->getIntegerBitWidth() == 1)
2530 // Just converting a bool to a bool, so do nothing.
2531 return V;
2532 return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2533}
2534
2535void DFSanFunction::storeOrigin(BasicBlock::iterator Pos, Value *Addr,
2536 uint64_t Size, Value *Shadow, Value *Origin,
2537 Value *StoreOriginAddr, Align InstAlignment) {
2538 // Do not write origins for zero shadows because we do not trace origins for
2539 // untainted sinks.
2540 const Align OriginAlignment = getOriginAlign(InstAlignment);
2541 Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2542 IRBuilder<> IRB(Pos->getParent(), Pos);
2543 if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2544 if (!ConstantShadow->isZeroValue())
2545 paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2546 OriginAlignment);
2547 return;
2548 }
2549
2550 if (shouldInstrumentWithCall()) {
2551 IRB.CreateCall(
2552 DFS.DFSanMaybeStoreOriginFn,
2553 {CollapsedShadow, Addr, ConstantInt::get(DFS.IntptrTy, Size), Origin});
2554 } else {
2555 Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2556 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2558 Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DTU);
2559 IRBuilder<> IRBNew(CheckTerm);
2560 paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2561 OriginAlignment);
2562 ++NumOriginStores;
2563 }
2564}
2565
2566void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2567 Align ShadowAlign,
2569 IRBuilder<> IRB(Pos->getParent(), Pos);
2570 IntegerType *ShadowTy =
2571 IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2572 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2573 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2574 IRB.CreateAlignedStore(ExtZeroShadow, ShadowAddr, ShadowAlign);
2575 // Do not write origins for 0 shadows because we do not trace origins for
2576 // untainted sinks.
2577}
2578
2579void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2580 Align InstAlignment,
2581 Value *PrimitiveShadow,
2582 Value *Origin,
2584 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2585
2586 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2587 const auto SI = AllocaShadowMap.find(AI);
2588 if (SI != AllocaShadowMap.end()) {
2589 IRBuilder<> IRB(Pos->getParent(), Pos);
2590 IRB.CreateStore(PrimitiveShadow, SI->second);
2591
2592 // Do not write origins for 0 shadows because we do not trace origins for
2593 // untainted sinks.
2594 if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2595 const auto OI = AllocaOriginMap.find(AI);
2596 assert(OI != AllocaOriginMap.end() && Origin);
2597 IRB.CreateStore(Origin, OI->second);
2598 }
2599 return;
2600 }
2601 }
2602
2603 const Align ShadowAlign = getShadowAlign(InstAlignment);
2604 if (DFS.isZeroShadow(PrimitiveShadow)) {
2605 storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2606 return;
2607 }
2608
2609 IRBuilder<> IRB(Pos->getParent(), Pos);
2610 Value *ShadowAddr, *OriginAddr;
2611 std::tie(ShadowAddr, OriginAddr) =
2612 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2613
2614 const unsigned ShadowVecSize = 8;
2615 assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2616 "Shadow vector is too large!");
2617
2618 uint64_t Offset = 0;
2619 uint64_t LeftSize = Size;
2620 if (LeftSize >= ShadowVecSize) {
2621 auto *ShadowVecTy =
2622 FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2623 Value *ShadowVec = PoisonValue::get(ShadowVecTy);
2624 for (unsigned I = 0; I != ShadowVecSize; ++I) {
2625 ShadowVec = IRB.CreateInsertElement(
2626 ShadowVec, PrimitiveShadow,
2627 ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2628 }
2629 do {
2630 Value *CurShadowVecAddr =
2631 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowAddr, Offset);
2632 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2633 LeftSize -= ShadowVecSize;
2634 ++Offset;
2635 } while (LeftSize >= ShadowVecSize);
2636 Offset *= ShadowVecSize;
2637 }
2638 while (LeftSize > 0) {
2639 Value *CurShadowAddr =
2640 IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2641 IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2642 --LeftSize;
2643 ++Offset;
2644 }
2645
2646 if (ShouldTrackOrigins) {
2647 storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2648 InstAlignment);
2649 }
2650}
2651
2668
2669void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2670 auto &DL = SI.getDataLayout();
2671 Value *Val = SI.getValueOperand();
2672 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2673 if (Size == 0)
2674 return;
2675
2676 // When an application store is atomic, increase atomic ordering between
2677 // atomic application loads and stores to ensure happen-before order; load
2678 // shadow data after application data; store zero shadow data before
2679 // application data. This ensure shadow loads return either labels of the
2680 // initial application data or zeros.
2681 if (SI.isAtomic())
2682 SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2683
2684 const bool ShouldTrackOrigins =
2685 DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2686 std::vector<Value *> Shadows;
2687 std::vector<Value *> Origins;
2688
2689 Value *Shadow =
2690 SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2691
2692 if (ShouldTrackOrigins) {
2693 Shadows.push_back(Shadow);
2694 Origins.push_back(DFSF.getOrigin(Val));
2695 }
2696
2697 Value *PrimitiveShadow;
2699 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2700 if (ShouldTrackOrigins) {
2701 Shadows.push_back(PtrShadow);
2702 Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2703 }
2704 PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, SI.getIterator());
2705 } else {
2706 PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, SI.getIterator());
2707 }
2708 Value *Origin = nullptr;
2709 if (ShouldTrackOrigins)
2710 Origin = DFSF.combineOrigins(Shadows, Origins, SI.getIterator());
2711 DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2712 PrimitiveShadow, Origin, SI.getIterator());
2713 if (ClEventCallbacks) {
2714 IRBuilder<> IRB(&SI);
2715 Value *Addr = SI.getPointerOperand();
2716 CallInst *CI =
2717 IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr});
2718 CI->addParamAttr(0, Attribute::ZExt);
2719 }
2720}
2721
2722void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2724
2725 Value *Val = I.getOperand(1);
2726 const auto &DL = I.getDataLayout();
2727 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2728 if (Size == 0)
2729 return;
2730
2731 // Conservatively set data at stored addresses and return with zero shadow to
2732 // prevent shadow data races.
2733 IRBuilder<> IRB(&I);
2734 Value *Addr = I.getOperand(0);
2735 const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2736 DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, I.getIterator());
2737 DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2738 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2739}
2740
2741void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2742 visitCASOrRMW(I.getAlign(), I);
2743 // TODO: The ordering change follows MSan. It is possible not to change
2744 // ordering because we always set and use 0 shadows.
2745 I.setOrdering(addReleaseOrdering(I.getOrdering()));
2746}
2747
2748void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2749 visitCASOrRMW(I.getAlign(), I);
2750 // TODO: The ordering change follows MSan. It is possible not to change
2751 // ordering because we always set and use 0 shadows.
2752 I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2753}
2754
2755void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2756 visitInstOperands(UO);
2757}
2758
2759void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2760 visitInstOperands(BO);
2761}
2762
2763void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2764 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2765 // a musttail call and a ret, don't instrument. New instructions are not
2766 // allowed after a musttail call.
2767 if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2768 if (CI->isMustTailCall())
2769 return;
2770 visitInstOperands(BCI);
2771}
2772
2773void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2774
2775void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2776 visitInstOperands(CI);
2777 if (ClEventCallbacks) {
2778 IRBuilder<> IRB(&CI);
2779 Value *CombinedShadow = DFSF.getShadow(&CI);
2780 CallInst *CallI =
2781 IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2782 CallI->addParamAttr(0, Attribute::ZExt);
2783 }
2784}
2785
2786void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2787 // We do not need to track data through LandingPadInst.
2788 //
2789 // For the C++ exceptions, if a value is thrown, this value will be stored
2790 // in a memory location provided by __cxa_allocate_exception(...) (on the
2791 // throw side) or __cxa_begin_catch(...) (on the catch side).
2792 // This memory will have a shadow, so with the loads and stores we will be
2793 // able to propagate labels on data thrown through exceptions, without any
2794 // special handling of the LandingPadInst.
2795 //
2796 // The second element in the pair result of the LandingPadInst is a
2797 // register value, but it is for a type ID and should never be tainted.
2798 DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2799 DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2800}
2801
2802void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2804 DFSF.isLookupTableConstant(
2806 visitInstOperands(GEPI);
2807 return;
2808 }
2809
2810 // Only propagate shadow/origin of base pointer value but ignore those of
2811 // offset operands.
2812 Value *BasePointer = GEPI.getPointerOperand();
2813 DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2814 if (DFSF.DFS.shouldTrackOrigins())
2815 DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2816}
2817
2818void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2819 visitInstOperands(I);
2820}
2821
2822void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2823 visitInstOperands(I);
2824}
2825
2826void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2827 visitInstOperands(I);
2828}
2829
2830void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2831 IRBuilder<> IRB(&I);
2832 Value *Agg = I.getAggregateOperand();
2833 Value *AggShadow = DFSF.getShadow(Agg);
2834 Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2835 DFSF.setShadow(&I, ResShadow);
2836 visitInstOperandOrigins(I);
2837}
2838
2839void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2840 IRBuilder<> IRB(&I);
2841 Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2842 Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2843 Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2844 DFSF.setShadow(&I, Res);
2845 visitInstOperandOrigins(I);
2846}
2847
2848void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2849 bool AllLoadsStores = true;
2850 for (User *U : I.users()) {
2851 if (isa<LoadInst>(U))
2852 continue;
2853
2854 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2855 if (SI->getPointerOperand() == &I)
2856 continue;
2857 }
2858
2859 AllLoadsStores = false;
2860 break;
2861 }
2862 if (AllLoadsStores) {
2863 IRBuilder<> IRB(&I);
2864 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2865 if (DFSF.DFS.shouldTrackOrigins()) {
2866 DFSF.AllocaOriginMap[&I] =
2867 IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2868 }
2869 }
2870 DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2871 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2872}
2873
2874void DFSanVisitor::visitSelectInst(SelectInst &I) {
2875 Value *CondShadow = DFSF.getShadow(I.getCondition());
2876 Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2877 Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2878 Value *ShadowSel = nullptr;
2879 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2880 std::vector<Value *> Shadows;
2881 std::vector<Value *> Origins;
2882 Value *TrueOrigin =
2883 ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2884 Value *FalseOrigin =
2885 ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2886
2887 DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
2888
2889 if (isa<VectorType>(I.getCondition()->getType())) {
2890 ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2891 FalseShadow, I.getIterator());
2892 if (ShouldTrackOrigins) {
2893 Shadows.push_back(TrueShadow);
2894 Shadows.push_back(FalseShadow);
2895 Origins.push_back(TrueOrigin);
2896 Origins.push_back(FalseOrigin);
2897 }
2898 } else {
2899 if (TrueShadow == FalseShadow) {
2900 ShadowSel = TrueShadow;
2901 if (ShouldTrackOrigins) {
2902 Shadows.push_back(TrueShadow);
2903 Origins.push_back(TrueOrigin);
2904 }
2905 } else {
2906 ShadowSel = SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow,
2907 "", I.getIterator());
2908 if (ShouldTrackOrigins) {
2909 Shadows.push_back(ShadowSel);
2910 Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2911 FalseOrigin, "", I.getIterator()));
2912 }
2913 }
2914 }
2915 DFSF.setShadow(&I, ClTrackSelectControlFlow ? DFSF.combineShadowsThenConvert(
2916 I.getType(), CondShadow,
2917 ShadowSel, I.getIterator())
2918 : ShadowSel);
2919 if (ShouldTrackOrigins) {
2921 Shadows.push_back(CondShadow);
2922 Origins.push_back(DFSF.getOrigin(I.getCondition()));
2923 }
2924 DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, I.getIterator()));
2925 }
2926}
2927
2928void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2929 IRBuilder<> IRB(&I);
2930 Value *ValShadow = DFSF.getShadow(I.getValue());
2931 Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2932 ? DFSF.getOrigin(I.getValue())
2933 : DFSF.DFS.ZeroOrigin;
2934 IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
2935 {ValShadow, ValOrigin, I.getDest(),
2936 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2937}
2938
2939void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2940 IRBuilder<> IRB(&I);
2941
2942 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2943 // need to move origins before moving shadows.
2944 if (DFSF.DFS.shouldTrackOrigins()) {
2945 IRB.CreateCall(
2946 DFSF.DFS.DFSanMemOriginTransferFn,
2947 {I.getArgOperand(0), I.getArgOperand(1),
2948 IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2949 }
2950
2951 Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), I.getIterator());
2952 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), I.getIterator());
2953 Value *LenShadow =
2954 IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2955 DFSF.DFS.ShadowWidthBytes));
2956 auto *MTI = cast<MemTransferInst>(
2957 IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2958 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2959 MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
2960 MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
2961 if (ClEventCallbacks) {
2962 IRB.CreateCall(
2963 DFSF.DFS.DFSanMemTransferCallbackFn,
2964 {DestShadow, IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2965 }
2966}
2967
2968void DFSanVisitor::visitBranchInst(BranchInst &BR) {
2969 if (!BR.isConditional())
2970 return;
2971
2972 DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
2973}
2974
2975void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
2976 DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
2977}
2978
2979static bool isAMustTailRetVal(Value *RetVal) {
2980 // Tail call may have a bitcast between return.
2981 if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2982 RetVal = I->getOperand(0);
2983 }
2984 if (auto *I = dyn_cast<CallInst>(RetVal)) {
2985 return I->isMustTailCall();
2986 }
2987 return false;
2988}
2989
2990void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2991 if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2992 // Don't emit the instrumentation for musttail call returns.
2994 return;
2995
2996 Value *S = DFSF.getShadow(RI.getReturnValue());
2997 IRBuilder<> IRB(&RI);
2998 Type *RT = DFSF.F->getFunctionType()->getReturnType();
2999 unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
3000 if (Size <= RetvalTLSSize) {
3001 // If the size overflows, stores nothing. At callsite, oversized return
3002 // shadows are set to zero.
3003 IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
3004 }
3005 if (DFSF.DFS.shouldTrackOrigins()) {
3006 Value *O = DFSF.getOrigin(RI.getReturnValue());
3007 IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
3008 }
3009 }
3010}
3011
3012void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
3013 std::vector<Value *> &Args,
3014 IRBuilder<> &IRB) {
3015 FunctionType *FT = F.getFunctionType();
3016
3017 auto *I = CB.arg_begin();
3018
3019 // Adds non-variable argument shadows.
3020 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3021 Args.push_back(
3022 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()));
3023
3024 // Adds variable argument shadows.
3025 if (FT->isVarArg()) {
3026 auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
3027 CB.arg_size() - FT->getNumParams());
3028 auto *LabelVAAlloca =
3029 new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
3030 "labelva", DFSF.F->getEntryBlock().begin());
3031
3032 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3033 auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
3034 IRB.CreateStore(
3035 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()),
3036 LabelVAPtr);
3037 }
3038
3039 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
3040 }
3041
3042 // Adds the return value shadow.
3043 if (!FT->getReturnType()->isVoidTy()) {
3044 if (!DFSF.LabelReturnAlloca) {
3045 DFSF.LabelReturnAlloca = new AllocaInst(
3046 DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
3047 "labelreturn", DFSF.F->getEntryBlock().begin());
3048 }
3049 Args.push_back(DFSF.LabelReturnAlloca);
3050 }
3051}
3052
3053void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
3054 std::vector<Value *> &Args,
3055 IRBuilder<> &IRB) {
3056 FunctionType *FT = F.getFunctionType();
3057
3058 auto *I = CB.arg_begin();
3059
3060 // Add non-variable argument origins.
3061 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3062 Args.push_back(DFSF.getOrigin(*I));
3063
3064 // Add variable argument origins.
3065 if (FT->isVarArg()) {
3066 auto *OriginVATy =
3067 ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
3068 auto *OriginVAAlloca =
3069 new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
3070 "originva", DFSF.F->getEntryBlock().begin());
3071
3072 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3073 auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
3074 IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
3075 }
3076
3077 Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
3078 }
3079
3080 // Add the return value origin.
3081 if (!FT->getReturnType()->isVoidTy()) {
3082 if (!DFSF.OriginReturnAlloca) {
3083 DFSF.OriginReturnAlloca = new AllocaInst(
3084 DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
3085 "originreturn", DFSF.F->getEntryBlock().begin());
3086 }
3087 Args.push_back(DFSF.OriginReturnAlloca);
3088 }
3089}
3090
3091bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
3092 IRBuilder<> IRB(&CB);
3093 switch (DFSF.DFS.getWrapperKind(&F)) {
3094 case DataFlowSanitizer::WK_Warning:
3095 CB.setCalledFunction(&F);
3096 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
3097 IRB.CreateGlobalString(F.getName()));
3098 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3099 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3100 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3101 return true;
3102 case DataFlowSanitizer::WK_Discard:
3103 CB.setCalledFunction(&F);
3104 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3105 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3106 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3107 return true;
3108 case DataFlowSanitizer::WK_Functional:
3109 CB.setCalledFunction(&F);
3110 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3111 visitInstOperands(CB);
3112 return true;
3113 case DataFlowSanitizer::WK_Custom:
3114 // Don't try to handle invokes of custom functions, it's too complicated.
3115 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
3116 // wrapper.
3117 CallInst *CI = dyn_cast<CallInst>(&CB);
3118 if (!CI)
3119 return false;
3120
3121 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3122 FunctionType *FT = F.getFunctionType();
3123 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
3124 std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
3125 CustomFName += F.getName();
3126 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
3127 CustomFName, CustomFn.TransformedType);
3128 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
3129 CustomFn->copyAttributesFrom(&F);
3130
3131 // Custom functions returning non-void will write to the return label.
3132 if (!FT->getReturnType()->isVoidTy()) {
3133 CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
3134 }
3135 }
3136
3137 std::vector<Value *> Args;
3138
3139 // Adds non-variable arguments.
3140 auto *I = CB.arg_begin();
3141 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
3142 Args.push_back(*I);
3143 }
3144
3145 // Adds shadow arguments.
3146 const unsigned ShadowArgStart = Args.size();
3147 addShadowArguments(F, CB, Args, IRB);
3148
3149 // Adds origin arguments.
3150 const unsigned OriginArgStart = Args.size();
3151 if (ShouldTrackOrigins)
3152 addOriginArguments(F, CB, Args, IRB);
3153
3154 // Adds variable arguments.
3155 append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
3156
3157 CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
3158 CustomCI->setCallingConv(CI->getCallingConv());
3159 CustomCI->setAttributes(transformFunctionAttributes(
3160 CustomFn, CI->getContext(), CI->getAttributes()));
3161
3162 // Update the parameter attributes of the custom call instruction to
3163 // zero extend the shadow parameters. This is required for targets
3164 // which consider PrimitiveShadowTy an illegal type.
3165 for (unsigned N = 0; N < FT->getNumParams(); N++) {
3166 const unsigned ArgNo = ShadowArgStart + N;
3167 if (CustomCI->getArgOperand(ArgNo)->getType() ==
3168 DFSF.DFS.PrimitiveShadowTy)
3169 CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
3170 if (ShouldTrackOrigins) {
3171 const unsigned OriginArgNo = OriginArgStart + N;
3172 if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
3173 DFSF.DFS.OriginTy)
3174 CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
3175 }
3176 }
3177
3178 // Loads the return value shadow and origin.
3179 if (!FT->getReturnType()->isVoidTy()) {
3180 LoadInst *LabelLoad =
3181 IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
3182 DFSF.setShadow(CustomCI,
3183 DFSF.expandFromPrimitiveShadow(
3184 FT->getReturnType(), LabelLoad, CB.getIterator()));
3185 if (ShouldTrackOrigins) {
3186 LoadInst *OriginLoad =
3187 IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
3188 DFSF.setOrigin(CustomCI, OriginLoad);
3189 }
3190 }
3191
3192 CI->replaceAllUsesWith(CustomCI);
3193 CI->eraseFromParent();
3194 return true;
3195 }
3196 return false;
3197}
3198
3199Value *DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
3200 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3201 uint32_t OrderingTable[NumOrderings] = {};
3202
3203 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3204 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3205 OrderingTable[(int)AtomicOrderingCABI::consume] =
3206 (int)AtomicOrderingCABI::acquire;
3207 OrderingTable[(int)AtomicOrderingCABI::release] =
3208 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3209 (int)AtomicOrderingCABI::acq_rel;
3210 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3211 (int)AtomicOrderingCABI::seq_cst;
3212
3213 return ConstantDataVector::get(IRB.getContext(), OrderingTable);
3214}
3215
3216void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {
3217 // Since we use getNextNode here, we can't have CB terminate the BB.
3218 assert(isa<CallInst>(CB));
3219
3220 IRBuilder<> IRB(&CB);
3221 Value *Size = CB.getArgOperand(0);
3222 Value *SrcPtr = CB.getArgOperand(1);
3223 Value *DstPtr = CB.getArgOperand(2);
3224 Value *Ordering = CB.getArgOperand(3);
3225 // Convert the call to have at least Acquire ordering to make sure
3226 // the shadow operations aren't reordered before it.
3227 Value *NewOrdering =
3228 IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3229 CB.setArgOperand(3, NewOrdering);
3230
3231 IRBuilder<> NextIRB(CB.getNextNode());
3232 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3233
3234 // TODO: Support ClCombinePointerLabelsOnLoad
3235 // TODO: Support ClEventCallbacks
3236
3237 NextIRB.CreateCall(
3238 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3239 {DstPtr, SrcPtr, NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3240}
3241
3242Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
3243 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3244 uint32_t OrderingTable[NumOrderings] = {};
3245
3246 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3247 OrderingTable[(int)AtomicOrderingCABI::release] =
3248 (int)AtomicOrderingCABI::release;
3249 OrderingTable[(int)AtomicOrderingCABI::consume] =
3250 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3251 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3252 (int)AtomicOrderingCABI::acq_rel;
3253 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3254 (int)AtomicOrderingCABI::seq_cst;
3255
3256 return ConstantDataVector::get(IRB.getContext(), OrderingTable);
3257}
3258
3259void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {
3260 IRBuilder<> IRB(&CB);
3261 Value *Size = CB.getArgOperand(0);
3262 Value *SrcPtr = CB.getArgOperand(1);
3263 Value *DstPtr = CB.getArgOperand(2);
3264 Value *Ordering = CB.getArgOperand(3);
3265 // Convert the call to have at least Release ordering to make sure
3266 // the shadow operations aren't reordered after it.
3267 Value *NewOrdering =
3268 IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3269 CB.setArgOperand(3, NewOrdering);
3270
3271 // TODO: Support ClCombinePointerLabelsOnStore
3272 // TODO: Support ClEventCallbacks
3273
3274 IRB.CreateCall(
3275 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3276 {DstPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3277}
3278
3279void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {
3280 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int
3281 // ordering)
3282 IRBuilder<> IRB(&CB);
3283 Value *Size = CB.getArgOperand(0);
3284 Value *TargetPtr = CB.getArgOperand(1);
3285 Value *SrcPtr = CB.getArgOperand(2);
3286 Value *DstPtr = CB.getArgOperand(3);
3287
3288 // This operation is not atomic for the shadow and origin memory.
3289 // This could result in DFSan false positives or false negatives.
3290 // For now we will assume these operations are rare, and
3291 // the additional complexity to address this is not warrented.
3292
3293 // Current Target to Dest
3294 IRB.CreateCall(
3295 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3296 {DstPtr, TargetPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3297
3298 // Current Src to Target (overriding)
3299 IRB.CreateCall(
3300 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3301 {TargetPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3302}
3303
3304void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {
3305 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void
3306 // *desired, int success_order, int failure_order)
3307 Value *Size = CB.getArgOperand(0);
3308 Value *TargetPtr = CB.getArgOperand(1);
3309 Value *ExpectedPtr = CB.getArgOperand(2);
3310 Value *DesiredPtr = CB.getArgOperand(3);
3311
3312 // This operation is not atomic for the shadow and origin memory.
3313 // This could result in DFSan false positives or false negatives.
3314 // For now we will assume these operations are rare, and
3315 // the additional complexity to address this is not warrented.
3316
3317 IRBuilder<> NextIRB(CB.getNextNode());
3318 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3319
3320 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3321
3322 // If original call returned true, copy Desired to Target.
3323 // If original call returned false, copy Target to Expected.
3324 NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,
3325 {NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),
3326 TargetPtr, ExpectedPtr, DesiredPtr,
3327 NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3328}
3329
3330void DFSanVisitor::visitCallBase(CallBase &CB) {
3332 if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
3333 visitInstOperands(CB);
3334 return;
3335 }
3336
3337 // Calls to this function are synthesized in wrappers, and we shouldn't
3338 // instrument them.
3339 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
3340 return;
3341
3342 LibFunc LF;
3343 if (DFSF.TLI.getLibFunc(CB, LF)) {
3344 // libatomic.a functions need to have special handling because there isn't
3345 // a good way to intercept them or compile the library with
3346 // instrumentation.
3347 switch (LF) {
3348 case LibFunc_atomic_load:
3349 if (!isa<CallInst>(CB)) {
3350 llvm::errs() << "DFSAN -- cannot instrument invoke of libatomic load. "
3351 "Ignoring!\n";
3352 break;
3353 }
3354 visitLibAtomicLoad(CB);
3355 return;
3356 case LibFunc_atomic_store:
3357 visitLibAtomicStore(CB);
3358 return;
3359 default:
3360 break;
3361 }
3362 }
3363
3364 // TODO: These are not supported by TLI? They are not in the enum.
3365 if (F && F->hasName() && !F->isVarArg()) {
3366 if (F->getName() == "__atomic_exchange") {
3367 visitLibAtomicExchange(CB);
3368 return;
3369 }
3370 if (F->getName() == "__atomic_compare_exchange") {
3371 visitLibAtomicCompareExchange(CB);
3372 return;
3373 }
3374 }
3375
3376 DenseMap<Value *, Function *>::iterator UnwrappedFnIt =
3377 DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
3378 if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
3379 if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
3380 return;
3381
3382 IRBuilder<> IRB(&CB);
3383
3384 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3385 FunctionType *FT = CB.getFunctionType();
3386 const DataLayout &DL = getDataLayout();
3387
3388 // Stores argument shadows.
3389 unsigned ArgOffset = 0;
3390 for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
3391 if (ShouldTrackOrigins) {
3392 // Ignore overflowed origins
3393 Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
3394 if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
3395 !DFSF.DFS.isZeroShadow(ArgShadow))
3396 IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
3397 DFSF.getArgOriginTLS(I, IRB));
3398 }
3399
3400 unsigned Size =
3401 DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
3402 // Stop storing if arguments' size overflows. Inside a function, arguments
3403 // after overflow have zero shadow values.
3404 if (ArgOffset + Size > ArgTLSSize)
3405 break;
3406 IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
3407 DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
3409 ArgOffset += alignTo(Size, ShadowTLSAlignment);
3410 }
3411
3412 Instruction *Next = nullptr;
3413 if (!CB.getType()->isVoidTy()) {
3414 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3415 if (II->getNormalDest()->getSinglePredecessor()) {
3416 Next = &II->getNormalDest()->front();
3417 } else {
3418 BasicBlock *NewBB =
3419 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
3420 Next = &NewBB->front();
3421 }
3422 } else {
3423 assert(CB.getIterator() != CB.getParent()->end());
3424 Next = CB.getNextNode();
3425 }
3426
3427 // Don't emit the epilogue for musttail call returns.
3428 if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3429 return;
3430
3431 // Loads the return value shadow.
3432 IRBuilder<> NextIRB(Next);
3433 unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
3434 if (Size > RetvalTLSSize) {
3435 // Set overflowed return shadow to be zero.
3436 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3437 } else {
3438 LoadInst *LI = NextIRB.CreateAlignedLoad(
3439 DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
3440 ShadowTLSAlignment, "_dfsret");
3441 DFSF.SkipInsts.insert(LI);
3442 DFSF.setShadow(&CB, LI);
3443 DFSF.NonZeroChecks.push_back(LI);
3444 }
3445
3446 if (ShouldTrackOrigins) {
3447 LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
3448 DFSF.getRetvalOriginTLS(), "_dfsret_o");
3449 DFSF.SkipInsts.insert(LI);
3450 DFSF.setOrigin(&CB, LI);
3451 }
3452
3453 DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);
3454 }
3455}
3456
3457void DFSanVisitor::visitPHINode(PHINode &PN) {
3458 Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
3459 PHINode *ShadowPN = PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "",
3460 PN.getIterator());
3461
3462 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3463 Value *PoisonShadow = PoisonValue::get(ShadowTy);
3464 for (BasicBlock *BB : PN.blocks())
3465 ShadowPN->addIncoming(PoisonShadow, BB);
3466
3467 DFSF.setShadow(&PN, ShadowPN);
3468
3469 PHINode *OriginPN = nullptr;
3470 if (DFSF.DFS.shouldTrackOrigins()) {
3471 OriginPN = PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "",
3472 PN.getIterator());
3473 Value *PoisonOrigin = PoisonValue::get(DFSF.DFS.OriginTy);
3474 for (BasicBlock *BB : PN.blocks())
3475 OriginPN->addIncoming(PoisonOrigin, BB);
3476 DFSF.setOrigin(&PN, OriginPN);
3477 }
3478
3479 DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3480}
3481
3484 // Return early if nosanitize_dataflow module flag is present for the module.
3485 if (checkIfAlreadyInstrumented(M, "nosanitize_dataflow"))
3486 return PreservedAnalyses::all();
3487 auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
3488 auto &FAM =
3490 return FAM.getResult<TargetLibraryAnalysis>(F);
3491 };
3492 if (!DataFlowSanitizer(ABIListFiles, FS).runImpl(M, GetTLI))
3493 return PreservedAnalyses::all();
3494
3496 // GlobalsAA is considered stateless and does not get invalidated unless
3497 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
3498 // make changes that require GlobalsAA to be invalidated.
3499 PA.abandon<GlobalsAA>();
3500 return PA;
3501}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MemoryMapParams Linux_LoongArch64_MemoryMapParams
const MemoryMapParams Linux_X86_64_MemoryMapParams
static cl::opt< bool > ClAddGlobalNameSuffix("dfsan-add-global-name-suffix", cl::desc("Whether to add .dfsan suffix to global names"), cl::Hidden, cl::init(true))
static cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))
static cl::list< std::string > ClCombineTaintLookupTables("dfsan-combine-taint-lookup-table", cl::desc("When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)
static const Align MinOriginAlignment
static cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))
static cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)
static cl::opt< bool > ClReachesFunctionCallbacks("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))
static Value * expandFromPrimitiveShadowRecursive(Value *Shadow, SmallVector< unsigned, 4 > &Indices, Type *SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)
static cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))
static cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))
static cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)
static cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))
static cl::opt< bool > ClIgnorePersonalityRoutine("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))
static const Align ShadowTLSAlignment
static AtomicOrdering addReleaseOrdering(AtomicOrdering AO)
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO)
Value * StripPointerGEPsAndCasts(Value *V)
const MemoryMapParams Linux_AArch64_MemoryMapParams
static cl::opt< bool > ClConditionalCallbacks("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))
static cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))
static StringRef getGlobalTypeString(const GlobalValue &G)
static cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))
static const unsigned ArgTLSSize
static const unsigned RetvalTLSSize
static bool isAMustTailRetVal(Value *RetVal)
static cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
#define T
nvptx lower args
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction & front() const
Definition BasicBlock.h:482
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
bool isConditional() const
Value * getCondition() const
bool isInlineAsm() const
Check if this call is an inline asm statement.
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI unsigned getLine() const
Definition DebugLoc.cpp:52
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
Definition DebugLoc.cpp:48
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Type * getReturnType() const
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
const BasicBlock & getEntryBlock() const
Definition Function.h:807
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
void removeFnAttrs(const AttributeMask &Attrs)
Definition Function.cpp:696
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition Function.cpp:688
arg_iterator arg_begin()
Definition Function.h:866
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
Definition Function.cpp:708
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition Function.cpp:859
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Globals.cpp:628
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
static bool isExternalWeakLinkage(LinkageTypes Linkage)
LinkageTypes getLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition GlobalValue.h:52
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
Type * getValueType() const
Analysis pass providing a never-invalidated alias analysis result.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2579
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1939
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition IRBuilder.h:1833
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2633
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2567
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2254
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2626
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Definition IRBuilder.h:2032
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2202
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2039
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition IRBuilder.h:567
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:1926
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
Definition IRBuilder.h:2019
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1886
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1599
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
LLVM_ABI GlobalVariable * CreateGlobalString(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Make a new global variable with initializer type i8*.
Definition IRBuilder.cpp:44
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isTerminator() const
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void setAlignment(Align Align)
Value * getPointerOperand()
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MemoryEffectsBase readOnly()
Definition ModRef.h:130
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition Module.h:289
void setModuleInlineAsm(StringRef Asm)
Set the module-scope inline assembly blocks.
Definition Module.h:328
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition Module.cpp:206
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & abandon()
Mark an analysis as abandoned.
Definition Analysis.h:171
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition StringMap.h:285
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
void insert_range(Range &&R)
Definition StringSet.h:49
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
Value * getCondition() const
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
@ loongarch64
Definition Triple.h:65
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool includes(R1 &&Range1, R2 &&Range2)
Provide wrappers to std::includes which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1934
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2128
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition Local.cpp:2883
LLVM_ABI bool checkIfAlreadyInstrumented(Module &M, StringRef Flag)
Check if module has flag attached, if not add the flag.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77