LLVM 19.0.0git
DataFlowSanitizer.cpp
Go to the documentation of this file.
1//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11/// analysis.
12///
13/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14/// class of bugs on its own. Instead, it provides a generic dynamic data flow
15/// analysis framework to be used by clients to help detect application-specific
16/// issues within their own code.
17///
18/// The analysis is based on automatic propagation of data flow labels (also
19/// known as taint labels) through a program as it performs computation.
20///
21/// Argument and return value labels are passed through TLS variables
22/// __dfsan_arg_tls and __dfsan_retval_tls.
23///
24/// Each byte of application memory is backed by a shadow memory byte. The
25/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26/// laid out as follows:
27///
28/// +--------------------+ 0x800000000000 (top of memory)
29/// | application 3 |
30/// +--------------------+ 0x700000000000
31/// | invalid |
32/// +--------------------+ 0x610000000000
33/// | origin 1 |
34/// +--------------------+ 0x600000000000
35/// | application 2 |
36/// +--------------------+ 0x510000000000
37/// | shadow 1 |
38/// +--------------------+ 0x500000000000
39/// | invalid |
40/// +--------------------+ 0x400000000000
41/// | origin 3 |
42/// +--------------------+ 0x300000000000
43/// | shadow 3 |
44/// +--------------------+ 0x200000000000
45/// | origin 2 |
46/// +--------------------+ 0x110000000000
47/// | invalid |
48/// +--------------------+ 0x100000000000
49/// | shadow 2 |
50/// +--------------------+ 0x010000000000
51/// | application 1 |
52/// +--------------------+ 0x000000000000
53///
54/// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55/// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56///
57/// For more information, please refer to the design document:
58/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/DenseMap.h"
64#include "llvm/ADT/DenseSet.h"
68#include "llvm/ADT/StringRef.h"
69#include "llvm/ADT/StringSet.h"
70#include "llvm/ADT/iterator.h"
75#include "llvm/IR/Argument.h"
77#include "llvm/IR/Attributes.h"
78#include "llvm/IR/BasicBlock.h"
79#include "llvm/IR/Constant.h"
80#include "llvm/IR/Constants.h"
81#include "llvm/IR/DataLayout.h"
83#include "llvm/IR/Dominators.h"
84#include "llvm/IR/Function.h"
85#include "llvm/IR/GlobalAlias.h"
86#include "llvm/IR/GlobalValue.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstVisitor.h"
90#include "llvm/IR/InstrTypes.h"
91#include "llvm/IR/Instruction.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Module.h"
96#include "llvm/IR/PassManager.h"
97#include "llvm/IR/Type.h"
98#include "llvm/IR/User.h"
99#include "llvm/IR/Value.h"
101#include "llvm/Support/Casting.h"
110#include <algorithm>
111#include <cassert>
112#include <cstddef>
113#include <cstdint>
114#include <memory>
115#include <set>
116#include <string>
117#include <utility>
118#include <vector>
119
120using namespace llvm;
121
122// This must be consistent with ShadowWidthBits.
124
126
127// The size of TLS variables. These constants must be kept in sync with the ones
128// in dfsan.cpp.
129static const unsigned ArgTLSSize = 800;
130static const unsigned RetvalTLSSize = 800;
131
132// The -dfsan-preserve-alignment flag controls whether this pass assumes that
133// alignment requirements provided by the input IR are correct. For example,
134// if the input IR contains a load with alignment 8, this flag will cause
135// the shadow load to have alignment 16. This flag is disabled by default as
136// we have unfortunately encountered too much code (including Clang itself;
137// see PR14291) which performs misaligned access.
139 "dfsan-preserve-alignment",
140 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
141 cl::init(false));
142
143// The ABI list files control how shadow parameters are passed. The pass treats
144// every function labelled "uninstrumented" in the ABI list file as conforming
145// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
146// additional annotations for those functions, a call to one of those functions
147// will produce a warning message, as the labelling behaviour of the function is
148// unknown. The other supported annotations for uninstrumented functions are
149// "functional" and "discard", which are described below under
150// DataFlowSanitizer::WrapperKind.
151// Functions will often be labelled with both "uninstrumented" and one of
152// "functional" or "discard". This will leave the function unchanged by this
153// pass, and create a wrapper function that will call the original.
154//
155// Instrumented functions can also be annotated as "force_zero_labels", which
156// will make all shadow and return values set zero labels.
157// Functions should never be labelled with both "force_zero_labels" and
158// "uninstrumented" or any of the unistrumented wrapper kinds.
160 "dfsan-abilist",
161 cl::desc("File listing native ABI functions and how the pass treats them"),
162 cl::Hidden);
163
164// Controls whether the pass includes or ignores the labels of pointers in load
165// instructions.
167 "dfsan-combine-pointer-labels-on-load",
168 cl::desc("Combine the label of the pointer with the label of the data when "
169 "loading from memory."),
170 cl::Hidden, cl::init(true));
171
172// Controls whether the pass includes or ignores the labels of pointers in
173// stores instructions.
175 "dfsan-combine-pointer-labels-on-store",
176 cl::desc("Combine the label of the pointer with the label of the data when "
177 "storing in memory."),
178 cl::Hidden, cl::init(false));
179
180// Controls whether the pass propagates labels of offsets in GEP instructions.
182 "dfsan-combine-offset-labels-on-gep",
183 cl::desc(
184 "Combine the label of the offset with the label of the pointer when "
185 "doing pointer arithmetic."),
186 cl::Hidden, cl::init(true));
187
189 "dfsan-combine-taint-lookup-table",
190 cl::desc(
191 "When dfsan-combine-offset-labels-on-gep and/or "
192 "dfsan-combine-pointer-labels-on-load are false, this flag can "
193 "be used to re-enable combining offset and/or pointer taint when "
194 "loading specific constant global variables (i.e. lookup tables)."),
195 cl::Hidden);
196
198 "dfsan-debug-nonzero-labels",
199 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
200 "load or return with a nonzero label"),
201 cl::Hidden);
202
203// Experimental feature that inserts callbacks for certain data events.
204// Currently callbacks are only inserted for loads, stores, memory transfers
205// (i.e. memcpy and memmove), and comparisons.
206//
207// If this flag is set to true, the user must provide definitions for the
208// following callback functions:
209// void __dfsan_load_callback(dfsan_label Label, void* addr);
210// void __dfsan_store_callback(dfsan_label Label, void* addr);
211// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
212// void __dfsan_cmp_callback(dfsan_label CombinedLabel);
214 "dfsan-event-callbacks",
215 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
216 cl::Hidden, cl::init(false));
217
218// Experimental feature that inserts callbacks for conditionals, including:
219// conditional branch, switch, select.
220// This must be true for dfsan_set_conditional_callback() to have effect.
222 "dfsan-conditional-callbacks",
223 cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
224 cl::init(false));
225
226// Experimental feature that inserts callbacks for data reaching a function,
227// either via function arguments and loads.
228// This must be true for dfsan_set_reaches_function_callback() to have effect.
230 "dfsan-reaches-function-callbacks",
231 cl::desc("Insert calls to callback functions on data reaching a function."),
232 cl::Hidden, cl::init(false));
233
234// Controls whether the pass tracks the control flow of select instructions.
236 "dfsan-track-select-control-flow",
237 cl::desc("Propagate labels from condition values of select instructions "
238 "to results."),
239 cl::Hidden, cl::init(true));
240
241// TODO: This default value follows MSan. DFSan may use a different value.
243 "dfsan-instrument-with-call-threshold",
244 cl::desc("If the function being instrumented requires more than "
245 "this number of origin stores, use callbacks instead of "
246 "inline checks (-1 means never use callbacks)."),
247 cl::Hidden, cl::init(3500));
248
249// Controls how to track origins.
250// * 0: do not track origins.
251// * 1: track origins at memory store operations.
252// * 2: track origins at memory load and store operations.
253// TODO: track callsites.
254static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
255 cl::desc("Track origins of labels"),
256 cl::Hidden, cl::init(0));
257
259 "dfsan-ignore-personality-routine",
260 cl::desc("If a personality routine is marked uninstrumented from the ABI "
261 "list, do not create a wrapper for it."),
262 cl::Hidden, cl::init(false));
263
265 // Types of GlobalVariables are always pointer types.
266 Type *GType = G.getValueType();
267 // For now we support excluding struct types only.
268 if (StructType *SGType = dyn_cast<StructType>(GType)) {
269 if (!SGType->isLiteral())
270 return SGType->getName();
271 }
272 return "<unknown type>";
273}
274
275namespace {
276
277// Memory map parameters used in application-to-shadow address calculation.
278// Offset = (Addr & ~AndMask) ^ XorMask
279// Shadow = ShadowBase + Offset
280// Origin = (OriginBase + Offset) & ~3ULL
281struct MemoryMapParams {
282 uint64_t AndMask;
283 uint64_t XorMask;
284 uint64_t ShadowBase;
285 uint64_t OriginBase;
286};
287
288} // end anonymous namespace
289
290// NOLINTBEGIN(readability-identifier-naming)
291// aarch64 Linux
292const MemoryMapParams Linux_AArch64_MemoryMapParams = {
293 0, // AndMask (not used)
294 0x0B00000000000, // XorMask
295 0, // ShadowBase (not used)
296 0x0200000000000, // OriginBase
297};
298
299// x86_64 Linux
300const MemoryMapParams Linux_X86_64_MemoryMapParams = {
301 0, // AndMask (not used)
302 0x500000000000, // XorMask
303 0, // ShadowBase (not used)
304 0x100000000000, // OriginBase
305};
306// NOLINTEND(readability-identifier-naming)
307
308// loongarch64 Linux
309const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
310 0, // AndMask (not used)
311 0x500000000000, // XorMask
312 0, // ShadowBase (not used)
313 0x100000000000, // OriginBase
314};
315
316namespace {
317
318class DFSanABIList {
319 std::unique_ptr<SpecialCaseList> SCL;
320
321public:
322 DFSanABIList() = default;
323
324 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
325
326 /// Returns whether either this function or its source file are listed in the
327 /// given category.
328 bool isIn(const Function &F, StringRef Category) const {
329 return isIn(*F.getParent(), Category) ||
330 SCL->inSection("dataflow", "fun", F.getName(), Category);
331 }
332
333 /// Returns whether this global alias is listed in the given category.
334 ///
335 /// If GA aliases a function, the alias's name is matched as a function name
336 /// would be. Similarly, aliases of globals are matched like globals.
337 bool isIn(const GlobalAlias &GA, StringRef Category) const {
338 if (isIn(*GA.getParent(), Category))
339 return true;
340
341 if (isa<FunctionType>(GA.getValueType()))
342 return SCL->inSection("dataflow", "fun", GA.getName(), Category);
343
344 return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
345 SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
346 Category);
347 }
348
349 /// Returns whether this module is listed in the given category.
350 bool isIn(const Module &M, StringRef Category) const {
351 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
352 }
353};
354
355/// TransformedFunction is used to express the result of transforming one
356/// function type into another. This struct is immutable. It holds metadata
357/// useful for updating calls of the old function to the new type.
358struct TransformedFunction {
359 TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
360 std::vector<unsigned> ArgumentIndexMapping)
361 : OriginalType(OriginalType), TransformedType(TransformedType),
362 ArgumentIndexMapping(ArgumentIndexMapping) {}
363
364 // Disallow copies.
365 TransformedFunction(const TransformedFunction &) = delete;
366 TransformedFunction &operator=(const TransformedFunction &) = delete;
367
368 // Allow moves.
369 TransformedFunction(TransformedFunction &&) = default;
370 TransformedFunction &operator=(TransformedFunction &&) = default;
371
372 /// Type of the function before the transformation.
373 FunctionType *OriginalType;
374
375 /// Type of the function after the transformation.
377
378 /// Transforming a function may change the position of arguments. This
379 /// member records the mapping from each argument's old position to its new
380 /// position. Argument positions are zero-indexed. If the transformation
381 /// from F to F' made the first argument of F into the third argument of F',
382 /// then ArgumentIndexMapping[0] will equal 2.
383 std::vector<unsigned> ArgumentIndexMapping;
384};
385
386/// Given function attributes from a call site for the original function,
387/// return function attributes appropriate for a call to the transformed
388/// function.
390transformFunctionAttributes(const TransformedFunction &TransformedFunction,
391 LLVMContext &Ctx, AttributeList CallSiteAttrs) {
392
393 // Construct a vector of AttributeSet for each function argument.
394 std::vector<llvm::AttributeSet> ArgumentAttributes(
395 TransformedFunction.TransformedType->getNumParams());
396
397 // Copy attributes from the parameter of the original function to the
398 // transformed version. 'ArgumentIndexMapping' holds the mapping from
399 // old argument position to new.
400 for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
401 I < IE; ++I) {
402 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
403 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
404 }
405
406 // Copy annotations on varargs arguments.
407 for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
408 IE = CallSiteAttrs.getNumAttrSets();
409 I < IE; ++I) {
410 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
411 }
412
413 return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
414 CallSiteAttrs.getRetAttrs(),
415 llvm::ArrayRef(ArgumentAttributes));
416}
417
418class DataFlowSanitizer {
419 friend struct DFSanFunction;
420 friend class DFSanVisitor;
421
422 enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
423
424 enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
425
426 /// How should calls to uninstrumented functions be handled?
427 enum WrapperKind {
428 /// This function is present in an uninstrumented form but we don't know
429 /// how it should be handled. Print a warning and call the function anyway.
430 /// Don't label the return value.
431 WK_Warning,
432
433 /// This function does not write to (user-accessible) memory, and its return
434 /// value is unlabelled.
435 WK_Discard,
436
437 /// This function does not write to (user-accessible) memory, and the label
438 /// of its return value is the union of the label of its arguments.
439 WK_Functional,
440
441 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
442 /// where F is the name of the function. This function may wrap the
443 /// original function or provide its own implementation. WK_Custom uses an
444 /// extra pointer argument to return the shadow. This allows the wrapped
445 /// form of the function type to be expressed in C.
446 WK_Custom
447 };
448
449 Module *Mod;
450 LLVMContext *Ctx;
451 Type *Int8Ptr;
452 IntegerType *OriginTy;
453 PointerType *OriginPtrTy;
454 ConstantInt *ZeroOrigin;
455 /// The shadow type for all primitive types and vector types.
456 IntegerType *PrimitiveShadowTy;
457 PointerType *PrimitiveShadowPtrTy;
458 IntegerType *IntptrTy;
459 ConstantInt *ZeroPrimitiveShadow;
460 Constant *ArgTLS;
461 ArrayType *ArgOriginTLSTy;
462 Constant *ArgOriginTLS;
463 Constant *RetvalTLS;
464 Constant *RetvalOriginTLS;
465 FunctionType *DFSanUnionLoadFnTy;
466 FunctionType *DFSanLoadLabelAndOriginFnTy;
467 FunctionType *DFSanUnimplementedFnTy;
468 FunctionType *DFSanWrapperExternWeakNullFnTy;
469 FunctionType *DFSanSetLabelFnTy;
470 FunctionType *DFSanNonzeroLabelFnTy;
471 FunctionType *DFSanVarargWrapperFnTy;
472 FunctionType *DFSanConditionalCallbackFnTy;
473 FunctionType *DFSanConditionalCallbackOriginFnTy;
474 FunctionType *DFSanReachesFunctionCallbackFnTy;
475 FunctionType *DFSanReachesFunctionCallbackOriginFnTy;
476 FunctionType *DFSanCmpCallbackFnTy;
477 FunctionType *DFSanLoadStoreCallbackFnTy;
478 FunctionType *DFSanMemTransferCallbackFnTy;
479 FunctionType *DFSanChainOriginFnTy;
480 FunctionType *DFSanChainOriginIfTaintedFnTy;
481 FunctionType *DFSanMemOriginTransferFnTy;
482 FunctionType *DFSanMemShadowOriginTransferFnTy;
483 FunctionType *DFSanMemShadowOriginConditionalExchangeFnTy;
484 FunctionType *DFSanMaybeStoreOriginFnTy;
485 FunctionCallee DFSanUnionLoadFn;
486 FunctionCallee DFSanLoadLabelAndOriginFn;
487 FunctionCallee DFSanUnimplementedFn;
488 FunctionCallee DFSanWrapperExternWeakNullFn;
489 FunctionCallee DFSanSetLabelFn;
490 FunctionCallee DFSanNonzeroLabelFn;
491 FunctionCallee DFSanVarargWrapperFn;
492 FunctionCallee DFSanLoadCallbackFn;
493 FunctionCallee DFSanStoreCallbackFn;
494 FunctionCallee DFSanMemTransferCallbackFn;
495 FunctionCallee DFSanConditionalCallbackFn;
496 FunctionCallee DFSanConditionalCallbackOriginFn;
497 FunctionCallee DFSanReachesFunctionCallbackFn;
498 FunctionCallee DFSanReachesFunctionCallbackOriginFn;
499 FunctionCallee DFSanCmpCallbackFn;
500 FunctionCallee DFSanChainOriginFn;
501 FunctionCallee DFSanChainOriginIfTaintedFn;
502 FunctionCallee DFSanMemOriginTransferFn;
503 FunctionCallee DFSanMemShadowOriginTransferFn;
504 FunctionCallee DFSanMemShadowOriginConditionalExchangeFn;
505 FunctionCallee DFSanMaybeStoreOriginFn;
506 SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
507 MDNode *ColdCallWeights;
508 MDNode *OriginStoreWeights;
509 DFSanABIList ABIList;
510 DenseMap<Value *, Function *> UnwrappedFnMap;
511 AttributeMask ReadOnlyNoneAttrs;
512 StringSet<> CombineTaintLookupTableNames;
513
514 /// Memory map parameters used in calculation mapping application addresses
515 /// to shadow addresses and origin addresses.
516 const MemoryMapParams *MapParams;
517
518 Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
519 Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos);
520 Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos,
521 Value *ShadowOffset);
522 std::pair<Value *, Value *> getShadowOriginAddress(Value *Addr,
523 Align InstAlignment,
525 bool isInstrumented(const Function *F);
526 bool isInstrumented(const GlobalAlias *GA);
527 bool isForceZeroLabels(const Function *F);
528 TransformedFunction getCustomFunctionType(FunctionType *T);
529 WrapperKind getWrapperKind(Function *F);
530 void addGlobalNameSuffix(GlobalValue *GV);
531 void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);
532 Function *buildWrapperFunction(Function *F, StringRef NewFName,
534 FunctionType *NewFT);
535 void initializeCallbackFunctions(Module &M);
536 void initializeRuntimeFunctions(Module &M);
537 bool initializeModule(Module &M);
538
539 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
540 /// from it. Returns the origin's loaded value.
541 Value *loadNextOrigin(BasicBlock::iterator Pos, Align OriginAlign,
542 Value **OriginAddr);
543
544 /// Returns whether the given load byte size is amenable to inlined
545 /// optimization patterns.
546 bool hasLoadSizeForFastPath(uint64_t Size);
547
548 /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
549 bool shouldTrackOrigins();
550
551 /// Returns a zero constant with the shadow type of OrigTy.
552 ///
553 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
554 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
555 /// getZeroShadow(other type) = i16(0)
556 Constant *getZeroShadow(Type *OrigTy);
557 /// Returns a zero constant with the shadow type of V's type.
558 Constant *getZeroShadow(Value *V);
559
560 /// Checks if V is a zero shadow.
561 bool isZeroShadow(Value *V);
562
563 /// Returns the shadow type of OrigTy.
564 ///
565 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
566 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
567 /// getShadowTy(other type) = i16
568 Type *getShadowTy(Type *OrigTy);
569 /// Returns the shadow type of V's type.
570 Type *getShadowTy(Value *V);
571
572 const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
573
574public:
575 DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
576
577 bool runImpl(Module &M,
579};
580
581struct DFSanFunction {
582 DataFlowSanitizer &DFS;
583 Function *F;
584 DominatorTree DT;
585 bool IsNativeABI;
586 bool IsForceZeroLabels;
588 AllocaInst *LabelReturnAlloca = nullptr;
589 AllocaInst *OriginReturnAlloca = nullptr;
590 DenseMap<Value *, Value *> ValShadowMap;
591 DenseMap<Value *, Value *> ValOriginMap;
594
595 struct PHIFixupElement {
596 PHINode *Phi;
597 PHINode *ShadowPhi;
598 PHINode *OriginPhi;
599 };
600 std::vector<PHIFixupElement> PHIFixups;
601
602 DenseSet<Instruction *> SkipInsts;
603 std::vector<Value *> NonZeroChecks;
604
605 struct CachedShadow {
606 BasicBlock *Block; // The block where Shadow is defined.
607 Value *Shadow;
608 };
609 /// Maps a value to its latest shadow value in terms of domination tree.
610 DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
611 /// Maps a value to its latest collapsed shadow value it was converted to in
612 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
613 /// used at a post process where CFG blocks are split. So it does not cache
614 /// BasicBlock like CachedShadows, but uses domination between values.
615 DenseMap<Value *, Value *> CachedCollapsedShadows;
617
618 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
619 bool IsForceZeroLabels, TargetLibraryInfo &TLI)
620 : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
621 IsForceZeroLabels(IsForceZeroLabels), TLI(TLI) {
622 DT.recalculate(*F);
623 }
624
625 /// Computes the shadow address for a given function argument.
626 ///
627 /// Shadow = ArgTLS+ArgOffset.
628 Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
629
630 /// Computes the shadow address for a return value.
631 Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
632
633 /// Computes the origin address for a given function argument.
634 ///
635 /// Origin = ArgOriginTLS[ArgNo].
636 Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
637
638 /// Computes the origin address for a return value.
639 Value *getRetvalOriginTLS();
640
641 Value *getOrigin(Value *V);
642 void setOrigin(Instruction *I, Value *Origin);
643 /// Generates IR to compute the origin of the last operand with a taint label.
644 Value *combineOperandOrigins(Instruction *Inst);
645 /// Before the instruction Pos, generates IR to compute the last origin with a
646 /// taint label. Labels and origins are from vectors Shadows and Origins
647 /// correspondingly. The generated IR is like
648 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
649 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
650 /// zeros with other bitwidths.
651 Value *combineOrigins(const std::vector<Value *> &Shadows,
652 const std::vector<Value *> &Origins,
653 BasicBlock::iterator Pos, ConstantInt *Zero = nullptr);
654
655 Value *getShadow(Value *V);
656 void setShadow(Instruction *I, Value *Shadow);
657 /// Generates IR to compute the union of the two given shadows, inserting it
658 /// before Pos. The combined value is with primitive type.
659 Value *combineShadows(Value *V1, Value *V2, BasicBlock::iterator Pos);
660 /// Combines the shadow values of V1 and V2, then converts the combined value
661 /// with primitive type into a shadow value with the original type T.
662 Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
664 Value *combineOperandShadows(Instruction *Inst);
665
666 /// Generates IR to load shadow and origin corresponding to bytes [\p
667 /// Addr, \p Addr + \p Size), where addr has alignment \p
668 /// InstAlignment, and take the union of each of those shadows. The returned
669 /// shadow always has primitive type.
670 ///
671 /// When tracking loads is enabled, the returned origin is a chain at the
672 /// current stack if the returned shadow is tainted.
673 std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
674 Align InstAlignment,
676
677 void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
678 Align InstAlignment, Value *PrimitiveShadow,
679 Value *Origin, BasicBlock::iterator Pos);
680 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
681 /// the expanded shadow value.
682 ///
683 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
684 /// EFP([n x T], PS) = [n x EFP(T,PS)]
685 /// EFP(other types, PS) = PS
686 Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
688 /// Collapses Shadow into a single primitive shadow value, unioning all
689 /// primitive shadow values in the process. Returns the final primitive
690 /// shadow value.
691 ///
692 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
693 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
694 /// CTP(other types, PS) = PS
695 Value *collapseToPrimitiveShadow(Value *Shadow, BasicBlock::iterator Pos);
696
697 void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
699
700 Align getShadowAlign(Align InstAlignment);
701
702 // If ClConditionalCallbacks is enabled, insert a callback after a given
703 // branch instruction using the given conditional expression.
704 void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
705
706 // If ClReachesFunctionCallbacks is enabled, insert a callback for each
707 // argument and load instruction.
708 void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,
709 Value *Data);
710
711 bool isLookupTableConstant(Value *P);
712
713private:
714 /// Collapses the shadow with aggregate type into a single primitive shadow
715 /// value.
716 template <class AggregateType>
717 Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
718 IRBuilder<> &IRB);
719
720 Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
721
722 /// Returns the shadow value of an argument A.
723 Value *getShadowForTLSArgument(Argument *A);
724
725 /// The fast path of loading shadows.
726 std::pair<Value *, Value *>
727 loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
728 Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
730
731 Align getOriginAlign(Align InstAlignment);
732
733 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
734 /// is __dfsan_load_label_and_origin. This function returns the union of all
735 /// labels and the origin of the first taint label. However this is an
736 /// additional call with many instructions. To ensure common cases are fast,
737 /// checks if it is possible to load labels and origins without using the
738 /// callback function.
739 ///
740 /// When enabling tracking load instructions, we always use
741 /// __dfsan_load_label_and_origin to reduce code size.
742 bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
743
744 /// Returns a chain at the current stack with previous origin V.
745 Value *updateOrigin(Value *V, IRBuilder<> &IRB);
746
747 /// Returns a chain at the current stack with previous origin V if Shadow is
748 /// tainted.
749 Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
750
751 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
752 /// Origin otherwise.
753 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
754
755 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
756 /// Size).
757 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
758 uint64_t StoreOriginSize, Align Alignment);
759
760 /// Stores Origin in terms of its Shadow value.
761 /// * Do not write origins for zero shadows because we do not trace origins
762 /// for untainted sinks.
763 /// * Use __dfsan_maybe_store_origin if there are too many origin store
764 /// instrumentations.
765 void storeOrigin(BasicBlock::iterator Pos, Value *Addr, uint64_t Size,
766 Value *Shadow, Value *Origin, Value *StoreOriginAddr,
767 Align InstAlignment);
768
769 /// Convert a scalar value to an i1 by comparing with 0.
770 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
771
772 bool shouldInstrumentWithCall();
773
774 /// Generates IR to load shadow and origin corresponding to bytes [\p
775 /// Addr, \p Addr + \p Size), where addr has alignment \p
776 /// InstAlignment, and take the union of each of those shadows. The returned
777 /// shadow always has primitive type.
778 std::pair<Value *, Value *>
779 loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
780 Align InstAlignment,
782 int NumOriginStores = 0;
783};
784
785class DFSanVisitor : public InstVisitor<DFSanVisitor> {
786public:
787 DFSanFunction &DFSF;
788
789 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
790
791 const DataLayout &getDataLayout() const {
792 return DFSF.F->getParent()->getDataLayout();
793 }
794
795 // Combines shadow values and origins for all of I's operands.
796 void visitInstOperands(Instruction &I);
797
800 void visitBitCastInst(BitCastInst &BCI);
801 void visitCastInst(CastInst &CI);
802 void visitCmpInst(CmpInst &CI);
805 void visitLoadInst(LoadInst &LI);
806 void visitStoreInst(StoreInst &SI);
809 void visitReturnInst(ReturnInst &RI);
810 void visitLibAtomicLoad(CallBase &CB);
811 void visitLibAtomicStore(CallBase &CB);
812 void visitLibAtomicExchange(CallBase &CB);
813 void visitLibAtomicCompareExchange(CallBase &CB);
814 void visitCallBase(CallBase &CB);
815 void visitPHINode(PHINode &PN);
825 void visitBranchInst(BranchInst &BR);
826 void visitSwitchInst(SwitchInst &SW);
827
828private:
829 void visitCASOrRMW(Align InstAlignment, Instruction &I);
830
831 // Returns false when this is an invoke of a custom function.
832 bool visitWrappedCallBase(Function &F, CallBase &CB);
833
834 // Combines origins for all of I's operands.
835 void visitInstOperandOrigins(Instruction &I);
836
837 void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
838 IRBuilder<> &IRB);
839
840 void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
841 IRBuilder<> &IRB);
842
843 Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB);
844 Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB);
845};
846
847bool LibAtomicFunction(const Function &F) {
848 // This is a bit of a hack because TargetLibraryInfo is a function pass.
849 // The DFSan pass would need to be refactored to be function pass oriented
850 // (like MSan is) in order to fit together nicely with TargetLibraryInfo.
851 // We need this check to prevent them from being instrumented, or wrapped.
852 // Match on name and number of arguments.
853 if (!F.hasName() || F.isVarArg())
854 return false;
855 switch (F.arg_size()) {
856 case 4:
857 return F.getName() == "__atomic_load" || F.getName() == "__atomic_store";
858 case 5:
859 return F.getName() == "__atomic_exchange";
860 case 6:
861 return F.getName() == "__atomic_compare_exchange";
862 default:
863 return false;
864 }
865}
866
867} // end anonymous namespace
868
869DataFlowSanitizer::DataFlowSanitizer(
870 const std::vector<std::string> &ABIListFiles) {
871 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
872 llvm::append_range(AllABIListFiles, ClABIListFiles);
873 // FIXME: should we propagate vfs::FileSystem to this constructor?
874 ABIList.set(
876
878 CombineTaintLookupTableNames.insert(v);
879}
880
881TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
882 SmallVector<Type *, 4> ArgTypes;
883
884 // Some parameters of the custom function being constructed are
885 // parameters of T. Record the mapping from parameters of T to
886 // parameters of the custom function, so that parameter attributes
887 // at call sites can be updated.
888 std::vector<unsigned> ArgumentIndexMapping;
889 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
890 Type *ParamType = T->getParamType(I);
891 ArgumentIndexMapping.push_back(ArgTypes.size());
892 ArgTypes.push_back(ParamType);
893 }
894 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
895 ArgTypes.push_back(PrimitiveShadowTy);
896 if (T->isVarArg())
897 ArgTypes.push_back(PrimitiveShadowPtrTy);
898 Type *RetType = T->getReturnType();
899 if (!RetType->isVoidTy())
900 ArgTypes.push_back(PrimitiveShadowPtrTy);
901
902 if (shouldTrackOrigins()) {
903 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
904 ArgTypes.push_back(OriginTy);
905 if (T->isVarArg())
906 ArgTypes.push_back(OriginPtrTy);
907 if (!RetType->isVoidTy())
908 ArgTypes.push_back(OriginPtrTy);
909 }
910
911 return TransformedFunction(
912 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
913 ArgumentIndexMapping);
914}
915
916bool DataFlowSanitizer::isZeroShadow(Value *V) {
917 Type *T = V->getType();
918 if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
919 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
920 return CI->isZero();
921 return false;
922 }
923
924 return isa<ConstantAggregateZero>(V);
925}
926
927bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
928 uint64_t ShadowSize = Size * ShadowWidthBytes;
929 return ShadowSize % 8 == 0 || ShadowSize == 4;
930}
931
932bool DataFlowSanitizer::shouldTrackOrigins() {
933 static const bool ShouldTrackOrigins = ClTrackOrigins;
934 return ShouldTrackOrigins;
935}
936
937Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
938 if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
939 return ZeroPrimitiveShadow;
940 Type *ShadowTy = getShadowTy(OrigTy);
941 return ConstantAggregateZero::get(ShadowTy);
942}
943
944Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
945 return getZeroShadow(V->getType());
946}
947
949 Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
950 Value *PrimitiveShadow, IRBuilder<> &IRB) {
951 if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
952 return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
953
954 if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
955 for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
956 Indices.push_back(Idx);
958 Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
959 Indices.pop_back();
960 }
961 return Shadow;
962 }
963
964 if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
965 for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
966 Indices.push_back(Idx);
968 Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
969 Indices.pop_back();
970 }
971 return Shadow;
972 }
973 llvm_unreachable("Unexpected shadow type");
974}
975
976bool DFSanFunction::shouldInstrumentWithCall() {
977 return ClInstrumentWithCallThreshold >= 0 &&
978 NumOriginStores >= ClInstrumentWithCallThreshold;
979}
980
981Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
983 Type *ShadowTy = DFS.getShadowTy(T);
984
985 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
986 return PrimitiveShadow;
987
988 if (DFS.isZeroShadow(PrimitiveShadow))
989 return DFS.getZeroShadow(ShadowTy);
990
991 IRBuilder<> IRB(Pos->getParent(), Pos);
993 Value *Shadow = UndefValue::get(ShadowTy);
994 Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
995 PrimitiveShadow, IRB);
996
997 // Caches the primitive shadow value that built the shadow value.
998 CachedCollapsedShadows[Shadow] = PrimitiveShadow;
999 return Shadow;
1000}
1001
1002template <class AggregateType>
1003Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
1004 IRBuilder<> &IRB) {
1005 if (!AT->getNumElements())
1006 return DFS.ZeroPrimitiveShadow;
1007
1008 Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1009 Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
1010
1011 for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
1012 Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1013 Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
1014 Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1015 }
1016 return Aggregator;
1017}
1018
1019Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1020 IRBuilder<> &IRB) {
1021 Type *ShadowTy = Shadow->getType();
1022 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1023 return Shadow;
1024 if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
1025 return collapseAggregateShadow<>(AT, Shadow, IRB);
1026 if (StructType *ST = dyn_cast<StructType>(ShadowTy))
1027 return collapseAggregateShadow<>(ST, Shadow, IRB);
1028 llvm_unreachable("Unexpected shadow type");
1029}
1030
1031Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1033 Type *ShadowTy = Shadow->getType();
1034 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1035 return Shadow;
1036
1037 // Checks if the cached collapsed shadow value dominates Pos.
1038 Value *&CS = CachedCollapsedShadows[Shadow];
1039 if (CS && DT.dominates(CS, Pos))
1040 return CS;
1041
1042 IRBuilder<> IRB(Pos->getParent(), Pos);
1043 Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
1044 // Caches the converted primitive shadow value.
1045 CS = PrimitiveShadow;
1046 return PrimitiveShadow;
1047}
1048
1049void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
1050 Value *Condition) {
1052 return;
1053 }
1054 IRBuilder<> IRB(&I);
1055 Value *CondShadow = getShadow(Condition);
1056 CallInst *CI;
1057 if (DFS.shouldTrackOrigins()) {
1058 Value *CondOrigin = getOrigin(Condition);
1059 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
1060 {CondShadow, CondOrigin});
1061 } else {
1062 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
1063 }
1064 CI->addParamAttr(0, Attribute::ZExt);
1065}
1066
1067void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
1068 Instruction &I,
1069 Value *Data) {
1071 return;
1072 }
1073 const DebugLoc &dbgloc = I.getDebugLoc();
1074 Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);
1075 ConstantInt *CILine;
1076 llvm::Value *FilePathPtr;
1077
1078 if (dbgloc.get() == nullptr) {
1079 CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0));
1080 FilePathPtr = IRB.CreateGlobalStringPtr(
1081 I.getFunction()->getParent()->getSourceFileName());
1082 } else {
1083 CILine = llvm::ConstantInt::get(I.getContext(),
1084 llvm::APInt(32, dbgloc.getLine()));
1085 FilePathPtr =
1086 IRB.CreateGlobalStringPtr(dbgloc->getFilename());
1087 }
1088
1089 llvm::Value *FunctionNamePtr =
1090 IRB.CreateGlobalStringPtr(I.getFunction()->getName());
1091
1092 CallInst *CB;
1093 std::vector<Value *> args;
1094
1095 if (DFS.shouldTrackOrigins()) {
1096 Value *DataOrigin = getOrigin(Data);
1097 args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };
1098 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);
1099 } else {
1100 args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };
1101 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);
1102 }
1103 CB->addParamAttr(0, Attribute::ZExt);
1104 CB->setDebugLoc(dbgloc);
1105}
1106
1107Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
1108 if (!OrigTy->isSized())
1109 return PrimitiveShadowTy;
1110 if (isa<IntegerType>(OrigTy))
1111 return PrimitiveShadowTy;
1112 if (isa<VectorType>(OrigTy))
1113 return PrimitiveShadowTy;
1114 if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
1115 return ArrayType::get(getShadowTy(AT->getElementType()),
1116 AT->getNumElements());
1117 if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1119 for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
1120 Elements.push_back(getShadowTy(ST->getElementType(I)));
1121 return StructType::get(*Ctx, Elements);
1122 }
1123 return PrimitiveShadowTy;
1124}
1125
1126Type *DataFlowSanitizer::getShadowTy(Value *V) {
1127 return getShadowTy(V->getType());
1128}
1129
1130bool DataFlowSanitizer::initializeModule(Module &M) {
1131 Triple TargetTriple(M.getTargetTriple());
1132 const DataLayout &DL = M.getDataLayout();
1133
1134 if (TargetTriple.getOS() != Triple::Linux)
1135 report_fatal_error("unsupported operating system");
1136 switch (TargetTriple.getArch()) {
1137 case Triple::aarch64:
1138 MapParams = &Linux_AArch64_MemoryMapParams;
1139 break;
1140 case Triple::x86_64:
1141 MapParams = &Linux_X86_64_MemoryMapParams;
1142 break;
1145 break;
1146 default:
1147 report_fatal_error("unsupported architecture");
1148 }
1149
1150 Mod = &M;
1151 Ctx = &M.getContext();
1152 Int8Ptr = PointerType::getUnqual(*Ctx);
1153 OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1154 OriginPtrTy = PointerType::getUnqual(OriginTy);
1155 PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1156 PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1157 IntptrTy = DL.getIntPtrType(*Ctx);
1158 ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1159 ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1160
1161 Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1162 DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1163 /*isVarArg=*/false);
1164 Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1165 DFSanLoadLabelAndOriginFnTy =
1166 FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1167 /*isVarArg=*/false);
1168 DFSanUnimplementedFnTy = FunctionType::get(
1169 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1170 Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
1171 DFSanWrapperExternWeakNullFnTy =
1172 FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
1173 /*isVarArg=*/false);
1174 Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1175 PointerType::getUnqual(*Ctx), IntptrTy};
1176 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1177 DFSanSetLabelArgs, /*isVarArg=*/false);
1178 DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), std::nullopt,
1179 /*isVarArg=*/false);
1180 DFSanVarargWrapperFnTy = FunctionType::get(
1181 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1182 DFSanConditionalCallbackFnTy =
1183 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1184 /*isVarArg=*/false);
1185 Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
1186 DFSanConditionalCallbackOriginFnTy = FunctionType::get(
1187 Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
1188 /*isVarArg=*/false);
1189 Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,
1190 OriginTy, Int8Ptr};
1191 DFSanReachesFunctionCallbackFnTy =
1192 FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,
1193 /*isVarArg=*/false);
1194 Type *DFSanReachesFunctionCallbackOriginArgs[5] = {
1195 PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};
1196 DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(
1197 Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,
1198 /*isVarArg=*/false);
1199 DFSanCmpCallbackFnTy =
1200 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1201 /*isVarArg=*/false);
1202 DFSanChainOriginFnTy =
1203 FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1204 Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1205 DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1206 OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1207 Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1208 Int8Ptr, IntptrTy, OriginTy};
1209 DFSanMaybeStoreOriginFnTy = FunctionType::get(
1210 Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1211 Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1212 DFSanMemOriginTransferFnTy = FunctionType::get(
1213 Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1214 Type *DFSanMemShadowOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1215 DFSanMemShadowOriginTransferFnTy =
1216 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemShadowOriginTransferArgs,
1217 /*isVarArg=*/false);
1218 Type *DFSanMemShadowOriginConditionalExchangeArgs[5] = {
1219 IntegerType::get(*Ctx, 8), Int8Ptr, Int8Ptr, Int8Ptr, IntptrTy};
1220 DFSanMemShadowOriginConditionalExchangeFnTy = FunctionType::get(
1221 Type::getVoidTy(*Ctx), DFSanMemShadowOriginConditionalExchangeArgs,
1222 /*isVarArg=*/false);
1223 Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1224 DFSanLoadStoreCallbackFnTy =
1225 FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1226 /*isVarArg=*/false);
1227 Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1228 DFSanMemTransferCallbackFnTy =
1229 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1230 /*isVarArg=*/false);
1231
1232 ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1233 OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1234 return true;
1235}
1236
1237bool DataFlowSanitizer::isInstrumented(const Function *F) {
1238 return !ABIList.isIn(*F, "uninstrumented");
1239}
1240
1241bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1242 return !ABIList.isIn(*GA, "uninstrumented");
1243}
1244
1245bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1246 return ABIList.isIn(*F, "force_zero_labels");
1247}
1248
1249DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1250 if (ABIList.isIn(*F, "functional"))
1251 return WK_Functional;
1252 if (ABIList.isIn(*F, "discard"))
1253 return WK_Discard;
1254 if (ABIList.isIn(*F, "custom"))
1255 return WK_Custom;
1256
1257 return WK_Warning;
1258}
1259
1260void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1261 std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1262 GV->setName(GVName + Suffix);
1263
1264 // Try to change the name of the function in module inline asm. We only do
1265 // this for specific asm directives, currently only ".symver", to try to avoid
1266 // corrupting asm which happens to contain the symbol name as a substring.
1267 // Note that the substitution for .symver assumes that the versioned symbol
1268 // also has an instrumented name.
1269 std::string Asm = GV->getParent()->getModuleInlineAsm();
1270 std::string SearchStr = ".symver " + GVName + ",";
1271 size_t Pos = Asm.find(SearchStr);
1272 if (Pos != std::string::npos) {
1273 Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1274 Pos = Asm.find('@');
1275
1276 if (Pos == std::string::npos)
1277 report_fatal_error(Twine("unsupported .symver: ", Asm));
1278
1279 Asm.replace(Pos, 1, Suffix + "@");
1280 GV->getParent()->setModuleInlineAsm(Asm);
1281 }
1282}
1283
1284void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
1285 Function *F) {
1286 // If the function we are wrapping was ExternWeak, it may be null.
1287 // The original code before calling this wrapper may have checked for null,
1288 // but replacing with a known-to-not-be-null wrapper can break this check.
1289 // When replacing uses of the extern weak function with the wrapper we try
1290 // to avoid replacing uses in conditionals, but this is not perfect.
1291 // In the case where we fail, and accidentally optimize out a null check
1292 // for a extern weak function, add a check here to help identify the issue.
1293 if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
1294 std::vector<Value *> Args;
1295 Args.push_back(F);
1296 Args.push_back(IRB.CreateGlobalStringPtr(F->getName()));
1297 IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
1298 }
1299}
1300
1301Function *
1302DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1304 FunctionType *NewFT) {
1305 FunctionType *FT = F->getFunctionType();
1306 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1307 NewFName, F->getParent());
1308 NewF->copyAttributesFrom(F);
1309 NewF->removeRetAttrs(
1310 AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
1311
1312 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1313 if (F->isVarArg()) {
1314 NewF->removeFnAttr("split-stack");
1315 CallInst::Create(DFSanVarargWrapperFn,
1316 IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
1317 BB);
1318 new UnreachableInst(*Ctx, BB);
1319 } else {
1320 auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1321 std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1322
1323 CallInst *CI = CallInst::Create(F, Args, "", BB);
1324 if (FT->getReturnType()->isVoidTy())
1325 ReturnInst::Create(*Ctx, BB);
1326 else
1327 ReturnInst::Create(*Ctx, CI, BB);
1328 }
1329
1330 return NewF;
1331}
1332
1333// Initialize DataFlowSanitizer runtime functions and declare them in the module
1334void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1335 LLVMContext &C = M.getContext();
1336 {
1338 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1339 AL = AL.addFnAttribute(
1341 AL = AL.addRetAttribute(C, Attribute::ZExt);
1342 DFSanUnionLoadFn =
1343 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1344 }
1345 {
1347 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1348 AL = AL.addFnAttribute(
1350 AL = AL.addRetAttribute(C, Attribute::ZExt);
1351 DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1352 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1353 }
1354 DFSanUnimplementedFn =
1355 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1356 DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(
1357 "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);
1358 {
1360 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1361 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1362 DFSanSetLabelFn =
1363 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1364 }
1365 DFSanNonzeroLabelFn =
1366 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1367 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1368 DFSanVarargWrapperFnTy);
1369 {
1371 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1372 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1373 DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1374 DFSanChainOriginFnTy, AL);
1375 }
1376 {
1378 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1379 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1380 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1381 DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1382 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1383 }
1384 DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1385 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1386
1387 DFSanMemShadowOriginTransferFn = Mod->getOrInsertFunction(
1388 "__dfsan_mem_shadow_origin_transfer", DFSanMemShadowOriginTransferFnTy);
1389
1390 DFSanMemShadowOriginConditionalExchangeFn =
1391 Mod->getOrInsertFunction("__dfsan_mem_shadow_origin_conditional_exchange",
1392 DFSanMemShadowOriginConditionalExchangeFnTy);
1393
1394 {
1396 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1397 AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1398 DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1399 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1400 }
1401
1402 DFSanRuntimeFunctions.insert(
1403 DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1404 DFSanRuntimeFunctions.insert(
1405 DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1406 DFSanRuntimeFunctions.insert(
1407 DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1408 DFSanRuntimeFunctions.insert(
1409 DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());
1410 DFSanRuntimeFunctions.insert(
1411 DFSanSetLabelFn.getCallee()->stripPointerCasts());
1412 DFSanRuntimeFunctions.insert(
1413 DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1414 DFSanRuntimeFunctions.insert(
1415 DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1416 DFSanRuntimeFunctions.insert(
1417 DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1418 DFSanRuntimeFunctions.insert(
1419 DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1420 DFSanRuntimeFunctions.insert(
1421 DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1422 DFSanRuntimeFunctions.insert(
1423 DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
1424 DFSanRuntimeFunctions.insert(
1425 DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
1426 DFSanRuntimeFunctions.insert(
1427 DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());
1428 DFSanRuntimeFunctions.insert(
1429 DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());
1430 DFSanRuntimeFunctions.insert(
1431 DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1432 DFSanRuntimeFunctions.insert(
1433 DFSanChainOriginFn.getCallee()->stripPointerCasts());
1434 DFSanRuntimeFunctions.insert(
1435 DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1436 DFSanRuntimeFunctions.insert(
1437 DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1438 DFSanRuntimeFunctions.insert(
1439 DFSanMemShadowOriginTransferFn.getCallee()->stripPointerCasts());
1440 DFSanRuntimeFunctions.insert(
1441 DFSanMemShadowOriginConditionalExchangeFn.getCallee()
1442 ->stripPointerCasts());
1443 DFSanRuntimeFunctions.insert(
1444 DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1445}
1446
1447// Initializes event callback functions and declare them in the module
1448void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1449 {
1451 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1452 DFSanLoadCallbackFn = Mod->getOrInsertFunction(
1453 "__dfsan_load_callback", DFSanLoadStoreCallbackFnTy, AL);
1454 }
1455 {
1457 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1458 DFSanStoreCallbackFn = Mod->getOrInsertFunction(
1459 "__dfsan_store_callback", DFSanLoadStoreCallbackFnTy, AL);
1460 }
1461 DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1462 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1463 {
1465 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1466 DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",
1467 DFSanCmpCallbackFnTy, AL);
1468 }
1469 {
1471 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1472 DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
1473 "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy, AL);
1474 }
1475 {
1477 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1478 DFSanConditionalCallbackOriginFn =
1479 Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
1480 DFSanConditionalCallbackOriginFnTy, AL);
1481 }
1482 {
1484 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1485 DFSanReachesFunctionCallbackFn =
1486 Mod->getOrInsertFunction("__dfsan_reaches_function_callback",
1487 DFSanReachesFunctionCallbackFnTy, AL);
1488 }
1489 {
1491 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1492 DFSanReachesFunctionCallbackOriginFn =
1493 Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
1494 DFSanReachesFunctionCallbackOriginFnTy, AL);
1495 }
1496}
1497
1498bool DataFlowSanitizer::runImpl(
1500 initializeModule(M);
1501
1502 if (ABIList.isIn(M, "skip"))
1503 return false;
1504
1505 const unsigned InitialGlobalSize = M.global_size();
1506 const unsigned InitialModuleSize = M.size();
1507
1508 bool Changed = false;
1509
1510 auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1511 Type *Ty) -> Constant * {
1513 if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1514 Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1515 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1516 }
1517 return C;
1518 };
1519
1520 // These globals must be kept in sync with the ones in dfsan.cpp.
1521 ArgTLS =
1522 GetOrInsertGlobal("__dfsan_arg_tls",
1523 ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1524 RetvalTLS = GetOrInsertGlobal(
1525 "__dfsan_retval_tls",
1526 ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1527 ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1528 ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1529 RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1530
1531 (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1532 Changed = true;
1533 return new GlobalVariable(
1534 M, OriginTy, true, GlobalValue::WeakODRLinkage,
1535 ConstantInt::getSigned(OriginTy,
1536 shouldTrackOrigins() ? ClTrackOrigins : 0),
1537 "__dfsan_track_origins");
1538 });
1539
1540 initializeCallbackFunctions(M);
1541 initializeRuntimeFunctions(M);
1542
1543 std::vector<Function *> FnsToInstrument;
1544 SmallPtrSet<Function *, 2> FnsWithNativeABI;
1545 SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1546 SmallPtrSet<Constant *, 1> PersonalityFns;
1547 for (Function &F : M)
1548 if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F) &&
1549 !LibAtomicFunction(F)) {
1550 FnsToInstrument.push_back(&F);
1551 if (F.hasPersonalityFn())
1552 PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
1553 }
1554
1556 for (auto *C : PersonalityFns) {
1557 assert(isa<Function>(C) && "Personality routine is not a function!");
1558 Function *F = cast<Function>(C);
1559 if (!isInstrumented(F))
1560 llvm::erase(FnsToInstrument, F);
1561 }
1562 }
1563
1564 // Give function aliases prefixes when necessary, and build wrappers where the
1565 // instrumentedness is inconsistent.
1566 for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1567 // Don't stop on weak. We assume people aren't playing games with the
1568 // instrumentedness of overridden weak aliases.
1569 auto *F = dyn_cast<Function>(GA.getAliaseeObject());
1570 if (!F)
1571 continue;
1572
1573 bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1574 if (GAInst && FInst) {
1575 addGlobalNameSuffix(&GA);
1576 } else if (GAInst != FInst) {
1577 // Non-instrumented alias of an instrumented function, or vice versa.
1578 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1579 // below will take care of instrumenting it.
1580 Function *NewF =
1581 buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1582 GA.replaceAllUsesWith(NewF);
1583 NewF->takeName(&GA);
1584 GA.eraseFromParent();
1585 FnsToInstrument.push_back(NewF);
1586 }
1587 }
1588
1589 // TODO: This could be more precise.
1590 ReadOnlyNoneAttrs.addAttribute(Attribute::Memory);
1591
1592 // First, change the ABI of every function in the module. ABI-listed
1593 // functions keep their original ABI and get a wrapper function.
1594 for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1595 FE = FnsToInstrument.end();
1596 FI != FE; ++FI) {
1597 Function &F = **FI;
1598 FunctionType *FT = F.getFunctionType();
1599
1600 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1601 FT->getReturnType()->isVoidTy());
1602
1603 if (isInstrumented(&F)) {
1604 if (isForceZeroLabels(&F))
1605 FnsWithForceZeroLabel.insert(&F);
1606
1607 // Instrumented functions get a '.dfsan' suffix. This allows us to more
1608 // easily identify cases of mismatching ABIs. This naming scheme is
1609 // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1610 addGlobalNameSuffix(&F);
1611 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1612 // Build a wrapper function for F. The wrapper simply calls F, and is
1613 // added to FnsToInstrument so that any instrumentation according to its
1614 // WrapperKind is done in the second pass below.
1615
1616 // If the function being wrapped has local linkage, then preserve the
1617 // function's linkage in the wrapper function.
1618 GlobalValue::LinkageTypes WrapperLinkage =
1619 F.hasLocalLinkage() ? F.getLinkage()
1621
1622 Function *NewF = buildWrapperFunction(
1623 &F,
1624 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1625 std::string(F.getName()),
1626 WrapperLinkage, FT);
1627 NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1628
1629 // Extern weak functions can sometimes be null at execution time.
1630 // Code will sometimes check if an extern weak function is null.
1631 // This could look something like:
1632 // declare extern_weak i8 @my_func(i8)
1633 // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
1634 // label %avoid_my_func
1635 // The @"dfsw$my_func" wrapper is never null, so if we replace this use
1636 // in the comparison, the icmp will simplify to false and we have
1637 // accidentally optimized away a null check that is necessary.
1638 // This can lead to a crash when the null extern_weak my_func is called.
1639 //
1640 // To prevent (the most common pattern of) this problem,
1641 // do not replace uses in comparisons with the wrapper.
1642 // We definitely want to replace uses in call instructions.
1643 // Other uses (e.g. store the function address somewhere) might be
1644 // called or compared or both - this case may not be handled correctly.
1645 // We will default to replacing with wrapper in cases we are unsure.
1646 auto IsNotCmpUse = [](Use &U) -> bool {
1647 User *Usr = U.getUser();
1648 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
1649 // This is the most common case for icmp ne null
1650 if (CE->getOpcode() == Instruction::ICmp) {
1651 return false;
1652 }
1653 }
1654 if (Instruction *I = dyn_cast<Instruction>(Usr)) {
1655 if (I->getOpcode() == Instruction::ICmp) {
1656 return false;
1657 }
1658 }
1659 return true;
1660 };
1661 F.replaceUsesWithIf(NewF, IsNotCmpUse);
1662
1663 UnwrappedFnMap[NewF] = &F;
1664 *FI = NewF;
1665
1666 if (!F.isDeclaration()) {
1667 // This function is probably defining an interposition of an
1668 // uninstrumented function and hence needs to keep the original ABI.
1669 // But any functions it may call need to use the instrumented ABI, so
1670 // we instrument it in a mode which preserves the original ABI.
1671 FnsWithNativeABI.insert(&F);
1672
1673 // This code needs to rebuild the iterators, as they may be invalidated
1674 // by the push_back, taking care that the new range does not include
1675 // any functions added by this code.
1676 size_t N = FI - FnsToInstrument.begin(),
1677 Count = FE - FnsToInstrument.begin();
1678 FnsToInstrument.push_back(&F);
1679 FI = FnsToInstrument.begin() + N;
1680 FE = FnsToInstrument.begin() + Count;
1681 }
1682 // Hopefully, nobody will try to indirectly call a vararg
1683 // function... yet.
1684 } else if (FT->isVarArg()) {
1685 UnwrappedFnMap[&F] = &F;
1686 *FI = nullptr;
1687 }
1688 }
1689
1690 for (Function *F : FnsToInstrument) {
1691 if (!F || F->isDeclaration())
1692 continue;
1693
1695
1696 DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1697 FnsWithForceZeroLabel.count(F), GetTLI(*F));
1698
1700 // Add callback for arguments reaching this function.
1701 for (auto &FArg : F->args()) {
1702 Instruction *Next = &F->getEntryBlock().front();
1703 Value *FArgShadow = DFSF.getShadow(&FArg);
1704 if (isZeroShadow(FArgShadow))
1705 continue;
1706 if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {
1707 Next = FArgShadowInst->getNextNode();
1708 }
1709 if (shouldTrackOrigins()) {
1710 if (Instruction *Origin =
1711 dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {
1712 // Ensure IRB insertion point is after loads for shadow and origin.
1713 Instruction *OriginNext = Origin->getNextNode();
1714 if (Next->comesBefore(OriginNext)) {
1715 Next = OriginNext;
1716 }
1717 }
1718 }
1719 IRBuilder<> IRB(Next);
1720 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);
1721 }
1722 }
1723
1724 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1725 // Build a copy of the list before iterating over it.
1726 SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1727
1728 for (BasicBlock *BB : BBList) {
1729 Instruction *Inst = &BB->front();
1730 while (true) {
1731 // DFSanVisitor may split the current basic block, changing the current
1732 // instruction's next pointer and moving the next instruction to the
1733 // tail block from which we should continue.
1734 Instruction *Next = Inst->getNextNode();
1735 // DFSanVisitor may delete Inst, so keep track of whether it was a
1736 // terminator.
1737 bool IsTerminator = Inst->isTerminator();
1738 if (!DFSF.SkipInsts.count(Inst))
1739 DFSanVisitor(DFSF).visit(Inst);
1740 if (IsTerminator)
1741 break;
1742 Inst = Next;
1743 }
1744 }
1745
1746 // We will not necessarily be able to compute the shadow for every phi node
1747 // until we have visited every block. Therefore, the code that handles phi
1748 // nodes adds them to the PHIFixups list so that they can be properly
1749 // handled here.
1750 for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1751 for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1752 ++Val) {
1753 P.ShadowPhi->setIncomingValue(
1754 Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1755 if (P.OriginPhi)
1756 P.OriginPhi->setIncomingValue(
1757 Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1758 }
1759 }
1760
1761 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1762 // places (i.e. instructions in basic blocks we haven't even begun visiting
1763 // yet). To make our life easier, do this work in a pass after the main
1764 // instrumentation.
1766 for (Value *V : DFSF.NonZeroChecks) {
1768 if (Instruction *I = dyn_cast<Instruction>(V))
1769 Pos = std::next(I->getIterator());
1770 else
1771 Pos = DFSF.F->getEntryBlock().begin();
1772 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1773 Pos = std::next(Pos->getIterator());
1774 IRBuilder<> IRB(Pos->getParent(), Pos);
1775 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1776 Value *Ne =
1777 IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1778 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1779 Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1780 IRBuilder<> ThenIRB(BI);
1781 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1782 }
1783 }
1784 }
1785
1786 return Changed || !FnsToInstrument.empty() ||
1787 M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1788}
1789
1790Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1791 Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1792 if (ArgOffset)
1793 Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1794 return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
1795 "_dfsarg");
1796}
1797
1798Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1799 return IRB.CreatePointerCast(
1800 DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
1801}
1802
1803Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1804
1805Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1806 return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
1807 "_dfsarg_o");
1808}
1809
1810Value *DFSanFunction::getOrigin(Value *V) {
1811 assert(DFS.shouldTrackOrigins());
1812 if (!isa<Argument>(V) && !isa<Instruction>(V))
1813 return DFS.ZeroOrigin;
1814 Value *&Origin = ValOriginMap[V];
1815 if (!Origin) {
1816 if (Argument *A = dyn_cast<Argument>(V)) {
1817 if (IsNativeABI)
1818 return DFS.ZeroOrigin;
1819 if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1820 Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1821 IRBuilder<> IRB(ArgOriginTLSPos);
1822 Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1823 Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1824 } else {
1825 // Overflow
1826 Origin = DFS.ZeroOrigin;
1827 }
1828 } else {
1829 Origin = DFS.ZeroOrigin;
1830 }
1831 }
1832 return Origin;
1833}
1834
1835void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1836 if (!DFS.shouldTrackOrigins())
1837 return;
1838 assert(!ValOriginMap.count(I));
1839 assert(Origin->getType() == DFS.OriginTy);
1840 ValOriginMap[I] = Origin;
1841}
1842
1843Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1844 unsigned ArgOffset = 0;
1845 const DataLayout &DL = F->getParent()->getDataLayout();
1846 for (auto &FArg : F->args()) {
1847 if (!FArg.getType()->isSized()) {
1848 if (A == &FArg)
1849 break;
1850 continue;
1851 }
1852
1853 unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1854 if (A != &FArg) {
1855 ArgOffset += alignTo(Size, ShadowTLSAlignment);
1856 if (ArgOffset > ArgTLSSize)
1857 break; // ArgTLS overflows, uses a zero shadow.
1858 continue;
1859 }
1860
1861 if (ArgOffset + Size > ArgTLSSize)
1862 break; // ArgTLS overflows, uses a zero shadow.
1863
1864 Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1865 IRBuilder<> IRB(ArgTLSPos);
1866 Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1867 return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1869 }
1870
1871 return DFS.getZeroShadow(A);
1872}
1873
1874Value *DFSanFunction::getShadow(Value *V) {
1875 if (!isa<Argument>(V) && !isa<Instruction>(V))
1876 return DFS.getZeroShadow(V);
1877 if (IsForceZeroLabels)
1878 return DFS.getZeroShadow(V);
1879 Value *&Shadow = ValShadowMap[V];
1880 if (!Shadow) {
1881 if (Argument *A = dyn_cast<Argument>(V)) {
1882 if (IsNativeABI)
1883 return DFS.getZeroShadow(V);
1884 Shadow = getShadowForTLSArgument(A);
1885 NonZeroChecks.push_back(Shadow);
1886 } else {
1887 Shadow = DFS.getZeroShadow(V);
1888 }
1889 }
1890 return Shadow;
1891}
1892
1893void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1894 assert(!ValShadowMap.count(I));
1895 ValShadowMap[I] = Shadow;
1896}
1897
1898/// Compute the integer shadow offset that corresponds to a given
1899/// application address.
1900///
1901/// Offset = (Addr & ~AndMask) ^ XorMask
1902Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1903 assert(Addr != RetvalTLS && "Reinstrumenting?");
1904 Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1905
1906 uint64_t AndMask = MapParams->AndMask;
1907 if (AndMask)
1908 OffsetLong =
1909 IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1910
1911 uint64_t XorMask = MapParams->XorMask;
1912 if (XorMask)
1913 OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1914 return OffsetLong;
1915}
1916
1917std::pair<Value *, Value *>
1918DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1920 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1921 IRBuilder<> IRB(Pos->getParent(), Pos);
1922 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1923 Value *ShadowLong = ShadowOffset;
1924 uint64_t ShadowBase = MapParams->ShadowBase;
1925 if (ShadowBase != 0) {
1926 ShadowLong =
1927 IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1928 }
1929 IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1930 Value *ShadowPtr =
1931 IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1932 Value *OriginPtr = nullptr;
1933 if (shouldTrackOrigins()) {
1934 Value *OriginLong = ShadowOffset;
1935 uint64_t OriginBase = MapParams->OriginBase;
1936 if (OriginBase != 0)
1937 OriginLong =
1938 IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1939 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1940 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1941 // So Mask is unnecessary.
1942 if (Alignment < MinOriginAlignment) {
1944 OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1945 }
1946 OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1947 }
1948 return std::make_pair(ShadowPtr, OriginPtr);
1949}
1950
1951Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
1953 Value *ShadowOffset) {
1954 IRBuilder<> IRB(Pos->getParent(), Pos);
1955 return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1956}
1957
1958Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
1960 IRBuilder<> IRB(Pos->getParent(), Pos);
1961 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1962 return getShadowAddress(Addr, Pos, ShadowOffset);
1963}
1964
1965Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1967 Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1968 return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1969}
1970
1971// Generates IR to compute the union of the two given shadows, inserting it
1972// before Pos. The combined value is with primitive type.
1973Value *DFSanFunction::combineShadows(Value *V1, Value *V2,
1975 if (DFS.isZeroShadow(V1))
1976 return collapseToPrimitiveShadow(V2, Pos);
1977 if (DFS.isZeroShadow(V2))
1978 return collapseToPrimitiveShadow(V1, Pos);
1979 if (V1 == V2)
1980 return collapseToPrimitiveShadow(V1, Pos);
1981
1982 auto V1Elems = ShadowElements.find(V1);
1983 auto V2Elems = ShadowElements.find(V2);
1984 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1985 if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1986 V2Elems->second.begin(), V2Elems->second.end())) {
1987 return collapseToPrimitiveShadow(V1, Pos);
1988 }
1989 if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1990 V1Elems->second.begin(), V1Elems->second.end())) {
1991 return collapseToPrimitiveShadow(V2, Pos);
1992 }
1993 } else if (V1Elems != ShadowElements.end()) {
1994 if (V1Elems->second.count(V2))
1995 return collapseToPrimitiveShadow(V1, Pos);
1996 } else if (V2Elems != ShadowElements.end()) {
1997 if (V2Elems->second.count(V1))
1998 return collapseToPrimitiveShadow(V2, Pos);
1999 }
2000
2001 auto Key = std::make_pair(V1, V2);
2002 if (V1 > V2)
2003 std::swap(Key.first, Key.second);
2004 CachedShadow &CCS = CachedShadows[Key];
2005 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
2006 return CCS.Shadow;
2007
2008 // Converts inputs shadows to shadows with primitive types.
2009 Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
2010 Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
2011
2012 IRBuilder<> IRB(Pos->getParent(), Pos);
2013 CCS.Block = Pos->getParent();
2014 CCS.Shadow = IRB.CreateOr(PV1, PV2);
2015
2016 std::set<Value *> UnionElems;
2017 if (V1Elems != ShadowElements.end()) {
2018 UnionElems = V1Elems->second;
2019 } else {
2020 UnionElems.insert(V1);
2021 }
2022 if (V2Elems != ShadowElements.end()) {
2023 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
2024 } else {
2025 UnionElems.insert(V2);
2026 }
2027 ShadowElements[CCS.Shadow] = std::move(UnionElems);
2028
2029 return CCS.Shadow;
2030}
2031
2032// A convenience function which folds the shadows of each of the operands
2033// of the provided instruction Inst, inserting the IR before Inst. Returns
2034// the computed union Value.
2035Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
2036 if (Inst->getNumOperands() == 0)
2037 return DFS.getZeroShadow(Inst);
2038
2039 Value *Shadow = getShadow(Inst->getOperand(0));
2040 for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
2041 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)),
2042 Inst->getIterator());
2043
2044 return expandFromPrimitiveShadow(Inst->getType(), Shadow,
2045 Inst->getIterator());
2046}
2047
2048void DFSanVisitor::visitInstOperands(Instruction &I) {
2049 Value *CombinedShadow = DFSF.combineOperandShadows(&I);
2050 DFSF.setShadow(&I, CombinedShadow);
2051 visitInstOperandOrigins(I);
2052}
2053
2054Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
2055 const std::vector<Value *> &Origins,
2057 ConstantInt *Zero) {
2058 assert(Shadows.size() == Origins.size());
2059 size_t Size = Origins.size();
2060 if (Size == 0)
2061 return DFS.ZeroOrigin;
2062 Value *Origin = nullptr;
2063 if (!Zero)
2064 Zero = DFS.ZeroPrimitiveShadow;
2065 for (size_t I = 0; I != Size; ++I) {
2066 Value *OpOrigin = Origins[I];
2067 Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
2068 if (ConstOpOrigin && ConstOpOrigin->isNullValue())
2069 continue;
2070 if (!Origin) {
2071 Origin = OpOrigin;
2072 continue;
2073 }
2074 Value *OpShadow = Shadows[I];
2075 Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
2076 IRBuilder<> IRB(Pos->getParent(), Pos);
2077 Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
2078 Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2079 }
2080 return Origin ? Origin : DFS.ZeroOrigin;
2081}
2082
2083Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
2084 size_t Size = Inst->getNumOperands();
2085 std::vector<Value *> Shadows(Size);
2086 std::vector<Value *> Origins(Size);
2087 for (unsigned I = 0; I != Size; ++I) {
2088 Shadows[I] = getShadow(Inst->getOperand(I));
2089 Origins[I] = getOrigin(Inst->getOperand(I));
2090 }
2091 return combineOrigins(Shadows, Origins, Inst->getIterator());
2092}
2093
2094void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
2095 if (!DFSF.DFS.shouldTrackOrigins())
2096 return;
2097 Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
2098 DFSF.setOrigin(&I, CombinedOrigin);
2099}
2100
2101Align DFSanFunction::getShadowAlign(Align InstAlignment) {
2102 const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
2103 return Align(Alignment.value() * DFS.ShadowWidthBytes);
2104}
2105
2106Align DFSanFunction::getOriginAlign(Align InstAlignment) {
2107 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2108 return Align(std::max(MinOriginAlignment, Alignment));
2109}
2110
2111bool DFSanFunction::isLookupTableConstant(Value *P) {
2112 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
2113 if (GV->isConstant() && GV->hasName())
2114 return DFS.CombineTaintLookupTableNames.count(GV->getName());
2115
2116 return false;
2117}
2118
2119bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
2120 Align InstAlignment) {
2121 // When enabling tracking load instructions, we always use
2122 // __dfsan_load_label_and_origin to reduce code size.
2123 if (ClTrackOrigins == 2)
2124 return true;
2125
2126 assert(Size != 0);
2127 // * if Size == 1, it is sufficient to load its origin aligned at 4.
2128 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
2129 // load its origin aligned at 4. If not, although origins may be lost, it
2130 // should not happen very often.
2131 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
2132 // Size % 4 == 0, it is more efficient to load origins without callbacks.
2133 // * Otherwise we use __dfsan_load_label_and_origin.
2134 // This should ensure that common cases run efficiently.
2135 if (Size <= 2)
2136 return false;
2137
2138 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2139 return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
2140}
2141
2142Value *DataFlowSanitizer::loadNextOrigin(BasicBlock::iterator Pos,
2143 Align OriginAlign,
2144 Value **OriginAddr) {
2145 IRBuilder<> IRB(Pos->getParent(), Pos);
2146 *OriginAddr =
2147 IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
2148 return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
2149}
2150
2151std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
2152 Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
2153 Align OriginAlign, Value *FirstOrigin, BasicBlock::iterator Pos) {
2154 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2155 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
2156
2157 assert(Size >= 4 && "Not large enough load size for fast path!");
2158
2159 // Used for origin tracking.
2160 std::vector<Value *> Shadows;
2161 std::vector<Value *> Origins;
2162
2163 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
2164 // but this function is only used in a subset of cases that make it possible
2165 // to optimize the instrumentation.
2166 //
2167 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
2168 // per byte) is either:
2169 // - a multiple of 8 (common)
2170 // - equal to 4 (only for load32)
2171 //
2172 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2173 // other cases, we use a 64-bit integer to hold the wide shadow.
2174 Type *WideShadowTy =
2175 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
2176
2177 IRBuilder<> IRB(Pos->getParent(), Pos);
2178 Value *CombinedWideShadow =
2179 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2180
2181 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
2182 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
2183
2184 auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
2185 if (BytesPerWideShadow > 4) {
2186 assert(BytesPerWideShadow == 8);
2187 // The wide shadow relates to two origin pointers: one for the first four
2188 // application bytes, and one for the latest four. We use a left shift to
2189 // get just the shadow bytes that correspond to the first origin pointer,
2190 // and then the entire shadow for the second origin pointer (which will be
2191 // chosen by combineOrigins() iff the least-significant half of the wide
2192 // shadow was empty but the other half was not).
2193 Value *WideShadowLo = IRB.CreateShl(
2194 WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
2195 Shadows.push_back(WideShadow);
2196 Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
2197
2198 Shadows.push_back(WideShadowLo);
2199 Origins.push_back(Origin);
2200 } else {
2201 Shadows.push_back(WideShadow);
2202 Origins.push_back(Origin);
2203 }
2204 };
2205
2206 if (ShouldTrackOrigins)
2207 AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
2208
2209 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2210 // then OR individual shadows within the combined WideShadow by binary ORing.
2211 // This is fewer instructions than ORing shadows individually, since it
2212 // needs logN shift/or instructions (N being the bytes of the combined wide
2213 // shadow).
2214 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2215 ByteOfs += BytesPerWideShadow) {
2216 ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr,
2217 ConstantInt::get(DFS.IntptrTy, 1));
2218 Value *NextWideShadow =
2219 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2220 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
2221 if (ShouldTrackOrigins) {
2222 Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
2223 AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
2224 }
2225 }
2226 for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
2227 Width >>= 1) {
2228 Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
2229 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
2230 }
2231 return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
2232 ShouldTrackOrigins
2233 ? combineOrigins(Shadows, Origins, Pos,
2235 : DFS.ZeroOrigin};
2236}
2237
2238std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
2239 Value *Addr, uint64_t Size, Align InstAlignment, BasicBlock::iterator Pos) {
2240 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2241
2242 // Non-escaped loads.
2243 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2244 const auto SI = AllocaShadowMap.find(AI);
2245 if (SI != AllocaShadowMap.end()) {
2246 IRBuilder<> IRB(Pos->getParent(), Pos);
2247 Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
2248 const auto OI = AllocaOriginMap.find(AI);
2249 assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
2250 return {ShadowLI, ShouldTrackOrigins
2251 ? IRB.CreateLoad(DFS.OriginTy, OI->second)
2252 : nullptr};
2253 }
2254 }
2255
2256 // Load from constant addresses.
2259 bool AllConstants = true;
2260 for (const Value *Obj : Objs) {
2261 if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
2262 continue;
2263 if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
2264 continue;
2265
2266 AllConstants = false;
2267 break;
2268 }
2269 if (AllConstants)
2270 return {DFS.ZeroPrimitiveShadow,
2271 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2272
2273 if (Size == 0)
2274 return {DFS.ZeroPrimitiveShadow,
2275 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2276
2277 // Use callback to load if this is not an optimizable case for origin
2278 // tracking.
2279 if (ShouldTrackOrigins &&
2280 useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2281 IRBuilder<> IRB(Pos->getParent(), Pos);
2282 CallInst *Call =
2283 IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2284 {Addr, ConstantInt::get(DFS.IntptrTy, Size)});
2285 Call->addRetAttr(Attribute::ZExt);
2286 return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2287 DFS.PrimitiveShadowTy),
2288 IRB.CreateTrunc(Call, DFS.OriginTy)};
2289 }
2290
2291 // Other cases that support loading shadows or origins in a fast way.
2292 Value *ShadowAddr, *OriginAddr;
2293 std::tie(ShadowAddr, OriginAddr) =
2294 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2295
2296 const Align ShadowAlign = getShadowAlign(InstAlignment);
2297 const Align OriginAlign = getOriginAlign(InstAlignment);
2298 Value *Origin = nullptr;
2299 if (ShouldTrackOrigins) {
2300 IRBuilder<> IRB(Pos->getParent(), Pos);
2301 Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2302 }
2303
2304 // When the byte size is small enough, we can load the shadow directly with
2305 // just a few instructions.
2306 switch (Size) {
2307 case 1: {
2308 LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2309 LI->setAlignment(ShadowAlign);
2310 return {LI, Origin};
2311 }
2312 case 2: {
2313 IRBuilder<> IRB(Pos->getParent(), Pos);
2314 Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2315 ConstantInt::get(DFS.IntptrTy, 1));
2316 Value *Load =
2317 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2318 Value *Load1 =
2319 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2320 return {combineShadows(Load, Load1, Pos), Origin};
2321 }
2322 }
2323 bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2324
2325 if (HasSizeForFastPath)
2326 return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2327 OriginAlign, Origin, Pos);
2328
2329 IRBuilder<> IRB(Pos->getParent(), Pos);
2330 CallInst *FallbackCall = IRB.CreateCall(
2331 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2332 FallbackCall->addRetAttr(Attribute::ZExt);
2333 return {FallbackCall, Origin};
2334}
2335
2336std::pair<Value *, Value *>
2337DFSanFunction::loadShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment,
2339 Value *PrimitiveShadow, *Origin;
2340 std::tie(PrimitiveShadow, Origin) =
2341 loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2342 if (DFS.shouldTrackOrigins()) {
2343 if (ClTrackOrigins == 2) {
2344 IRBuilder<> IRB(Pos->getParent(), Pos);
2345 auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2346 if (!ConstantShadow || !ConstantShadow->isZeroValue())
2347 Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2348 }
2349 }
2350 return {PrimitiveShadow, Origin};
2351}
2352
2354 switch (AO) {
2355 case AtomicOrdering::NotAtomic:
2356 return AtomicOrdering::NotAtomic;
2357 case AtomicOrdering::Unordered:
2358 case AtomicOrdering::Monotonic:
2359 case AtomicOrdering::Acquire:
2360 return AtomicOrdering::Acquire;
2361 case AtomicOrdering::Release:
2362 case AtomicOrdering::AcquireRelease:
2363 return AtomicOrdering::AcquireRelease;
2364 case AtomicOrdering::SequentiallyConsistent:
2365 return AtomicOrdering::SequentiallyConsistent;
2366 }
2367 llvm_unreachable("Unknown ordering");
2368}
2369
2371 if (!V->getType()->isPointerTy())
2372 return V;
2373
2374 // DFSan pass should be running on valid IR, but we'll
2375 // keep a seen set to ensure there are no issues.
2377 Visited.insert(V);
2378 do {
2379 if (auto *GEP = dyn_cast<GEPOperator>(V)) {
2380 V = GEP->getPointerOperand();
2381 } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2382 V = cast<Operator>(V)->getOperand(0);
2383 if (!V->getType()->isPointerTy())
2384 return V;
2385 } else if (isa<GlobalAlias>(V)) {
2386 V = cast<GlobalAlias>(V)->getAliasee();
2387 }
2388 } while (Visited.insert(V).second);
2389
2390 return V;
2391}
2392
2393void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2394 auto &DL = LI.getModule()->getDataLayout();
2395 uint64_t Size = DL.getTypeStoreSize(LI.getType());
2396 if (Size == 0) {
2397 DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2398 DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2399 return;
2400 }
2401
2402 // When an application load is atomic, increase atomic ordering between
2403 // atomic application loads and stores to ensure happen-before order; load
2404 // shadow data after application data; store zero shadow data before
2405 // application data. This ensure shadow loads return either labels of the
2406 // initial application data or zeros.
2407 if (LI.isAtomic())
2409
2410 BasicBlock::iterator AfterLi = std::next(LI.getIterator());
2412 if (LI.isAtomic())
2413 Pos = std::next(Pos);
2414
2415 std::vector<Value *> Shadows;
2416 std::vector<Value *> Origins;
2417 Value *PrimitiveShadow, *Origin;
2418 std::tie(PrimitiveShadow, Origin) =
2419 DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2420 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2421 if (ShouldTrackOrigins) {
2422 Shadows.push_back(PrimitiveShadow);
2423 Origins.push_back(Origin);
2424 }
2426 DFSF.isLookupTableConstant(
2428 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2429 PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2430 if (ShouldTrackOrigins) {
2431 Shadows.push_back(PtrShadow);
2432 Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2433 }
2434 }
2435 if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2436 DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2437
2438 Value *Shadow =
2439 DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2440 DFSF.setShadow(&LI, Shadow);
2441
2442 if (ShouldTrackOrigins) {
2443 DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2444 }
2445
2446 if (ClEventCallbacks) {
2447 IRBuilder<> IRB(Pos->getParent(), Pos);
2449 CallInst *CI =
2450 IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr});
2451 CI->addParamAttr(0, Attribute::ZExt);
2452 }
2453
2454 IRBuilder<> IRB(AfterLi->getParent(), AfterLi);
2455 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);
2456}
2457
2458Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2459 IRBuilder<> &IRB) {
2460 assert(DFS.shouldTrackOrigins());
2461 return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2462}
2463
2464Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2465 if (!DFS.shouldTrackOrigins())
2466 return V;
2467 return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2468}
2469
2470Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2471 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2472 const DataLayout &DL = F->getParent()->getDataLayout();
2473 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2474 if (IntptrSize == OriginSize)
2475 return Origin;
2476 assert(IntptrSize == OriginSize * 2);
2477 Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2478 return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2479}
2480
2481void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2482 Value *StoreOriginAddr,
2483 uint64_t StoreOriginSize, Align Alignment) {
2484 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2485 const DataLayout &DL = F->getParent()->getDataLayout();
2486 const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2487 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2488 assert(IntptrAlignment >= MinOriginAlignment);
2489 assert(IntptrSize >= OriginSize);
2490
2491 unsigned Ofs = 0;
2492 Align CurrentAlignment = Alignment;
2493 if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2494 Value *IntptrOrigin = originToIntptr(IRB, Origin);
2495 Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
2496 StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
2497 for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2498 Value *Ptr =
2499 I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2500 : IntptrStoreOriginPtr;
2501 IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2502 Ofs += IntptrSize / OriginSize;
2503 CurrentAlignment = IntptrAlignment;
2504 }
2505 }
2506
2507 for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2508 ++I) {
2509 Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2510 : StoreOriginAddr;
2511 IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2512 CurrentAlignment = MinOriginAlignment;
2513 }
2514}
2515
2516Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2517 const Twine &Name) {
2518 Type *VTy = V->getType();
2519 assert(VTy->isIntegerTy());
2520 if (VTy->getIntegerBitWidth() == 1)
2521 // Just converting a bool to a bool, so do nothing.
2522 return V;
2523 return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2524}
2525
2526void DFSanFunction::storeOrigin(BasicBlock::iterator Pos, Value *Addr,
2527 uint64_t Size, Value *Shadow, Value *Origin,
2528 Value *StoreOriginAddr, Align InstAlignment) {
2529 // Do not write origins for zero shadows because we do not trace origins for
2530 // untainted sinks.
2531 const Align OriginAlignment = getOriginAlign(InstAlignment);
2532 Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2533 IRBuilder<> IRB(Pos->getParent(), Pos);
2534 if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2535 if (!ConstantShadow->isZeroValue())
2536 paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2537 OriginAlignment);
2538 return;
2539 }
2540
2541 if (shouldInstrumentWithCall()) {
2542 IRB.CreateCall(
2543 DFS.DFSanMaybeStoreOriginFn,
2544 {CollapsedShadow, Addr, ConstantInt::get(DFS.IntptrTy, Size), Origin});
2545 } else {
2546 Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2547 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2549 Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DTU);
2550 IRBuilder<> IRBNew(CheckTerm);
2551 paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2552 OriginAlignment);
2553 ++NumOriginStores;
2554 }
2555}
2556
2557void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2558 Align ShadowAlign,
2560 IRBuilder<> IRB(Pos->getParent(), Pos);
2561 IntegerType *ShadowTy =
2562 IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2563 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2564 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2565 IRB.CreateAlignedStore(ExtZeroShadow, ShadowAddr, ShadowAlign);
2566 // Do not write origins for 0 shadows because we do not trace origins for
2567 // untainted sinks.
2568}
2569
2570void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2571 Align InstAlignment,
2572 Value *PrimitiveShadow,
2573 Value *Origin,
2575 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2576
2577 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2578 const auto SI = AllocaShadowMap.find(AI);
2579 if (SI != AllocaShadowMap.end()) {
2580 IRBuilder<> IRB(Pos->getParent(), Pos);
2581 IRB.CreateStore(PrimitiveShadow, SI->second);
2582
2583 // Do not write origins for 0 shadows because we do not trace origins for
2584 // untainted sinks.
2585 if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2586 const auto OI = AllocaOriginMap.find(AI);
2587 assert(OI != AllocaOriginMap.end() && Origin);
2588 IRB.CreateStore(Origin, OI->second);
2589 }
2590 return;
2591 }
2592 }
2593
2594 const Align ShadowAlign = getShadowAlign(InstAlignment);
2595 if (DFS.isZeroShadow(PrimitiveShadow)) {
2596 storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2597 return;
2598 }
2599
2600 IRBuilder<> IRB(Pos->getParent(), Pos);
2601 Value *ShadowAddr, *OriginAddr;
2602 std::tie(ShadowAddr, OriginAddr) =
2603 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2604
2605 const unsigned ShadowVecSize = 8;
2606 assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2607 "Shadow vector is too large!");
2608
2609 uint64_t Offset = 0;
2610 uint64_t LeftSize = Size;
2611 if (LeftSize >= ShadowVecSize) {
2612 auto *ShadowVecTy =
2613 FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2614 Value *ShadowVec = PoisonValue::get(ShadowVecTy);
2615 for (unsigned I = 0; I != ShadowVecSize; ++I) {
2616 ShadowVec = IRB.CreateInsertElement(
2617 ShadowVec, PrimitiveShadow,
2618 ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2619 }
2620 do {
2621 Value *CurShadowVecAddr =
2622 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowAddr, Offset);
2623 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2624 LeftSize -= ShadowVecSize;
2625 ++Offset;
2626 } while (LeftSize >= ShadowVecSize);
2627 Offset *= ShadowVecSize;
2628 }
2629 while (LeftSize > 0) {
2630 Value *CurShadowAddr =
2631 IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2632 IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2633 --LeftSize;
2634 ++Offset;
2635 }
2636
2637 if (ShouldTrackOrigins) {
2638 storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2639 InstAlignment);
2640 }
2641}
2642
2644 switch (AO) {
2645 case AtomicOrdering::NotAtomic:
2646 return AtomicOrdering::NotAtomic;
2647 case AtomicOrdering::Unordered:
2648 case AtomicOrdering::Monotonic:
2649 case AtomicOrdering::Release:
2650 return AtomicOrdering::Release;
2651 case AtomicOrdering::Acquire:
2652 case AtomicOrdering::AcquireRelease:
2653 return AtomicOrdering::AcquireRelease;
2654 case AtomicOrdering::SequentiallyConsistent:
2655 return AtomicOrdering::SequentiallyConsistent;
2656 }
2657 llvm_unreachable("Unknown ordering");
2658}
2659
2660void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2661 auto &DL = SI.getModule()->getDataLayout();
2662 Value *Val = SI.getValueOperand();
2663 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2664 if (Size == 0)
2665 return;
2666
2667 // When an application store is atomic, increase atomic ordering between
2668 // atomic application loads and stores to ensure happen-before order; load
2669 // shadow data after application data; store zero shadow data before
2670 // application data. This ensure shadow loads return either labels of the
2671 // initial application data or zeros.
2672 if (SI.isAtomic())
2673 SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2674
2675 const bool ShouldTrackOrigins =
2676 DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2677 std::vector<Value *> Shadows;
2678 std::vector<Value *> Origins;
2679
2680 Value *Shadow =
2681 SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2682
2683 if (ShouldTrackOrigins) {
2684 Shadows.push_back(Shadow);
2685 Origins.push_back(DFSF.getOrigin(Val));
2686 }
2687
2688 Value *PrimitiveShadow;
2690 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2691 if (ShouldTrackOrigins) {
2692 Shadows.push_back(PtrShadow);
2693 Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2694 }
2695 PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, SI.getIterator());
2696 } else {
2697 PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, SI.getIterator());
2698 }
2699 Value *Origin = nullptr;
2700 if (ShouldTrackOrigins)
2701 Origin = DFSF.combineOrigins(Shadows, Origins, SI.getIterator());
2702 DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2703 PrimitiveShadow, Origin, SI.getIterator());
2704 if (ClEventCallbacks) {
2705 IRBuilder<> IRB(&SI);
2706 Value *Addr = SI.getPointerOperand();
2707 CallInst *CI =
2708 IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr});
2709 CI->addParamAttr(0, Attribute::ZExt);
2710 }
2711}
2712
2713void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2714 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2715
2716 Value *Val = I.getOperand(1);
2717 const auto &DL = I.getModule()->getDataLayout();
2718 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2719 if (Size == 0)
2720 return;
2721
2722 // Conservatively set data at stored addresses and return with zero shadow to
2723 // prevent shadow data races.
2724 IRBuilder<> IRB(&I);
2725 Value *Addr = I.getOperand(0);
2726 const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2727 DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, I.getIterator());
2728 DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2729 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2730}
2731
2732void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2733 visitCASOrRMW(I.getAlign(), I);
2734 // TODO: The ordering change follows MSan. It is possible not to change
2735 // ordering because we always set and use 0 shadows.
2736 I.setOrdering(addReleaseOrdering(I.getOrdering()));
2737}
2738
2739void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2740 visitCASOrRMW(I.getAlign(), I);
2741 // TODO: The ordering change follows MSan. It is possible not to change
2742 // ordering because we always set and use 0 shadows.
2743 I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2744}
2745
2746void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2747 visitInstOperands(UO);
2748}
2749
2750void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2751 visitInstOperands(BO);
2752}
2753
2754void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2755 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2756 // a musttail call and a ret, don't instrument. New instructions are not
2757 // allowed after a musttail call.
2758 if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2759 if (CI->isMustTailCall())
2760 return;
2761 visitInstOperands(BCI);
2762}
2763
2764void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2765
2766void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2767 visitInstOperands(CI);
2768 if (ClEventCallbacks) {
2769 IRBuilder<> IRB(&CI);
2770 Value *CombinedShadow = DFSF.getShadow(&CI);
2771 CallInst *CallI =
2772 IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2773 CallI->addParamAttr(0, Attribute::ZExt);
2774 }
2775}
2776
2777void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2778 // We do not need to track data through LandingPadInst.
2779 //
2780 // For the C++ exceptions, if a value is thrown, this value will be stored
2781 // in a memory location provided by __cxa_allocate_exception(...) (on the
2782 // throw side) or __cxa_begin_catch(...) (on the catch side).
2783 // This memory will have a shadow, so with the loads and stores we will be
2784 // able to propagate labels on data thrown through exceptions, without any
2785 // special handling of the LandingPadInst.
2786 //
2787 // The second element in the pair result of the LandingPadInst is a
2788 // register value, but it is for a type ID and should never be tainted.
2789 DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2790 DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2791}
2792
2793void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2795 DFSF.isLookupTableConstant(
2797 visitInstOperands(GEPI);
2798 return;
2799 }
2800
2801 // Only propagate shadow/origin of base pointer value but ignore those of
2802 // offset operands.
2803 Value *BasePointer = GEPI.getPointerOperand();
2804 DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2805 if (DFSF.DFS.shouldTrackOrigins())
2806 DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2807}
2808
2809void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2810 visitInstOperands(I);
2811}
2812
2813void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2814 visitInstOperands(I);
2815}
2816
2817void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2818 visitInstOperands(I);
2819}
2820
2821void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2822 IRBuilder<> IRB(&I);
2823 Value *Agg = I.getAggregateOperand();
2824 Value *AggShadow = DFSF.getShadow(Agg);
2825 Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2826 DFSF.setShadow(&I, ResShadow);
2827 visitInstOperandOrigins(I);
2828}
2829
2830void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2831 IRBuilder<> IRB(&I);
2832 Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2833 Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2834 Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2835 DFSF.setShadow(&I, Res);
2836 visitInstOperandOrigins(I);
2837}
2838
2839void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2840 bool AllLoadsStores = true;
2841 for (User *U : I.users()) {
2842 if (isa<LoadInst>(U))
2843 continue;
2844
2845 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2846 if (SI->getPointerOperand() == &I)
2847 continue;
2848 }
2849
2850 AllLoadsStores = false;
2851 break;
2852 }
2853 if (AllLoadsStores) {
2854 IRBuilder<> IRB(&I);
2855 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2856 if (DFSF.DFS.shouldTrackOrigins()) {
2857 DFSF.AllocaOriginMap[&I] =
2858 IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2859 }
2860 }
2861 DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2862 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2863}
2864
2865void DFSanVisitor::visitSelectInst(SelectInst &I) {
2866 Value *CondShadow = DFSF.getShadow(I.getCondition());
2867 Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2868 Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2869 Value *ShadowSel = nullptr;
2870 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2871 std::vector<Value *> Shadows;
2872 std::vector<Value *> Origins;
2873 Value *TrueOrigin =
2874 ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2875 Value *FalseOrigin =
2876 ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2877
2878 DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
2879
2880 if (isa<VectorType>(I.getCondition()->getType())) {
2881 ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2882 FalseShadow, I.getIterator());
2883 if (ShouldTrackOrigins) {
2884 Shadows.push_back(TrueShadow);
2885 Shadows.push_back(FalseShadow);
2886 Origins.push_back(TrueOrigin);
2887 Origins.push_back(FalseOrigin);
2888 }
2889 } else {
2890 if (TrueShadow == FalseShadow) {
2891 ShadowSel = TrueShadow;
2892 if (ShouldTrackOrigins) {
2893 Shadows.push_back(TrueShadow);
2894 Origins.push_back(TrueOrigin);
2895 }
2896 } else {
2897 ShadowSel = SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow,
2898 "", I.getIterator());
2899 if (ShouldTrackOrigins) {
2900 Shadows.push_back(ShadowSel);
2901 Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2902 FalseOrigin, "", I.getIterator()));
2903 }
2904 }
2905 }
2906 DFSF.setShadow(&I, ClTrackSelectControlFlow ? DFSF.combineShadowsThenConvert(
2907 I.getType(), CondShadow,
2908 ShadowSel, I.getIterator())
2909 : ShadowSel);
2910 if (ShouldTrackOrigins) {
2912 Shadows.push_back(CondShadow);
2913 Origins.push_back(DFSF.getOrigin(I.getCondition()));
2914 }
2915 DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, I.getIterator()));
2916 }
2917}
2918
2919void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2920 IRBuilder<> IRB(&I);
2921 Value *ValShadow = DFSF.getShadow(I.getValue());
2922 Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2923 ? DFSF.getOrigin(I.getValue())
2924 : DFSF.DFS.ZeroOrigin;
2925 IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
2926 {ValShadow, ValOrigin, I.getDest(),
2927 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2928}
2929
2930void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2931 IRBuilder<> IRB(&I);
2932
2933 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2934 // need to move origins before moving shadows.
2935 if (DFSF.DFS.shouldTrackOrigins()) {
2936 IRB.CreateCall(
2937 DFSF.DFS.DFSanMemOriginTransferFn,
2938 {I.getArgOperand(0), I.getArgOperand(1),
2939 IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2940 }
2941
2942 Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), I.getIterator());
2943 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), I.getIterator());
2944 Value *LenShadow =
2945 IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2946 DFSF.DFS.ShadowWidthBytes));
2947 auto *MTI = cast<MemTransferInst>(
2948 IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2949 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2950 MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
2951 MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
2952 if (ClEventCallbacks) {
2953 IRB.CreateCall(
2954 DFSF.DFS.DFSanMemTransferCallbackFn,
2955 {DestShadow, IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2956 }
2957}
2958
2959void DFSanVisitor::visitBranchInst(BranchInst &BR) {
2960 if (!BR.isConditional())
2961 return;
2962
2963 DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
2964}
2965
2966void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
2967 DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
2968}
2969
2970static bool isAMustTailRetVal(Value *RetVal) {
2971 // Tail call may have a bitcast between return.
2972 if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2973 RetVal = I->getOperand(0);
2974 }
2975 if (auto *I = dyn_cast<CallInst>(RetVal)) {
2976 return I->isMustTailCall();
2977 }
2978 return false;
2979}
2980
2981void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2982 if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2983 // Don't emit the instrumentation for musttail call returns.
2985 return;
2986
2987 Value *S = DFSF.getShadow(RI.getReturnValue());
2988 IRBuilder<> IRB(&RI);
2989 Type *RT = DFSF.F->getFunctionType()->getReturnType();
2990 unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2991 if (Size <= RetvalTLSSize) {
2992 // If the size overflows, stores nothing. At callsite, oversized return
2993 // shadows are set to zero.
2994 IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
2995 }
2996 if (DFSF.DFS.shouldTrackOrigins()) {
2997 Value *O = DFSF.getOrigin(RI.getReturnValue());
2998 IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2999 }
3000 }
3001}
3002
3003void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
3004 std::vector<Value *> &Args,
3005 IRBuilder<> &IRB) {
3006 FunctionType *FT = F.getFunctionType();
3007
3008 auto *I = CB.arg_begin();
3009
3010 // Adds non-variable argument shadows.
3011 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3012 Args.push_back(
3013 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()));
3014
3015 // Adds variable argument shadows.
3016 if (FT->isVarArg()) {
3017 auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
3018 CB.arg_size() - FT->getNumParams());
3019 auto *LabelVAAlloca =
3020 new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
3021 "labelva", DFSF.F->getEntryBlock().begin());
3022
3023 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3024 auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
3025 IRB.CreateStore(
3026 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()),
3027 LabelVAPtr);
3028 }
3029
3030 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
3031 }
3032
3033 // Adds the return value shadow.
3034 if (!FT->getReturnType()->isVoidTy()) {
3035 if (!DFSF.LabelReturnAlloca) {
3036 DFSF.LabelReturnAlloca = new AllocaInst(
3037 DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
3038 "labelreturn", DFSF.F->getEntryBlock().begin());
3039 }
3040 Args.push_back(DFSF.LabelReturnAlloca);
3041 }
3042}
3043
3044void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
3045 std::vector<Value *> &Args,
3046 IRBuilder<> &IRB) {
3047 FunctionType *FT = F.getFunctionType();
3048
3049 auto *I = CB.arg_begin();
3050
3051 // Add non-variable argument origins.
3052 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3053 Args.push_back(DFSF.getOrigin(*I));
3054
3055 // Add variable argument origins.
3056 if (FT->isVarArg()) {
3057 auto *OriginVATy =
3058 ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
3059 auto *OriginVAAlloca =
3060 new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
3061 "originva", DFSF.F->getEntryBlock().begin());
3062
3063 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3064 auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
3065 IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
3066 }
3067
3068 Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
3069 }
3070
3071 // Add the return value origin.
3072 if (!FT->getReturnType()->isVoidTy()) {
3073 if (!DFSF.OriginReturnAlloca) {
3074 DFSF.OriginReturnAlloca = new AllocaInst(
3075 DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
3076 "originreturn", DFSF.F->getEntryBlock().begin());
3077 }
3078 Args.push_back(DFSF.OriginReturnAlloca);
3079 }
3080}
3081
3082bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
3083 IRBuilder<> IRB(&CB);
3084 switch (DFSF.DFS.getWrapperKind(&F)) {
3085 case DataFlowSanitizer::WK_Warning:
3086 CB.setCalledFunction(&F);
3087 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
3088 IRB.CreateGlobalStringPtr(F.getName()));
3089 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3090 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3091 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3092 return true;
3093 case DataFlowSanitizer::WK_Discard:
3094 CB.setCalledFunction(&F);
3095 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3096 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3097 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3098 return true;
3099 case DataFlowSanitizer::WK_Functional:
3100 CB.setCalledFunction(&F);
3101 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3102 visitInstOperands(CB);
3103 return true;
3104 case DataFlowSanitizer::WK_Custom:
3105 // Don't try to handle invokes of custom functions, it's too complicated.
3106 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
3107 // wrapper.
3108 CallInst *CI = dyn_cast<CallInst>(&CB);
3109 if (!CI)
3110 return false;
3111
3112 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3113 FunctionType *FT = F.getFunctionType();
3114 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
3115 std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
3116 CustomFName += F.getName();
3117 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
3118 CustomFName, CustomFn.TransformedType);
3119 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
3120 CustomFn->copyAttributesFrom(&F);
3121
3122 // Custom functions returning non-void will write to the return label.
3123 if (!FT->getReturnType()->isVoidTy()) {
3124 CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
3125 }
3126 }
3127
3128 std::vector<Value *> Args;
3129
3130 // Adds non-variable arguments.
3131 auto *I = CB.arg_begin();
3132 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
3133 Args.push_back(*I);
3134 }
3135
3136 // Adds shadow arguments.
3137 const unsigned ShadowArgStart = Args.size();
3138 addShadowArguments(F, CB, Args, IRB);
3139
3140 // Adds origin arguments.
3141 const unsigned OriginArgStart = Args.size();
3142 if (ShouldTrackOrigins)
3143 addOriginArguments(F, CB, Args, IRB);
3144
3145 // Adds variable arguments.
3146 append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
3147
3148 CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
3149 CustomCI->setCallingConv(CI->getCallingConv());
3150 CustomCI->setAttributes(transformFunctionAttributes(
3151 CustomFn, CI->getContext(), CI->getAttributes()));
3152
3153 // Update the parameter attributes of the custom call instruction to
3154 // zero extend the shadow parameters. This is required for targets
3155 // which consider PrimitiveShadowTy an illegal type.
3156 for (unsigned N = 0; N < FT->getNumParams(); N++) {
3157 const unsigned ArgNo = ShadowArgStart + N;
3158 if (CustomCI->getArgOperand(ArgNo)->getType() ==
3159 DFSF.DFS.PrimitiveShadowTy)
3160 CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
3161 if (ShouldTrackOrigins) {
3162 const unsigned OriginArgNo = OriginArgStart + N;
3163 if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
3164 DFSF.DFS.OriginTy)
3165 CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
3166 }
3167 }
3168
3169 // Loads the return value shadow and origin.
3170 if (!FT->getReturnType()->isVoidTy()) {
3171 LoadInst *LabelLoad =
3172 IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
3173 DFSF.setShadow(CustomCI,
3174 DFSF.expandFromPrimitiveShadow(
3175 FT->getReturnType(), LabelLoad, CB.getIterator()));
3176 if (ShouldTrackOrigins) {
3177 LoadInst *OriginLoad =
3178 IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
3179 DFSF.setOrigin(CustomCI, OriginLoad);
3180 }
3181 }
3182
3183 CI->replaceAllUsesWith(CustomCI);
3184 CI->eraseFromParent();
3185 return true;
3186 }
3187 return false;
3188}
3189
3190Value *DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
3191 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3192 uint32_t OrderingTable[NumOrderings] = {};
3193
3194 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3195 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3196 OrderingTable[(int)AtomicOrderingCABI::consume] =
3197 (int)AtomicOrderingCABI::acquire;
3198 OrderingTable[(int)AtomicOrderingCABI::release] =
3199 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3200 (int)AtomicOrderingCABI::acq_rel;
3201 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3202 (int)AtomicOrderingCABI::seq_cst;
3203
3205 ArrayRef(OrderingTable, NumOrderings));
3206}
3207
3208void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {
3209 // Since we use getNextNode here, we can't have CB terminate the BB.
3210 assert(isa<CallInst>(CB));
3211
3212 IRBuilder<> IRB(&CB);
3213 Value *Size = CB.getArgOperand(0);
3214 Value *SrcPtr = CB.getArgOperand(1);
3215 Value *DstPtr = CB.getArgOperand(2);
3216 Value *Ordering = CB.getArgOperand(3);
3217 // Convert the call to have at least Acquire ordering to make sure
3218 // the shadow operations aren't reordered before it.
3219 Value *NewOrdering =
3220 IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3221 CB.setArgOperand(3, NewOrdering);
3222
3223 IRBuilder<> NextIRB(CB.getNextNode());
3224 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3225
3226 // TODO: Support ClCombinePointerLabelsOnLoad
3227 // TODO: Support ClEventCallbacks
3228
3229 NextIRB.CreateCall(
3230 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3231 {DstPtr, SrcPtr, NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3232}
3233
3234Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
3235 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3236 uint32_t OrderingTable[NumOrderings] = {};
3237
3238 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3239 OrderingTable[(int)AtomicOrderingCABI::release] =
3240 (int)AtomicOrderingCABI::release;
3241 OrderingTable[(int)AtomicOrderingCABI::consume] =
3242 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3243 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3244 (int)AtomicOrderingCABI::acq_rel;
3245 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3246 (int)AtomicOrderingCABI::seq_cst;
3247
3249 ArrayRef(OrderingTable, NumOrderings));
3250}
3251
3252void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {
3253 IRBuilder<> IRB(&CB);
3254 Value *Size = CB.getArgOperand(0);
3255 Value *SrcPtr = CB.getArgOperand(1);
3256 Value *DstPtr = CB.getArgOperand(2);
3257 Value *Ordering = CB.getArgOperand(3);
3258 // Convert the call to have at least Release ordering to make sure
3259 // the shadow operations aren't reordered after it.
3260 Value *NewOrdering =
3261 IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3262 CB.setArgOperand(3, NewOrdering);
3263
3264 // TODO: Support ClCombinePointerLabelsOnStore
3265 // TODO: Support ClEventCallbacks
3266
3267 IRB.CreateCall(
3268 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3269 {DstPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3270}
3271
3272void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {
3273 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int
3274 // ordering)
3275 IRBuilder<> IRB(&CB);
3276 Value *Size = CB.getArgOperand(0);
3277 Value *TargetPtr = CB.getArgOperand(1);
3278 Value *SrcPtr = CB.getArgOperand(2);
3279 Value *DstPtr = CB.getArgOperand(3);
3280
3281 // This operation is not atomic for the shadow and origin memory.
3282 // This could result in DFSan false positives or false negatives.
3283 // For now we will assume these operations are rare, and
3284 // the additional complexity to address this is not warrented.
3285
3286 // Current Target to Dest
3287 IRB.CreateCall(
3288 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3289 {DstPtr, TargetPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3290
3291 // Current Src to Target (overriding)
3292 IRB.CreateCall(
3293 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3294 {TargetPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3295}
3296
3297void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {
3298 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void
3299 // *desired, int success_order, int failure_order)
3300 Value *Size = CB.getArgOperand(0);
3301 Value *TargetPtr = CB.getArgOperand(1);
3302 Value *ExpectedPtr = CB.getArgOperand(2);
3303 Value *DesiredPtr = CB.getArgOperand(3);
3304
3305 // This operation is not atomic for the shadow and origin memory.
3306 // This could result in DFSan false positives or false negatives.
3307 // For now we will assume these operations are rare, and
3308 // the additional complexity to address this is not warrented.
3309
3310 IRBuilder<> NextIRB(CB.getNextNode());
3311 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3312
3313 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3314
3315 // If original call returned true, copy Desired to Target.
3316 // If original call returned false, copy Target to Expected.
3317 NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,
3318 {NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),
3319 TargetPtr, ExpectedPtr, DesiredPtr,
3320 NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3321}
3322
3323void DFSanVisitor::visitCallBase(CallBase &CB) {
3325 if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
3326 visitInstOperands(CB);
3327 return;
3328 }
3329
3330 // Calls to this function are synthesized in wrappers, and we shouldn't
3331 // instrument them.
3332 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
3333 return;
3334
3335 LibFunc LF;
3336 if (DFSF.TLI.getLibFunc(CB, LF)) {
3337 // libatomic.a functions need to have special handling because there isn't
3338 // a good way to intercept them or compile the library with
3339 // instrumentation.
3340 switch (LF) {
3341 case LibFunc_atomic_load:
3342 if (!isa<CallInst>(CB)) {
3343 llvm::errs() << "DFSAN -- cannot instrument invoke of libatomic load. "
3344 "Ignoring!\n";
3345 break;
3346 }
3347 visitLibAtomicLoad(CB);
3348 return;
3349 case LibFunc_atomic_store:
3350 visitLibAtomicStore(CB);
3351 return;
3352 default:
3353 break;
3354 }
3355 }
3356
3357 // TODO: These are not supported by TLI? They are not in the enum.
3358 if (F && F->hasName() && !F->isVarArg()) {
3359 if (F->getName() == "__atomic_exchange") {
3360 visitLibAtomicExchange(CB);
3361 return;
3362 }
3363 if (F->getName() == "__atomic_compare_exchange") {
3364 visitLibAtomicCompareExchange(CB);
3365 return;
3366 }
3367 }
3368
3370 DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
3371 if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
3372 if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
3373 return;
3374
3375 IRBuilder<> IRB(&CB);
3376
3377 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3378 FunctionType *FT = CB.getFunctionType();
3379 const DataLayout &DL = getDataLayout();
3380
3381 // Stores argument shadows.
3382 unsigned ArgOffset = 0;
3383 for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
3384 if (ShouldTrackOrigins) {
3385 // Ignore overflowed origins
3386 Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
3387 if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
3388 !DFSF.DFS.isZeroShadow(ArgShadow))
3389 IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
3390 DFSF.getArgOriginTLS(I, IRB));
3391 }
3392
3393 unsigned Size =
3394 DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
3395 // Stop storing if arguments' size overflows. Inside a function, arguments
3396 // after overflow have zero shadow values.
3397 if (ArgOffset + Size > ArgTLSSize)
3398 break;
3399 IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
3400 DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
3402 ArgOffset += alignTo(Size, ShadowTLSAlignment);
3403 }
3404
3405 Instruction *Next = nullptr;
3406 if (!CB.getType()->isVoidTy()) {
3407 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3408 if (II->getNormalDest()->getSinglePredecessor()) {
3409 Next = &II->getNormalDest()->front();
3410 } else {
3411 BasicBlock *NewBB =
3412 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
3413 Next = &NewBB->front();
3414 }
3415 } else {
3416 assert(CB.getIterator() != CB.getParent()->end());
3417 Next = CB.getNextNode();
3418 }
3419
3420 // Don't emit the epilogue for musttail call returns.
3421 if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3422 return;
3423
3424 // Loads the return value shadow.
3425 IRBuilder<> NextIRB(Next);
3426 unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
3427 if (Size > RetvalTLSSize) {
3428 // Set overflowed return shadow to be zero.
3429 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3430 } else {
3431 LoadInst *LI = NextIRB.CreateAlignedLoad(
3432 DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
3433 ShadowTLSAlignment, "_dfsret");
3434 DFSF.SkipInsts.insert(LI);
3435 DFSF.setShadow(&CB, LI);
3436 DFSF.NonZeroChecks.push_back(LI);
3437 }
3438
3439 if (ShouldTrackOrigins) {
3440 LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
3441 DFSF.getRetvalOriginTLS(), "_dfsret_o");
3442 DFSF.SkipInsts.insert(LI);
3443 DFSF.setOrigin(&CB, LI);
3444 }
3445
3446 DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);
3447 }
3448}
3449
3450void DFSanVisitor::visitPHINode(PHINode &PN) {
3451 Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
3452 PHINode *ShadowPN = PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "",
3453 PN.getIterator());
3454
3455 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3456 Value *UndefShadow = UndefValue::get(ShadowTy);
3457 for (BasicBlock *BB : PN.blocks())
3458 ShadowPN->addIncoming(UndefShadow, BB);
3459
3460 DFSF.setShadow(&PN, ShadowPN);
3461
3462 PHINode *OriginPN = nullptr;
3463 if (DFSF.DFS.shouldTrackOrigins()) {
3464 OriginPN = PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "",
3465 PN.getIterator());
3466 Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
3467 for (BasicBlock *BB : PN.blocks())
3468 OriginPN->addIncoming(UndefOrigin, BB);
3469 DFSF.setOrigin(&PN, OriginPN);
3470 }
3471
3472 DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3473}
3474
3477 auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
3478 auto &FAM =
3481 };
3482 if (!DataFlowSanitizer(ABIListFiles).runImpl(M, GetTLI))
3483 return PreservedAnalyses::all();
3484
3486 // GlobalsAA is considered stateless and does not get invalidated unless
3487 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
3488 // make changes that require GlobalsAA to be invalidated.
3489 PA.abandon<GlobalsAA>();
3490 return PA;
3491}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isConstant(const MachineInstr &MI)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MemoryMapParams Linux_LoongArch64_MemoryMapParams
const MemoryMapParams Linux_X86_64_MemoryMapParams
static cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))
static cl::list< std::string > ClCombineTaintLookupTables("dfsan-combine-taint-lookup-table", cl::desc("When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)
static const Align MinOriginAlignment
static cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))
static cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)
static cl::opt< bool > ClReachesFunctionCallbacks("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))
static Value * expandFromPrimitiveShadowRecursive(Value *Shadow, SmallVector< unsigned, 4 > &Indices, Type *SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)
static cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))
static cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))
static cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)
static cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))
static cl::opt< bool > ClIgnorePersonalityRoutine("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))
static const Align ShadowTLSAlignment
static AtomicOrdering addReleaseOrdering(AtomicOrdering AO)
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO)
Value * StripPointerGEPsAndCasts(Value *V)
const MemoryMapParams Linux_AArch64_MemoryMapParams
static cl::opt< bool > ClConditionalCallbacks("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))
static cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))
static StringRef getGlobalTypeString(const GlobalValue &G)
static cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))
static const unsigned ArgTLSSize
static const unsigned RetvalTLSSize
static bool isAMustTailRetVal(Value *RetVal)
static cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
uint64_t Addr
std::string Name
uint64_t Size
static bool runImpl(Function &F, const TargetLowering &TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
nvptx lower args
#define P(N)
Module * Mod
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
Defines the virtual file system interface vfs::FileSystem.
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:59
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
unsigned getNumAttrSets() const
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Definition: AttributeMask.h:44
static Attribute getWithMemoryEffects(LLVMContext &Context, MemoryEffects ME)
Definition: Attributes.cpp:241
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
const Instruction & front() const
Definition: BasicBlock.h:452
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
This class represents a no-op cast from one type to another.
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1770
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1765
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1703
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1761
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1623
Value * getCalledOperand() const
Definition: InstrTypes.h:1696
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1784
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition: InstrTypes.h:1822
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1653
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1629
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1561
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1639
unsigned arg_size() const
Definition: InstrTypes.h:1646
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1780
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1832
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1742
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
bool isMustTailCall() const
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:2893
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:122
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:20
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:162
void removeFnAttrs(const AttributeMask &Attrs)
Definition: Function.cpp:635
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition: Function.cpp:627
arg_iterator arg_begin()
Definition: Function.h:813
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
Definition: Function.cpp:647
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:785
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:548
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
static bool isExternalWeakLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:411
LinkageTypes getLinkage() const
Definition: GlobalValue.h:545
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:57
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
Type * getValueType() const
Definition: GlobalValue.h:296
Analysis pass providing a never-invalidated alias analysis result.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1880
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2506
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1806
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
Definition: IRBuilder.h:1992
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2153
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
Definition: IRBuilder.h:1946
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Definition: IRBuilder.h:1972
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:525
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2228
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1789
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1802
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2179
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1865
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:658
This instruction inserts a single (scalar) element into a VectorType value.
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCmpInst(CmpInst &I)
Definition: InstVisitor.h:262
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
RetTy visitInsertValueInst(InsertValueInst &I)
Definition: InstVisitor.h:195
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:193
RetTy visitLandingPadInst(LandingPadInst &I)
Definition: InstVisitor.h:196
RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I)
Definition: InstVisitor.h:171
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
RetTy visitSwitchInst(SwitchInst &I)
Definition: InstVisitor.h:232
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitReturnInst(ReturnInst &I)
Definition: InstVisitor.h:226
RetTy visitExtractValueInst(ExtractValueInst &I)
Definition: InstVisitor.h:194
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:260
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
RetTy visitAllocaInst(AllocaInst &I)
Definition: InstVisitor.h:168
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:261
RetTy visitMemTransferInst(MemTransferInst &I)
Definition: InstVisitor.h:214
RetTy visitMemSetInst(MemSetInst &I)
Definition: InstVisitor.h:209
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:259
RetTy visitBranchInst(BranchInst &I)
Definition: InstVisitor.h:229
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isTerminator() const
Definition: Instruction.h:254
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
void setAlignment(Align Align)
Definition: Instructions.h:240
Value * getPointerOperand()
Definition: Instructions.h:280
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:250
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
static MemoryEffectsBase readOnly()
Create MemoryEffectsBase that can read any memory.
Definition: ModRef.h:122
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:299
void setModuleInlineAsm(StringRef Asm)
Set the module-scope inline assembly blocks.
Definition: Module.h:338
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:167
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
Constant * getOrInsertGlobal(StringRef Name, Type *Ty, function_ref< GlobalVariable *()> CreateGlobalCallback)
Look up the specified global in the module symbol table.
Definition: Module.cpp:221
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void abandon()
Mark an analysis as abandoned.
Definition: Analysis.h:162
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
static std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
Multiway switch.
Value * getCondition() const
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
@ loongarch64
Definition: Triple.h:62
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Key
PAL metadata keys.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1047
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< BlockNode * > Block
Definition: RDFGraph.h:392
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2082
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2068
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
iterator_range< df_iterator< T > > depth_first(const T &G)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition: Local.cpp:3180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85