LLVM 19.0.0git
DataFlowSanitizer.cpp
Go to the documentation of this file.
1//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11/// analysis.
12///
13/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14/// class of bugs on its own. Instead, it provides a generic dynamic data flow
15/// analysis framework to be used by clients to help detect application-specific
16/// issues within their own code.
17///
18/// The analysis is based on automatic propagation of data flow labels (also
19/// known as taint labels) through a program as it performs computation.
20///
21/// Argument and return value labels are passed through TLS variables
22/// __dfsan_arg_tls and __dfsan_retval_tls.
23///
24/// Each byte of application memory is backed by a shadow memory byte. The
25/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26/// laid out as follows:
27///
28/// +--------------------+ 0x800000000000 (top of memory)
29/// | application 3 |
30/// +--------------------+ 0x700000000000
31/// | invalid |
32/// +--------------------+ 0x610000000000
33/// | origin 1 |
34/// +--------------------+ 0x600000000000
35/// | application 2 |
36/// +--------------------+ 0x510000000000
37/// | shadow 1 |
38/// +--------------------+ 0x500000000000
39/// | invalid |
40/// +--------------------+ 0x400000000000
41/// | origin 3 |
42/// +--------------------+ 0x300000000000
43/// | shadow 3 |
44/// +--------------------+ 0x200000000000
45/// | origin 2 |
46/// +--------------------+ 0x110000000000
47/// | invalid |
48/// +--------------------+ 0x100000000000
49/// | shadow 2 |
50/// +--------------------+ 0x010000000000
51/// | application 1 |
52/// +--------------------+ 0x000000000000
53///
54/// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55/// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56///
57/// For more information, please refer to the design document:
58/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/DenseMap.h"
64#include "llvm/ADT/DenseSet.h"
68#include "llvm/ADT/StringRef.h"
69#include "llvm/ADT/StringSet.h"
70#include "llvm/ADT/iterator.h"
75#include "llvm/IR/Argument.h"
77#include "llvm/IR/Attributes.h"
78#include "llvm/IR/BasicBlock.h"
79#include "llvm/IR/Constant.h"
80#include "llvm/IR/Constants.h"
81#include "llvm/IR/DataLayout.h"
83#include "llvm/IR/Dominators.h"
84#include "llvm/IR/Function.h"
85#include "llvm/IR/GlobalAlias.h"
86#include "llvm/IR/GlobalValue.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstVisitor.h"
90#include "llvm/IR/InstrTypes.h"
91#include "llvm/IR/Instruction.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Module.h"
96#include "llvm/IR/PassManager.h"
97#include "llvm/IR/Type.h"
98#include "llvm/IR/User.h"
99#include "llvm/IR/Value.h"
101#include "llvm/Support/Casting.h"
110#include <algorithm>
111#include <cassert>
112#include <cstddef>
113#include <cstdint>
114#include <memory>
115#include <set>
116#include <string>
117#include <utility>
118#include <vector>
119
120using namespace llvm;
121
122// This must be consistent with ShadowWidthBits.
124
126
127// The size of TLS variables. These constants must be kept in sync with the ones
128// in dfsan.cpp.
129static const unsigned ArgTLSSize = 800;
130static const unsigned RetvalTLSSize = 800;
131
132// The -dfsan-preserve-alignment flag controls whether this pass assumes that
133// alignment requirements provided by the input IR are correct. For example,
134// if the input IR contains a load with alignment 8, this flag will cause
135// the shadow load to have alignment 16. This flag is disabled by default as
136// we have unfortunately encountered too much code (including Clang itself;
137// see PR14291) which performs misaligned access.
139 "dfsan-preserve-alignment",
140 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
141 cl::init(false));
142
143// The ABI list files control how shadow parameters are passed. The pass treats
144// every function labelled "uninstrumented" in the ABI list file as conforming
145// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
146// additional annotations for those functions, a call to one of those functions
147// will produce a warning message, as the labelling behaviour of the function is
148// unknown. The other supported annotations for uninstrumented functions are
149// "functional" and "discard", which are described below under
150// DataFlowSanitizer::WrapperKind.
151// Functions will often be labelled with both "uninstrumented" and one of
152// "functional" or "discard". This will leave the function unchanged by this
153// pass, and create a wrapper function that will call the original.
154//
155// Instrumented functions can also be annotated as "force_zero_labels", which
156// will make all shadow and return values set zero labels.
157// Functions should never be labelled with both "force_zero_labels" and
158// "uninstrumented" or any of the unistrumented wrapper kinds.
160 "dfsan-abilist",
161 cl::desc("File listing native ABI functions and how the pass treats them"),
162 cl::Hidden);
163
164// Controls whether the pass includes or ignores the labels of pointers in load
165// instructions.
167 "dfsan-combine-pointer-labels-on-load",
168 cl::desc("Combine the label of the pointer with the label of the data when "
169 "loading from memory."),
170 cl::Hidden, cl::init(true));
171
172// Controls whether the pass includes or ignores the labels of pointers in
173// stores instructions.
175 "dfsan-combine-pointer-labels-on-store",
176 cl::desc("Combine the label of the pointer with the label of the data when "
177 "storing in memory."),
178 cl::Hidden, cl::init(false));
179
180// Controls whether the pass propagates labels of offsets in GEP instructions.
182 "dfsan-combine-offset-labels-on-gep",
183 cl::desc(
184 "Combine the label of the offset with the label of the pointer when "
185 "doing pointer arithmetic."),
186 cl::Hidden, cl::init(true));
187
189 "dfsan-combine-taint-lookup-table",
190 cl::desc(
191 "When dfsan-combine-offset-labels-on-gep and/or "
192 "dfsan-combine-pointer-labels-on-load are false, this flag can "
193 "be used to re-enable combining offset and/or pointer taint when "
194 "loading specific constant global variables (i.e. lookup tables)."),
195 cl::Hidden);
196
198 "dfsan-debug-nonzero-labels",
199 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
200 "load or return with a nonzero label"),
201 cl::Hidden);
202
203// Experimental feature that inserts callbacks for certain data events.
204// Currently callbacks are only inserted for loads, stores, memory transfers
205// (i.e. memcpy and memmove), and comparisons.
206//
207// If this flag is set to true, the user must provide definitions for the
208// following callback functions:
209// void __dfsan_load_callback(dfsan_label Label, void* addr);
210// void __dfsan_store_callback(dfsan_label Label, void* addr);
211// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
212// void __dfsan_cmp_callback(dfsan_label CombinedLabel);
214 "dfsan-event-callbacks",
215 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
216 cl::Hidden, cl::init(false));
217
218// Experimental feature that inserts callbacks for conditionals, including:
219// conditional branch, switch, select.
220// This must be true for dfsan_set_conditional_callback() to have effect.
222 "dfsan-conditional-callbacks",
223 cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
224 cl::init(false));
225
226// Experimental feature that inserts callbacks for data reaching a function,
227// either via function arguments and loads.
228// This must be true for dfsan_set_reaches_function_callback() to have effect.
230 "dfsan-reaches-function-callbacks",
231 cl::desc("Insert calls to callback functions on data reaching a function."),
232 cl::Hidden, cl::init(false));
233
234// Controls whether the pass tracks the control flow of select instructions.
236 "dfsan-track-select-control-flow",
237 cl::desc("Propagate labels from condition values of select instructions "
238 "to results."),
239 cl::Hidden, cl::init(true));
240
241// TODO: This default value follows MSan. DFSan may use a different value.
243 "dfsan-instrument-with-call-threshold",
244 cl::desc("If the function being instrumented requires more than "
245 "this number of origin stores, use callbacks instead of "
246 "inline checks (-1 means never use callbacks)."),
247 cl::Hidden, cl::init(3500));
248
249// Controls how to track origins.
250// * 0: do not track origins.
251// * 1: track origins at memory store operations.
252// * 2: track origins at memory load and store operations.
253// TODO: track callsites.
254static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
255 cl::desc("Track origins of labels"),
256 cl::Hidden, cl::init(0));
257
259 "dfsan-ignore-personality-routine",
260 cl::desc("If a personality routine is marked uninstrumented from the ABI "
261 "list, do not create a wrapper for it."),
262 cl::Hidden, cl::init(false));
263
265 // Types of GlobalVariables are always pointer types.
266 Type *GType = G.getValueType();
267 // For now we support excluding struct types only.
268 if (StructType *SGType = dyn_cast<StructType>(GType)) {
269 if (!SGType->isLiteral())
270 return SGType->getName();
271 }
272 return "<unknown type>";
273}
274
275namespace {
276
277// Memory map parameters used in application-to-shadow address calculation.
278// Offset = (Addr & ~AndMask) ^ XorMask
279// Shadow = ShadowBase + Offset
280// Origin = (OriginBase + Offset) & ~3ULL
281struct MemoryMapParams {
282 uint64_t AndMask;
283 uint64_t XorMask;
284 uint64_t ShadowBase;
285 uint64_t OriginBase;
286};
287
288} // end anonymous namespace
289
290// NOLINTBEGIN(readability-identifier-naming)
291// aarch64 Linux
292const MemoryMapParams Linux_AArch64_MemoryMapParams = {
293 0, // AndMask (not used)
294 0x0B00000000000, // XorMask
295 0, // ShadowBase (not used)
296 0x0200000000000, // OriginBase
297};
298
299// x86_64 Linux
300const MemoryMapParams Linux_X86_64_MemoryMapParams = {
301 0, // AndMask (not used)
302 0x500000000000, // XorMask
303 0, // ShadowBase (not used)
304 0x100000000000, // OriginBase
305};
306// NOLINTEND(readability-identifier-naming)
307
308// loongarch64 Linux
309const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
310 0, // AndMask (not used)
311 0x500000000000, // XorMask
312 0, // ShadowBase (not used)
313 0x100000000000, // OriginBase
314};
315
316namespace {
317
318class DFSanABIList {
319 std::unique_ptr<SpecialCaseList> SCL;
320
321public:
322 DFSanABIList() = default;
323
324 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
325
326 /// Returns whether either this function or its source file are listed in the
327 /// given category.
328 bool isIn(const Function &F, StringRef Category) const {
329 return isIn(*F.getParent(), Category) ||
330 SCL->inSection("dataflow", "fun", F.getName(), Category);
331 }
332
333 /// Returns whether this global alias is listed in the given category.
334 ///
335 /// If GA aliases a function, the alias's name is matched as a function name
336 /// would be. Similarly, aliases of globals are matched like globals.
337 bool isIn(const GlobalAlias &GA, StringRef Category) const {
338 if (isIn(*GA.getParent(), Category))
339 return true;
340
341 if (isa<FunctionType>(GA.getValueType()))
342 return SCL->inSection("dataflow", "fun", GA.getName(), Category);
343
344 return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
345 SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
346 Category);
347 }
348
349 /// Returns whether this module is listed in the given category.
350 bool isIn(const Module &M, StringRef Category) const {
351 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
352 }
353};
354
355/// TransformedFunction is used to express the result of transforming one
356/// function type into another. This struct is immutable. It holds metadata
357/// useful for updating calls of the old function to the new type.
358struct TransformedFunction {
359 TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
360 std::vector<unsigned> ArgumentIndexMapping)
361 : OriginalType(OriginalType), TransformedType(TransformedType),
362 ArgumentIndexMapping(ArgumentIndexMapping) {}
363
364 // Disallow copies.
365 TransformedFunction(const TransformedFunction &) = delete;
366 TransformedFunction &operator=(const TransformedFunction &) = delete;
367
368 // Allow moves.
369 TransformedFunction(TransformedFunction &&) = default;
370 TransformedFunction &operator=(TransformedFunction &&) = default;
371
372 /// Type of the function before the transformation.
373 FunctionType *OriginalType;
374
375 /// Type of the function after the transformation.
377
378 /// Transforming a function may change the position of arguments. This
379 /// member records the mapping from each argument's old position to its new
380 /// position. Argument positions are zero-indexed. If the transformation
381 /// from F to F' made the first argument of F into the third argument of F',
382 /// then ArgumentIndexMapping[0] will equal 2.
383 std::vector<unsigned> ArgumentIndexMapping;
384};
385
386/// Given function attributes from a call site for the original function,
387/// return function attributes appropriate for a call to the transformed
388/// function.
390transformFunctionAttributes(const TransformedFunction &TransformedFunction,
391 LLVMContext &Ctx, AttributeList CallSiteAttrs) {
392
393 // Construct a vector of AttributeSet for each function argument.
394 std::vector<llvm::AttributeSet> ArgumentAttributes(
395 TransformedFunction.TransformedType->getNumParams());
396
397 // Copy attributes from the parameter of the original function to the
398 // transformed version. 'ArgumentIndexMapping' holds the mapping from
399 // old argument position to new.
400 for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
401 I < IE; ++I) {
402 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
403 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
404 }
405
406 // Copy annotations on varargs arguments.
407 for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
408 IE = CallSiteAttrs.getNumAttrSets();
409 I < IE; ++I) {
410 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
411 }
412
413 return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
414 CallSiteAttrs.getRetAttrs(),
415 llvm::ArrayRef(ArgumentAttributes));
416}
417
418class DataFlowSanitizer {
419 friend struct DFSanFunction;
420 friend class DFSanVisitor;
421
422 enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
423
424 enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
425
426 /// How should calls to uninstrumented functions be handled?
427 enum WrapperKind {
428 /// This function is present in an uninstrumented form but we don't know
429 /// how it should be handled. Print a warning and call the function anyway.
430 /// Don't label the return value.
431 WK_Warning,
432
433 /// This function does not write to (user-accessible) memory, and its return
434 /// value is unlabelled.
435 WK_Discard,
436
437 /// This function does not write to (user-accessible) memory, and the label
438 /// of its return value is the union of the label of its arguments.
439 WK_Functional,
440
441 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
442 /// where F is the name of the function. This function may wrap the
443 /// original function or provide its own implementation. WK_Custom uses an
444 /// extra pointer argument to return the shadow. This allows the wrapped
445 /// form of the function type to be expressed in C.
446 WK_Custom
447 };
448
449 Module *Mod;
450 LLVMContext *Ctx;
451 Type *Int8Ptr;
452 IntegerType *OriginTy;
453 PointerType *OriginPtrTy;
454 ConstantInt *ZeroOrigin;
455 /// The shadow type for all primitive types and vector types.
456 IntegerType *PrimitiveShadowTy;
457 PointerType *PrimitiveShadowPtrTy;
458 IntegerType *IntptrTy;
459 ConstantInt *ZeroPrimitiveShadow;
460 Constant *ArgTLS;
461 ArrayType *ArgOriginTLSTy;
462 Constant *ArgOriginTLS;
463 Constant *RetvalTLS;
464 Constant *RetvalOriginTLS;
465 FunctionType *DFSanUnionLoadFnTy;
466 FunctionType *DFSanLoadLabelAndOriginFnTy;
467 FunctionType *DFSanUnimplementedFnTy;
468 FunctionType *DFSanWrapperExternWeakNullFnTy;
469 FunctionType *DFSanSetLabelFnTy;
470 FunctionType *DFSanNonzeroLabelFnTy;
471 FunctionType *DFSanVarargWrapperFnTy;
472 FunctionType *DFSanConditionalCallbackFnTy;
473 FunctionType *DFSanConditionalCallbackOriginFnTy;
474 FunctionType *DFSanReachesFunctionCallbackFnTy;
475 FunctionType *DFSanReachesFunctionCallbackOriginFnTy;
476 FunctionType *DFSanCmpCallbackFnTy;
477 FunctionType *DFSanLoadStoreCallbackFnTy;
478 FunctionType *DFSanMemTransferCallbackFnTy;
479 FunctionType *DFSanChainOriginFnTy;
480 FunctionType *DFSanChainOriginIfTaintedFnTy;
481 FunctionType *DFSanMemOriginTransferFnTy;
482 FunctionType *DFSanMemShadowOriginTransferFnTy;
483 FunctionType *DFSanMemShadowOriginConditionalExchangeFnTy;
484 FunctionType *DFSanMaybeStoreOriginFnTy;
485 FunctionCallee DFSanUnionLoadFn;
486 FunctionCallee DFSanLoadLabelAndOriginFn;
487 FunctionCallee DFSanUnimplementedFn;
488 FunctionCallee DFSanWrapperExternWeakNullFn;
489 FunctionCallee DFSanSetLabelFn;
490 FunctionCallee DFSanNonzeroLabelFn;
491 FunctionCallee DFSanVarargWrapperFn;
492 FunctionCallee DFSanLoadCallbackFn;
493 FunctionCallee DFSanStoreCallbackFn;
494 FunctionCallee DFSanMemTransferCallbackFn;
495 FunctionCallee DFSanConditionalCallbackFn;
496 FunctionCallee DFSanConditionalCallbackOriginFn;
497 FunctionCallee DFSanReachesFunctionCallbackFn;
498 FunctionCallee DFSanReachesFunctionCallbackOriginFn;
499 FunctionCallee DFSanCmpCallbackFn;
500 FunctionCallee DFSanChainOriginFn;
501 FunctionCallee DFSanChainOriginIfTaintedFn;
502 FunctionCallee DFSanMemOriginTransferFn;
503 FunctionCallee DFSanMemShadowOriginTransferFn;
504 FunctionCallee DFSanMemShadowOriginConditionalExchangeFn;
505 FunctionCallee DFSanMaybeStoreOriginFn;
506 SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
507 MDNode *ColdCallWeights;
508 MDNode *OriginStoreWeights;
509 DFSanABIList ABIList;
510 DenseMap<Value *, Function *> UnwrappedFnMap;
511 AttributeMask ReadOnlyNoneAttrs;
512 StringSet<> CombineTaintLookupTableNames;
513
514 /// Memory map parameters used in calculation mapping application addresses
515 /// to shadow addresses and origin addresses.
516 const MemoryMapParams *MapParams;
517
518 Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
519 Value *getShadowAddress(Value *Addr, Instruction *Pos);
520 Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset);
521 std::pair<Value *, Value *>
522 getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
523 bool isInstrumented(const Function *F);
524 bool isInstrumented(const GlobalAlias *GA);
525 bool isForceZeroLabels(const Function *F);
526 TransformedFunction getCustomFunctionType(FunctionType *T);
527 WrapperKind getWrapperKind(Function *F);
528 void addGlobalNameSuffix(GlobalValue *GV);
529 void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);
530 Function *buildWrapperFunction(Function *F, StringRef NewFName,
532 FunctionType *NewFT);
533 void initializeCallbackFunctions(Module &M);
534 void initializeRuntimeFunctions(Module &M);
535 bool initializeModule(Module &M);
536
537 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
538 /// from it. Returns the origin's loaded value.
539 Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
540 Value **OriginAddr);
541
542 /// Returns whether the given load byte size is amenable to inlined
543 /// optimization patterns.
544 bool hasLoadSizeForFastPath(uint64_t Size);
545
546 /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
547 bool shouldTrackOrigins();
548
549 /// Returns a zero constant with the shadow type of OrigTy.
550 ///
551 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
552 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
553 /// getZeroShadow(other type) = i16(0)
554 Constant *getZeroShadow(Type *OrigTy);
555 /// Returns a zero constant with the shadow type of V's type.
556 Constant *getZeroShadow(Value *V);
557
558 /// Checks if V is a zero shadow.
559 bool isZeroShadow(Value *V);
560
561 /// Returns the shadow type of OrigTy.
562 ///
563 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
564 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
565 /// getShadowTy(other type) = i16
566 Type *getShadowTy(Type *OrigTy);
567 /// Returns the shadow type of V's type.
568 Type *getShadowTy(Value *V);
569
570 const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
571
572public:
573 DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
574
575 bool runImpl(Module &M,
577};
578
579struct DFSanFunction {
580 DataFlowSanitizer &DFS;
581 Function *F;
582 DominatorTree DT;
583 bool IsNativeABI;
584 bool IsForceZeroLabels;
586 AllocaInst *LabelReturnAlloca = nullptr;
587 AllocaInst *OriginReturnAlloca = nullptr;
588 DenseMap<Value *, Value *> ValShadowMap;
589 DenseMap<Value *, Value *> ValOriginMap;
592
593 struct PHIFixupElement {
594 PHINode *Phi;
595 PHINode *ShadowPhi;
596 PHINode *OriginPhi;
597 };
598 std::vector<PHIFixupElement> PHIFixups;
599
600 DenseSet<Instruction *> SkipInsts;
601 std::vector<Value *> NonZeroChecks;
602
603 struct CachedShadow {
604 BasicBlock *Block; // The block where Shadow is defined.
605 Value *Shadow;
606 };
607 /// Maps a value to its latest shadow value in terms of domination tree.
608 DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
609 /// Maps a value to its latest collapsed shadow value it was converted to in
610 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
611 /// used at a post process where CFG blocks are split. So it does not cache
612 /// BasicBlock like CachedShadows, but uses domination between values.
613 DenseMap<Value *, Value *> CachedCollapsedShadows;
615
616 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
617 bool IsForceZeroLabels, TargetLibraryInfo &TLI)
618 : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
619 IsForceZeroLabels(IsForceZeroLabels), TLI(TLI) {
620 DT.recalculate(*F);
621 }
622
623 /// Computes the shadow address for a given function argument.
624 ///
625 /// Shadow = ArgTLS+ArgOffset.
626 Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
627
628 /// Computes the shadow address for a return value.
629 Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
630
631 /// Computes the origin address for a given function argument.
632 ///
633 /// Origin = ArgOriginTLS[ArgNo].
634 Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
635
636 /// Computes the origin address for a return value.
637 Value *getRetvalOriginTLS();
638
639 Value *getOrigin(Value *V);
640 void setOrigin(Instruction *I, Value *Origin);
641 /// Generates IR to compute the origin of the last operand with a taint label.
642 Value *combineOperandOrigins(Instruction *Inst);
643 /// Before the instruction Pos, generates IR to compute the last origin with a
644 /// taint label. Labels and origins are from vectors Shadows and Origins
645 /// correspondingly. The generated IR is like
646 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
647 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
648 /// zeros with other bitwidths.
649 Value *combineOrigins(const std::vector<Value *> &Shadows,
650 const std::vector<Value *> &Origins, Instruction *Pos,
651 ConstantInt *Zero = nullptr);
652
653 Value *getShadow(Value *V);
654 void setShadow(Instruction *I, Value *Shadow);
655 /// Generates IR to compute the union of the two given shadows, inserting it
656 /// before Pos. The combined value is with primitive type.
657 Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
658 /// Combines the shadow values of V1 and V2, then converts the combined value
659 /// with primitive type into a shadow value with the original type T.
660 Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
661 Instruction *Pos);
662 Value *combineOperandShadows(Instruction *Inst);
663
664 /// Generates IR to load shadow and origin corresponding to bytes [\p
665 /// Addr, \p Addr + \p Size), where addr has alignment \p
666 /// InstAlignment, and take the union of each of those shadows. The returned
667 /// shadow always has primitive type.
668 ///
669 /// When tracking loads is enabled, the returned origin is a chain at the
670 /// current stack if the returned shadow is tainted.
671 std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
672 Align InstAlignment,
673 Instruction *Pos);
674
675 void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
676 Align InstAlignment, Value *PrimitiveShadow,
677 Value *Origin, Instruction *Pos);
678 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
679 /// the expanded shadow value.
680 ///
681 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
682 /// EFP([n x T], PS) = [n x EFP(T,PS)]
683 /// EFP(other types, PS) = PS
684 Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
685 Instruction *Pos);
686 /// Collapses Shadow into a single primitive shadow value, unioning all
687 /// primitive shadow values in the process. Returns the final primitive
688 /// shadow value.
689 ///
690 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
691 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
692 /// CTP(other types, PS) = PS
693 Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
694
695 void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
696 Instruction *Pos);
697
698 Align getShadowAlign(Align InstAlignment);
699
700 // If ClConditionalCallbacks is enabled, insert a callback after a given
701 // branch instruction using the given conditional expression.
702 void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
703
704 // If ClReachesFunctionCallbacks is enabled, insert a callback for each
705 // argument and load instruction.
706 void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,
707 Value *Data);
708
709 bool isLookupTableConstant(Value *P);
710
711private:
712 /// Collapses the shadow with aggregate type into a single primitive shadow
713 /// value.
714 template <class AggregateType>
715 Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
716 IRBuilder<> &IRB);
717
718 Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
719
720 /// Returns the shadow value of an argument A.
721 Value *getShadowForTLSArgument(Argument *A);
722
723 /// The fast path of loading shadows.
724 std::pair<Value *, Value *>
725 loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
726 Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
727 Instruction *Pos);
728
729 Align getOriginAlign(Align InstAlignment);
730
731 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
732 /// is __dfsan_load_label_and_origin. This function returns the union of all
733 /// labels and the origin of the first taint label. However this is an
734 /// additional call with many instructions. To ensure common cases are fast,
735 /// checks if it is possible to load labels and origins without using the
736 /// callback function.
737 ///
738 /// When enabling tracking load instructions, we always use
739 /// __dfsan_load_label_and_origin to reduce code size.
740 bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
741
742 /// Returns a chain at the current stack with previous origin V.
743 Value *updateOrigin(Value *V, IRBuilder<> &IRB);
744
745 /// Returns a chain at the current stack with previous origin V if Shadow is
746 /// tainted.
747 Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
748
749 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
750 /// Origin otherwise.
751 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
752
753 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
754 /// Size).
755 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
756 uint64_t StoreOriginSize, Align Alignment);
757
758 /// Stores Origin in terms of its Shadow value.
759 /// * Do not write origins for zero shadows because we do not trace origins
760 /// for untainted sinks.
761 /// * Use __dfsan_maybe_store_origin if there are too many origin store
762 /// instrumentations.
763 void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
764 Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
765
766 /// Convert a scalar value to an i1 by comparing with 0.
767 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
768
769 bool shouldInstrumentWithCall();
770
771 /// Generates IR to load shadow and origin corresponding to bytes [\p
772 /// Addr, \p Addr + \p Size), where addr has alignment \p
773 /// InstAlignment, and take the union of each of those shadows. The returned
774 /// shadow always has primitive type.
775 std::pair<Value *, Value *>
776 loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
777 Align InstAlignment, Instruction *Pos);
778 int NumOriginStores = 0;
779};
780
781class DFSanVisitor : public InstVisitor<DFSanVisitor> {
782public:
783 DFSanFunction &DFSF;
784
785 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
786
787 const DataLayout &getDataLayout() const {
788 return DFSF.F->getParent()->getDataLayout();
789 }
790
791 // Combines shadow values and origins for all of I's operands.
792 void visitInstOperands(Instruction &I);
793
796 void visitBitCastInst(BitCastInst &BCI);
797 void visitCastInst(CastInst &CI);
798 void visitCmpInst(CmpInst &CI);
801 void visitLoadInst(LoadInst &LI);
802 void visitStoreInst(StoreInst &SI);
805 void visitReturnInst(ReturnInst &RI);
806 void visitLibAtomicLoad(CallBase &CB);
807 void visitLibAtomicStore(CallBase &CB);
808 void visitLibAtomicExchange(CallBase &CB);
809 void visitLibAtomicCompareExchange(CallBase &CB);
810 void visitCallBase(CallBase &CB);
811 void visitPHINode(PHINode &PN);
821 void visitBranchInst(BranchInst &BR);
822 void visitSwitchInst(SwitchInst &SW);
823
824private:
825 void visitCASOrRMW(Align InstAlignment, Instruction &I);
826
827 // Returns false when this is an invoke of a custom function.
828 bool visitWrappedCallBase(Function &F, CallBase &CB);
829
830 // Combines origins for all of I's operands.
831 void visitInstOperandOrigins(Instruction &I);
832
833 void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
834 IRBuilder<> &IRB);
835
836 void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
837 IRBuilder<> &IRB);
838
839 Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB);
840 Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB);
841};
842
843bool LibAtomicFunction(const Function &F) {
844 // This is a bit of a hack because TargetLibraryInfo is a function pass.
845 // The DFSan pass would need to be refactored to be function pass oriented
846 // (like MSan is) in order to fit together nicely with TargetLibraryInfo.
847 // We need this check to prevent them from being instrumented, or wrapped.
848 // Match on name and number of arguments.
849 if (!F.hasName() || F.isVarArg())
850 return false;
851 switch (F.arg_size()) {
852 case 4:
853 return F.getName() == "__atomic_load" || F.getName() == "__atomic_store";
854 case 5:
855 return F.getName() == "__atomic_exchange";
856 case 6:
857 return F.getName() == "__atomic_compare_exchange";
858 default:
859 return false;
860 }
861}
862
863} // end anonymous namespace
864
865DataFlowSanitizer::DataFlowSanitizer(
866 const std::vector<std::string> &ABIListFiles) {
867 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
868 llvm::append_range(AllABIListFiles, ClABIListFiles);
869 // FIXME: should we propagate vfs::FileSystem to this constructor?
870 ABIList.set(
872
874 CombineTaintLookupTableNames.insert(v);
875}
876
877TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
878 SmallVector<Type *, 4> ArgTypes;
879
880 // Some parameters of the custom function being constructed are
881 // parameters of T. Record the mapping from parameters of T to
882 // parameters of the custom function, so that parameter attributes
883 // at call sites can be updated.
884 std::vector<unsigned> ArgumentIndexMapping;
885 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
886 Type *ParamType = T->getParamType(I);
887 ArgumentIndexMapping.push_back(ArgTypes.size());
888 ArgTypes.push_back(ParamType);
889 }
890 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
891 ArgTypes.push_back(PrimitiveShadowTy);
892 if (T->isVarArg())
893 ArgTypes.push_back(PrimitiveShadowPtrTy);
894 Type *RetType = T->getReturnType();
895 if (!RetType->isVoidTy())
896 ArgTypes.push_back(PrimitiveShadowPtrTy);
897
898 if (shouldTrackOrigins()) {
899 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
900 ArgTypes.push_back(OriginTy);
901 if (T->isVarArg())
902 ArgTypes.push_back(OriginPtrTy);
903 if (!RetType->isVoidTy())
904 ArgTypes.push_back(OriginPtrTy);
905 }
906
907 return TransformedFunction(
908 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
909 ArgumentIndexMapping);
910}
911
912bool DataFlowSanitizer::isZeroShadow(Value *V) {
913 Type *T = V->getType();
914 if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
915 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
916 return CI->isZero();
917 return false;
918 }
919
920 return isa<ConstantAggregateZero>(V);
921}
922
923bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
924 uint64_t ShadowSize = Size * ShadowWidthBytes;
925 return ShadowSize % 8 == 0 || ShadowSize == 4;
926}
927
928bool DataFlowSanitizer::shouldTrackOrigins() {
929 static const bool ShouldTrackOrigins = ClTrackOrigins;
930 return ShouldTrackOrigins;
931}
932
933Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
934 if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
935 return ZeroPrimitiveShadow;
936 Type *ShadowTy = getShadowTy(OrigTy);
937 return ConstantAggregateZero::get(ShadowTy);
938}
939
940Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
941 return getZeroShadow(V->getType());
942}
943
945 Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
946 Value *PrimitiveShadow, IRBuilder<> &IRB) {
947 if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
948 return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
949
950 if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
951 for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
952 Indices.push_back(Idx);
954 Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
955 Indices.pop_back();
956 }
957 return Shadow;
958 }
959
960 if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
961 for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
962 Indices.push_back(Idx);
964 Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
965 Indices.pop_back();
966 }
967 return Shadow;
968 }
969 llvm_unreachable("Unexpected shadow type");
970}
971
972bool DFSanFunction::shouldInstrumentWithCall() {
973 return ClInstrumentWithCallThreshold >= 0 &&
974 NumOriginStores >= ClInstrumentWithCallThreshold;
975}
976
977Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
978 Instruction *Pos) {
979 Type *ShadowTy = DFS.getShadowTy(T);
980
981 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
982 return PrimitiveShadow;
983
984 if (DFS.isZeroShadow(PrimitiveShadow))
985 return DFS.getZeroShadow(ShadowTy);
986
987 IRBuilder<> IRB(Pos);
989 Value *Shadow = UndefValue::get(ShadowTy);
990 Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
991 PrimitiveShadow, IRB);
992
993 // Caches the primitive shadow value that built the shadow value.
994 CachedCollapsedShadows[Shadow] = PrimitiveShadow;
995 return Shadow;
996}
997
998template <class AggregateType>
999Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
1000 IRBuilder<> &IRB) {
1001 if (!AT->getNumElements())
1002 return DFS.ZeroPrimitiveShadow;
1003
1004 Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1005 Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
1006
1007 for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
1008 Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1009 Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
1010 Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1011 }
1012 return Aggregator;
1013}
1014
1015Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1016 IRBuilder<> &IRB) {
1017 Type *ShadowTy = Shadow->getType();
1018 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1019 return Shadow;
1020 if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
1021 return collapseAggregateShadow<>(AT, Shadow, IRB);
1022 if (StructType *ST = dyn_cast<StructType>(ShadowTy))
1023 return collapseAggregateShadow<>(ST, Shadow, IRB);
1024 llvm_unreachable("Unexpected shadow type");
1025}
1026
1027Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1028 Instruction *Pos) {
1029 Type *ShadowTy = Shadow->getType();
1030 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1031 return Shadow;
1032
1033 // Checks if the cached collapsed shadow value dominates Pos.
1034 Value *&CS = CachedCollapsedShadows[Shadow];
1035 if (CS && DT.dominates(CS, Pos))
1036 return CS;
1037
1038 IRBuilder<> IRB(Pos);
1039 Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
1040 // Caches the converted primitive shadow value.
1041 CS = PrimitiveShadow;
1042 return PrimitiveShadow;
1043}
1044
1045void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
1046 Value *Condition) {
1048 return;
1049 }
1050 IRBuilder<> IRB(&I);
1051 Value *CondShadow = getShadow(Condition);
1052 CallInst *CI;
1053 if (DFS.shouldTrackOrigins()) {
1054 Value *CondOrigin = getOrigin(Condition);
1055 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
1056 {CondShadow, CondOrigin});
1057 } else {
1058 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
1059 }
1060 CI->addParamAttr(0, Attribute::ZExt);
1061}
1062
1063void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
1064 Instruction &I,
1065 Value *Data) {
1067 return;
1068 }
1069 const DebugLoc &dbgloc = I.getDebugLoc();
1070 Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);
1071 ConstantInt *CILine;
1072 llvm::Value *FilePathPtr;
1073
1074 if (dbgloc.get() == nullptr) {
1075 CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0));
1076 FilePathPtr = IRB.CreateGlobalStringPtr(
1077 I.getFunction()->getParent()->getSourceFileName());
1078 } else {
1079 CILine = llvm::ConstantInt::get(I.getContext(),
1080 llvm::APInt(32, dbgloc.getLine()));
1081 FilePathPtr =
1082 IRB.CreateGlobalStringPtr(dbgloc->getFilename());
1083 }
1084
1085 llvm::Value *FunctionNamePtr =
1086 IRB.CreateGlobalStringPtr(I.getFunction()->getName());
1087
1088 CallInst *CB;
1089 std::vector<Value *> args;
1090
1091 if (DFS.shouldTrackOrigins()) {
1092 Value *DataOrigin = getOrigin(Data);
1093 args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };
1094 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);
1095 } else {
1096 args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };
1097 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);
1098 }
1099 CB->addParamAttr(0, Attribute::ZExt);
1100 CB->setDebugLoc(dbgloc);
1101}
1102
1103Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
1104 if (!OrigTy->isSized())
1105 return PrimitiveShadowTy;
1106 if (isa<IntegerType>(OrigTy))
1107 return PrimitiveShadowTy;
1108 if (isa<VectorType>(OrigTy))
1109 return PrimitiveShadowTy;
1110 if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
1111 return ArrayType::get(getShadowTy(AT->getElementType()),
1112 AT->getNumElements());
1113 if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1115 for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
1116 Elements.push_back(getShadowTy(ST->getElementType(I)));
1117 return StructType::get(*Ctx, Elements);
1118 }
1119 return PrimitiveShadowTy;
1120}
1121
1122Type *DataFlowSanitizer::getShadowTy(Value *V) {
1123 return getShadowTy(V->getType());
1124}
1125
1126bool DataFlowSanitizer::initializeModule(Module &M) {
1127 Triple TargetTriple(M.getTargetTriple());
1128 const DataLayout &DL = M.getDataLayout();
1129
1130 if (TargetTriple.getOS() != Triple::Linux)
1131 report_fatal_error("unsupported operating system");
1132 switch (TargetTriple.getArch()) {
1133 case Triple::aarch64:
1134 MapParams = &Linux_AArch64_MemoryMapParams;
1135 break;
1136 case Triple::x86_64:
1137 MapParams = &Linux_X86_64_MemoryMapParams;
1138 break;
1141 break;
1142 default:
1143 report_fatal_error("unsupported architecture");
1144 }
1145
1146 Mod = &M;
1147 Ctx = &M.getContext();
1148 Int8Ptr = PointerType::getUnqual(*Ctx);
1149 OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1150 OriginPtrTy = PointerType::getUnqual(OriginTy);
1151 PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1152 PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1153 IntptrTy = DL.getIntPtrType(*Ctx);
1154 ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1155 ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1156
1157 Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1158 DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1159 /*isVarArg=*/false);
1160 Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1161 DFSanLoadLabelAndOriginFnTy =
1162 FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1163 /*isVarArg=*/false);
1164 DFSanUnimplementedFnTy = FunctionType::get(
1165 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1166 Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
1167 DFSanWrapperExternWeakNullFnTy =
1168 FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
1169 /*isVarArg=*/false);
1170 Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1171 PointerType::getUnqual(*Ctx), IntptrTy};
1172 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1173 DFSanSetLabelArgs, /*isVarArg=*/false);
1174 DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), std::nullopt,
1175 /*isVarArg=*/false);
1176 DFSanVarargWrapperFnTy = FunctionType::get(
1177 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1178 DFSanConditionalCallbackFnTy =
1179 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1180 /*isVarArg=*/false);
1181 Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
1182 DFSanConditionalCallbackOriginFnTy = FunctionType::get(
1183 Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
1184 /*isVarArg=*/false);
1185 Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,
1186 OriginTy, Int8Ptr};
1187 DFSanReachesFunctionCallbackFnTy =
1188 FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,
1189 /*isVarArg=*/false);
1190 Type *DFSanReachesFunctionCallbackOriginArgs[5] = {
1191 PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};
1192 DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(
1193 Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,
1194 /*isVarArg=*/false);
1195 DFSanCmpCallbackFnTy =
1196 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1197 /*isVarArg=*/false);
1198 DFSanChainOriginFnTy =
1199 FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1200 Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1201 DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1202 OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1203 Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1204 Int8Ptr, IntptrTy, OriginTy};
1205 DFSanMaybeStoreOriginFnTy = FunctionType::get(
1206 Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1207 Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1208 DFSanMemOriginTransferFnTy = FunctionType::get(
1209 Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1210 Type *DFSanMemShadowOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1211 DFSanMemShadowOriginTransferFnTy =
1212 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemShadowOriginTransferArgs,
1213 /*isVarArg=*/false);
1214 Type *DFSanMemShadowOriginConditionalExchangeArgs[5] = {
1215 IntegerType::get(*Ctx, 8), Int8Ptr, Int8Ptr, Int8Ptr, IntptrTy};
1216 DFSanMemShadowOriginConditionalExchangeFnTy = FunctionType::get(
1217 Type::getVoidTy(*Ctx), DFSanMemShadowOriginConditionalExchangeArgs,
1218 /*isVarArg=*/false);
1219 Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1220 DFSanLoadStoreCallbackFnTy =
1221 FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1222 /*isVarArg=*/false);
1223 Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1224 DFSanMemTransferCallbackFnTy =
1225 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1226 /*isVarArg=*/false);
1227
1228 ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1229 OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1230 return true;
1231}
1232
1233bool DataFlowSanitizer::isInstrumented(const Function *F) {
1234 return !ABIList.isIn(*F, "uninstrumented");
1235}
1236
1237bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1238 return !ABIList.isIn(*GA, "uninstrumented");
1239}
1240
1241bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1242 return ABIList.isIn(*F, "force_zero_labels");
1243}
1244
1245DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1246 if (ABIList.isIn(*F, "functional"))
1247 return WK_Functional;
1248 if (ABIList.isIn(*F, "discard"))
1249 return WK_Discard;
1250 if (ABIList.isIn(*F, "custom"))
1251 return WK_Custom;
1252
1253 return WK_Warning;
1254}
1255
1256void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1257 std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1258 GV->setName(GVName + Suffix);
1259
1260 // Try to change the name of the function in module inline asm. We only do
1261 // this for specific asm directives, currently only ".symver", to try to avoid
1262 // corrupting asm which happens to contain the symbol name as a substring.
1263 // Note that the substitution for .symver assumes that the versioned symbol
1264 // also has an instrumented name.
1265 std::string Asm = GV->getParent()->getModuleInlineAsm();
1266 std::string SearchStr = ".symver " + GVName + ",";
1267 size_t Pos = Asm.find(SearchStr);
1268 if (Pos != std::string::npos) {
1269 Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1270 Pos = Asm.find('@');
1271
1272 if (Pos == std::string::npos)
1273 report_fatal_error(Twine("unsupported .symver: ", Asm));
1274
1275 Asm.replace(Pos, 1, Suffix + "@");
1276 GV->getParent()->setModuleInlineAsm(Asm);
1277 }
1278}
1279
1280void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
1281 Function *F) {
1282 // If the function we are wrapping was ExternWeak, it may be null.
1283 // The original code before calling this wrapper may have checked for null,
1284 // but replacing with a known-to-not-be-null wrapper can break this check.
1285 // When replacing uses of the extern weak function with the wrapper we try
1286 // to avoid replacing uses in conditionals, but this is not perfect.
1287 // In the case where we fail, and accidentally optimize out a null check
1288 // for a extern weak function, add a check here to help identify the issue.
1289 if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
1290 std::vector<Value *> Args;
1291 Args.push_back(F);
1292 Args.push_back(IRB.CreateGlobalStringPtr(F->getName()));
1293 IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
1294 }
1295}
1296
1297Function *
1298DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1300 FunctionType *NewFT) {
1301 FunctionType *FT = F->getFunctionType();
1302 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1303 NewFName, F->getParent());
1304 NewF->copyAttributesFrom(F);
1305 NewF->removeRetAttrs(
1306 AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
1307
1308 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1309 if (F->isVarArg()) {
1310 NewF->removeFnAttr("split-stack");
1311 CallInst::Create(DFSanVarargWrapperFn,
1312 IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
1313 BB);
1314 new UnreachableInst(*Ctx, BB);
1315 } else {
1316 auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1317 std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1318
1319 CallInst *CI = CallInst::Create(F, Args, "", BB);
1320 if (FT->getReturnType()->isVoidTy())
1321 ReturnInst::Create(*Ctx, BB);
1322 else
1323 ReturnInst::Create(*Ctx, CI, BB);
1324 }
1325
1326 return NewF;
1327}
1328
1329// Initialize DataFlowSanitizer runtime functions and declare them in the module
1330void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1331 LLVMContext &C = M.getContext();
1332 {
1334 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1335 AL = AL.addFnAttribute(
1337 AL = AL.addRetAttribute(C, Attribute::ZExt);
1338 DFSanUnionLoadFn =
1339 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1340 }
1341 {
1343 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1344 AL = AL.addFnAttribute(
1346 AL = AL.addRetAttribute(C, Attribute::ZExt);
1347 DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1348 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1349 }
1350 DFSanUnimplementedFn =
1351 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1352 DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(
1353 "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);
1354 {
1356 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1357 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1358 DFSanSetLabelFn =
1359 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1360 }
1361 DFSanNonzeroLabelFn =
1362 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1363 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1364 DFSanVarargWrapperFnTy);
1365 {
1367 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1368 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1369 DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1370 DFSanChainOriginFnTy, AL);
1371 }
1372 {
1374 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1375 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1376 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1377 DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1378 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1379 }
1380 DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1381 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1382
1383 DFSanMemShadowOriginTransferFn = Mod->getOrInsertFunction(
1384 "__dfsan_mem_shadow_origin_transfer", DFSanMemShadowOriginTransferFnTy);
1385
1386 DFSanMemShadowOriginConditionalExchangeFn =
1387 Mod->getOrInsertFunction("__dfsan_mem_shadow_origin_conditional_exchange",
1388 DFSanMemShadowOriginConditionalExchangeFnTy);
1389
1390 {
1392 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1393 AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1394 DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1395 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1396 }
1397
1398 DFSanRuntimeFunctions.insert(
1399 DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1400 DFSanRuntimeFunctions.insert(
1401 DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1402 DFSanRuntimeFunctions.insert(
1403 DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1404 DFSanRuntimeFunctions.insert(
1405 DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());
1406 DFSanRuntimeFunctions.insert(
1407 DFSanSetLabelFn.getCallee()->stripPointerCasts());
1408 DFSanRuntimeFunctions.insert(
1409 DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1410 DFSanRuntimeFunctions.insert(
1411 DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1412 DFSanRuntimeFunctions.insert(
1413 DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1414 DFSanRuntimeFunctions.insert(
1415 DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1416 DFSanRuntimeFunctions.insert(
1417 DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1418 DFSanRuntimeFunctions.insert(
1419 DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
1420 DFSanRuntimeFunctions.insert(
1421 DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
1422 DFSanRuntimeFunctions.insert(
1423 DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());
1424 DFSanRuntimeFunctions.insert(
1425 DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());
1426 DFSanRuntimeFunctions.insert(
1427 DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1428 DFSanRuntimeFunctions.insert(
1429 DFSanChainOriginFn.getCallee()->stripPointerCasts());
1430 DFSanRuntimeFunctions.insert(
1431 DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1432 DFSanRuntimeFunctions.insert(
1433 DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1434 DFSanRuntimeFunctions.insert(
1435 DFSanMemShadowOriginTransferFn.getCallee()->stripPointerCasts());
1436 DFSanRuntimeFunctions.insert(
1437 DFSanMemShadowOriginConditionalExchangeFn.getCallee()
1438 ->stripPointerCasts());
1439 DFSanRuntimeFunctions.insert(
1440 DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1441}
1442
1443// Initializes event callback functions and declare them in the module
1444void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1445 {
1447 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1448 DFSanLoadCallbackFn = Mod->getOrInsertFunction(
1449 "__dfsan_load_callback", DFSanLoadStoreCallbackFnTy, AL);
1450 }
1451 {
1453 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1454 DFSanStoreCallbackFn = Mod->getOrInsertFunction(
1455 "__dfsan_store_callback", DFSanLoadStoreCallbackFnTy, AL);
1456 }
1457 DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1458 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1459 {
1461 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1462 DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",
1463 DFSanCmpCallbackFnTy, AL);
1464 }
1465 {
1467 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1468 DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
1469 "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy, AL);
1470 }
1471 {
1473 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1474 DFSanConditionalCallbackOriginFn =
1475 Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
1476 DFSanConditionalCallbackOriginFnTy, AL);
1477 }
1478 {
1480 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1481 DFSanReachesFunctionCallbackFn =
1482 Mod->getOrInsertFunction("__dfsan_reaches_function_callback",
1483 DFSanReachesFunctionCallbackFnTy, AL);
1484 }
1485 {
1487 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1488 DFSanReachesFunctionCallbackOriginFn =
1489 Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
1490 DFSanReachesFunctionCallbackOriginFnTy, AL);
1491 }
1492}
1493
1494bool DataFlowSanitizer::runImpl(
1496 initializeModule(M);
1497
1498 if (ABIList.isIn(M, "skip"))
1499 return false;
1500
1501 const unsigned InitialGlobalSize = M.global_size();
1502 const unsigned InitialModuleSize = M.size();
1503
1504 bool Changed = false;
1505
1506 auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1507 Type *Ty) -> Constant * {
1509 if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1510 Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1511 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1512 }
1513 return C;
1514 };
1515
1516 // These globals must be kept in sync with the ones in dfsan.cpp.
1517 ArgTLS =
1518 GetOrInsertGlobal("__dfsan_arg_tls",
1519 ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1520 RetvalTLS = GetOrInsertGlobal(
1521 "__dfsan_retval_tls",
1522 ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1523 ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1524 ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1525 RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1526
1527 (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1528 Changed = true;
1529 return new GlobalVariable(
1530 M, OriginTy, true, GlobalValue::WeakODRLinkage,
1531 ConstantInt::getSigned(OriginTy,
1532 shouldTrackOrigins() ? ClTrackOrigins : 0),
1533 "__dfsan_track_origins");
1534 });
1535
1536 initializeCallbackFunctions(M);
1537 initializeRuntimeFunctions(M);
1538
1539 std::vector<Function *> FnsToInstrument;
1540 SmallPtrSet<Function *, 2> FnsWithNativeABI;
1541 SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1542 SmallPtrSet<Constant *, 1> PersonalityFns;
1543 for (Function &F : M)
1544 if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F) &&
1545 !LibAtomicFunction(F)) {
1546 FnsToInstrument.push_back(&F);
1547 if (F.hasPersonalityFn())
1548 PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
1549 }
1550
1552 for (auto *C : PersonalityFns) {
1553 assert(isa<Function>(C) && "Personality routine is not a function!");
1554 Function *F = cast<Function>(C);
1555 if (!isInstrumented(F))
1556 llvm::erase(FnsToInstrument, F);
1557 }
1558 }
1559
1560 // Give function aliases prefixes when necessary, and build wrappers where the
1561 // instrumentedness is inconsistent.
1562 for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1563 // Don't stop on weak. We assume people aren't playing games with the
1564 // instrumentedness of overridden weak aliases.
1565 auto *F = dyn_cast<Function>(GA.getAliaseeObject());
1566 if (!F)
1567 continue;
1568
1569 bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1570 if (GAInst && FInst) {
1571 addGlobalNameSuffix(&GA);
1572 } else if (GAInst != FInst) {
1573 // Non-instrumented alias of an instrumented function, or vice versa.
1574 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1575 // below will take care of instrumenting it.
1576 Function *NewF =
1577 buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1578 GA.replaceAllUsesWith(NewF);
1579 NewF->takeName(&GA);
1580 GA.eraseFromParent();
1581 FnsToInstrument.push_back(NewF);
1582 }
1583 }
1584
1585 // TODO: This could be more precise.
1586 ReadOnlyNoneAttrs.addAttribute(Attribute::Memory);
1587
1588 // First, change the ABI of every function in the module. ABI-listed
1589 // functions keep their original ABI and get a wrapper function.
1590 for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1591 FE = FnsToInstrument.end();
1592 FI != FE; ++FI) {
1593 Function &F = **FI;
1594 FunctionType *FT = F.getFunctionType();
1595
1596 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1597 FT->getReturnType()->isVoidTy());
1598
1599 if (isInstrumented(&F)) {
1600 if (isForceZeroLabels(&F))
1601 FnsWithForceZeroLabel.insert(&F);
1602
1603 // Instrumented functions get a '.dfsan' suffix. This allows us to more
1604 // easily identify cases of mismatching ABIs. This naming scheme is
1605 // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1606 addGlobalNameSuffix(&F);
1607 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1608 // Build a wrapper function for F. The wrapper simply calls F, and is
1609 // added to FnsToInstrument so that any instrumentation according to its
1610 // WrapperKind is done in the second pass below.
1611
1612 // If the function being wrapped has local linkage, then preserve the
1613 // function's linkage in the wrapper function.
1614 GlobalValue::LinkageTypes WrapperLinkage =
1615 F.hasLocalLinkage() ? F.getLinkage()
1617
1618 Function *NewF = buildWrapperFunction(
1619 &F,
1620 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1621 std::string(F.getName()),
1622 WrapperLinkage, FT);
1623 NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1624
1625 // Extern weak functions can sometimes be null at execution time.
1626 // Code will sometimes check if an extern weak function is null.
1627 // This could look something like:
1628 // declare extern_weak i8 @my_func(i8)
1629 // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
1630 // label %avoid_my_func
1631 // The @"dfsw$my_func" wrapper is never null, so if we replace this use
1632 // in the comparison, the icmp will simplify to false and we have
1633 // accidentally optimized away a null check that is necessary.
1634 // This can lead to a crash when the null extern_weak my_func is called.
1635 //
1636 // To prevent (the most common pattern of) this problem,
1637 // do not replace uses in comparisons with the wrapper.
1638 // We definitely want to replace uses in call instructions.
1639 // Other uses (e.g. store the function address somewhere) might be
1640 // called or compared or both - this case may not be handled correctly.
1641 // We will default to replacing with wrapper in cases we are unsure.
1642 auto IsNotCmpUse = [](Use &U) -> bool {
1643 User *Usr = U.getUser();
1644 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
1645 // This is the most common case for icmp ne null
1646 if (CE->getOpcode() == Instruction::ICmp) {
1647 return false;
1648 }
1649 }
1650 if (Instruction *I = dyn_cast<Instruction>(Usr)) {
1651 if (I->getOpcode() == Instruction::ICmp) {
1652 return false;
1653 }
1654 }
1655 return true;
1656 };
1657 F.replaceUsesWithIf(NewF, IsNotCmpUse);
1658
1659 UnwrappedFnMap[NewF] = &F;
1660 *FI = NewF;
1661
1662 if (!F.isDeclaration()) {
1663 // This function is probably defining an interposition of an
1664 // uninstrumented function and hence needs to keep the original ABI.
1665 // But any functions it may call need to use the instrumented ABI, so
1666 // we instrument it in a mode which preserves the original ABI.
1667 FnsWithNativeABI.insert(&F);
1668
1669 // This code needs to rebuild the iterators, as they may be invalidated
1670 // by the push_back, taking care that the new range does not include
1671 // any functions added by this code.
1672 size_t N = FI - FnsToInstrument.begin(),
1673 Count = FE - FnsToInstrument.begin();
1674 FnsToInstrument.push_back(&F);
1675 FI = FnsToInstrument.begin() + N;
1676 FE = FnsToInstrument.begin() + Count;
1677 }
1678 // Hopefully, nobody will try to indirectly call a vararg
1679 // function... yet.
1680 } else if (FT->isVarArg()) {
1681 UnwrappedFnMap[&F] = &F;
1682 *FI = nullptr;
1683 }
1684 }
1685
1686 for (Function *F : FnsToInstrument) {
1687 if (!F || F->isDeclaration())
1688 continue;
1689
1691
1692 DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1693 FnsWithForceZeroLabel.count(F), GetTLI(*F));
1694
1696 // Add callback for arguments reaching this function.
1697 for (auto &FArg : F->args()) {
1698 Instruction *Next = &F->getEntryBlock().front();
1699 Value *FArgShadow = DFSF.getShadow(&FArg);
1700 if (isZeroShadow(FArgShadow))
1701 continue;
1702 if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {
1703 Next = FArgShadowInst->getNextNode();
1704 }
1705 if (shouldTrackOrigins()) {
1706 if (Instruction *Origin =
1707 dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {
1708 // Ensure IRB insertion point is after loads for shadow and origin.
1709 Instruction *OriginNext = Origin->getNextNode();
1710 if (Next->comesBefore(OriginNext)) {
1711 Next = OriginNext;
1712 }
1713 }
1714 }
1715 IRBuilder<> IRB(Next);
1716 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);
1717 }
1718 }
1719
1720 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1721 // Build a copy of the list before iterating over it.
1722 SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1723
1724 for (BasicBlock *BB : BBList) {
1725 Instruction *Inst = &BB->front();
1726 while (true) {
1727 // DFSanVisitor may split the current basic block, changing the current
1728 // instruction's next pointer and moving the next instruction to the
1729 // tail block from which we should continue.
1730 Instruction *Next = Inst->getNextNode();
1731 // DFSanVisitor may delete Inst, so keep track of whether it was a
1732 // terminator.
1733 bool IsTerminator = Inst->isTerminator();
1734 if (!DFSF.SkipInsts.count(Inst))
1735 DFSanVisitor(DFSF).visit(Inst);
1736 if (IsTerminator)
1737 break;
1738 Inst = Next;
1739 }
1740 }
1741
1742 // We will not necessarily be able to compute the shadow for every phi node
1743 // until we have visited every block. Therefore, the code that handles phi
1744 // nodes adds them to the PHIFixups list so that they can be properly
1745 // handled here.
1746 for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1747 for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1748 ++Val) {
1749 P.ShadowPhi->setIncomingValue(
1750 Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1751 if (P.OriginPhi)
1752 P.OriginPhi->setIncomingValue(
1753 Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1754 }
1755 }
1756
1757 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1758 // places (i.e. instructions in basic blocks we haven't even begun visiting
1759 // yet). To make our life easier, do this work in a pass after the main
1760 // instrumentation.
1762 for (Value *V : DFSF.NonZeroChecks) {
1763 Instruction *Pos;
1764 if (Instruction *I = dyn_cast<Instruction>(V))
1765 Pos = I->getNextNode();
1766 else
1767 Pos = &DFSF.F->getEntryBlock().front();
1768 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1769 Pos = Pos->getNextNode();
1770 IRBuilder<> IRB(Pos);
1771 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1772 Value *Ne =
1773 IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1774 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1775 Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1776 IRBuilder<> ThenIRB(BI);
1777 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1778 }
1779 }
1780 }
1781
1782 return Changed || !FnsToInstrument.empty() ||
1783 M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1784}
1785
1786Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1787 Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1788 if (ArgOffset)
1789 Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1790 return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
1791 "_dfsarg");
1792}
1793
1794Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1795 return IRB.CreatePointerCast(
1796 DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
1797}
1798
1799Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1800
1801Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1802 return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
1803 "_dfsarg_o");
1804}
1805
1806Value *DFSanFunction::getOrigin(Value *V) {
1807 assert(DFS.shouldTrackOrigins());
1808 if (!isa<Argument>(V) && !isa<Instruction>(V))
1809 return DFS.ZeroOrigin;
1810 Value *&Origin = ValOriginMap[V];
1811 if (!Origin) {
1812 if (Argument *A = dyn_cast<Argument>(V)) {
1813 if (IsNativeABI)
1814 return DFS.ZeroOrigin;
1815 if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1816 Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1817 IRBuilder<> IRB(ArgOriginTLSPos);
1818 Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1819 Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1820 } else {
1821 // Overflow
1822 Origin = DFS.ZeroOrigin;
1823 }
1824 } else {
1825 Origin = DFS.ZeroOrigin;
1826 }
1827 }
1828 return Origin;
1829}
1830
1831void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1832 if (!DFS.shouldTrackOrigins())
1833 return;
1834 assert(!ValOriginMap.count(I));
1835 assert(Origin->getType() == DFS.OriginTy);
1836 ValOriginMap[I] = Origin;
1837}
1838
1839Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1840 unsigned ArgOffset = 0;
1841 const DataLayout &DL = F->getParent()->getDataLayout();
1842 for (auto &FArg : F->args()) {
1843 if (!FArg.getType()->isSized()) {
1844 if (A == &FArg)
1845 break;
1846 continue;
1847 }
1848
1849 unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1850 if (A != &FArg) {
1851 ArgOffset += alignTo(Size, ShadowTLSAlignment);
1852 if (ArgOffset > ArgTLSSize)
1853 break; // ArgTLS overflows, uses a zero shadow.
1854 continue;
1855 }
1856
1857 if (ArgOffset + Size > ArgTLSSize)
1858 break; // ArgTLS overflows, uses a zero shadow.
1859
1860 Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1861 IRBuilder<> IRB(ArgTLSPos);
1862 Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1863 return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1865 }
1866
1867 return DFS.getZeroShadow(A);
1868}
1869
1870Value *DFSanFunction::getShadow(Value *V) {
1871 if (!isa<Argument>(V) && !isa<Instruction>(V))
1872 return DFS.getZeroShadow(V);
1873 if (IsForceZeroLabels)
1874 return DFS.getZeroShadow(V);
1875 Value *&Shadow = ValShadowMap[V];
1876 if (!Shadow) {
1877 if (Argument *A = dyn_cast<Argument>(V)) {
1878 if (IsNativeABI)
1879 return DFS.getZeroShadow(V);
1880 Shadow = getShadowForTLSArgument(A);
1881 NonZeroChecks.push_back(Shadow);
1882 } else {
1883 Shadow = DFS.getZeroShadow(V);
1884 }
1885 }
1886 return Shadow;
1887}
1888
1889void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1890 assert(!ValShadowMap.count(I));
1891 ValShadowMap[I] = Shadow;
1892}
1893
1894/// Compute the integer shadow offset that corresponds to a given
1895/// application address.
1896///
1897/// Offset = (Addr & ~AndMask) ^ XorMask
1898Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1899 assert(Addr != RetvalTLS && "Reinstrumenting?");
1900 Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1901
1902 uint64_t AndMask = MapParams->AndMask;
1903 if (AndMask)
1904 OffsetLong =
1905 IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1906
1907 uint64_t XorMask = MapParams->XorMask;
1908 if (XorMask)
1909 OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1910 return OffsetLong;
1911}
1912
1913std::pair<Value *, Value *>
1914DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1915 Instruction *Pos) {
1916 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1917 IRBuilder<> IRB(Pos);
1918 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1919 Value *ShadowLong = ShadowOffset;
1920 uint64_t ShadowBase = MapParams->ShadowBase;
1921 if (ShadowBase != 0) {
1922 ShadowLong =
1923 IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1924 }
1925 IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1926 Value *ShadowPtr =
1927 IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1928 Value *OriginPtr = nullptr;
1929 if (shouldTrackOrigins()) {
1930 Value *OriginLong = ShadowOffset;
1931 uint64_t OriginBase = MapParams->OriginBase;
1932 if (OriginBase != 0)
1933 OriginLong =
1934 IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1935 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1936 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1937 // So Mask is unnecessary.
1938 if (Alignment < MinOriginAlignment) {
1940 OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1941 }
1942 OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1943 }
1944 return std::make_pair(ShadowPtr, OriginPtr);
1945}
1946
1947Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos,
1948 Value *ShadowOffset) {
1949 IRBuilder<> IRB(Pos);
1950 return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1951}
1952
1953Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1954 IRBuilder<> IRB(Pos);
1955 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1956 return getShadowAddress(Addr, Pos, ShadowOffset);
1957}
1958
1959Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1960 Instruction *Pos) {
1961 Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1962 return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1963}
1964
1965// Generates IR to compute the union of the two given shadows, inserting it
1966// before Pos. The combined value is with primitive type.
1967Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1968 if (DFS.isZeroShadow(V1))
1969 return collapseToPrimitiveShadow(V2, Pos);
1970 if (DFS.isZeroShadow(V2))
1971 return collapseToPrimitiveShadow(V1, Pos);
1972 if (V1 == V2)
1973 return collapseToPrimitiveShadow(V1, Pos);
1974
1975 auto V1Elems = ShadowElements.find(V1);
1976 auto V2Elems = ShadowElements.find(V2);
1977 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1978 if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1979 V2Elems->second.begin(), V2Elems->second.end())) {
1980 return collapseToPrimitiveShadow(V1, Pos);
1981 }
1982 if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1983 V1Elems->second.begin(), V1Elems->second.end())) {
1984 return collapseToPrimitiveShadow(V2, Pos);
1985 }
1986 } else if (V1Elems != ShadowElements.end()) {
1987 if (V1Elems->second.count(V2))
1988 return collapseToPrimitiveShadow(V1, Pos);
1989 } else if (V2Elems != ShadowElements.end()) {
1990 if (V2Elems->second.count(V1))
1991 return collapseToPrimitiveShadow(V2, Pos);
1992 }
1993
1994 auto Key = std::make_pair(V1, V2);
1995 if (V1 > V2)
1996 std::swap(Key.first, Key.second);
1997 CachedShadow &CCS = CachedShadows[Key];
1998 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1999 return CCS.Shadow;
2000
2001 // Converts inputs shadows to shadows with primitive types.
2002 Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
2003 Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
2004
2005 IRBuilder<> IRB(Pos);
2006 CCS.Block = Pos->getParent();
2007 CCS.Shadow = IRB.CreateOr(PV1, PV2);
2008
2009 std::set<Value *> UnionElems;
2010 if (V1Elems != ShadowElements.end()) {
2011 UnionElems = V1Elems->second;
2012 } else {
2013 UnionElems.insert(V1);
2014 }
2015 if (V2Elems != ShadowElements.end()) {
2016 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
2017 } else {
2018 UnionElems.insert(V2);
2019 }
2020 ShadowElements[CCS.Shadow] = std::move(UnionElems);
2021
2022 return CCS.Shadow;
2023}
2024
2025// A convenience function which folds the shadows of each of the operands
2026// of the provided instruction Inst, inserting the IR before Inst. Returns
2027// the computed union Value.
2028Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
2029 if (Inst->getNumOperands() == 0)
2030 return DFS.getZeroShadow(Inst);
2031
2032 Value *Shadow = getShadow(Inst->getOperand(0));
2033 for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
2034 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst);
2035
2036 return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
2037}
2038
2039void DFSanVisitor::visitInstOperands(Instruction &I) {
2040 Value *CombinedShadow = DFSF.combineOperandShadows(&I);
2041 DFSF.setShadow(&I, CombinedShadow);
2042 visitInstOperandOrigins(I);
2043}
2044
2045Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
2046 const std::vector<Value *> &Origins,
2047 Instruction *Pos, ConstantInt *Zero) {
2048 assert(Shadows.size() == Origins.size());
2049 size_t Size = Origins.size();
2050 if (Size == 0)
2051 return DFS.ZeroOrigin;
2052 Value *Origin = nullptr;
2053 if (!Zero)
2054 Zero = DFS.ZeroPrimitiveShadow;
2055 for (size_t I = 0; I != Size; ++I) {
2056 Value *OpOrigin = Origins[I];
2057 Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
2058 if (ConstOpOrigin && ConstOpOrigin->isNullValue())
2059 continue;
2060 if (!Origin) {
2061 Origin = OpOrigin;
2062 continue;
2063 }
2064 Value *OpShadow = Shadows[I];
2065 Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
2066 IRBuilder<> IRB(Pos);
2067 Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
2068 Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2069 }
2070 return Origin ? Origin : DFS.ZeroOrigin;
2071}
2072
2073Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
2074 size_t Size = Inst->getNumOperands();
2075 std::vector<Value *> Shadows(Size);
2076 std::vector<Value *> Origins(Size);
2077 for (unsigned I = 0; I != Size; ++I) {
2078 Shadows[I] = getShadow(Inst->getOperand(I));
2079 Origins[I] = getOrigin(Inst->getOperand(I));
2080 }
2081 return combineOrigins(Shadows, Origins, Inst);
2082}
2083
2084void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
2085 if (!DFSF.DFS.shouldTrackOrigins())
2086 return;
2087 Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
2088 DFSF.setOrigin(&I, CombinedOrigin);
2089}
2090
2091Align DFSanFunction::getShadowAlign(Align InstAlignment) {
2092 const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
2093 return Align(Alignment.value() * DFS.ShadowWidthBytes);
2094}
2095
2096Align DFSanFunction::getOriginAlign(Align InstAlignment) {
2097 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2098 return Align(std::max(MinOriginAlignment, Alignment));
2099}
2100
2101bool DFSanFunction::isLookupTableConstant(Value *P) {
2102 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
2103 if (GV->isConstant() && GV->hasName())
2104 return DFS.CombineTaintLookupTableNames.count(GV->getName());
2105
2106 return false;
2107}
2108
2109bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
2110 Align InstAlignment) {
2111 // When enabling tracking load instructions, we always use
2112 // __dfsan_load_label_and_origin to reduce code size.
2113 if (ClTrackOrigins == 2)
2114 return true;
2115
2116 assert(Size != 0);
2117 // * if Size == 1, it is sufficient to load its origin aligned at 4.
2118 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
2119 // load its origin aligned at 4. If not, although origins may be lost, it
2120 // should not happen very often.
2121 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
2122 // Size % 4 == 0, it is more efficient to load origins without callbacks.
2123 // * Otherwise we use __dfsan_load_label_and_origin.
2124 // This should ensure that common cases run efficiently.
2125 if (Size <= 2)
2126 return false;
2127
2128 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2129 return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
2130}
2131
2132Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
2133 Value **OriginAddr) {
2134 IRBuilder<> IRB(Pos);
2135 *OriginAddr =
2136 IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
2137 return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
2138}
2139
2140std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
2141 Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
2142 Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
2143 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2144 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
2145
2146 assert(Size >= 4 && "Not large enough load size for fast path!");
2147
2148 // Used for origin tracking.
2149 std::vector<Value *> Shadows;
2150 std::vector<Value *> Origins;
2151
2152 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
2153 // but this function is only used in a subset of cases that make it possible
2154 // to optimize the instrumentation.
2155 //
2156 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
2157 // per byte) is either:
2158 // - a multiple of 8 (common)
2159 // - equal to 4 (only for load32)
2160 //
2161 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2162 // other cases, we use a 64-bit integer to hold the wide shadow.
2163 Type *WideShadowTy =
2164 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
2165
2166 IRBuilder<> IRB(Pos);
2167 Value *CombinedWideShadow =
2168 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2169
2170 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
2171 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
2172
2173 auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
2174 if (BytesPerWideShadow > 4) {
2175 assert(BytesPerWideShadow == 8);
2176 // The wide shadow relates to two origin pointers: one for the first four
2177 // application bytes, and one for the latest four. We use a left shift to
2178 // get just the shadow bytes that correspond to the first origin pointer,
2179 // and then the entire shadow for the second origin pointer (which will be
2180 // chosen by combineOrigins() iff the least-significant half of the wide
2181 // shadow was empty but the other half was not).
2182 Value *WideShadowLo = IRB.CreateShl(
2183 WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
2184 Shadows.push_back(WideShadow);
2185 Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
2186
2187 Shadows.push_back(WideShadowLo);
2188 Origins.push_back(Origin);
2189 } else {
2190 Shadows.push_back(WideShadow);
2191 Origins.push_back(Origin);
2192 }
2193 };
2194
2195 if (ShouldTrackOrigins)
2196 AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
2197
2198 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2199 // then OR individual shadows within the combined WideShadow by binary ORing.
2200 // This is fewer instructions than ORing shadows individually, since it
2201 // needs logN shift/or instructions (N being the bytes of the combined wide
2202 // shadow).
2203 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2204 ByteOfs += BytesPerWideShadow) {
2205 ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr,
2206 ConstantInt::get(DFS.IntptrTy, 1));
2207 Value *NextWideShadow =
2208 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2209 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
2210 if (ShouldTrackOrigins) {
2211 Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
2212 AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
2213 }
2214 }
2215 for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
2216 Width >>= 1) {
2217 Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
2218 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
2219 }
2220 return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
2221 ShouldTrackOrigins
2222 ? combineOrigins(Shadows, Origins, Pos,
2224 : DFS.ZeroOrigin};
2225}
2226
2227std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
2228 Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
2229 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2230
2231 // Non-escaped loads.
2232 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2233 const auto SI = AllocaShadowMap.find(AI);
2234 if (SI != AllocaShadowMap.end()) {
2235 IRBuilder<> IRB(Pos);
2236 Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
2237 const auto OI = AllocaOriginMap.find(AI);
2238 assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
2239 return {ShadowLI, ShouldTrackOrigins
2240 ? IRB.CreateLoad(DFS.OriginTy, OI->second)
2241 : nullptr};
2242 }
2243 }
2244
2245 // Load from constant addresses.
2248 bool AllConstants = true;
2249 for (const Value *Obj : Objs) {
2250 if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
2251 continue;
2252 if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
2253 continue;
2254
2255 AllConstants = false;
2256 break;
2257 }
2258 if (AllConstants)
2259 return {DFS.ZeroPrimitiveShadow,
2260 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2261
2262 if (Size == 0)
2263 return {DFS.ZeroPrimitiveShadow,
2264 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2265
2266 // Use callback to load if this is not an optimizable case for origin
2267 // tracking.
2268 if (ShouldTrackOrigins &&
2269 useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2270 IRBuilder<> IRB(Pos);
2271 CallInst *Call =
2272 IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2273 {Addr, ConstantInt::get(DFS.IntptrTy, Size)});
2274 Call->addRetAttr(Attribute::ZExt);
2275 return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2276 DFS.PrimitiveShadowTy),
2277 IRB.CreateTrunc(Call, DFS.OriginTy)};
2278 }
2279
2280 // Other cases that support loading shadows or origins in a fast way.
2281 Value *ShadowAddr, *OriginAddr;
2282 std::tie(ShadowAddr, OriginAddr) =
2283 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2284
2285 const Align ShadowAlign = getShadowAlign(InstAlignment);
2286 const Align OriginAlign = getOriginAlign(InstAlignment);
2287 Value *Origin = nullptr;
2288 if (ShouldTrackOrigins) {
2289 IRBuilder<> IRB(Pos);
2290 Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2291 }
2292
2293 // When the byte size is small enough, we can load the shadow directly with
2294 // just a few instructions.
2295 switch (Size) {
2296 case 1: {
2297 LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2298 LI->setAlignment(ShadowAlign);
2299 return {LI, Origin};
2300 }
2301 case 2: {
2302 IRBuilder<> IRB(Pos);
2303 Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2304 ConstantInt::get(DFS.IntptrTy, 1));
2305 Value *Load =
2306 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2307 Value *Load1 =
2308 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2309 return {combineShadows(Load, Load1, Pos), Origin};
2310 }
2311 }
2312 bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2313
2314 if (HasSizeForFastPath)
2315 return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2316 OriginAlign, Origin, Pos);
2317
2318 IRBuilder<> IRB(Pos);
2319 CallInst *FallbackCall = IRB.CreateCall(
2320 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2321 FallbackCall->addRetAttr(Attribute::ZExt);
2322 return {FallbackCall, Origin};
2323}
2324
2325std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
2326 uint64_t Size,
2327 Align InstAlignment,
2328 Instruction *Pos) {
2329 Value *PrimitiveShadow, *Origin;
2330 std::tie(PrimitiveShadow, Origin) =
2331 loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2332 if (DFS.shouldTrackOrigins()) {
2333 if (ClTrackOrigins == 2) {
2334 IRBuilder<> IRB(Pos);
2335 auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2336 if (!ConstantShadow || !ConstantShadow->isZeroValue())
2337 Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2338 }
2339 }
2340 return {PrimitiveShadow, Origin};
2341}
2342
2344 switch (AO) {
2345 case AtomicOrdering::NotAtomic:
2346 return AtomicOrdering::NotAtomic;
2347 case AtomicOrdering::Unordered:
2348 case AtomicOrdering::Monotonic:
2349 case AtomicOrdering::Acquire:
2350 return AtomicOrdering::Acquire;
2351 case AtomicOrdering::Release:
2352 case AtomicOrdering::AcquireRelease:
2353 return AtomicOrdering::AcquireRelease;
2354 case AtomicOrdering::SequentiallyConsistent:
2355 return AtomicOrdering::SequentiallyConsistent;
2356 }
2357 llvm_unreachable("Unknown ordering");
2358}
2359
2361 if (!V->getType()->isPointerTy())
2362 return V;
2363
2364 // DFSan pass should be running on valid IR, but we'll
2365 // keep a seen set to ensure there are no issues.
2367 Visited.insert(V);
2368 do {
2369 if (auto *GEP = dyn_cast<GEPOperator>(V)) {
2370 V = GEP->getPointerOperand();
2371 } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2372 V = cast<Operator>(V)->getOperand(0);
2373 if (!V->getType()->isPointerTy())
2374 return V;
2375 } else if (isa<GlobalAlias>(V)) {
2376 V = cast<GlobalAlias>(V)->getAliasee();
2377 }
2378 } while (Visited.insert(V).second);
2379
2380 return V;
2381}
2382
2383void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2384 auto &DL = LI.getModule()->getDataLayout();
2385 uint64_t Size = DL.getTypeStoreSize(LI.getType());
2386 if (Size == 0) {
2387 DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2388 DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2389 return;
2390 }
2391
2392 // When an application load is atomic, increase atomic ordering between
2393 // atomic application loads and stores to ensure happen-before order; load
2394 // shadow data after application data; store zero shadow data before
2395 // application data. This ensure shadow loads return either labels of the
2396 // initial application data or zeros.
2397 if (LI.isAtomic())
2399
2400 Instruction *AfterLi = LI.getNextNode();
2401 Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
2402 std::vector<Value *> Shadows;
2403 std::vector<Value *> Origins;
2404 Value *PrimitiveShadow, *Origin;
2405 std::tie(PrimitiveShadow, Origin) =
2406 DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2407 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2408 if (ShouldTrackOrigins) {
2409 Shadows.push_back(PrimitiveShadow);
2410 Origins.push_back(Origin);
2411 }
2413 DFSF.isLookupTableConstant(
2415 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2416 PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2417 if (ShouldTrackOrigins) {
2418 Shadows.push_back(PtrShadow);
2419 Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2420 }
2421 }
2422 if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2423 DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2424
2425 Value *Shadow =
2426 DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2427 DFSF.setShadow(&LI, Shadow);
2428
2429 if (ShouldTrackOrigins) {
2430 DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2431 }
2432
2433 if (ClEventCallbacks) {
2434 IRBuilder<> IRB(Pos);
2436 CallInst *CI =
2437 IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr});
2438 CI->addParamAttr(0, Attribute::ZExt);
2439 }
2440
2441 IRBuilder<> IRB(AfterLi);
2442 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);
2443}
2444
2445Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2446 IRBuilder<> &IRB) {
2447 assert(DFS.shouldTrackOrigins());
2448 return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2449}
2450
2451Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2452 if (!DFS.shouldTrackOrigins())
2453 return V;
2454 return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2455}
2456
2457Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2458 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2459 const DataLayout &DL = F->getParent()->getDataLayout();
2460 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2461 if (IntptrSize == OriginSize)
2462 return Origin;
2463 assert(IntptrSize == OriginSize * 2);
2464 Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2465 return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2466}
2467
2468void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2469 Value *StoreOriginAddr,
2470 uint64_t StoreOriginSize, Align Alignment) {
2471 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2472 const DataLayout &DL = F->getParent()->getDataLayout();
2473 const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2474 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2475 assert(IntptrAlignment >= MinOriginAlignment);
2476 assert(IntptrSize >= OriginSize);
2477
2478 unsigned Ofs = 0;
2479 Align CurrentAlignment = Alignment;
2480 if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2481 Value *IntptrOrigin = originToIntptr(IRB, Origin);
2482 Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
2483 StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
2484 for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2485 Value *Ptr =
2486 I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2487 : IntptrStoreOriginPtr;
2488 IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2489 Ofs += IntptrSize / OriginSize;
2490 CurrentAlignment = IntptrAlignment;
2491 }
2492 }
2493
2494 for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2495 ++I) {
2496 Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2497 : StoreOriginAddr;
2498 IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2499 CurrentAlignment = MinOriginAlignment;
2500 }
2501}
2502
2503Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2504 const Twine &Name) {
2505 Type *VTy = V->getType();
2506 assert(VTy->isIntegerTy());
2507 if (VTy->getIntegerBitWidth() == 1)
2508 // Just converting a bool to a bool, so do nothing.
2509 return V;
2510 return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2511}
2512
2513void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
2514 Value *Shadow, Value *Origin,
2515 Value *StoreOriginAddr, Align InstAlignment) {
2516 // Do not write origins for zero shadows because we do not trace origins for
2517 // untainted sinks.
2518 const Align OriginAlignment = getOriginAlign(InstAlignment);
2519 Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2520 IRBuilder<> IRB(Pos);
2521 if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2522 if (!ConstantShadow->isZeroValue())
2523 paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2524 OriginAlignment);
2525 return;
2526 }
2527
2528 if (shouldInstrumentWithCall()) {
2529 IRB.CreateCall(
2530 DFS.DFSanMaybeStoreOriginFn,
2531 {CollapsedShadow, Addr, ConstantInt::get(DFS.IntptrTy, Size), Origin});
2532 } else {
2533 Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2534 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2536 Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DTU);
2537 IRBuilder<> IRBNew(CheckTerm);
2538 paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2539 OriginAlignment);
2540 ++NumOriginStores;
2541 }
2542}
2543
2544void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2545 Align ShadowAlign,
2546 Instruction *Pos) {
2547 IRBuilder<> IRB(Pos);
2548 IntegerType *ShadowTy =
2549 IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2550 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2551 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2552 IRB.CreateAlignedStore(ExtZeroShadow, ShadowAddr, ShadowAlign);
2553 // Do not write origins for 0 shadows because we do not trace origins for
2554 // untainted sinks.
2555}
2556
2557void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2558 Align InstAlignment,
2559 Value *PrimitiveShadow,
2560 Value *Origin,
2561 Instruction *Pos) {
2562 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2563
2564 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2565 const auto SI = AllocaShadowMap.find(AI);
2566 if (SI != AllocaShadowMap.end()) {
2567 IRBuilder<> IRB(Pos);
2568 IRB.CreateStore(PrimitiveShadow, SI->second);
2569
2570 // Do not write origins for 0 shadows because we do not trace origins for
2571 // untainted sinks.
2572 if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2573 const auto OI = AllocaOriginMap.find(AI);
2574 assert(OI != AllocaOriginMap.end() && Origin);
2575 IRB.CreateStore(Origin, OI->second);
2576 }
2577 return;
2578 }
2579 }
2580
2581 const Align ShadowAlign = getShadowAlign(InstAlignment);
2582 if (DFS.isZeroShadow(PrimitiveShadow)) {
2583 storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2584 return;
2585 }
2586
2587 IRBuilder<> IRB(Pos);
2588 Value *ShadowAddr, *OriginAddr;
2589 std::tie(ShadowAddr, OriginAddr) =
2590 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2591
2592 const unsigned ShadowVecSize = 8;
2593 assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2594 "Shadow vector is too large!");
2595
2596 uint64_t Offset = 0;
2597 uint64_t LeftSize = Size;
2598 if (LeftSize >= ShadowVecSize) {
2599 auto *ShadowVecTy =
2600 FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2601 Value *ShadowVec = PoisonValue::get(ShadowVecTy);
2602 for (unsigned I = 0; I != ShadowVecSize; ++I) {
2603 ShadowVec = IRB.CreateInsertElement(
2604 ShadowVec, PrimitiveShadow,
2605 ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2606 }
2607 do {
2608 Value *CurShadowVecAddr =
2609 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowAddr, Offset);
2610 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2611 LeftSize -= ShadowVecSize;
2612 ++Offset;
2613 } while (LeftSize >= ShadowVecSize);
2614 Offset *= ShadowVecSize;
2615 }
2616 while (LeftSize > 0) {
2617 Value *CurShadowAddr =
2618 IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2619 IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2620 --LeftSize;
2621 ++Offset;
2622 }
2623
2624 if (ShouldTrackOrigins) {
2625 storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2626 InstAlignment);
2627 }
2628}
2629
2631 switch (AO) {
2632 case AtomicOrdering::NotAtomic:
2633 return AtomicOrdering::NotAtomic;
2634 case AtomicOrdering::Unordered:
2635 case AtomicOrdering::Monotonic:
2636 case AtomicOrdering::Release:
2637 return AtomicOrdering::Release;
2638 case AtomicOrdering::Acquire:
2639 case AtomicOrdering::AcquireRelease:
2640 return AtomicOrdering::AcquireRelease;
2641 case AtomicOrdering::SequentiallyConsistent:
2642 return AtomicOrdering::SequentiallyConsistent;
2643 }
2644 llvm_unreachable("Unknown ordering");
2645}
2646
2647void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2648 auto &DL = SI.getModule()->getDataLayout();
2649 Value *Val = SI.getValueOperand();
2650 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2651 if (Size == 0)
2652 return;
2653
2654 // When an application store is atomic, increase atomic ordering between
2655 // atomic application loads and stores to ensure happen-before order; load
2656 // shadow data after application data; store zero shadow data before
2657 // application data. This ensure shadow loads return either labels of the
2658 // initial application data or zeros.
2659 if (SI.isAtomic())
2660 SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2661
2662 const bool ShouldTrackOrigins =
2663 DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2664 std::vector<Value *> Shadows;
2665 std::vector<Value *> Origins;
2666
2667 Value *Shadow =
2668 SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2669
2670 if (ShouldTrackOrigins) {
2671 Shadows.push_back(Shadow);
2672 Origins.push_back(DFSF.getOrigin(Val));
2673 }
2674
2675 Value *PrimitiveShadow;
2677 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2678 if (ShouldTrackOrigins) {
2679 Shadows.push_back(PtrShadow);
2680 Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2681 }
2682 PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
2683 } else {
2684 PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
2685 }
2686 Value *Origin = nullptr;
2687 if (ShouldTrackOrigins)
2688 Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
2689 DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2690 PrimitiveShadow, Origin, &SI);
2691 if (ClEventCallbacks) {
2692 IRBuilder<> IRB(&SI);
2693 Value *Addr = SI.getPointerOperand();
2694 CallInst *CI =
2695 IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr});
2696 CI->addParamAttr(0, Attribute::ZExt);
2697 }
2698}
2699
2700void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2701 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2702
2703 Value *Val = I.getOperand(1);
2704 const auto &DL = I.getModule()->getDataLayout();
2705 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2706 if (Size == 0)
2707 return;
2708
2709 // Conservatively set data at stored addresses and return with zero shadow to
2710 // prevent shadow data races.
2711 IRBuilder<> IRB(&I);
2712 Value *Addr = I.getOperand(0);
2713 const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2714 DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I);
2715 DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2716 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2717}
2718
2719void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2720 visitCASOrRMW(I.getAlign(), I);
2721 // TODO: The ordering change follows MSan. It is possible not to change
2722 // ordering because we always set and use 0 shadows.
2723 I.setOrdering(addReleaseOrdering(I.getOrdering()));
2724}
2725
2726void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2727 visitCASOrRMW(I.getAlign(), I);
2728 // TODO: The ordering change follows MSan. It is possible not to change
2729 // ordering because we always set and use 0 shadows.
2730 I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2731}
2732
2733void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2734 visitInstOperands(UO);
2735}
2736
2737void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2738 visitInstOperands(BO);
2739}
2740
2741void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2742 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2743 // a musttail call and a ret, don't instrument. New instructions are not
2744 // allowed after a musttail call.
2745 if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2746 if (CI->isMustTailCall())
2747 return;
2748 visitInstOperands(BCI);
2749}
2750
2751void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2752
2753void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2754 visitInstOperands(CI);
2755 if (ClEventCallbacks) {
2756 IRBuilder<> IRB(&CI);
2757 Value *CombinedShadow = DFSF.getShadow(&CI);
2758 CallInst *CallI =
2759 IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2760 CallI->addParamAttr(0, Attribute::ZExt);
2761 }
2762}
2763
2764void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2765 // We do not need to track data through LandingPadInst.
2766 //
2767 // For the C++ exceptions, if a value is thrown, this value will be stored
2768 // in a memory location provided by __cxa_allocate_exception(...) (on the
2769 // throw side) or __cxa_begin_catch(...) (on the catch side).
2770 // This memory will have a shadow, so with the loads and stores we will be
2771 // able to propagate labels on data thrown through exceptions, without any
2772 // special handling of the LandingPadInst.
2773 //
2774 // The second element in the pair result of the LandingPadInst is a
2775 // register value, but it is for a type ID and should never be tainted.
2776 DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2777 DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2778}
2779
2780void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2782 DFSF.isLookupTableConstant(
2784 visitInstOperands(GEPI);
2785 return;
2786 }
2787
2788 // Only propagate shadow/origin of base pointer value but ignore those of
2789 // offset operands.
2790 Value *BasePointer = GEPI.getPointerOperand();
2791 DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2792 if (DFSF.DFS.shouldTrackOrigins())
2793 DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2794}
2795
2796void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2797 visitInstOperands(I);
2798}
2799
2800void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2801 visitInstOperands(I);
2802}
2803
2804void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2805 visitInstOperands(I);
2806}
2807
2808void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2809 IRBuilder<> IRB(&I);
2810 Value *Agg = I.getAggregateOperand();
2811 Value *AggShadow = DFSF.getShadow(Agg);
2812 Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2813 DFSF.setShadow(&I, ResShadow);
2814 visitInstOperandOrigins(I);
2815}
2816
2817void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2818 IRBuilder<> IRB(&I);
2819 Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2820 Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2821 Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2822 DFSF.setShadow(&I, Res);
2823 visitInstOperandOrigins(I);
2824}
2825
2826void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2827 bool AllLoadsStores = true;
2828 for (User *U : I.users()) {
2829 if (isa<LoadInst>(U))
2830 continue;
2831
2832 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2833 if (SI->getPointerOperand() == &I)
2834 continue;
2835 }
2836
2837 AllLoadsStores = false;
2838 break;
2839 }
2840 if (AllLoadsStores) {
2841 IRBuilder<> IRB(&I);
2842 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2843 if (DFSF.DFS.shouldTrackOrigins()) {
2844 DFSF.AllocaOriginMap[&I] =
2845 IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2846 }
2847 }
2848 DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2849 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2850}
2851
2852void DFSanVisitor::visitSelectInst(SelectInst &I) {
2853 Value *CondShadow = DFSF.getShadow(I.getCondition());
2854 Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2855 Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2856 Value *ShadowSel = nullptr;
2857 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2858 std::vector<Value *> Shadows;
2859 std::vector<Value *> Origins;
2860 Value *TrueOrigin =
2861 ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2862 Value *FalseOrigin =
2863 ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2864
2865 DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
2866
2867 if (isa<VectorType>(I.getCondition()->getType())) {
2868 ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2869 FalseShadow, &I);
2870 if (ShouldTrackOrigins) {
2871 Shadows.push_back(TrueShadow);
2872 Shadows.push_back(FalseShadow);
2873 Origins.push_back(TrueOrigin);
2874 Origins.push_back(FalseOrigin);
2875 }
2876 } else {
2877 if (TrueShadow == FalseShadow) {
2878 ShadowSel = TrueShadow;
2879 if (ShouldTrackOrigins) {
2880 Shadows.push_back(TrueShadow);
2881 Origins.push_back(TrueOrigin);
2882 }
2883 } else {
2884 ShadowSel =
2885 SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
2886 if (ShouldTrackOrigins) {
2887 Shadows.push_back(ShadowSel);
2888 Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2889 FalseOrigin, "", &I));
2890 }
2891 }
2892 }
2893 DFSF.setShadow(&I, ClTrackSelectControlFlow
2894 ? DFSF.combineShadowsThenConvert(
2895 I.getType(), CondShadow, ShadowSel, &I)
2896 : ShadowSel);
2897 if (ShouldTrackOrigins) {
2899 Shadows.push_back(CondShadow);
2900 Origins.push_back(DFSF.getOrigin(I.getCondition()));
2901 }
2902 DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I));
2903 }
2904}
2905
2906void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2907 IRBuilder<> IRB(&I);
2908 Value *ValShadow = DFSF.getShadow(I.getValue());
2909 Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2910 ? DFSF.getOrigin(I.getValue())
2911 : DFSF.DFS.ZeroOrigin;
2912 IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
2913 {ValShadow, ValOrigin, I.getDest(),
2914 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2915}
2916
2917void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2918 IRBuilder<> IRB(&I);
2919
2920 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2921 // need to move origins before moving shadows.
2922 if (DFSF.DFS.shouldTrackOrigins()) {
2923 IRB.CreateCall(
2924 DFSF.DFS.DFSanMemOriginTransferFn,
2925 {I.getArgOperand(0), I.getArgOperand(1),
2926 IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2927 }
2928
2929 Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
2930 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
2931 Value *LenShadow =
2932 IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2933 DFSF.DFS.ShadowWidthBytes));
2934 auto *MTI = cast<MemTransferInst>(
2935 IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2936 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2937 MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
2938 MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
2939 if (ClEventCallbacks) {
2940 IRB.CreateCall(
2941 DFSF.DFS.DFSanMemTransferCallbackFn,
2942 {DestShadow, IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2943 }
2944}
2945
2946void DFSanVisitor::visitBranchInst(BranchInst &BR) {
2947 if (!BR.isConditional())
2948 return;
2949
2950 DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
2951}
2952
2953void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
2954 DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
2955}
2956
2957static bool isAMustTailRetVal(Value *RetVal) {
2958 // Tail call may have a bitcast between return.
2959 if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2960 RetVal = I->getOperand(0);
2961 }
2962 if (auto *I = dyn_cast<CallInst>(RetVal)) {
2963 return I->isMustTailCall();
2964 }
2965 return false;
2966}
2967
2968void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2969 if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2970 // Don't emit the instrumentation for musttail call returns.
2972 return;
2973
2974 Value *S = DFSF.getShadow(RI.getReturnValue());
2975 IRBuilder<> IRB(&RI);
2976 Type *RT = DFSF.F->getFunctionType()->getReturnType();
2977 unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2978 if (Size <= RetvalTLSSize) {
2979 // If the size overflows, stores nothing. At callsite, oversized return
2980 // shadows are set to zero.
2981 IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
2982 }
2983 if (DFSF.DFS.shouldTrackOrigins()) {
2984 Value *O = DFSF.getOrigin(RI.getReturnValue());
2985 IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2986 }
2987 }
2988}
2989
2990void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
2991 std::vector<Value *> &Args,
2992 IRBuilder<> &IRB) {
2993 FunctionType *FT = F.getFunctionType();
2994
2995 auto *I = CB.arg_begin();
2996
2997 // Adds non-variable argument shadows.
2998 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2999 Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB));
3000
3001 // Adds variable argument shadows.
3002 if (FT->isVarArg()) {
3003 auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
3004 CB.arg_size() - FT->getNumParams());
3005 auto *LabelVAAlloca =
3006 new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
3007 "labelva", &DFSF.F->getEntryBlock().front());
3008
3009 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3010 auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
3011 IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB),
3012 LabelVAPtr);
3013 }
3014
3015 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
3016 }
3017
3018 // Adds the return value shadow.
3019 if (!FT->getReturnType()->isVoidTy()) {
3020 if (!DFSF.LabelReturnAlloca) {
3021 DFSF.LabelReturnAlloca = new AllocaInst(
3022 DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
3023 "labelreturn", &DFSF.F->getEntryBlock().front());
3024 }
3025 Args.push_back(DFSF.LabelReturnAlloca);
3026 }
3027}
3028
3029void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
3030 std::vector<Value *> &Args,
3031 IRBuilder<> &IRB) {
3032 FunctionType *FT = F.getFunctionType();
3033
3034 auto *I = CB.arg_begin();
3035
3036 // Add non-variable argument origins.
3037 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3038 Args.push_back(DFSF.getOrigin(*I));
3039
3040 // Add variable argument origins.
3041 if (FT->isVarArg()) {
3042 auto *OriginVATy =
3043 ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
3044 auto *OriginVAAlloca =
3045 new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
3046 "originva", &DFSF.F->getEntryBlock().front());
3047
3048 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3049 auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
3050 IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
3051 }
3052
3053 Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
3054 }
3055
3056 // Add the return value origin.
3057 if (!FT->getReturnType()->isVoidTy()) {
3058 if (!DFSF.OriginReturnAlloca) {
3059 DFSF.OriginReturnAlloca = new AllocaInst(
3060 DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
3061 "originreturn", &DFSF.F->getEntryBlock().front());
3062 }
3063 Args.push_back(DFSF.OriginReturnAlloca);
3064 }
3065}
3066
3067bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
3068 IRBuilder<> IRB(&CB);
3069 switch (DFSF.DFS.getWrapperKind(&F)) {
3070 case DataFlowSanitizer::WK_Warning:
3071 CB.setCalledFunction(&F);
3072 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
3073 IRB.CreateGlobalStringPtr(F.getName()));
3074 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3075 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3076 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3077 return true;
3078 case DataFlowSanitizer::WK_Discard:
3079 CB.setCalledFunction(&F);
3080 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3081 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3082 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3083 return true;
3084 case DataFlowSanitizer::WK_Functional:
3085 CB.setCalledFunction(&F);
3086 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3087 visitInstOperands(CB);
3088 return true;
3089 case DataFlowSanitizer::WK_Custom:
3090 // Don't try to handle invokes of custom functions, it's too complicated.
3091 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
3092 // wrapper.
3093 CallInst *CI = dyn_cast<CallInst>(&CB);
3094 if (!CI)
3095 return false;
3096
3097 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3098 FunctionType *FT = F.getFunctionType();
3099 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
3100 std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
3101 CustomFName += F.getName();
3102 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
3103 CustomFName, CustomFn.TransformedType);
3104 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
3105 CustomFn->copyAttributesFrom(&F);
3106
3107 // Custom functions returning non-void will write to the return label.
3108 if (!FT->getReturnType()->isVoidTy()) {
3109 CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
3110 }
3111 }
3112
3113 std::vector<Value *> Args;
3114
3115 // Adds non-variable arguments.
3116 auto *I = CB.arg_begin();
3117 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
3118 Args.push_back(*I);
3119 }
3120
3121 // Adds shadow arguments.
3122 const unsigned ShadowArgStart = Args.size();
3123 addShadowArguments(F, CB, Args, IRB);
3124
3125 // Adds origin arguments.
3126 const unsigned OriginArgStart = Args.size();
3127 if (ShouldTrackOrigins)
3128 addOriginArguments(F, CB, Args, IRB);
3129
3130 // Adds variable arguments.
3131 append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
3132
3133 CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
3134 CustomCI->setCallingConv(CI->getCallingConv());
3135 CustomCI->setAttributes(transformFunctionAttributes(
3136 CustomFn, CI->getContext(), CI->getAttributes()));
3137
3138 // Update the parameter attributes of the custom call instruction to
3139 // zero extend the shadow parameters. This is required for targets
3140 // which consider PrimitiveShadowTy an illegal type.
3141 for (unsigned N = 0; N < FT->getNumParams(); N++) {
3142 const unsigned ArgNo = ShadowArgStart + N;
3143 if (CustomCI->getArgOperand(ArgNo)->getType() ==
3144 DFSF.DFS.PrimitiveShadowTy)
3145 CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
3146 if (ShouldTrackOrigins) {
3147 const unsigned OriginArgNo = OriginArgStart + N;
3148 if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
3149 DFSF.DFS.OriginTy)
3150 CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
3151 }
3152 }
3153
3154 // Loads the return value shadow and origin.
3155 if (!FT->getReturnType()->isVoidTy()) {
3156 LoadInst *LabelLoad =
3157 IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
3158 DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
3159 FT->getReturnType(), LabelLoad, &CB));
3160 if (ShouldTrackOrigins) {
3161 LoadInst *OriginLoad =
3162 IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
3163 DFSF.setOrigin(CustomCI, OriginLoad);
3164 }
3165 }
3166
3167 CI->replaceAllUsesWith(CustomCI);
3168 CI->eraseFromParent();
3169 return true;
3170 }
3171 return false;
3172}
3173
3174Value *DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
3175 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3176 uint32_t OrderingTable[NumOrderings] = {};
3177
3178 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3179 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3180 OrderingTable[(int)AtomicOrderingCABI::consume] =
3181 (int)AtomicOrderingCABI::acquire;
3182 OrderingTable[(int)AtomicOrderingCABI::release] =
3183 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3184 (int)AtomicOrderingCABI::acq_rel;
3185 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3186 (int)AtomicOrderingCABI::seq_cst;
3187
3189 ArrayRef(OrderingTable, NumOrderings));
3190}
3191
3192void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {
3193 // Since we use getNextNode here, we can't have CB terminate the BB.
3194 assert(isa<CallInst>(CB));
3195
3196 IRBuilder<> IRB(&CB);
3197 Value *Size = CB.getArgOperand(0);
3198 Value *SrcPtr = CB.getArgOperand(1);
3199 Value *DstPtr = CB.getArgOperand(2);
3200 Value *Ordering = CB.getArgOperand(3);
3201 // Convert the call to have at least Acquire ordering to make sure
3202 // the shadow operations aren't reordered before it.
3203 Value *NewOrdering =
3204 IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3205 CB.setArgOperand(3, NewOrdering);
3206
3207 IRBuilder<> NextIRB(CB.getNextNode());
3208 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3209
3210 // TODO: Support ClCombinePointerLabelsOnLoad
3211 // TODO: Support ClEventCallbacks
3212
3213 NextIRB.CreateCall(
3214 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3215 {DstPtr, SrcPtr, NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3216}
3217
3218Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
3219 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3220 uint32_t OrderingTable[NumOrderings] = {};
3221
3222 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3223 OrderingTable[(int)AtomicOrderingCABI::release] =
3224 (int)AtomicOrderingCABI::release;
3225 OrderingTable[(int)AtomicOrderingCABI::consume] =
3226 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3227 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3228 (int)AtomicOrderingCABI::acq_rel;
3229 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3230 (int)AtomicOrderingCABI::seq_cst;
3231
3233 ArrayRef(OrderingTable, NumOrderings));
3234}
3235
3236void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {
3237 IRBuilder<> IRB(&CB);
3238 Value *Size = CB.getArgOperand(0);
3239 Value *SrcPtr = CB.getArgOperand(1);
3240 Value *DstPtr = CB.getArgOperand(2);
3241 Value *Ordering = CB.getArgOperand(3);
3242 // Convert the call to have at least Release ordering to make sure
3243 // the shadow operations aren't reordered after it.
3244 Value *NewOrdering =
3245 IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3246 CB.setArgOperand(3, NewOrdering);
3247
3248 // TODO: Support ClCombinePointerLabelsOnStore
3249 // TODO: Support ClEventCallbacks
3250
3251 IRB.CreateCall(
3252 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3253 {DstPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3254}
3255
3256void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {
3257 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int
3258 // ordering)
3259 IRBuilder<> IRB(&CB);
3260 Value *Size = CB.getArgOperand(0);
3261 Value *TargetPtr = CB.getArgOperand(1);
3262 Value *SrcPtr = CB.getArgOperand(2);
3263 Value *DstPtr = CB.getArgOperand(3);
3264
3265 // This operation is not atomic for the shadow and origin memory.
3266 // This could result in DFSan false positives or false negatives.
3267 // For now we will assume these operations are rare, and
3268 // the additional complexity to address this is not warrented.
3269
3270 // Current Target to Dest
3271 IRB.CreateCall(
3272 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3273 {DstPtr, TargetPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3274
3275 // Current Src to Target (overriding)
3276 IRB.CreateCall(
3277 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3278 {TargetPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3279}
3280
3281void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {
3282 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void
3283 // *desired, int success_order, int failure_order)
3284 Value *Size = CB.getArgOperand(0);
3285 Value *TargetPtr = CB.getArgOperand(1);
3286 Value *ExpectedPtr = CB.getArgOperand(2);
3287 Value *DesiredPtr = CB.getArgOperand(3);
3288
3289 // This operation is not atomic for the shadow and origin memory.
3290 // This could result in DFSan false positives or false negatives.
3291 // For now we will assume these operations are rare, and
3292 // the additional complexity to address this is not warrented.
3293
3294 IRBuilder<> NextIRB(CB.getNextNode());
3295 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3296
3297 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3298
3299 // If original call returned true, copy Desired to Target.
3300 // If original call returned false, copy Target to Expected.
3301 NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,
3302 {NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),
3303 TargetPtr, ExpectedPtr, DesiredPtr,
3304 NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3305}
3306
3307void DFSanVisitor::visitCallBase(CallBase &CB) {
3309 if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
3310 visitInstOperands(CB);
3311 return;
3312 }
3313
3314 // Calls to this function are synthesized in wrappers, and we shouldn't
3315 // instrument them.
3316 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
3317 return;
3318
3319 LibFunc LF;
3320 if (DFSF.TLI.getLibFunc(CB, LF)) {
3321 // libatomic.a functions need to have special handling because there isn't
3322 // a good way to intercept them or compile the library with
3323 // instrumentation.
3324 switch (LF) {
3325 case LibFunc_atomic_load:
3326 if (!isa<CallInst>(CB)) {
3327 llvm::errs() << "DFSAN -- cannot instrument invoke of libatomic load. "
3328 "Ignoring!\n";
3329 break;
3330 }
3331 visitLibAtomicLoad(CB);
3332 return;
3333 case LibFunc_atomic_store:
3334 visitLibAtomicStore(CB);
3335 return;
3336 default:
3337 break;
3338 }
3339 }
3340
3341 // TODO: These are not supported by TLI? They are not in the enum.
3342 if (F && F->hasName() && !F->isVarArg()) {
3343 if (F->getName() == "__atomic_exchange") {
3344 visitLibAtomicExchange(CB);
3345 return;
3346 }
3347 if (F->getName() == "__atomic_compare_exchange") {
3348 visitLibAtomicCompareExchange(CB);
3349 return;
3350 }
3351 }
3352
3354 DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
3355 if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
3356 if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
3357 return;
3358
3359 IRBuilder<> IRB(&CB);
3360
3361 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3362 FunctionType *FT = CB.getFunctionType();
3363 const DataLayout &DL = getDataLayout();
3364
3365 // Stores argument shadows.
3366 unsigned ArgOffset = 0;
3367 for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
3368 if (ShouldTrackOrigins) {
3369 // Ignore overflowed origins
3370 Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
3371 if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
3372 !DFSF.DFS.isZeroShadow(ArgShadow))
3373 IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
3374 DFSF.getArgOriginTLS(I, IRB));
3375 }
3376
3377 unsigned Size =
3378 DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
3379 // Stop storing if arguments' size overflows. Inside a function, arguments
3380 // after overflow have zero shadow values.
3381 if (ArgOffset + Size > ArgTLSSize)
3382 break;
3383 IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
3384 DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
3386 ArgOffset += alignTo(Size, ShadowTLSAlignment);
3387 }
3388
3389 Instruction *Next = nullptr;
3390 if (!CB.getType()->isVoidTy()) {
3391 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3392 if (II->getNormalDest()->getSinglePredecessor()) {
3393 Next = &II->getNormalDest()->front();
3394 } else {
3395 BasicBlock *NewBB =
3396 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
3397 Next = &NewBB->front();
3398 }
3399 } else {
3400 assert(CB.getIterator() != CB.getParent()->end());
3401 Next = CB.getNextNode();
3402 }
3403
3404 // Don't emit the epilogue for musttail call returns.
3405 if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3406 return;
3407
3408 // Loads the return value shadow.
3409 IRBuilder<> NextIRB(Next);
3410 unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
3411 if (Size > RetvalTLSSize) {
3412 // Set overflowed return shadow to be zero.
3413 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3414 } else {
3415 LoadInst *LI = NextIRB.CreateAlignedLoad(
3416 DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
3417 ShadowTLSAlignment, "_dfsret");
3418 DFSF.SkipInsts.insert(LI);
3419 DFSF.setShadow(&CB, LI);
3420 DFSF.NonZeroChecks.push_back(LI);
3421 }
3422
3423 if (ShouldTrackOrigins) {
3424 LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
3425 DFSF.getRetvalOriginTLS(), "_dfsret_o");
3426 DFSF.SkipInsts.insert(LI);
3427 DFSF.setOrigin(&CB, LI);
3428 }
3429
3430 DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);
3431 }
3432}
3433
3434void DFSanVisitor::visitPHINode(PHINode &PN) {
3435 Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
3436 PHINode *ShadowPN =
3437 PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
3438
3439 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3440 Value *UndefShadow = UndefValue::get(ShadowTy);
3441 for (BasicBlock *BB : PN.blocks())
3442 ShadowPN->addIncoming(UndefShadow, BB);
3443
3444 DFSF.setShadow(&PN, ShadowPN);
3445
3446 PHINode *OriginPN = nullptr;
3447 if (DFSF.DFS.shouldTrackOrigins()) {
3448 OriginPN =
3449 PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN);
3450 Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
3451 for (BasicBlock *BB : PN.blocks())
3452 OriginPN->addIncoming(UndefOrigin, BB);
3453 DFSF.setOrigin(&PN, OriginPN);
3454 }
3455
3456 DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3457}
3458
3461 auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
3462 auto &FAM =
3465 };
3466 if (!DataFlowSanitizer(ABIListFiles).runImpl(M, GetTLI))
3467 return PreservedAnalyses::all();
3468
3470 // GlobalsAA is considered stateless and does not get invalidated unless
3471 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
3472 // make changes that require GlobalsAA to be invalidated.
3473 PA.abandon<GlobalsAA>();
3474 return PA;
3475}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isConstant(const MachineInstr &MI)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MemoryMapParams Linux_LoongArch64_MemoryMapParams
const MemoryMapParams Linux_X86_64_MemoryMapParams
static cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))
static cl::list< std::string > ClCombineTaintLookupTables("dfsan-combine-taint-lookup-table", cl::desc("When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)
static const Align MinOriginAlignment
static cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))
static cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)
static cl::opt< bool > ClReachesFunctionCallbacks("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))
static Value * expandFromPrimitiveShadowRecursive(Value *Shadow, SmallVector< unsigned, 4 > &Indices, Type *SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)
static cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))
static cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))
static cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)
static cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))
static cl::opt< bool > ClIgnorePersonalityRoutine("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))
static const Align ShadowTLSAlignment
static AtomicOrdering addReleaseOrdering(AtomicOrdering AO)
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO)
Value * StripPointerGEPsAndCasts(Value *V)
const MemoryMapParams Linux_AArch64_MemoryMapParams
static cl::opt< bool > ClConditionalCallbacks("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))
static cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))
static StringRef getGlobalTypeString(const GlobalValue &G)
static cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))
static const unsigned ArgTLSSize
static const unsigned RetvalTLSSize
static bool isAMustTailRetVal(Value *RetVal)
static cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
uint64_t Addr
std::string Name
uint64_t Size
static bool runImpl(Function &F, const TargetLowering &TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
nvptx lower args
#define P(N)
Module * Mod
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
Defines the virtual file system interface vfs::FileSystem.
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:59
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
unsigned getNumAttrSets() const
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Definition: AttributeMask.h:44
static Attribute getWithMemoryEffects(LLVMContext &Context, MemoryEffects ME)
Definition: Attributes.cpp:215
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
const Instruction & front() const
Definition: BasicBlock.h:452
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
This class represents a no-op cast from one type to another.
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1770
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1765
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1703
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1761
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1623
Value * getCalledOperand() const
Definition: InstrTypes.h:1696
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1784
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition: InstrTypes.h:1822
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1653
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1629
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1561
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1639
unsigned arg_size() const
Definition: InstrTypes.h:1646
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1780
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1832
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1742
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
bool isMustTailCall() const
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:2893
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:122
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:20
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:162
void removeFnAttrs(const AttributeMask &Attrs)
Definition: Function.cpp:635
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition: Function.cpp:627
arg_iterator arg_begin()
Definition: Function.h:810
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
Definition: Function.cpp:647
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:781
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:548
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
static bool isExternalWeakLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:411
LinkageTypes getLinkage() const
Definition: GlobalValue.h:545
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:57
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
Type * getValueType() const
Definition: GlobalValue.h:296
Analysis pass providing a never-invalidated alias analysis result.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1880
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2506
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1806
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
Definition: IRBuilder.h:1992
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2153
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1108
Value * CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
Definition: IRBuilder.h:1946
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Definition: IRBuilder.h:1972
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:525
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2228
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1789
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1802
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2179
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1865
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:658
This instruction inserts a single (scalar) element into a VectorType value.
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCmpInst(CmpInst &I)
Definition: InstVisitor.h:262
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
RetTy visitInsertValueInst(InsertValueInst &I)
Definition: InstVisitor.h:195
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:193
RetTy visitLandingPadInst(LandingPadInst &I)
Definition: InstVisitor.h:196
RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I)
Definition: InstVisitor.h:171
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
RetTy visitSwitchInst(SwitchInst &I)
Definition: InstVisitor.h:232
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitReturnInst(ReturnInst &I)
Definition: InstVisitor.h:226
RetTy visitExtractValueInst(ExtractValueInst &I)
Definition: InstVisitor.h:194
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:260
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
RetTy visitAllocaInst(AllocaInst &I)
Definition: InstVisitor.h:168
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:261
RetTy visitMemTransferInst(MemTransferInst &I)
Definition: InstVisitor.h:214
RetTy visitMemSetInst(MemSetInst &I)
Definition: InstVisitor.h:209
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:259
RetTy visitBranchInst(BranchInst &I)
Definition: InstVisitor.h:229
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:452
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:150
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isTerminator() const
Definition: Instruction.h:253
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:449
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
void setAlignment(Align Align)
Definition: Instructions.h:240
Value * getPointerOperand()
Definition: Instructions.h:280
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:250
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
static MemoryEffectsBase readOnly()
Create MemoryEffectsBase that can read any memory.
Definition: ModRef.h:122
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:299
void setModuleInlineAsm(StringRef Asm)
Set the module-scope inline assembly blocks.
Definition: Module.h:338
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:167
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
Constant * getOrInsertGlobal(StringRef Name, Type *Ty, function_ref< GlobalVariable *()> CreateGlobalCallback)
Look up the specified global in the module symbol table.
Definition: Module.cpp:221
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void abandon()
Mark an analysis as abandoned.
Definition: Analysis.h:162
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
static std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
Multiway switch.
Value * getCondition() const
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
@ loongarch64
Definition: Triple.h:62
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Key
PAL metadata keys.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1045
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< BlockNode * > Block
Definition: RDFGraph.h:392
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2053
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2039
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
iterator_range< df_iterator< T > > depth_first(const T &G)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition: Local.cpp:3162
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85