LLVM 23.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/ADT/Twine.h"
17#include "llvm/IR/BasicBlock.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/Value.h"
29#include "llvm/Support/Debug.h"
31#include <utility>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "bounds-checking"
36
37static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
38 cl::desc("Use one trap block per function"));
39
40STATISTIC(ChecksAdded, "Bounds checks added");
41STATISTIC(ChecksSkipped, "Bounds checks skipped");
42STATISTIC(ChecksUnable, "Bounds checks unable to add");
43
45 mutable MDNode *NoSanitizeMD = nullptr;
46
47public:
48 NoSanitizeInserter() = default;
49
50 void InsertHelper(Instruction *I, const Twine &Name,
51 BasicBlock::iterator InsertPt) const override {
53 if (!NoSanitizeMD)
54 NoSanitizeMD = MDNode::get(I->getContext(), {});
55 I->setMetadata(LLVMContext::MD_nosanitize, NoSanitizeMD);
56 }
57};
58
60
61/// Gets the conditions under which memory accessing instructions will overflow.
62///
63/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
64/// the result from the load or the value being stored. It is used to determine
65/// the size of memory block that is touched.
66///
67/// Returns the condition under which the access will overflow.
68static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
69 const DataLayout &DL, TargetLibraryInfo &TLI,
70 ObjectSizeOffsetEvaluator &ObjSizeEval,
71 BuilderTy &IRB, ScalarEvolution &SE) {
72 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
73 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
74 << " bytes\n");
75
76 SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr);
77
78 if (!SizeOffset.bothKnown()) {
79 ++ChecksUnable;
80 return nullptr;
81 }
82
83 Value *Size = SizeOffset.Size;
84 Value *Offset = SizeOffset.Offset;
86
87 Type *IndexTy = DL.getIndexType(Ptr->getType());
88 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
89
90 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
91 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
92 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
93
94 // three checks are required to ensure safety:
95 // . Offset >= 0 (since the offset is given from the base ptr)
96 // . Size >= Offset (unsigned)
97 // . Size - Offset >= NeededSize (unsigned)
98 //
99 // optimization: if Size >= 0 (signed), skip 1st check
100 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
101 Value *ObjSize = IRB.CreateSub(Size, Offset);
102 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
104 : IRB.CreateICmpULT(Size, Offset);
105 Value *Cmp3 = SizeRange.sub(OffsetRange)
106 .getUnsignedMin()
107 .uge(NeededSizeRange.getUnsignedMax())
109 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
110 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
111 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
112 !SizeRange.getSignedMin().isNonNegative()) {
113 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
114 Or = IRB.CreateOr(Cmp1, Or);
115 }
116
117 return Or;
118}
119
120static CallInst *InsertTrap(BuilderTy &IRB, bool DebugTrapBB,
121 std::optional<int8_t> GuardKind) {
122 if (!DebugTrapBB)
123 return IRB.CreateIntrinsicWithoutFolding(Intrinsic::trap, {});
124
125 uint64_t ImmArg = GuardKind.has_value()
126 ? GuardKind.value()
127 : IRB.GetInsertBlock()->getParent()->size();
128 // Ensure we constrain ImmArg to fitting within a 8-but unsigned integer to
129 // prevent overflow.
130 if (ImmArg > 255)
131 ImmArg = 255;
132
134 Intrinsic::ubsantrap, ConstantInt::get(IRB.getInt8Ty(), ImmArg));
135}
136
137static CallInst *InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name) {
138 Function *Fn = IRB.GetInsertBlock()->getParent();
139 LLVMContext &Ctx = Fn->getContext();
140 llvm::AttrBuilder B(Ctx);
141 B.addAttribute(llvm::Attribute::NoUnwind);
142 if (!MayReturn)
143 B.addAttribute(llvm::Attribute::NoReturn);
145 Name,
146 llvm::AttributeList::get(Ctx, llvm::AttributeList::FunctionIndex, B),
147 Type::getVoidTy(Ctx));
148 return IRB.CreateCall(Callee);
149}
150
151/// Adds run-time bounds checks to memory accessing instructions.
152///
153/// \p Or is the condition that should guard the trap.
154///
155/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
156template <typename GetTrapBBT>
157static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
158 // check if the comparison is always false
160 if (C) {
161 ++ChecksSkipped;
162 // If non-zero, nothing to do.
163 if (!C->getZExtValue())
164 return;
165 }
166 ++ChecksAdded;
167
169 BasicBlock *OldBB = SplitI->getParent();
170 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
171 OldBB->getTerminator()->eraseFromParent();
172
173 BasicBlock *TrapBB = GetTrapBB(IRB, Cont);
174
175 if (C) {
176 // If we have a constant zero, unconditionally branch.
177 // FIXME: We should really handle this differently to bypass the splitting
178 // the block.
179 UncondBrInst::Create(TrapBB, OldBB);
180 return;
181 }
182
183 // Create the conditional branch.
184 CondBrInst::Create(Or, TrapBB, Cont, OldBB);
185}
186
187static std::string
189 std::string Name = "__ubsan_handle_local_out_of_bounds";
190 if (Opts.MinRuntime)
191 Name += "_minimal";
192 if (!Opts.MayReturn)
193 Name += "_abort";
194 else if (Opts.HandlerPreserveAllRegs)
195 Name += "_preserve";
196 return Name;
197}
198
200 ScalarEvolution &SE,
201 const BoundsCheckingPass::Options &Opts) {
202 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
203 return false;
204
205 const DataLayout &DL = F.getDataLayout();
206 ObjectSizeOpts EvalOpts;
207 EvalOpts.RoundToAlign = true;
209 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
210
211 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
212 // touching instructions
214 for (Instruction &I : instructions(F)) {
215 Value *Or = nullptr;
216 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
217 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
218 if (!LI->isVolatile())
219 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
220 ObjSizeEval, IRB, SE);
221 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
222 if (!SI->isVolatile())
223 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
224 DL, TLI, ObjSizeEval, IRB, SE);
226 if (!AI->isVolatile())
227 Or =
228 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
229 DL, TLI, ObjSizeEval, IRB, SE);
230 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
231 if (!AI->isVolatile())
232 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
233 DL, TLI, ObjSizeEval, IRB, SE);
234 }
235 if (Or) {
236 if (Opts.GuardKind) {
237 llvm::Value *Allow = IRB.CreateIntrinsic(
238 IRB.getInt1Ty(), Intrinsic::allow_ubsan_check,
239 {llvm::ConstantInt::getSigned(IRB.getInt8Ty(), *Opts.GuardKind)});
240 Or = IRB.CreateAnd(Or, Allow);
241 }
242 TrapInfo.push_back(std::make_pair(&I, Or));
243 }
244 }
245
246 std::string Name;
247 if (Opts.Rt)
248 Name = getRuntimeCallName(*Opts.Rt);
249
250 // Create a trapping basic block on demand using a callback. Depending on
251 // flags, this will either create a single block for the entire function or
252 // will create a fresh block every time it is called.
253 BasicBlock *ReuseTrapBB = nullptr;
254 auto GetTrapBB = [&ReuseTrapBB, &Opts, &Name](BuilderTy &IRB,
255 BasicBlock *Cont) {
256 Function *Fn = IRB.GetInsertBlock()->getParent();
257 auto DebugLoc = IRB.getCurrentDebugLocation();
259
260 // Create a trapping basic block on demand using a callback. Depending on
261 // flags, this will either create a single block for the entire function or
262 // will create a fresh block every time it is called.
263 if (ReuseTrapBB)
264 return ReuseTrapBB;
265
266 BasicBlock *TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
267 IRB.SetInsertPoint(TrapBB);
268
269 bool DebugTrapBB = !Opts.Merge;
270 CallInst *TrapCall = Opts.Rt ? InsertCall(IRB, Opts.Rt->MayReturn, Name)
271 : InsertTrap(IRB, DebugTrapBB, Opts.GuardKind);
272 if (DebugTrapBB)
273 TrapCall->addFnAttr(llvm::Attribute::NoMerge);
274
275 TrapCall->setDoesNotThrow();
276 TrapCall->setDebugLoc(DebugLoc);
277
278 bool MayReturn = Opts.Rt && Opts.Rt->MayReturn;
279 if (MayReturn) {
280 IRB.CreateBr(Cont);
281 } else {
282 TrapCall->setDoesNotReturn();
283 IRB.CreateUnreachable();
284 }
285 // The preserve-all logic is somewhat duplicated in CGExpr.cpp for
286 // local-bounds. Make sure to change that too.
287 if (Opts.Rt && Opts.Rt->HandlerPreserveAllRegs && MayReturn)
289 if (!MayReturn && SingleTrapBB && !DebugTrapBB)
290 ReuseTrapBB = TrapBB;
291
292 return TrapBB;
293 };
294
295 for (const auto &Entry : TrapInfo) {
296 Instruction *Inst = Entry.first;
298 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
299 }
300
301 return !TrapInfo.empty();
302}
303
313
315 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
317 OS, MapClassName2PassName);
318 OS << "<";
319 if (Opts.Rt) {
320 if (Opts.Rt->MinRuntime)
321 OS << "min-";
322 OS << "rt";
323 if (!Opts.Rt->MayReturn)
324 OS << "-abort";
325 } else {
326 OS << "trap";
327 }
328 if (Opts.Merge)
329 OS << ";merge";
330 if (Opts.GuardKind)
331 OS << ";guard=" << static_cast<int>(*Opts.GuardKind);
332 OS << ">";
333}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE, const BoundsCheckingPass::Options &Opts)
static CallInst * InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name)
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static std::string getRuntimeCallName(const BoundsCheckingPass::Options::Runtime &Opts)
static CallInst * InsertTrap(BuilderTy &IRB, bool DebugTrapBB, std::optional< int8_t > GuardKind)
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
IRBuilder< TargetFolder, NoSanitizeInserter > BuilderTy
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
NoSanitizeInserter()=default
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const override
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
void setCallingConv(CallingConv::ID CC)
void setDoesNotReturn()
void setDoesNotThrow()
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
This class represents a function call, abstracting a target machine's calling convention.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
size_t size() const
Definition Function.h:858
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:354
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI CallInst * CreateIntrinsicWithoutFolding(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2380
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition IRBuilder.h:519
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:176
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:175
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1439
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1570
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2543
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2396
LLVM_ABI Value * CreateTypeSize(Type *Ty, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:524
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition Module.cpp:211
Evaluate the size and offset of an object pointed to by a Value*.
LLVM_ABI SizeOffsetValue compute(Value *V)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
TargetFolder - Create constants with target dependent folding.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
std::optional< Runtime > Rt
std::optional< int8_t > GuardKind
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.
@ ExactUnderlyingSizeAndOffset
All branches must be known and have the same underlying size and offset to be merged.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89