LLVM 20.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/ADT/Twine.h"
17#include "llvm/IR/BasicBlock.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/Value.h"
29#include "llvm/Support/Debug.h"
31#include <utility>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "bounds-checking"
36
37static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
38 cl::desc("Use one trap block per function"));
39
40static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
41 cl::desc("Always use one trap per check"));
42
43STATISTIC(ChecksAdded, "Bounds checks added");
44STATISTIC(ChecksSkipped, "Bounds checks skipped");
45STATISTIC(ChecksUnable, "Bounds checks unable to add");
46
48
49/// Gets the conditions under which memory accessing instructions will overflow.
50///
51/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
52/// the result from the load or the value being stored. It is used to determine
53/// the size of memory block that is touched.
54///
55/// Returns the condition under which the access will overflow.
57 const DataLayout &DL, TargetLibraryInfo &TLI,
58 ObjectSizeOffsetEvaluator &ObjSizeEval,
59 BuilderTy &IRB, ScalarEvolution &SE) {
60 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
61 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
62 << " bytes\n");
63
64 SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr);
65
66 if (!SizeOffset.bothKnown()) {
67 ++ChecksUnable;
68 return nullptr;
69 }
70
71 Value *Size = SizeOffset.Size;
72 Value *Offset = SizeOffset.Offset;
73 ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
74
75 Type *IndexTy = DL.getIndexType(Ptr->getType());
76 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
77
78 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
79 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
80 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
81
82 // three checks are required to ensure safety:
83 // . Offset >= 0 (since the offset is given from the base ptr)
84 // . Size >= Offset (unsigned)
85 // . Size - Offset >= NeededSize (unsigned)
86 //
87 // optimization: if Size >= 0 (signed), skip 1st check
88 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
89 Value *ObjSize = IRB.CreateSub(Size, Offset);
90 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
91 ? ConstantInt::getFalse(Ptr->getContext())
93 Value *Cmp3 = SizeRange.sub(OffsetRange)
94 .getUnsignedMin()
95 .uge(NeededSizeRange.getUnsignedMax())
96 ? ConstantInt::getFalse(Ptr->getContext())
97 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
98 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
99 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
100 !SizeRange.getSignedMin().isNonNegative()) {
101 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
102 Or = IRB.CreateOr(Cmp1, Or);
103 }
104
105 return Or;
106}
107
109 if (!DebugTrapBB)
110 return IRB.CreateIntrinsic(Intrinsic::trap, {}, {});
111 // FIXME: Ideally we would use the SanitizerHandler::OutOfBounds constant.
112 return IRB.CreateIntrinsic(
113 Intrinsic::ubsantrap, {},
114 ConstantInt::get(IRB.getInt8Ty(),
115 IRB.GetInsertBlock()->getParent()->size()));
116}
117
118static CallInst *InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name) {
119 Function *Fn = IRB.GetInsertBlock()->getParent();
120 LLVMContext &Ctx = Fn->getContext();
121 llvm::AttrBuilder B(Ctx);
122 B.addAttribute(llvm::Attribute::NoUnwind);
123 if (!MayReturn)
124 B.addAttribute(llvm::Attribute::NoReturn);
126 Name,
128 Type::getVoidTy(Ctx));
129 return IRB.CreateCall(Callee);
130}
131
132/// Adds run-time bounds checks to memory accessing instructions.
133///
134/// \p Or is the condition that should guard the trap.
135///
136/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
137template <typename GetTrapBBT>
138static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
139 // check if the comparison is always false
140 ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
141 if (C) {
142 ++ChecksSkipped;
143 // If non-zero, nothing to do.
144 if (!C->getZExtValue())
145 return;
146 }
147 ++ChecksAdded;
148
150 BasicBlock *OldBB = SplitI->getParent();
151 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
152 OldBB->getTerminator()->eraseFromParent();
153
154 BasicBlock *TrapBB = GetTrapBB(IRB, Cont);
155
156 if (C) {
157 // If we have a constant zero, unconditionally branch.
158 // FIXME: We should really handle this differently to bypass the splitting
159 // the block.
160 BranchInst::Create(TrapBB, OldBB);
161 return;
162 }
163
164 // Create the conditional branch.
165 BranchInst::Create(TrapBB, Cont, Or, OldBB);
166}
167
169 bool MayReturn = false;
170 bool UseTrap = false;
171 bool MinRuntime = false;
173
175 switch (Mode) {
176 case BoundsCheckingPass::ReportingMode::Trap:
177 UseTrap = true;
178 break;
179 case BoundsCheckingPass::ReportingMode::MinRuntime:
180 Name = "__ubsan_handle_local_out_of_bounds_minimal";
181 MinRuntime = true;
182 MayReturn = true;
183 break;
184 case BoundsCheckingPass::ReportingMode::MinRuntimeAbort:
185 Name = "__ubsan_handle_local_out_of_bounds_minimal_abort";
186 MinRuntime = true;
187 break;
188 case BoundsCheckingPass::ReportingMode::FullRuntime:
189 Name = "__ubsan_handle_local_out_of_bounds";
190 MayReturn = true;
191 break;
192 case BoundsCheckingPass::ReportingMode::FullRuntimeAbort:
193 Name = "__ubsan_handle_local_out_of_bounds_abort";
194 break;
195 }
196 }
197};
198
200 ScalarEvolution &SE, const ReportingOpts &Opts) {
201 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
202 return false;
203
204 const DataLayout &DL = F.getDataLayout();
205 ObjectSizeOpts EvalOpts;
206 EvalOpts.RoundToAlign = true;
207 EvalOpts.EvalMode = ObjectSizeOpts::Mode::ExactUnderlyingSizeAndOffset;
208 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
209
210 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
211 // touching instructions
213 for (Instruction &I : instructions(F)) {
214 Value *Or = nullptr;
215 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
216 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
217 if (!LI->isVolatile())
218 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
219 ObjSizeEval, IRB, SE);
220 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
221 if (!SI->isVolatile())
222 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
223 DL, TLI, ObjSizeEval, IRB, SE);
224 } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
225 if (!AI->isVolatile())
226 Or =
227 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
228 DL, TLI, ObjSizeEval, IRB, SE);
229 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
230 if (!AI->isVolatile())
231 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
232 DL, TLI, ObjSizeEval, IRB, SE);
233 }
234 if (Or)
235 TrapInfo.push_back(std::make_pair(&I, Or));
236 }
237
238 // Create a trapping basic block on demand using a callback. Depending on
239 // flags, this will either create a single block for the entire function or
240 // will create a fresh block every time it is called.
241 BasicBlock *ReuseTrapBB = nullptr;
242 auto GetTrapBB = [&ReuseTrapBB, &Opts](BuilderTy &IRB, BasicBlock *Cont) {
243 Function *Fn = IRB.GetInsertBlock()->getParent();
244 auto DebugLoc = IRB.getCurrentDebugLocation();
246
247 // Create a trapping basic block on demand using a callback. Depending on
248 // flags, this will either create a single block for the entire function or
249 // will create a fresh block every time it is called.
250 if (ReuseTrapBB)
251 return ReuseTrapBB;
252
253 BasicBlock *TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
254 IRB.SetInsertPoint(TrapBB);
255
256 CallInst *TrapCall = Opts.UseTrap
257 ? InsertTrap(IRB)
258 : InsertCall(IRB, Opts.MayReturn, Opts.Name);
259 if (DebugTrapBB) {
260 // FIXME: Pass option form clang.
261 TrapCall->addFnAttr(llvm::Attribute::NoMerge);
262 }
263
264 TrapCall->setDoesNotThrow();
265 TrapCall->setDebugLoc(DebugLoc);
266 if (Opts.MayReturn) {
267 IRB.CreateBr(Cont);
268 } else {
269 TrapCall->setDoesNotReturn();
270 IRB.CreateUnreachable();
271 }
272
273 if (!Opts.MayReturn && SingleTrapBB && !DebugTrapBB)
274 ReuseTrapBB = TrapBB;
275
276 return TrapBB;
277 };
278
279 for (const auto &Entry : TrapInfo) {
280 Instruction *Inst = Entry.first;
282 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
283 }
284
285 return !TrapInfo.empty();
286}
287
289 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
290 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
291
292 if (!addBoundsChecking(F, TLI, SE, ReportingOpts(Mode)))
293 return PreservedAnalyses::all();
294
296}
297
299 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
301 OS, MapClassName2PassName);
302 switch (Mode) {
304 OS << "<trap>";
305 break;
307 OS << "<min-rt>";
308 break;
310 OS << "<min-rt-abort>";
311 break;
313 OS << "<rt>";
314 break;
316 OS << "<rt-abort>";
317 break;
318 }
319}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static CallInst * InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name)
static cl::opt< bool > DebugTrapBB("bounds-checking-unique-traps", cl::desc("Always use one trap per check"))
static CallInst * InsertTrap(BuilderTy &IRB)
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE, const ReportingOpts &Opts)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setDoesNotReturn()
Definition: InstrTypes.h:1917
void setDoesNotThrow()
Definition: InstrTypes.h:1924
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1482
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
size_t size() const
Definition: Function.h:858
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateTypeSize(Type *DstType, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Definition: IRBuilder.cpp:103
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2305
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:468
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:204
Evaluate the size and offset of an object pointed to by a Value*.
SizeOffsetValue compute(Value *V)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Or
Bitwise or logical OR of integers.
ReportingOpts(BoundsCheckingPass::ReportingMode Mode)
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
bool bothKnown() const