LLVM 20.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/ADT/Twine.h"
17#include "llvm/IR/BasicBlock.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/Value.h"
29#include "llvm/Support/Debug.h"
31#include <utility>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "bounds-checking"
36
37static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
38 cl::desc("Use one trap block per function"));
39
40STATISTIC(ChecksAdded, "Bounds checks added");
41STATISTIC(ChecksSkipped, "Bounds checks skipped");
42STATISTIC(ChecksUnable, "Bounds checks unable to add");
43
45
47 ReportingMode Mode, bool Merge)
48 : Mode(Mode), Merge(Merge) {}
49
50/// Gets the conditions under which memory accessing instructions will overflow.
51///
52/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
53/// the result from the load or the value being stored. It is used to determine
54/// the size of memory block that is touched.
55///
56/// Returns the condition under which the access will overflow.
58 const DataLayout &DL, TargetLibraryInfo &TLI,
59 ObjectSizeOffsetEvaluator &ObjSizeEval,
60 BuilderTy &IRB, ScalarEvolution &SE) {
61 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
62 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
63 << " bytes\n");
64
65 SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr);
66
67 if (!SizeOffset.bothKnown()) {
68 ++ChecksUnable;
69 return nullptr;
70 }
71
72 Value *Size = SizeOffset.Size;
73 Value *Offset = SizeOffset.Offset;
74 ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
75
76 Type *IndexTy = DL.getIndexType(Ptr->getType());
77 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
78
79 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
80 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
81 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
82
83 // three checks are required to ensure safety:
84 // . Offset >= 0 (since the offset is given from the base ptr)
85 // . Size >= Offset (unsigned)
86 // . Size - Offset >= NeededSize (unsigned)
87 //
88 // optimization: if Size >= 0 (signed), skip 1st check
89 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
90 Value *ObjSize = IRB.CreateSub(Size, Offset);
91 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
92 ? ConstantInt::getFalse(Ptr->getContext())
94 Value *Cmp3 = SizeRange.sub(OffsetRange)
95 .getUnsignedMin()
96 .uge(NeededSizeRange.getUnsignedMax())
97 ? ConstantInt::getFalse(Ptr->getContext())
98 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
99 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
100 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
101 !SizeRange.getSignedMin().isNonNegative()) {
102 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
103 Or = IRB.CreateOr(Cmp1, Or);
104 }
105
106 return Or;
107}
108
109static CallInst *InsertTrap(BuilderTy &IRB, bool DebugTrapBB) {
110 if (!DebugTrapBB)
111 return IRB.CreateIntrinsic(Intrinsic::trap, {}, {});
112 // FIXME: Ideally we would use the SanitizerHandler::OutOfBounds constant.
113 return IRB.CreateIntrinsic(
114 Intrinsic::ubsantrap, {},
115 ConstantInt::get(IRB.getInt8Ty(),
116 IRB.GetInsertBlock()->getParent()->size()));
117}
118
119static CallInst *InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name) {
120 Function *Fn = IRB.GetInsertBlock()->getParent();
121 LLVMContext &Ctx = Fn->getContext();
122 llvm::AttrBuilder B(Ctx);
123 B.addAttribute(llvm::Attribute::NoUnwind);
124 if (!MayReturn)
125 B.addAttribute(llvm::Attribute::NoReturn);
127 Name,
129 Type::getVoidTy(Ctx));
130 return IRB.CreateCall(Callee);
131}
132
133/// Adds run-time bounds checks to memory accessing instructions.
134///
135/// \p Or is the condition that should guard the trap.
136///
137/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
138template <typename GetTrapBBT>
139static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
140 // check if the comparison is always false
141 ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
142 if (C) {
143 ++ChecksSkipped;
144 // If non-zero, nothing to do.
145 if (!C->getZExtValue())
146 return;
147 }
148 ++ChecksAdded;
149
151 BasicBlock *OldBB = SplitI->getParent();
152 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
153 OldBB->getTerminator()->eraseFromParent();
154
155 BasicBlock *TrapBB = GetTrapBB(IRB, Cont);
156
157 if (C) {
158 // If we have a constant zero, unconditionally branch.
159 // FIXME: We should really handle this differently to bypass the splitting
160 // the block.
161 BranchInst::Create(TrapBB, OldBB);
162 return;
163 }
164
165 // Create the conditional branch.
166 BranchInst::Create(TrapBB, Cont, Or, OldBB);
167}
168
170 bool MayReturn = false;
171 bool UseTrap = false;
172 bool MinRuntime = false;
173 bool MayMerge = true;
175
177 switch (Mode) {
178 case BoundsCheckingPass::ReportingMode::Trap:
179 UseTrap = true;
180 break;
181 case BoundsCheckingPass::ReportingMode::MinRuntime:
182 Name = "__ubsan_handle_local_out_of_bounds_minimal";
183 MinRuntime = true;
184 MayReturn = true;
185 break;
186 case BoundsCheckingPass::ReportingMode::MinRuntimeAbort:
187 Name = "__ubsan_handle_local_out_of_bounds_minimal_abort";
188 MinRuntime = true;
189 break;
190 case BoundsCheckingPass::ReportingMode::FullRuntime:
191 Name = "__ubsan_handle_local_out_of_bounds";
192 MayReturn = true;
193 break;
194 case BoundsCheckingPass::ReportingMode::FullRuntimeAbort:
195 Name = "__ubsan_handle_local_out_of_bounds_abort";
196 break;
197 }
198
199 MayMerge = Merge;
200 }
201};
202
204 ScalarEvolution &SE, const ReportingOpts &Opts) {
205 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
206 return false;
207
208 const DataLayout &DL = F.getDataLayout();
209 ObjectSizeOpts EvalOpts;
210 EvalOpts.RoundToAlign = true;
212 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
213
214 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
215 // touching instructions
217 for (Instruction &I : instructions(F)) {
218 Value *Or = nullptr;
219 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
220 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
221 if (!LI->isVolatile())
222 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
223 ObjSizeEval, IRB, SE);
224 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
225 if (!SI->isVolatile())
226 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
227 DL, TLI, ObjSizeEval, IRB, SE);
228 } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
229 if (!AI->isVolatile())
230 Or =
231 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
232 DL, TLI, ObjSizeEval, IRB, SE);
233 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
234 if (!AI->isVolatile())
235 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
236 DL, TLI, ObjSizeEval, IRB, SE);
237 }
238 if (Or)
239 TrapInfo.push_back(std::make_pair(&I, Or));
240 }
241
242 // Create a trapping basic block on demand using a callback. Depending on
243 // flags, this will either create a single block for the entire function or
244 // will create a fresh block every time it is called.
245 BasicBlock *ReuseTrapBB = nullptr;
246 auto GetTrapBB = [&ReuseTrapBB, &Opts](BuilderTy &IRB, BasicBlock *Cont) {
247 Function *Fn = IRB.GetInsertBlock()->getParent();
248 auto DebugLoc = IRB.getCurrentDebugLocation();
250
251 // Create a trapping basic block on demand using a callback. Depending on
252 // flags, this will either create a single block for the entire function or
253 // will create a fresh block every time it is called.
254 if (ReuseTrapBB)
255 return ReuseTrapBB;
256
257 BasicBlock *TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
258 IRB.SetInsertPoint(TrapBB);
259
260 bool DebugTrapBB = !Opts.MayMerge;
261 CallInst *TrapCall = Opts.UseTrap
262 ? InsertTrap(IRB, DebugTrapBB)
263 : InsertCall(IRB, Opts.MayReturn, Opts.Name);
264 if (DebugTrapBB)
265 TrapCall->addFnAttr(llvm::Attribute::NoMerge);
266
267 TrapCall->setDoesNotThrow();
268 TrapCall->setDebugLoc(DebugLoc);
269 if (Opts.MayReturn) {
270 IRB.CreateBr(Cont);
271 } else {
272 TrapCall->setDoesNotReturn();
273 IRB.CreateUnreachable();
274 }
275
276 if (!Opts.MayReturn && SingleTrapBB && !DebugTrapBB)
277 ReuseTrapBB = TrapBB;
278
279 return TrapBB;
280 };
281
282 for (const auto &Entry : TrapInfo) {
283 Instruction *Inst = Entry.first;
285 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
286 }
287
288 return !TrapInfo.empty();
289}
290
292 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
293 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
294
295 if (!addBoundsChecking(F, TLI, SE,
296 ReportingOpts(Options.Mode, Options.Merge)))
297 return PreservedAnalyses::all();
298
300}
301
303 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
305 OS, MapClassName2PassName);
306 switch (Options.Mode) {
308 OS << "<trap";
309 break;
311 OS << "<min-rt";
312 break;
314 OS << "<min-rt-abort";
315 break;
317 OS << "<rt";
318 break;
320 OS << "<rt-abort";
321 break;
322 }
323 if (Options.Merge)
324 OS << ";merge";
325 OS << ">";
326}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static CallInst * InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name)
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static CallInst * InsertTrap(BuilderTy &IRB, bool DebugTrapBB)
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE, const ReportingOpts &Opts)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
R600 Clause Merge
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setDoesNotReturn()
Definition: InstrTypes.h:1917
void setDoesNotThrow()
Definition: InstrTypes.h:1924
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1482
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
size_t size() const
Definition: Function.h:858
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateTypeSize(Type *DstType, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Definition: IRBuilder.cpp:103
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2305
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:204
Evaluate the size and offset of an object pointed to by a Value*.
SizeOffsetValue compute(Value *V)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Or
Bitwise or logical OR of integers.
ReportingOpts(BoundsCheckingPass::ReportingMode Mode, bool Merge)
BoundsCheckingOptions(ReportingMode Mode, bool Merge)
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.
@ ExactUnderlyingSizeAndOffset
All branches must be known and have the same underlying size and offset to be merged.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
bool bothKnown() const