LLVM 22.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/ADT/Twine.h"
17#include "llvm/IR/BasicBlock.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/Value.h"
29#include "llvm/Support/Debug.h"
31#include <utility>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "bounds-checking"
36
37static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
38 cl::desc("Use one trap block per function"));
39
40STATISTIC(ChecksAdded, "Bounds checks added");
41STATISTIC(ChecksSkipped, "Bounds checks skipped");
42STATISTIC(ChecksUnable, "Bounds checks unable to add");
43
44class BuilderTy : public IRBuilder<TargetFolder> {
45public:
47 : IRBuilder<TargetFolder>(TheBB, IP, Folder) {
49 }
50};
51
52/// Gets the conditions under which memory accessing instructions will overflow.
53///
54/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
55/// the result from the load or the value being stored. It is used to determine
56/// the size of memory block that is touched.
57///
58/// Returns the condition under which the access will overflow.
59static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
60 const DataLayout &DL, TargetLibraryInfo &TLI,
61 ObjectSizeOffsetEvaluator &ObjSizeEval,
62 BuilderTy &IRB, ScalarEvolution &SE) {
63 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
64 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
65 << " bytes\n");
66
67 SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr);
68
69 if (!SizeOffset.bothKnown()) {
70 ++ChecksUnable;
71 return nullptr;
72 }
73
74 Value *Size = SizeOffset.Size;
75 Value *Offset = SizeOffset.Offset;
77
78 Type *IndexTy = DL.getIndexType(Ptr->getType());
79 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
80
81 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
82 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
83 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
84
85 // three checks are required to ensure safety:
86 // . Offset >= 0 (since the offset is given from the base ptr)
87 // . Size >= Offset (unsigned)
88 // . Size - Offset >= NeededSize (unsigned)
89 //
90 // optimization: if Size >= 0 (signed), skip 1st check
91 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
92 Value *ObjSize = IRB.CreateSub(Size, Offset);
93 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
96 Value *Cmp3 = SizeRange.sub(OffsetRange)
97 .getUnsignedMin()
98 .uge(NeededSizeRange.getUnsignedMax())
100 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
101 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
102 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
103 !SizeRange.getSignedMin().isNonNegative()) {
104 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
105 Or = IRB.CreateOr(Cmp1, Or);
106 }
107
108 return Or;
109}
110
111static CallInst *InsertTrap(BuilderTy &IRB, bool DebugTrapBB,
112 std::optional<int8_t> GuardKind) {
113 if (!DebugTrapBB)
114 return IRB.CreateIntrinsic(Intrinsic::trap, {});
115
116 return IRB.CreateIntrinsic(
117 Intrinsic::ubsantrap,
118 ConstantInt::get(IRB.getInt8Ty(),
119 GuardKind.has_value()
120 ? GuardKind.value()
121 : IRB.GetInsertBlock()->getParent()->size()));
122}
123
124static CallInst *InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name) {
125 Function *Fn = IRB.GetInsertBlock()->getParent();
126 LLVMContext &Ctx = Fn->getContext();
127 llvm::AttrBuilder B(Ctx);
128 B.addAttribute(llvm::Attribute::NoUnwind);
129 if (!MayReturn)
130 B.addAttribute(llvm::Attribute::NoReturn);
132 Name,
133 llvm::AttributeList::get(Ctx, llvm::AttributeList::FunctionIndex, B),
134 Type::getVoidTy(Ctx));
135 return IRB.CreateCall(Callee);
136}
137
138/// Adds run-time bounds checks to memory accessing instructions.
139///
140/// \p Or is the condition that should guard the trap.
141///
142/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
143template <typename GetTrapBBT>
144static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
145 // check if the comparison is always false
147 if (C) {
148 ++ChecksSkipped;
149 // If non-zero, nothing to do.
150 if (!C->getZExtValue())
151 return;
152 }
153 ++ChecksAdded;
154
156 BasicBlock *OldBB = SplitI->getParent();
157 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
158 OldBB->getTerminator()->eraseFromParent();
159
160 BasicBlock *TrapBB = GetTrapBB(IRB, Cont);
161
162 if (C) {
163 // If we have a constant zero, unconditionally branch.
164 // FIXME: We should really handle this differently to bypass the splitting
165 // the block.
166 BranchInst::Create(TrapBB, OldBB);
167 return;
168 }
169
170 // Create the conditional branch.
171 BranchInst::Create(TrapBB, Cont, Or, OldBB);
172}
173
174static std::string
176 std::string Name = "__ubsan_handle_local_out_of_bounds";
177 if (Opts.MinRuntime)
178 Name += "_minimal";
179 if (!Opts.MayReturn)
180 Name += "_abort";
181 else if (Opts.HandlerPreserveAllRegs)
182 Name += "_preserve";
183 return Name;
184}
185
187 ScalarEvolution &SE,
188 const BoundsCheckingPass::Options &Opts) {
189 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
190 return false;
191
192 const DataLayout &DL = F.getDataLayout();
193 ObjectSizeOpts EvalOpts;
194 EvalOpts.RoundToAlign = true;
196 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
197
198 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
199 // touching instructions
201 for (Instruction &I : instructions(F)) {
202 Value *Or = nullptr;
203 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
204 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
205 if (!LI->isVolatile())
206 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
207 ObjSizeEval, IRB, SE);
208 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
209 if (!SI->isVolatile())
210 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
211 DL, TLI, ObjSizeEval, IRB, SE);
213 if (!AI->isVolatile())
214 Or =
215 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
216 DL, TLI, ObjSizeEval, IRB, SE);
217 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
218 if (!AI->isVolatile())
219 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
220 DL, TLI, ObjSizeEval, IRB, SE);
221 }
222 if (Or) {
223 if (Opts.GuardKind) {
224 llvm::Value *Allow = IRB.CreateIntrinsic(
225 IRB.getInt1Ty(), Intrinsic::allow_ubsan_check,
226 {llvm::ConstantInt::getSigned(IRB.getInt8Ty(), *Opts.GuardKind)});
227 Or = IRB.CreateAnd(Or, Allow);
228 }
229 TrapInfo.push_back(std::make_pair(&I, Or));
230 }
231 }
232
233 std::string Name;
234 if (Opts.Rt)
235 Name = getRuntimeCallName(*Opts.Rt);
236
237 // Create a trapping basic block on demand using a callback. Depending on
238 // flags, this will either create a single block for the entire function or
239 // will create a fresh block every time it is called.
240 BasicBlock *ReuseTrapBB = nullptr;
241 auto GetTrapBB = [&ReuseTrapBB, &Opts, &Name](BuilderTy &IRB,
242 BasicBlock *Cont) {
243 Function *Fn = IRB.GetInsertBlock()->getParent();
244 auto DebugLoc = IRB.getCurrentDebugLocation();
246
247 // Create a trapping basic block on demand using a callback. Depending on
248 // flags, this will either create a single block for the entire function or
249 // will create a fresh block every time it is called.
250 if (ReuseTrapBB)
251 return ReuseTrapBB;
252
253 BasicBlock *TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
254 IRB.SetInsertPoint(TrapBB);
255
256 bool DebugTrapBB = !Opts.Merge;
257 CallInst *TrapCall = Opts.Rt ? InsertCall(IRB, Opts.Rt->MayReturn, Name)
258 : InsertTrap(IRB, DebugTrapBB, Opts.GuardKind);
259 if (DebugTrapBB)
260 TrapCall->addFnAttr(llvm::Attribute::NoMerge);
261
262 TrapCall->setDoesNotThrow();
263 TrapCall->setDebugLoc(DebugLoc);
264
265 bool MayReturn = Opts.Rt && Opts.Rt->MayReturn;
266 if (MayReturn) {
267 IRB.CreateBr(Cont);
268 } else {
269 TrapCall->setDoesNotReturn();
270 IRB.CreateUnreachable();
271 }
272 // The preserve-all logic is somewhat duplicated in CGExpr.cpp for
273 // local-bounds. Make sure to change that too.
274 if (Opts.Rt && Opts.Rt->HandlerPreserveAllRegs && MayReturn)
276 if (!MayReturn && SingleTrapBB && !DebugTrapBB)
277 ReuseTrapBB = TrapBB;
278
279 return TrapBB;
280 };
281
282 for (const auto &Entry : TrapInfo) {
283 Instruction *Inst = Entry.first;
285 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
286 }
287
288 return !TrapInfo.empty();
289}
290
300
302 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
304 OS, MapClassName2PassName);
305 OS << "<";
306 if (Opts.Rt) {
307 if (Opts.Rt->MinRuntime)
308 OS << "min-";
309 OS << "rt";
310 if (!Opts.Rt->MayReturn)
311 OS << "-abort";
312 } else {
313 OS << "trap";
314 }
315 if (Opts.Merge)
316 OS << ";merge";
317 if (Opts.GuardKind)
318 OS << ";guard=" << static_cast<int>(*Opts.GuardKind);
319 OS << ">";
320}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE, const BoundsCheckingPass::Options &Opts)
static CallInst * InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name)
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static std::string getRuntimeCallName(const BoundsCheckingPass::Options::Runtime &Opts)
static CallInst * InsertTrap(BuilderTy &IRB, bool DebugTrapBB, std::optional< int8_t > GuardKind)
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
BuilderTy(BasicBlock *TheBB, BasicBlock::iterator IP, TargetFolder Folder)
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setCallingConv(CallingConv::ID CC)
void setDoesNotReturn()
void setDoesNotThrow()
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
size_t size() const
Definition Function.h:856
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Module * getParent()
Get the module that this global value is contained inside of...
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
void SetNoSanitizeMetadata()
Set nosanitize metadata.
Definition IRBuilder.h:254
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition IRBuilder.h:547
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2364
LLVM_ABI Value * CreateTypeSize(Type *Ty, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
IRBuilder(LLVMContext &C, TargetFolder Folder, IRBuilderDefaultInserter Inserter, MDNode *FPMathTag=nullptr, ArrayRef< OperandBundleDef > OpBundles={})
Definition IRBuilder.h:2794
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition Module.cpp:206
Evaluate the size and offset of an object pointed to by a Value*.
LLVM_ABI SizeOffsetValue compute(Value *V)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetFolder - Create constants with target dependent folding.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
std::optional< Runtime > Rt
std::optional< int8_t > GuardKind
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.
@ ExactUnderlyingSizeAndOffset
All branches must be known and have the same underlying size and offset to be merged.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:69