LLVM 19.0.0git
PoisonChecking.cpp
Go to the documentation of this file.
1//===- PoisonChecking.cpp - -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implements a transform pass which instruments IR such that poison semantics
10// are made explicit. That is, it provides a (possibly partial) executable
11// semantics for every instruction w.r.t. poison as specified in the LLVM
12// LangRef. There are obvious parallels to the sanitizer tools, but this pass
13// is focused purely on the semantics of LLVM IR, not any particular source
14// language. If you're looking for something to see if your C/C++ contains
15// UB, this is not it.
16//
17// The rewritten semantics of each instruction will include the following
18// components:
19//
20// 1) The original instruction, unmodified.
21// 2) A propagation rule which translates dynamic information about the poison
22// state of each input to whether the dynamic output of the instruction
23// produces poison.
24// 3) A creation rule which validates any poison producing flags on the
25// instruction itself (e.g. checks for overflow on nsw).
26// 4) A check rule which traps (to a handler function) if this instruction must
27// execute undefined behavior given the poison state of it's inputs.
28//
29// This is a must analysis based transform; that is, the resulting code may
30// produce a false negative result (not report UB when actually exists
31// according to the LangRef spec), but should never produce a false positive
32// (report UB where it doesn't exist).
33//
34// Use cases for this pass include:
35// - Understanding (and testing!) the implications of the definition of poison
36// from the LangRef.
37// - Validating the output of a IR fuzzer to ensure that all programs produced
38// are well defined on the specific input used.
39// - Finding/confirming poison specific miscompiles by checking the poison
40// status of an input/IR pair is the same before and after an optimization
41// transform.
42// - Checking that a bugpoint reduction does not introduce UB which didn't
43// exist in the original program being reduced.
44//
45// The major sources of inaccuracy are currently:
46// - Most validation rules not yet implemented for instructions with poison
47// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48// - UB which is control dependent on a branch on poison is not yet
49// reported. Currently, only data flow dependence is modeled.
50// - Poison which is propagated through memory is not modeled. As such,
51// storing poison to memory and then reloading it will cause a false negative
52// as we consider the reloaded value to not be poisoned.
53// - Poison propagation across function boundaries is not modeled. At the
54// moment, all arguments and return values are assumed not to be poison.
55// - Undef is not modeled. In particular, the optimizer's freedom to pick
56// concrete values for undef bits so as to maximize potential for producing
57// poison is not modeled.
58//
59//===----------------------------------------------------------------------===//
60
62#include "llvm/ADT/DenseMap.h"
64#include "llvm/IR/IRBuilder.h"
65#include "llvm/IR/Module.h"
67
68using namespace llvm;
69
70#define DEBUG_TYPE "poison-checking"
71
72static cl::opt<bool>
73LocalCheck("poison-checking-function-local",
74 cl::init(false),
75 cl::desc("Check that returns are non-poison (for testing)"));
76
77
78static bool isConstantFalse(Value* V) {
79 assert(V->getType()->isIntegerTy(1));
80 if (auto *CI = dyn_cast<ConstantInt>(V))
81 return CI->isZero();
82 return false;
83}
84
86 if (Ops.size() == 0)
87 return B.getFalse();
88 unsigned i = 0;
89 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
90 if (i == Ops.size())
91 return B.getFalse();
92 Value *Accum = Ops[i++];
93 for (Value *Op : llvm::drop_begin(Ops, i))
94 if (!isConstantFalse(Op))
95 Accum = B.CreateOr(Accum, Op);
96 return Accum;
97}
98
100 SmallVectorImpl<Value*> &Checks) {
101 assert(isa<BinaryOperator>(I));
102
103 IRBuilder<> B(&I);
104 Value *LHS = I.getOperand(0);
105 Value *RHS = I.getOperand(1);
106 switch (I.getOpcode()) {
107 default:
108 return;
109 case Instruction::Add: {
110 if (I.hasNoSignedWrap()) {
111 auto *OverflowOp =
112 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
113 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
114 }
115 if (I.hasNoUnsignedWrap()) {
116 auto *OverflowOp =
117 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
118 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
119 }
120 break;
121 }
122 case Instruction::Sub: {
123 if (I.hasNoSignedWrap()) {
124 auto *OverflowOp =
125 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
126 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
127 }
128 if (I.hasNoUnsignedWrap()) {
129 auto *OverflowOp =
130 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
131 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
132 }
133 break;
134 }
135 case Instruction::Mul: {
136 if (I.hasNoSignedWrap()) {
137 auto *OverflowOp =
138 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
139 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
140 }
141 if (I.hasNoUnsignedWrap()) {
142 auto *OverflowOp =
143 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
144 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
145 }
146 break;
147 }
148 case Instruction::UDiv: {
149 if (I.isExact()) {
150 auto *Check =
151 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
152 ConstantInt::get(LHS->getType(), 0));
153 Checks.push_back(Check);
154 }
155 break;
156 }
157 case Instruction::SDiv: {
158 if (I.isExact()) {
159 auto *Check =
160 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
161 ConstantInt::get(LHS->getType(), 0));
162 Checks.push_back(Check);
163 }
164 break;
165 }
166 case Instruction::AShr:
167 case Instruction::LShr:
168 case Instruction::Shl: {
169 Value *ShiftCheck =
170 B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
171 ConstantInt::get(RHS->getType(),
173 Checks.push_back(ShiftCheck);
174 break;
175 }
176 };
177}
178
179/// Given an instruction which can produce poison on non-poison inputs
180/// (i.e. canCreatePoison returns true), generate runtime checks to produce
181/// boolean indicators of when poison would result.
183 SmallVectorImpl<Value*> &Checks) {
184 IRBuilder<> B(&I);
185 if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
187
188 // Handle non-binops separately
189 switch (I.getOpcode()) {
190 default:
191 // Note there are a couple of missing cases here, once implemented, this
192 // should become an llvm_unreachable.
193 break;
194 case Instruction::ExtractElement: {
195 Value *Vec = I.getOperand(0);
196 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
197 if (!VecVTy)
198 break;
199 Value *Idx = I.getOperand(1);
200 unsigned NumElts = VecVTy->getNumElements();
201 Value *Check =
202 B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
203 ConstantInt::get(Idx->getType(), NumElts));
204 Checks.push_back(Check);
205 break;
206 }
207 case Instruction::InsertElement: {
208 Value *Vec = I.getOperand(0);
209 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
210 if (!VecVTy)
211 break;
212 Value *Idx = I.getOperand(2);
213 unsigned NumElts = VecVTy->getNumElements();
214 Value *Check =
215 B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
216 ConstantInt::get(Idx->getType(), NumElts));
217 Checks.push_back(Check);
218 break;
219 }
220 };
221}
222
224 auto Itr = ValToPoison.find(V);
225 if (Itr != ValToPoison.end())
226 return Itr->second;
227 if (isa<Constant>(V)) {
228 return ConstantInt::getFalse(V->getContext());
229 }
230 // Return false for unknwon values - this implements a non-strict mode where
231 // unhandled IR constructs are simply considered to never produce poison. At
232 // some point in the future, we probably want a "strict mode" for testing if
233 // nothing else.
234 return ConstantInt::getFalse(V->getContext());
235}
236
238 assert(Cond->getType()->isIntegerTy(1));
239 if (auto *CI = dyn_cast<ConstantInt>(Cond))
240 if (CI->isAllOnesValue())
241 return;
242
243 Module *M = B.GetInsertBlock()->getModule();
244 M->getOrInsertFunction("__poison_checker_assert",
245 Type::getVoidTy(M->getContext()),
246 Type::getInt1Ty(M->getContext()));
247 Function *TrapFunc = M->getFunction("__poison_checker_assert");
248 B.CreateCall(TrapFunc, Cond);
249}
250
252 assert(Cond->getType()->isIntegerTy(1));
253 CreateAssert(B, B.CreateNot(Cond));
254}
255
256static bool rewrite(Function &F) {
257 auto * const Int1Ty = Type::getInt1Ty(F.getContext());
258
259 DenseMap<Value *, Value *> ValToPoison;
260
261 for (BasicBlock &BB : F)
262 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
263 auto *OldPHI = cast<PHINode>(&*I);
264 auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
265 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
266 NewPHI->addIncoming(UndefValue::get(Int1Ty),
267 OldPHI->getIncomingBlock(i));
268 NewPHI->insertBefore(OldPHI);
269 ValToPoison[OldPHI] = NewPHI;
270 }
271
272 for (BasicBlock &BB : F)
273 for (Instruction &I : BB) {
274 if (isa<PHINode>(I)) continue;
275
276 IRBuilder<> B(cast<Instruction>(&I));
277
278 // Note: There are many more sources of documented UB, but this pass only
279 // attempts to find UB triggered by propagation of poison.
281 SmallPtrSet<const Value *, 4> SeenNonPoisonOps;
282 getGuaranteedNonPoisonOps(&I, NonPoisonOps);
283 for (const Value *Op : NonPoisonOps)
284 if (SeenNonPoisonOps.insert(Op).second)
286 getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
287
288 if (LocalCheck)
289 if (auto *RI = dyn_cast<ReturnInst>(&I))
290 if (RI->getNumOperands() != 0) {
291 Value *Op = RI->getOperand(0);
292 CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
293 }
294
296 for (const Use &U : I.operands()) {
297 if (ValToPoison.count(U) && propagatesPoison(U))
298 Checks.push_back(getPoisonFor(ValToPoison, U));
299 }
300
301 if (canCreatePoison(cast<Operator>(&I)))
302 generateCreationChecks(I, Checks);
303 ValToPoison[&I] = buildOrChain(B, Checks);
304 }
305
306 for (BasicBlock &BB : F)
307 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
308 auto *OldPHI = cast<PHINode>(&*I);
309 if (!ValToPoison.count(OldPHI))
310 continue; // skip the newly inserted phis
311 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
312 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
313 auto *OldVal = OldPHI->getIncomingValue(i);
314 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
315 }
316 }
317 return true;
318}
319
320
323 bool Changed = false;
324 for (auto &F : M)
325 Changed |= rewrite(F);
326
327 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
328}
329
333}
334
335/* Major TODO Items:
336 - Control dependent poison UB
337 - Strict mode - (i.e. must analyze every operand)
338 - Poison through memory
339 - Function ABIs
340 - Full coverage of intrinsics, etc.. (ouch)
341
342 Instructions w/Unclear Semantics:
343 - shufflevector - It would seem reasonable for an out of bounds mask element
344 to produce poison, but the LangRef does not state.
345 - all binary ops w/vector operands - The likely interpretation would be that
346 any element overflowing should produce poison for the entire result, but
347 the LangRef does not state.
348 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
349 strange that only certian flags should be documented as producing poison.
350
351 Cases of clear poison semantics not yet implemented:
352 - Exact flags on ashr/lshr produce poison
353 - NSW/NUW flags on shl produce poison
354 - Inbounds flag on getelementptr produce poison
355 - fptosi/fptoui (out of bounds input) produce poison
356 - Scalable vector types for insertelement/extractelement
357 - Floating point binary ops w/fmf nnan/noinfs flags produce poison
358 */
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
#define Check(C,...)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
static void generateCreationChecksForBinOp(Instruction &I, SmallVectorImpl< Value * > &Checks)
static void CreateAssertNot(IRBuilder<> &B, Value *Cond)
static bool rewrite(Function &F)
static Value * getPoisonFor(DenseMap< Value *, Value * > &ValToPoison, Value *V)
static cl::opt< bool > LocalCheck("poison-checking-function-local", cl::init(false), cl::desc("Check that returns are non-poison (for testing)"))
static bool isConstantFalse(Value *V)
static Value * buildOrChain(IRBuilder<> &B, ArrayRef< Value * > Ops)
static void generateCreationChecks(Instruction &I, SmallVectorImpl< Value * > &Checks)
Given an instruction which can produce poison on non-poison inputs (i.e.
static void CreateAssert(IRBuilder<> &B, Value *Cond)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
bool canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
void getGuaranteedNonPoisonOps(const Instruction *I, SmallVectorImpl< const Value * > &Ops)
Insert operands of I into Ops such that I will trigger undefined behavior if I is executed and that o...
bool propagatesPoison(const Use &PoisonOp)
Return true if PoisonOp's user yields poison or raises UB if its operand PoisonOp is poison.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)