LLVM  12.0.0git
PoisonChecking.cpp
Go to the documentation of this file.
1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit. That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language. If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 // state of each input to whether the dynamic output of the instruction
23 // produces poison.
24 // 3) A creation rule which validates any poison producing flags on the
25 // instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 // execute undefined behavior given the poison state of it's inputs.
28 //
29 // This is a must analysis based transform; that is, the resulting code may
30 // produce a false negative result (not report UB when actually exists
31 // according to the LangRef spec), but should never produce a false positive
32 // (report UB where it doesn't exist).
33 //
34 // Use cases for this pass include:
35 // - Understanding (and testing!) the implications of the definition of poison
36 // from the LangRef.
37 // - Validating the output of a IR fuzzer to ensure that all programs produced
38 // are well defined on the specific input used.
39 // - Finding/confirming poison specific miscompiles by checking the poison
40 // status of an input/IR pair is the same before and after an optimization
41 // transform.
42 // - Checking that a bugpoint reduction does not introduce UB which didn't
43 // exist in the original program being reduced.
44 //
45 // The major sources of inaccuracy are currently:
46 // - Most validation rules not yet implemented for instructions with poison
47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48 // - UB which is control dependent on a branch on poison is not yet
49 // reported. Currently, only data flow dependence is modeled.
50 // - Poison which is propagated through memory is not modeled. As such,
51 // storing poison to memory and then reloading it will cause a false negative
52 // as we consider the reloaded value to not be poisoned.
53 // - Poison propagation across function boundaries is not modeled. At the
54 // moment, all arguments and return values are assumed not to be poison.
55 // - Undef is not modeled. In particular, the optimizer's freedom to pick
56 // concrete values for undef bits so as to maximize potential for producing
57 // poison is not modeled.
58 //
59 //===----------------------------------------------------------------------===//
60 
62 #include "llvm/ADT/DenseMap.h"
63 #include "llvm/ADT/Statistic.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/InstVisitor.h"
68 #include "llvm/IR/IntrinsicInst.h"
69 #include "llvm/IR/PatternMatch.h"
71 #include "llvm/Support/Debug.h"
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "poison-checking"
76 
77 static cl::opt<bool>
78 LocalCheck("poison-checking-function-local",
79  cl::init(false),
80  cl::desc("Check that returns are non-poison (for testing)"));
81 
82 
83 static bool isConstantFalse(Value* V) {
84  assert(V->getType()->isIntegerTy(1));
85  if (auto *CI = dyn_cast<ConstantInt>(V))
86  return CI->isZero();
87  return false;
88 }
89 
91  if (Ops.size() == 0)
92  return B.getFalse();
93  unsigned i = 0;
94  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
95  if (i == Ops.size())
96  return B.getFalse();
97  Value *Accum = Ops[i++];
98  for (; i < Ops.size(); i++)
99  if (!isConstantFalse(Ops[i]))
100  Accum = B.CreateOr(Accum, Ops[i]);
101  return Accum;
102 }
103 
105  SmallVectorImpl<Value*> &Checks) {
106  assert(isa<BinaryOperator>(I));
107 
108  IRBuilder<> B(&I);
109  Value *LHS = I.getOperand(0);
110  Value *RHS = I.getOperand(1);
111  switch (I.getOpcode()) {
112  default:
113  return;
114  case Instruction::Add: {
115  if (I.hasNoSignedWrap()) {
116  auto *OverflowOp =
117  B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
118  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
119  }
120  if (I.hasNoUnsignedWrap()) {
121  auto *OverflowOp =
122  B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
123  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
124  }
125  break;
126  }
127  case Instruction::Sub: {
128  if (I.hasNoSignedWrap()) {
129  auto *OverflowOp =
130  B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
131  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
132  }
133  if (I.hasNoUnsignedWrap()) {
134  auto *OverflowOp =
135  B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
136  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
137  }
138  break;
139  }
140  case Instruction::Mul: {
141  if (I.hasNoSignedWrap()) {
142  auto *OverflowOp =
143  B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
144  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
145  }
146  if (I.hasNoUnsignedWrap()) {
147  auto *OverflowOp =
148  B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
149  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
150  }
151  break;
152  }
153  case Instruction::UDiv: {
154  if (I.isExact()) {
155  auto *Check =
156  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
157  ConstantInt::get(LHS->getType(), 0));
158  Checks.push_back(Check);
159  }
160  break;
161  }
162  case Instruction::SDiv: {
163  if (I.isExact()) {
164  auto *Check =
165  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
166  ConstantInt::get(LHS->getType(), 0));
167  Checks.push_back(Check);
168  }
169  break;
170  }
171  case Instruction::AShr:
172  case Instruction::LShr:
173  case Instruction::Shl: {
174  Value *ShiftCheck =
175  B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
176  ConstantInt::get(RHS->getType(),
177  LHS->getType()->getScalarSizeInBits()));
178  Checks.push_back(ShiftCheck);
179  break;
180  }
181  };
182 }
183 
184 /// Given an instruction which can produce poison on non-poison inputs
185 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
186 /// boolean indicators of when poison would result.
188  SmallVectorImpl<Value*> &Checks) {
189  IRBuilder<> B(&I);
190  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
192 
193  // Handle non-binops separately
194  switch (I.getOpcode()) {
195  default:
196  // Note there are a couple of missing cases here, once implemented, this
197  // should become an llvm_unreachable.
198  break;
199  case Instruction::ExtractElement: {
200  Value *Vec = I.getOperand(0);
201  auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
202  if (!VecVTy)
203  break;
204  Value *Idx = I.getOperand(1);
205  unsigned NumElts = VecVTy->getNumElements();
206  Value *Check =
207  B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
208  ConstantInt::get(Idx->getType(), NumElts));
209  Checks.push_back(Check);
210  break;
211  }
212  case Instruction::InsertElement: {
213  Value *Vec = I.getOperand(0);
214  auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
215  if (!VecVTy)
216  break;
217  Value *Idx = I.getOperand(2);
218  unsigned NumElts = VecVTy->getNumElements();
219  Value *Check =
220  B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
221  ConstantInt::get(Idx->getType(), NumElts));
222  Checks.push_back(Check);
223  break;
224  }
225  };
226 }
227 
229  auto Itr = ValToPoison.find(V);
230  if (Itr != ValToPoison.end())
231  return Itr->second;
232  if (isa<Constant>(V)) {
233  return ConstantInt::getFalse(V->getContext());
234  }
235  // Return false for unknwon values - this implements a non-strict mode where
236  // unhandled IR constructs are simply considered to never produce poison. At
237  // some point in the future, we probably want a "strict mode" for testing if
238  // nothing else.
239  return ConstantInt::getFalse(V->getContext());
240 }
241 
243  assert(Cond->getType()->isIntegerTy(1));
244  if (auto *CI = dyn_cast<ConstantInt>(Cond))
245  if (CI->isAllOnesValue())
246  return;
247 
248  Module *M = B.GetInsertBlock()->getModule();
249  M->getOrInsertFunction("__poison_checker_assert",
250  Type::getVoidTy(M->getContext()),
251  Type::getInt1Ty(M->getContext()));
252  Function *TrapFunc = M->getFunction("__poison_checker_assert");
253  B.CreateCall(TrapFunc, Cond);
254 }
255 
257  assert(Cond->getType()->isIntegerTy(1));
258  CreateAssert(B, B.CreateNot(Cond));
259 }
260 
261 static bool rewrite(Function &F) {
262  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
263 
264  DenseMap<Value *, Value *> ValToPoison;
265 
266  for (BasicBlock &BB : F)
267  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
268  auto *OldPHI = cast<PHINode>(&*I);
269  auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
270  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
271  NewPHI->addIncoming(UndefValue::get(Int1Ty),
272  OldPHI->getIncomingBlock(i));
273  NewPHI->insertBefore(OldPHI);
274  ValToPoison[OldPHI] = NewPHI;
275  }
276 
277  for (BasicBlock &BB : F)
278  for (Instruction &I : BB) {
279  if (isa<PHINode>(I)) continue;
280 
281  IRBuilder<> B(cast<Instruction>(&I));
282 
283  // Note: There are many more sources of documented UB, but this pass only
284  // attempts to find UB triggered by propagation of poison.
285  SmallPtrSet<const Value *, 4> NonPoisonOps;
286  getGuaranteedNonPoisonOps(&I, NonPoisonOps);
287  for (const Value *Op : NonPoisonOps)
288  CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
289 
290  if (LocalCheck)
291  if (auto *RI = dyn_cast<ReturnInst>(&I))
292  if (RI->getNumOperands() != 0) {
293  Value *Op = RI->getOperand(0);
294  CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
295  }
296 
297  SmallVector<Value*, 4> Checks;
298  if (propagatesPoison(cast<Operator>(&I)))
299  for (Value *V : I.operands())
300  Checks.push_back(getPoisonFor(ValToPoison, V));
301 
302  if (canCreatePoison(cast<Operator>(&I)))
303  generateCreationChecks(I, Checks);
304  ValToPoison[&I] = buildOrChain(B, Checks);
305  }
306 
307  for (BasicBlock &BB : F)
308  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
309  auto *OldPHI = cast<PHINode>(&*I);
310  if (!ValToPoison.count(OldPHI))
311  continue; // skip the newly inserted phis
312  auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
313  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
314  auto *OldVal = OldPHI->getIncomingValue(i);
315  NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
316  }
317  }
318  return true;
319 }
320 
321 
323  ModuleAnalysisManager &AM) {
324  bool Changed = false;
325  for (auto &F : M)
326  Changed |= rewrite(F);
327 
328  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
329 }
330 
334 }
335 
336 /* Major TODO Items:
337  - Control dependent poison UB
338  - Strict mode - (i.e. must analyze every operand)
339  - Poison through memory
340  - Function ABIs
341  - Full coverage of intrinsics, etc.. (ouch)
342 
343  Instructions w/Unclear Semantics:
344  - shufflevector - It would seem reasonable for an out of bounds mask element
345  to produce poison, but the LangRef does not state.
346  - all binary ops w/vector operands - The likely interpretation would be that
347  any element overflowing should produce poison for the entire result, but
348  the LangRef does not state.
349  - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
350  strange that only certian flags should be documented as producing poison.
351 
352  Cases of clear poison semantics not yet implemented:
353  - Exact flags on ashr/lshr produce poison
354  - NSW/NUW flags on shl produce poison
355  - Inbounds flag on getelementptr produce poison
356  - fptosi/fptoui (out of bounds input) produce poison
357  - Scalable vector types for insertelement/extractelement
358  - Floating point binary ops w/fmf nnan/noinfs flags produce poison
359  */
static bool Check(DecodeStatus &Out, DecodeStatus In)
static void generateCreationChecksForBinOp(Instruction &I, SmallVectorImpl< Value * > &Checks)
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:822
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:194
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
static void generateCreationChecks(Instruction &I, SmallVectorImpl< Value * > &Checks)
Given an instruction which can produce poison on non-poison inputs (i.e.
bool propagatesPoison(const Operator *I)
Return true if I yields poison or raises UB if any of its operands is poison.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:868
static Value * getPoisonFor(DenseMap< Value *, Value * > &ValToPoison, Value *V)
F(f)
void getGuaranteedNonPoisonOps(const Instruction *I, SmallPtrSetImpl< const Value * > &Ops)
Insert operands of I into Ops such that I will trigger undefined behavior if I is executed and that o...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
unsigned greater or equal
Definition: InstrTypes.h:746
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
static Value * buildOrChain(IRBuilder<> &B, ArrayRef< Value * > Ops)
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
* if(!EatIfPresent(lltok::kw_thread_local)) return false
parseOptionalThreadLocal := /*empty
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:180
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1742
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
static bool isConstantFalse(Value *V)
Sum of integers.
static cl::opt< bool > LocalCheck("poison-checking-function-local", cl::init(false), cl::desc("Check that returns are non-poison (for testing)"))
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:442
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:147
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1116
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:867
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Product of integers.
static void CreateAssert(IRBuilder<> &B, Value *Cond)
#define I(x, y, z)
Definition: MD5.cpp:59
static void CreateAssertNot(IRBuilder<> &B, Value *Cond)
static bool rewrite(Function &F)
SmallVector< MachineOperand, 4 > Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:75
A container for analyses that lazily runs them and caches their results.
bool canCreatePoison(const Operator *Op)