LLVM  10.0.0svn
PoisonChecking.cpp
Go to the documentation of this file.
1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit. That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language. If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 // state of each input to whether the dynamic output of the instruction
23 // produces poison.
24 // 3) A flag validation rule which validates any poison producing flags on the
25 // instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 // execute undefined behavior given the poison state of it's inputs.
28 //
29 // At the moment, the UB detection is done in a best effort manner; that is,
30 // the resulting code may produce a false negative result (not report UB when
31 // it actually exists according to the LangRef spec), but should never produce
32 // a false positive (report UB where it doesn't exist). The intention is to
33 // eventually support a "strict" mode which never dynamically reports a false
34 // negative at the cost of rejecting some valid inputs to translation.
35 //
36 // Use cases for this pass include:
37 // - Understanding (and testing!) the implications of the definition of poison
38 // from the LangRef.
39 // - Validating the output of a IR fuzzer to ensure that all programs produced
40 // are well defined on the specific input used.
41 // - Finding/confirming poison specific miscompiles by checking the poison
42 // status of an input/IR pair is the same before and after an optimization
43 // transform.
44 // - Checking that a bugpoint reduction does not introduce UB which didn't
45 // exist in the original program being reduced.
46 //
47 // The major sources of inaccuracy are currently:
48 // - Most validation rules not yet implemented for instructions with poison
49 // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
50 // - UB which is control dependent on a branch on poison is not yet
51 // reported. Currently, only data flow dependence is modeled.
52 // - Poison which is propagated through memory is not modeled. As such,
53 // storing poison to memory and then reloading it will cause a false negative
54 // as we consider the reloaded value to not be poisoned.
55 // - Poison propagation across function boundaries is not modeled. At the
56 // moment, all arguments and return values are assumed not to be poison.
57 // - Undef is not modeled. In particular, the optimizer's freedom to pick
58 // concrete values for undef bits so as to maximize potential for producing
59 // poison is not modeled.
60 //
61 //===----------------------------------------------------------------------===//
62 
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/Statistic.h"
68 #include "llvm/IR/InstVisitor.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/PatternMatch.h"
72 #include "llvm/Support/Debug.h"
73 
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "poison-checking"
77 
78 static cl::opt<bool>
79 LocalCheck("poison-checking-function-local",
80  cl::init(false),
81  cl::desc("Check that returns are non-poison (for testing)"));
82 
83 
84 static bool isConstantFalse(Value* V) {
85  assert(V->getType()->isIntegerTy(1));
86  if (auto *CI = dyn_cast<ConstantInt>(V))
87  return CI->isZero();
88  return false;
89 }
90 
92  if (Ops.size() == 0)
93  return B.getFalse();
94  unsigned i = 0;
95  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
96  if (i == Ops.size())
97  return B.getFalse();
98  Value *Accum = Ops[i++];
99  for (; i < Ops.size(); i++)
100  if (!isConstantFalse(Ops[i]))
101  Accum = B.CreateOr(Accum, Ops[i]);
102  return Accum;
103 }
104 
106  SmallVector<Value*, 2> &Checks) {
107  assert(isa<BinaryOperator>(I));
108 
109  IRBuilder<> B(&I);
110  Value *LHS = I.getOperand(0);
111  Value *RHS = I.getOperand(1);
112  switch (I.getOpcode()) {
113  default:
114  return;
115  case Instruction::Add: {
116  if (I.hasNoSignedWrap()) {
117  auto *OverflowOp =
118  B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
119  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
120  }
121  if (I.hasNoUnsignedWrap()) {
122  auto *OverflowOp =
123  B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
124  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
125  }
126  break;
127  }
128  case Instruction::Sub: {
129  if (I.hasNoSignedWrap()) {
130  auto *OverflowOp =
131  B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
132  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
133  }
134  if (I.hasNoUnsignedWrap()) {
135  auto *OverflowOp =
136  B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
137  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
138  }
139  break;
140  }
141  case Instruction::Mul: {
142  if (I.hasNoSignedWrap()) {
143  auto *OverflowOp =
144  B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
145  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
146  }
147  if (I.hasNoUnsignedWrap()) {
148  auto *OverflowOp =
149  B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
150  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
151  }
152  break;
153  }
154  case Instruction::UDiv: {
155  if (I.isExact()) {
156  auto *Check =
157  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
158  ConstantInt::get(LHS->getType(), 0));
159  Checks.push_back(Check);
160  }
161  break;
162  }
163  case Instruction::SDiv: {
164  if (I.isExact()) {
165  auto *Check =
166  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
167  ConstantInt::get(LHS->getType(), 0));
168  Checks.push_back(Check);
169  }
170  break;
171  }
172  case Instruction::AShr:
173  case Instruction::LShr:
174  case Instruction::Shl: {
175  Value *ShiftCheck =
177  ConstantInt::get(RHS->getType(),
178  LHS->getType()->getScalarSizeInBits()));
179  Checks.push_back(ShiftCheck);
180  break;
181  }
182  };
183 }
184 
186  IRBuilder<> B(&I);
187  SmallVector<Value*, 2> Checks;
188  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
189  generatePoisonChecksForBinOp(I, Checks);
190 
191  // Handle non-binops seperately
192  switch (I.getOpcode()) {
193  default:
194  break;
195  case Instruction::ExtractElement: {
196  Value *Vec = I.getOperand(0);
197  if (Vec->getType()->getVectorIsScalable())
198  break;
199  Value *Idx = I.getOperand(1);
200  unsigned NumElts = Vec->getType()->getVectorNumElements();
201  Value *Check =
203  ConstantInt::get(Idx->getType(), NumElts));
204  Checks.push_back(Check);
205  break;
206  }
207  case Instruction::InsertElement: {
208  Value *Vec = I.getOperand(0);
209  if (Vec->getType()->getVectorIsScalable())
210  break;
211  Value *Idx = I.getOperand(2);
212  unsigned NumElts = Vec->getType()->getVectorNumElements();
213  Value *Check =
215  ConstantInt::get(Idx->getType(), NumElts));
216  Checks.push_back(Check);
217  break;
218  }
219  };
220  return buildOrChain(B, Checks);
221 }
222 
224  auto Itr = ValToPoison.find(V);
225  if (Itr != ValToPoison.end())
226  return Itr->second;
227  if (isa<Constant>(V)) {
228  return ConstantInt::getFalse(V->getContext());
229  }
230  // Return false for unknwon values - this implements a non-strict mode where
231  // unhandled IR constructs are simply considered to never produce poison. At
232  // some point in the future, we probably want a "strict mode" for testing if
233  // nothing else.
234  return ConstantInt::getFalse(V->getContext());
235 }
236 
237 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
238  assert(Cond->getType()->isIntegerTy(1));
239  if (auto *CI = dyn_cast<ConstantInt>(Cond))
240  if (CI->isAllOnesValue())
241  return;
242 
243  Module *M = B.GetInsertBlock()->getModule();
244  M->getOrInsertFunction("__poison_checker_assert",
247  Function *TrapFunc = M->getFunction("__poison_checker_assert");
248  B.CreateCall(TrapFunc, Cond);
249 }
250 
251 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
252  assert(Cond->getType()->isIntegerTy(1));
253  CreateAssert(B, B.CreateNot(Cond));
254 }
255 
256 static bool rewrite(Function &F) {
257  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
258 
259  DenseMap<Value *, Value *> ValToPoison;
260 
261  for (BasicBlock &BB : F)
262  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
263  auto *OldPHI = cast<PHINode>(&*I);
264  auto *NewPHI = PHINode::Create(Int1Ty,
265  OldPHI->getNumIncomingValues());
266  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
267  NewPHI->addIncoming(UndefValue::get(Int1Ty),
268  OldPHI->getIncomingBlock(i));
269  NewPHI->insertBefore(OldPHI);
270  ValToPoison[OldPHI] = NewPHI;
271  }
272 
273  for (BasicBlock &BB : F)
274  for (Instruction &I : BB) {
275  if (isa<PHINode>(I)) continue;
276 
277  IRBuilder<> B(cast<Instruction>(&I));
278 
279  // Note: There are many more sources of documented UB, but this pass only
280  // attempts to find UB triggered by propagation of poison.
281  if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
282  CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
283 
284  if (LocalCheck)
285  if (auto *RI = dyn_cast<ReturnInst>(&I))
286  if (RI->getNumOperands() != 0) {
287  Value *Op = RI->getOperand(0);
288  CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
289  }
290 
291  SmallVector<Value*, 4> Checks;
292  if (propagatesFullPoison(&I))
293  for (Value *V : I.operands())
294  Checks.push_back(getPoisonFor(ValToPoison, V));
295 
296  if (auto *Check = generatePoisonChecks(I))
297  Checks.push_back(Check);
298  ValToPoison[&I] = buildOrChain(B, Checks);
299  }
300 
301  for (BasicBlock &BB : F)
302  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
303  auto *OldPHI = cast<PHINode>(&*I);
304  if (!ValToPoison.count(OldPHI))
305  continue; // skip the newly inserted phis
306  auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
307  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
308  auto *OldVal = OldPHI->getIncomingValue(i);
309  NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
310  }
311  }
312  return true;
313 }
314 
315 
317  ModuleAnalysisManager &AM) {
318  bool Changed = false;
319  for (auto &F : M)
320  Changed |= rewrite(F);
321 
322  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
323 }
324 
328 }
329 
330 
331 /* Major TODO Items:
332  - Control dependent poison UB
333  - Strict mode - (i.e. must analyze every operand)
334  - Poison through memory
335  - Function ABIs
336  - Full coverage of intrinsics, etc.. (ouch)
337 
338  Instructions w/Unclear Semantics:
339  - shufflevector - It would seem reasonable for an out of bounds mask element
340  to produce poison, but the LangRef does not state.
341  - and/or - It would seem reasonable for poison to propagate from both
342  arguments, but LangRef doesn't state and propagatesFullPoison doesn't
343  include these two.
344  - all binary ops w/vector operands - The likely interpretation would be that
345  any element overflowing should produce poison for the entire result, but
346  the LangRef does not state.
347  - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
348  strange that only certian flags should be documented as producing poison.
349 
350  Cases of clear poison semantics not yet implemented:
351  - Exact flags on ashr/lshr produce poison
352  - NSW/NUW flags on shl produce poison
353  - Inbounds flag on getelementptr produce poison
354  - fptosi/fptoui (out of bounds input) produce poison
355  - Scalable vector types for insertelement/extractelement
356  - Floating point binary ops w/fmf nnan/noinfs flags produce poison
357  */
const Value * getGuaranteedNonFullPoisonOp(const Instruction *I)
Return either nullptr or an operand of I such that I will trigger undefined behavior if I is executed...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:616
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2211
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:177
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
F(f)
static void generatePoisonChecksForBinOp(Instruction &I, SmallVector< Value *, 2 > &Checks)
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
bool propagatesFullPoison(const Instruction *I)
Return true if this function can prove that I is guaranteed to yield full-poison (all bits poison) if...
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1523
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:245
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Value * getOperand(unsigned i) const
Definition: User.h:169
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:157
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1294
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:165
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2351
static bool isConstantFalse(Value *V)
bool isExact() const
Determine whether the exact flag is set.
Value * CreateSRem(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1202
static Value * generatePoisonChecks(Instruction &I)
static Value * getPoisonFor(DenseMap< Value *, Value *> &ValToPoison, Value *V)
static cl::opt< bool > LocalCheck("poison-checking-function-local", cl::init(false), cl::desc("Check that returns are non-poison (for testing)"))
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:134
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static Value * buildOrChain(IRBuilder<> &B, ArrayRef< Value *> Ops)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1197
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:566
static void CreateAssert(IRBuilder<> &B, Value *Cond)
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:328
unsigned greater or equal
Definition: InstrTypes.h:756
#define I(x, y, z)
Definition: MD5.cpp:58
static void CreateAssertNot(IRBuilder<> &B, Value *Cond)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2239
static bool rewrite(Function &F)
bool getVectorIsScalable() const
Definition: DerivedTypes.h:570
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
CallInst * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type. ...
Definition: IRBuilder.cpp:741
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
A container for analyses that lazily runs them and caches their results.