LLVM  6.0.0svn
Lint.cpp
Go to the documentation of this file.
1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass statically checks for common and easily-identified constructs
11 // which produce undefined or likely unintended behavior in LLVM IR.
12 //
13 // It is not a guarantee of correctness, in two ways. First, it isn't
14 // comprehensive. There are checks which could be done statically which are
15 // not yet implemented. Some of these are indicated by TODO comments, but
16 // those aren't comprehensive either. Second, many conditions cannot be
17 // checked statically. This pass does no dynamic instrumentation, so it
18 // can't check for all possible problems.
19 //
20 // Another limitation is that it assumes all code will be executed. A store
21 // through a null pointer in a basic block which is never reached is harmless,
22 // but this pass will warn about it anyway. This is the main reason why most
23 // of these checks live here instead of in the Verifier pass.
24 //
25 // Optimization passes may make conditions that this pass checks for more or
26 // less obvious. If an optimization pass appears to be introducing a warning,
27 // it may be that the optimization pass is merely exposing an existing
28 // condition in the code.
29 //
30 // This code may be run before instcombine. In many cases, instcombine checks
31 // for the same kinds of things and turns instructions with undefined behavior
32 // into unreachable (or equivalent). Because of this, this pass makes some
33 // effort to look through bitcasts and so on.
34 //
35 //===----------------------------------------------------------------------===//
36 
37 #include "llvm/Analysis/Lint.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/Twine.h"
46 #include "llvm/Analysis/Loads.h"
48 #include "llvm/Analysis/Passes.h"
51 #include "llvm/IR/Argument.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/CallSite.h"
54 #include "llvm/IR/Constant.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Dominators.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalVariable.h"
61 #include "llvm/IR/InstVisitor.h"
62 #include "llvm/IR/InstrTypes.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/Instructions.h"
65 #include "llvm/IR/IntrinsicInst.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/KnownBits.h"
76 #include <cassert>
77 #include <cstdint>
78 #include <iterator>
79 #include <string>
80 
81 using namespace llvm;
82 
83 namespace {
84  namespace MemRef {
85  static const unsigned Read = 1;
86  static const unsigned Write = 2;
87  static const unsigned Callee = 4;
88  static const unsigned Branchee = 8;
89  } // end namespace MemRef
90 
91  class Lint : public FunctionPass, public InstVisitor<Lint> {
92  friend class InstVisitor<Lint>;
93 
94  void visitFunction(Function &F);
95 
96  void visitCallSite(CallSite CS);
97  void visitMemoryReference(Instruction &I, Value *Ptr,
98  uint64_t Size, unsigned Align,
99  Type *Ty, unsigned Flags);
100  void visitEHBeginCatch(IntrinsicInst *II);
101  void visitEHEndCatch(IntrinsicInst *II);
102 
103  void visitCallInst(CallInst &I);
104  void visitInvokeInst(InvokeInst &I);
105  void visitReturnInst(ReturnInst &I);
106  void visitLoadInst(LoadInst &I);
107  void visitStoreInst(StoreInst &I);
108  void visitXor(BinaryOperator &I);
109  void visitSub(BinaryOperator &I);
110  void visitLShr(BinaryOperator &I);
111  void visitAShr(BinaryOperator &I);
112  void visitShl(BinaryOperator &I);
113  void visitSDiv(BinaryOperator &I);
114  void visitUDiv(BinaryOperator &I);
115  void visitSRem(BinaryOperator &I);
116  void visitURem(BinaryOperator &I);
117  void visitAllocaInst(AllocaInst &I);
118  void visitVAArgInst(VAArgInst &I);
119  void visitIndirectBrInst(IndirectBrInst &I);
120  void visitExtractElementInst(ExtractElementInst &I);
121  void visitInsertElementInst(InsertElementInst &I);
122  void visitUnreachableInst(UnreachableInst &I);
123 
124  Value *findValue(Value *V, bool OffsetOk) const;
125  Value *findValueImpl(Value *V, bool OffsetOk,
126  SmallPtrSetImpl<Value *> &Visited) const;
127 
128  public:
129  Module *Mod;
130  const DataLayout *DL;
131  AliasAnalysis *AA;
132  AssumptionCache *AC;
133  DominatorTree *DT;
134  TargetLibraryInfo *TLI;
135 
136  std::string Messages;
137  raw_string_ostream MessagesStr;
138 
139  static char ID; // Pass identification, replacement for typeid
140  Lint() : FunctionPass(ID), MessagesStr(Messages) {
142  }
143 
144  bool runOnFunction(Function &F) override;
145 
146  void getAnalysisUsage(AnalysisUsage &AU) const override {
147  AU.setPreservesAll();
152  }
153  void print(raw_ostream &O, const Module *M) const override {}
154 
155  void WriteValues(ArrayRef<const Value *> Vs) {
156  for (const Value *V : Vs) {
157  if (!V)
158  continue;
159  if (isa<Instruction>(V)) {
160  MessagesStr << *V << '\n';
161  } else {
162  V->printAsOperand(MessagesStr, true, Mod);
163  MessagesStr << '\n';
164  }
165  }
166  }
167 
168  /// \brief A check failed, so printout out the condition and the message.
169  ///
170  /// This provides a nice place to put a breakpoint if you want to see why
171  /// something is not correct.
172  void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
173 
174  /// \brief A check failed (with values to print).
175  ///
176  /// This calls the Message-only version so that the above is easier to set
177  /// a breakpoint on.
178  template <typename T1, typename... Ts>
179  void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
180  CheckFailed(Message);
181  WriteValues({V1, Vs...});
182  }
183  };
184 } // end anonymous namespace
185 
186 char Lint::ID = 0;
187 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
188  false, true)
193 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
194  false, true)
195 
196 // Assert - We know that cond should be true, if not print an error message.
197 #define Assert(C, ...) \
198  do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
199 
200 // Lint::run - This is the main Analysis entry point for a
201 // function.
202 //
204  Mod = F.getParent();
205  DL = &F.getParent()->getDataLayout();
206  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
207  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
208  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
209  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
210  visit(F);
211  dbgs() << MessagesStr.str();
212  Messages.clear();
213  return false;
214 }
215 
216 void Lint::visitFunction(Function &F) {
217  // This isn't undefined behavior, it's just a little unusual, and it's a
218  // fairly common mistake to neglect to name a function.
219  Assert(F.hasName() || F.hasLocalLinkage(),
220  "Unusual: Unnamed function with non-local linkage", &F);
221 
222  // TODO: Check for irreducible control flow.
223 }
224 
225 void Lint::visitCallSite(CallSite CS) {
226  Instruction &I = *CS.getInstruction();
227  Value *Callee = CS.getCalledValue();
228 
229  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
231 
232  if (Function *F = dyn_cast<Function>(findValue(Callee,
233  /*OffsetOk=*/false))) {
234  Assert(CS.getCallingConv() == F->getCallingConv(),
235  "Undefined behavior: Caller and callee calling convention differ",
236  &I);
237 
238  FunctionType *FT = F->getFunctionType();
239  unsigned NumActualArgs = CS.arg_size();
240 
241  Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
242  : FT->getNumParams() == NumActualArgs,
243  "Undefined behavior: Call argument count mismatches callee "
244  "argument count",
245  &I);
246 
247  Assert(FT->getReturnType() == I.getType(),
248  "Undefined behavior: Call return type mismatches "
249  "callee return type",
250  &I);
251 
252  // Check argument types (in case the callee was casted) and attributes.
253  // TODO: Verify that caller and callee attributes are compatible.
254  Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
255  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
256  for (; AI != AE; ++AI) {
257  Value *Actual = *AI;
258  if (PI != PE) {
259  Argument *Formal = &*PI++;
260  Assert(Formal->getType() == Actual->getType(),
261  "Undefined behavior: Call argument type mismatches "
262  "callee parameter type",
263  &I);
264 
265  // Check that noalias arguments don't alias other arguments. This is
266  // not fully precise because we don't know the sizes of the dereferenced
267  // memory regions.
268  if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
269  for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
270  if (AI != BI && (*BI)->getType()->isPointerTy()) {
271  AliasResult Result = AA->alias(*AI, *BI);
272  Assert(Result != MustAlias && Result != PartialAlias,
273  "Unusual: noalias argument aliases another argument", &I);
274  }
275 
276  // Check that an sret argument points to valid memory.
277  if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
278  Type *Ty =
279  cast<PointerType>(Formal->getType())->getElementType();
280  visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
281  DL->getABITypeAlignment(Ty), Ty,
282  MemRef::Read | MemRef::Write);
283  }
284  }
285  }
286  }
287 
288  if (CS.isCall()) {
289  const CallInst *CI = cast<CallInst>(CS.getInstruction());
290  if (CI->isTailCall()) {
291  const AttributeList &PAL = CI->getAttributes();
292  unsigned ArgNo = 0;
293  for (Value *Arg : CS.args()) {
294  // Skip ByVal arguments since they will be memcpy'd to the callee's
295  // stack anyway.
296  if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
297  continue;
298  Value *Obj = findValue(Arg, /*OffsetOk=*/true);
299  Assert(!isa<AllocaInst>(Obj),
300  "Undefined behavior: Call with \"tail\" keyword references "
301  "alloca",
302  &I);
303  }
304  }
305  }
306 
307 
308  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
309  switch (II->getIntrinsicID()) {
310  default: break;
311 
312  // TODO: Check more intrinsics
313 
314  case Intrinsic::memcpy: {
315  MemCpyInst *MCI = cast<MemCpyInst>(&I);
316  // TODO: If the size is known, use it.
317  visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
318  MCI->getAlignment(), nullptr, MemRef::Write);
319  visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
320  MCI->getAlignment(), nullptr, MemRef::Read);
321 
322  // Check that the memcpy arguments don't overlap. The AliasAnalysis API
323  // isn't expressive enough for what we really want to do. Known partial
324  // overlap is not distinguished from the case where nothing is known.
325  uint64_t Size = 0;
326  if (const ConstantInt *Len =
327  dyn_cast<ConstantInt>(findValue(MCI->getLength(),
328  /*OffsetOk=*/false)))
329  if (Len->getValue().isIntN(32))
330  Size = Len->getValue().getZExtValue();
331  Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
332  MustAlias,
333  "Undefined behavior: memcpy source and destination overlap", &I);
334  break;
335  }
336  case Intrinsic::memmove: {
337  MemMoveInst *MMI = cast<MemMoveInst>(&I);
338  // TODO: If the size is known, use it.
339  visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
340  MMI->getAlignment(), nullptr, MemRef::Write);
341  visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
342  MMI->getAlignment(), nullptr, MemRef::Read);
343  break;
344  }
345  case Intrinsic::memset: {
346  MemSetInst *MSI = cast<MemSetInst>(&I);
347  // TODO: If the size is known, use it.
348  visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
349  MSI->getAlignment(), nullptr, MemRef::Write);
350  break;
351  }
352 
353  case Intrinsic::vastart:
355  "Undefined behavior: va_start called in a non-varargs function",
356  &I);
357 
358  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
359  nullptr, MemRef::Read | MemRef::Write);
360  break;
361  case Intrinsic::vacopy:
362  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
363  nullptr, MemRef::Write);
364  visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
365  nullptr, MemRef::Read);
366  break;
367  case Intrinsic::vaend:
368  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
369  nullptr, MemRef::Read | MemRef::Write);
370  break;
371 
372  case Intrinsic::stackrestore:
373  // Stackrestore doesn't read or write memory, but it sets the
374  // stack pointer, which the compiler may read from or write to
375  // at any time, so check it for both readability and writeability.
376  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
377  nullptr, MemRef::Read | MemRef::Write);
378  break;
379  }
380 }
381 
382 void Lint::visitCallInst(CallInst &I) {
383  return visitCallSite(&I);
384 }
385 
386 void Lint::visitInvokeInst(InvokeInst &I) {
387  return visitCallSite(&I);
388 }
389 
390 void Lint::visitReturnInst(ReturnInst &I) {
391  Function *F = I.getParent()->getParent();
392  Assert(!F->doesNotReturn(),
393  "Unusual: Return statement in function with noreturn attribute", &I);
394 
395  if (Value *V = I.getReturnValue()) {
396  Value *Obj = findValue(V, /*OffsetOk=*/true);
397  Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
398  }
399 }
400 
401 // TODO: Check that the reference is in bounds.
402 // TODO: Check readnone/readonly function attributes.
403 void Lint::visitMemoryReference(Instruction &I,
404  Value *Ptr, uint64_t Size, unsigned Align,
405  Type *Ty, unsigned Flags) {
406  // If no memory is being referenced, it doesn't matter if the pointer
407  // is valid.
408  if (Size == 0)
409  return;
410 
411  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
412  Assert(!isa<ConstantPointerNull>(UnderlyingObject),
413  "Undefined behavior: Null pointer dereference", &I);
414  Assert(!isa<UndefValue>(UnderlyingObject),
415  "Undefined behavior: Undef pointer dereference", &I);
416  Assert(!isa<ConstantInt>(UnderlyingObject) ||
417  !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
418  "Unusual: All-ones pointer dereference", &I);
419  Assert(!isa<ConstantInt>(UnderlyingObject) ||
420  !cast<ConstantInt>(UnderlyingObject)->isOne(),
421  "Unusual: Address one pointer dereference", &I);
422 
423  if (Flags & MemRef::Write) {
424  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
425  Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
426  &I);
427  Assert(!isa<Function>(UnderlyingObject) &&
428  !isa<BlockAddress>(UnderlyingObject),
429  "Undefined behavior: Write to text section", &I);
430  }
431  if (Flags & MemRef::Read) {
432  Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
433  &I);
434  Assert(!isa<BlockAddress>(UnderlyingObject),
435  "Undefined behavior: Load from block address", &I);
436  }
437  if (Flags & MemRef::Callee) {
438  Assert(!isa<BlockAddress>(UnderlyingObject),
439  "Undefined behavior: Call to block address", &I);
440  }
441  if (Flags & MemRef::Branchee) {
442  Assert(!isa<Constant>(UnderlyingObject) ||
443  isa<BlockAddress>(UnderlyingObject),
444  "Undefined behavior: Branch to non-blockaddress", &I);
445  }
446 
447  // Check for buffer overflows and misalignment.
448  // Only handles memory references that read/write something simple like an
449  // alloca instruction or a global variable.
450  int64_t Offset = 0;
451  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
452  // OK, so the access is to a constant offset from Ptr. Check that Ptr is
453  // something we can handle and if so extract the size of this base object
454  // along with its alignment.
455  uint64_t BaseSize = MemoryLocation::UnknownSize;
456  unsigned BaseAlign = 0;
457 
458  if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
459  Type *ATy = AI->getAllocatedType();
460  if (!AI->isArrayAllocation() && ATy->isSized())
461  BaseSize = DL->getTypeAllocSize(ATy);
462  BaseAlign = AI->getAlignment();
463  if (BaseAlign == 0 && ATy->isSized())
464  BaseAlign = DL->getABITypeAlignment(ATy);
465  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
466  // If the global may be defined differently in another compilation unit
467  // then don't warn about funky memory accesses.
468  if (GV->hasDefinitiveInitializer()) {
469  Type *GTy = GV->getValueType();
470  if (GTy->isSized())
471  BaseSize = DL->getTypeAllocSize(GTy);
472  BaseAlign = GV->getAlignment();
473  if (BaseAlign == 0 && GTy->isSized())
474  BaseAlign = DL->getABITypeAlignment(GTy);
475  }
476  }
477 
478  // Accesses from before the start or after the end of the object are not
479  // defined.
481  BaseSize == MemoryLocation::UnknownSize ||
482  (Offset >= 0 && Offset + Size <= BaseSize),
483  "Undefined behavior: Buffer overflow", &I);
484 
485  // Accesses that say that the memory is more aligned than it is are not
486  // defined.
487  if (Align == 0 && Ty && Ty->isSized())
488  Align = DL->getABITypeAlignment(Ty);
489  Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
490  "Undefined behavior: Memory reference address is misaligned", &I);
491  }
492 }
493 
494 void Lint::visitLoadInst(LoadInst &I) {
495  visitMemoryReference(I, I.getPointerOperand(),
496  DL->getTypeStoreSize(I.getType()), I.getAlignment(),
497  I.getType(), MemRef::Read);
498 }
499 
500 void Lint::visitStoreInst(StoreInst &I) {
501  visitMemoryReference(I, I.getPointerOperand(),
502  DL->getTypeStoreSize(I.getOperand(0)->getType()),
503  I.getAlignment(),
504  I.getOperand(0)->getType(), MemRef::Write);
505 }
506 
507 void Lint::visitXor(BinaryOperator &I) {
508  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
509  "Undefined result: xor(undef, undef)", &I);
510 }
511 
512 void Lint::visitSub(BinaryOperator &I) {
513  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
514  "Undefined result: sub(undef, undef)", &I);
515 }
516 
517 void Lint::visitLShr(BinaryOperator &I) {
518  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
519  /*OffsetOk=*/false)))
520  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
521  "Undefined result: Shift count out of range", &I);
522 }
523 
524 void Lint::visitAShr(BinaryOperator &I) {
525  if (ConstantInt *CI =
526  dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
527  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
528  "Undefined result: Shift count out of range", &I);
529 }
530 
531 void Lint::visitShl(BinaryOperator &I) {
532  if (ConstantInt *CI =
533  dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
534  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
535  "Undefined result: Shift count out of range", &I);
536 }
537 
538 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
539  AssumptionCache *AC) {
540  // Assume undef could be zero.
541  if (isa<UndefValue>(V))
542  return true;
543 
544  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
545  if (!VecTy) {
546  KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
547  return Known.isZero();
548  }
549 
550  // Per-component check doesn't work with zeroinitializer
551  Constant *C = dyn_cast<Constant>(V);
552  if (!C)
553  return false;
554 
555  if (C->isZeroValue())
556  return true;
557 
558  // For a vector, KnownZero will only be true if all values are zero, so check
559  // this per component
560  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
561  Constant *Elem = C->getAggregateElement(I);
562  if (isa<UndefValue>(Elem))
563  return true;
564 
565  KnownBits Known = computeKnownBits(Elem, DL);
566  if (Known.isZero())
567  return true;
568  }
569 
570  return false;
571 }
572 
573 void Lint::visitSDiv(BinaryOperator &I) {
574  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
575  "Undefined behavior: Division by zero", &I);
576 }
577 
578 void Lint::visitUDiv(BinaryOperator &I) {
579  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
580  "Undefined behavior: Division by zero", &I);
581 }
582 
583 void Lint::visitSRem(BinaryOperator &I) {
584  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
585  "Undefined behavior: Division by zero", &I);
586 }
587 
588 void Lint::visitURem(BinaryOperator &I) {
589  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
590  "Undefined behavior: Division by zero", &I);
591 }
592 
593 void Lint::visitAllocaInst(AllocaInst &I) {
594  if (isa<ConstantInt>(I.getArraySize()))
595  // This isn't undefined behavior, it's just an obvious pessimization.
597  "Pessimization: Static alloca outside of entry block", &I);
598 
599  // TODO: Check for an unusual size (MSB set?)
600 }
601 
602 void Lint::visitVAArgInst(VAArgInst &I) {
603  visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0,
604  nullptr, MemRef::Read | MemRef::Write);
605 }
606 
607 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
608  visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0,
609  nullptr, MemRef::Branchee);
610 
611  Assert(I.getNumDestinations() != 0,
612  "Undefined behavior: indirectbr with no destinations", &I);
613 }
614 
615 void Lint::visitExtractElementInst(ExtractElementInst &I) {
616  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
617  /*OffsetOk=*/false)))
618  Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
619  "Undefined result: extractelement index out of range", &I);
620 }
621 
622 void Lint::visitInsertElementInst(InsertElementInst &I) {
623  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
624  /*OffsetOk=*/false)))
625  Assert(CI->getValue().ult(I.getType()->getNumElements()),
626  "Undefined result: insertelement index out of range", &I);
627 }
628 
629 void Lint::visitUnreachableInst(UnreachableInst &I) {
630  // This isn't undefined behavior, it's merely suspicious.
631  Assert(&I == &I.getParent()->front() ||
632  std::prev(I.getIterator())->mayHaveSideEffects(),
633  "Unusual: unreachable immediately preceded by instruction without "
634  "side effects",
635  &I);
636 }
637 
638 /// findValue - Look through bitcasts and simple memory reference patterns
639 /// to identify an equivalent, but more informative, value. If OffsetOk
640 /// is true, look through getelementptrs with non-zero offsets too.
641 ///
642 /// Most analysis passes don't require this logic, because instcombine
643 /// will simplify most of these kinds of things away. But it's a goal of
644 /// this Lint pass to be useful even on non-optimized IR.
645 Value *Lint::findValue(Value *V, bool OffsetOk) const {
646  SmallPtrSet<Value *, 4> Visited;
647  return findValueImpl(V, OffsetOk, Visited);
648 }
649 
650 /// findValueImpl - Implementation helper for findValue.
651 Value *Lint::findValueImpl(Value *V, bool OffsetOk,
652  SmallPtrSetImpl<Value *> &Visited) const {
653  // Detect self-referential values.
654  if (!Visited.insert(V).second)
655  return UndefValue::get(V->getType());
656 
657  // TODO: Look through sext or zext cast, when the result is known to
658  // be interpreted as signed or unsigned, respectively.
659  // TODO: Look through eliminable cast pairs.
660  // TODO: Look through calls with unique return values.
661  // TODO: Look through vector insert/extract/shuffle.
662  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
663  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
664  BasicBlock::iterator BBI = L->getIterator();
665  BasicBlock *BB = L->getParent();
666  SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
667  for (;;) {
668  if (!VisitedBlocks.insert(BB).second)
669  break;
670  if (Value *U =
672  return findValueImpl(U, OffsetOk, Visited);
673  if (BBI != BB->begin()) break;
674  BB = BB->getUniquePredecessor();
675  if (!BB) break;
676  BBI = BB->end();
677  }
678  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
679  if (Value *W = PN->hasConstantValue())
680  if (W != V)
681  return findValueImpl(W, OffsetOk, Visited);
682  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
683  if (CI->isNoopCast(*DL))
684  return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
685  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
686  if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
687  Ex->getIndices()))
688  if (W != V)
689  return findValueImpl(W, OffsetOk, Visited);
690  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
691  // Same as above, but for ConstantExpr instead of Instruction.
692  if (Instruction::isCast(CE->getOpcode())) {
693  if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
694  CE->getOperand(0)->getType(), CE->getType(),
695  *DL))
696  return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
697  } else if (CE->getOpcode() == Instruction::ExtractValue) {
698  ArrayRef<unsigned> Indices = CE->getIndices();
699  if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
700  if (W != V)
701  return findValueImpl(W, OffsetOk, Visited);
702  }
703  }
704 
705  // As a last resort, try SimplifyInstruction or constant folding.
706  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
707  if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
708  return findValueImpl(W, OffsetOk, Visited);
709  } else if (auto *C = dyn_cast<Constant>(V)) {
710  if (Value *W = ConstantFoldConstant(C, *DL, TLI))
711  if (W && W != V)
712  return findValueImpl(W, OffsetOk, Visited);
713  }
714 
715  return V;
716 }
717 
718 //===----------------------------------------------------------------------===//
719 // Implement the public interfaces to this file...
720 //===----------------------------------------------------------------------===//
721 
723  return new Lint();
724 }
725 
726 /// lintFunction - Check a function for errors, printing messages on stderr.
727 ///
728 void llvm::lintFunction(const Function &f) {
729  Function &F = const_cast<Function&>(f);
730  assert(!F.isDeclaration() && "Cannot lint external functions");
731 
733  Lint *V = new Lint();
734  FPM.add(V);
735  FPM.run(F);
736 }
737 
738 /// lintModule - Check a module for errors, printing messages on stderr.
739 ///
740 void llvm::lintModule(const Module &M) {
742  Lint *V = new Lint();
743  PM.add(V);
744  PM.run(const_cast<Module&>(M));
745 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
The two locations precisely alias each other.
Definition: AliasAnalysis.h:91
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void lintModule(const Module &M)
Check a module.
Definition: Lint.cpp:740
bool hasLocalLinkage() const
Definition: GlobalValue.h:427
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:89
This class represents a function call, abstracting a target machine&#39;s calling convention.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void lintFunction(const Function &F)
lintFunction - Check a function for errors, printing messages on stderr.
Definition: Lint.cpp:728
An immutable pass that tracks lazily created AssumptionCache objects.
iterator_range< IterTy > args() const
Definition: CallSite.h:215
A cache of .assume calls within a function.
This class wraps the llvm.memset intrinsic.
arg_iterator arg_end()
Definition: Function.h:612
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
Value * getLength() const
#define Assert(C,...)
Definition: Lint.cpp:197
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
AttributeList getAttributes() const
Return the parameter attributes for this call.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void add(Pass *P) override
Add a pass to the queue of passes to run.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Implicit null checks
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
This class wraps the llvm.memmove intrinsic.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
IterTy arg_end() const
Definition: CallSite.h:575
InstrTy * getInstruction() const
Definition: CallSite.h:92
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:862
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Definition: Lint.cpp:84
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:230
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
lint
Definition: Lint.cpp:193
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
An instruction for storing to memory.
Definition: Instructions.h:306
VectorType * getType() const
Overload to return most specific vector type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:135
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:277
bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition: Constants.cpp:66
PassManager manages ModulePassManagers.
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
static bool runOnFunction(Function &F, bool PostInlining)
This instruction inserts a single (scalar) element into a VectorType value.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static bool isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DstTy, const DataLayout &DL)
A no-op cast is one that can be effected without changing any bits.
This function has undefined behavior.
This is an important base class in LLVM.
Definition: Constant.h:42
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
Definition: Function.cpp:145
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
const Instruction & front() const
Definition: BasicBlock.h:264
Indirect Branch Instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
bool doesNotReturn() const
Determine if the function cannot return.
Definition: Function.h:444
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
arg_iterator arg_begin()
Definition: Function.h:603
self_iterator getIterator()
Definition: ilist_node.h:82
VectorType * getVectorOperandType() const
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:72
FunctionPassManager manages FunctionPasses and BasicBlockPassManagers.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:558
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
bool isCast() const
Definition: Instruction.h:131
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
bool run(Module &M)
run - Execute all of the passes scheduled for execution.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3573
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) INITIALIZE_PASS_END(Lint
Value * FindInsertedValue(Value *V, ArrayRef< unsigned > idx_range, Instruction *InsertBefore=nullptr)
Given an aggregrate and an sequence of indices, see if the scalar value indexed is already around as ...
FunctionPass * createLintPass()
Create a lint pass.
Definition: Lint.cpp:722
Iterator for intrusive lists based on ilist_node.
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:254
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
IterTy arg_begin() const
Definition: CallSite.h:571
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
void initializeLintPass(PassRegistry &)
This class wraps the llvm.memcpy intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:145
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
void setPreservesAll()
Set by analyses that do not transform their input at all.
bool isTailCall() const
amdgpu Simplify well known AMD library false Value Value * Arg
Basic Alias true
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:538
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This file provides utility analysis objects describing memory locations.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
unsigned getAlignment() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:462
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
Invoke instruction.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Value * getPointerOperand()
Definition: Instructions.h:398
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Statically lint checks LLVM IR
Definition: Lint.cpp:193
#define T1
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60