LLVM  9.0.0svn
Lint.cpp
Go to the documentation of this file.
1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass statically checks for common and easily-identified constructs
10 // which produce undefined or likely unintended behavior in LLVM IR.
11 //
12 // It is not a guarantee of correctness, in two ways. First, it isn't
13 // comprehensive. There are checks which could be done statically which are
14 // not yet implemented. Some of these are indicated by TODO comments, but
15 // those aren't comprehensive either. Second, many conditions cannot be
16 // checked statically. This pass does no dynamic instrumentation, so it
17 // can't check for all possible problems.
18 //
19 // Another limitation is that it assumes all code will be executed. A store
20 // through a null pointer in a basic block which is never reached is harmless,
21 // but this pass will warn about it anyway. This is the main reason why most
22 // of these checks live here instead of in the Verifier pass.
23 //
24 // Optimization passes may make conditions that this pass checks for more or
25 // less obvious. If an optimization pass appears to be introducing a warning,
26 // it may be that the optimization pass is merely exposing an existing
27 // condition in the code.
28 //
29 // This code may be run before instcombine. In many cases, instcombine checks
30 // for the same kinds of things and turns instructions with undefined behavior
31 // into unreachable (or equivalent). Because of this, this pass makes some
32 // effort to look through bitcasts and so on.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/Analysis/Lint.h"
37 #include "llvm/ADT/APInt.h"
38 #include "llvm/ADT/ArrayRef.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/Twine.h"
45 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/Passes.h"
50 #include "llvm/IR/Argument.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CallSite.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DerivedTypes.h"
57 #include "llvm/IR/Dominators.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/GlobalVariable.h"
60 #include "llvm/IR/InstVisitor.h"
61 #include "llvm/IR/InstrTypes.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/IntrinsicInst.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/KnownBits.h"
75 #include <cassert>
76 #include <cstdint>
77 #include <iterator>
78 #include <string>
79 
80 using namespace llvm;
81 
82 namespace {
83  namespace MemRef {
84  static const unsigned Read = 1;
85  static const unsigned Write = 2;
86  static const unsigned Callee = 4;
87  static const unsigned Branchee = 8;
88  } // end namespace MemRef
89 
90  class Lint : public FunctionPass, public InstVisitor<Lint> {
91  friend class InstVisitor<Lint>;
92 
93  void visitFunction(Function &F);
94 
95  void visitCallSite(CallSite CS);
96  void visitMemoryReference(Instruction &I, Value *Ptr,
97  uint64_t Size, unsigned Align,
98  Type *Ty, unsigned Flags);
99  void visitEHBeginCatch(IntrinsicInst *II);
100  void visitEHEndCatch(IntrinsicInst *II);
101 
102  void visitCallInst(CallInst &I);
103  void visitInvokeInst(InvokeInst &I);
104  void visitReturnInst(ReturnInst &I);
105  void visitLoadInst(LoadInst &I);
106  void visitStoreInst(StoreInst &I);
107  void visitXor(BinaryOperator &I);
108  void visitSub(BinaryOperator &I);
109  void visitLShr(BinaryOperator &I);
110  void visitAShr(BinaryOperator &I);
111  void visitShl(BinaryOperator &I);
112  void visitSDiv(BinaryOperator &I);
113  void visitUDiv(BinaryOperator &I);
114  void visitSRem(BinaryOperator &I);
115  void visitURem(BinaryOperator &I);
116  void visitAllocaInst(AllocaInst &I);
117  void visitVAArgInst(VAArgInst &I);
118  void visitIndirectBrInst(IndirectBrInst &I);
119  void visitExtractElementInst(ExtractElementInst &I);
120  void visitInsertElementInst(InsertElementInst &I);
121  void visitUnreachableInst(UnreachableInst &I);
122 
123  Value *findValue(Value *V, bool OffsetOk) const;
124  Value *findValueImpl(Value *V, bool OffsetOk,
125  SmallPtrSetImpl<Value *> &Visited) const;
126 
127  public:
128  Module *Mod;
129  const DataLayout *DL;
130  AliasAnalysis *AA;
131  AssumptionCache *AC;
132  DominatorTree *DT;
133  TargetLibraryInfo *TLI;
134 
135  std::string Messages;
136  raw_string_ostream MessagesStr;
137 
138  static char ID; // Pass identification, replacement for typeid
139  Lint() : FunctionPass(ID), MessagesStr(Messages) {
141  }
142 
143  bool runOnFunction(Function &F) override;
144 
145  void getAnalysisUsage(AnalysisUsage &AU) const override {
146  AU.setPreservesAll();
151  }
152  void print(raw_ostream &O, const Module *M) const override {}
153 
154  void WriteValues(ArrayRef<const Value *> Vs) {
155  for (const Value *V : Vs) {
156  if (!V)
157  continue;
158  if (isa<Instruction>(V)) {
159  MessagesStr << *V << '\n';
160  } else {
161  V->printAsOperand(MessagesStr, true, Mod);
162  MessagesStr << '\n';
163  }
164  }
165  }
166 
167  /// A check failed, so printout out the condition and the message.
168  ///
169  /// This provides a nice place to put a breakpoint if you want to see why
170  /// something is not correct.
171  void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
172 
173  /// A check failed (with values to print).
174  ///
175  /// This calls the Message-only version so that the above is easier to set
176  /// a breakpoint on.
177  template <typename T1, typename... Ts>
178  void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
179  CheckFailed(Message);
180  WriteValues({V1, Vs...});
181  }
182  };
183 } // end anonymous namespace
184 
185 char Lint::ID = 0;
186 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
187  false, true)
192 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
193  false, true)
194 
195 // Assert - We know that cond should be true, if not print an error message.
196 #define Assert(C, ...) \
197  do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
198 
199 // Lint::run - This is the main Analysis entry point for a
200 // function.
201 //
203  Mod = F.getParent();
204  DL = &F.getParent()->getDataLayout();
205  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
206  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
207  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
208  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
209  visit(F);
210  dbgs() << MessagesStr.str();
211  Messages.clear();
212  return false;
213 }
214 
215 void Lint::visitFunction(Function &F) {
216  // This isn't undefined behavior, it's just a little unusual, and it's a
217  // fairly common mistake to neglect to name a function.
218  Assert(F.hasName() || F.hasLocalLinkage(),
219  "Unusual: Unnamed function with non-local linkage", &F);
220 
221  // TODO: Check for irreducible control flow.
222 }
223 
224 void Lint::visitCallSite(CallSite CS) {
225  Instruction &I = *CS.getInstruction();
226  Value *Callee = CS.getCalledValue();
227 
228  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
229  MemRef::Callee);
230 
231  if (Function *F = dyn_cast<Function>(findValue(Callee,
232  /*OffsetOk=*/false))) {
233  Assert(CS.getCallingConv() == F->getCallingConv(),
234  "Undefined behavior: Caller and callee calling convention differ",
235  &I);
236 
237  FunctionType *FT = F->getFunctionType();
238  unsigned NumActualArgs = CS.arg_size();
239 
240  Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
241  : FT->getNumParams() == NumActualArgs,
242  "Undefined behavior: Call argument count mismatches callee "
243  "argument count",
244  &I);
245 
246  Assert(FT->getReturnType() == I.getType(),
247  "Undefined behavior: Call return type mismatches "
248  "callee return type",
249  &I);
250 
251  // Check argument types (in case the callee was casted) and attributes.
252  // TODO: Verify that caller and callee attributes are compatible.
253  Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
254  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
255  for (; AI != AE; ++AI) {
256  Value *Actual = *AI;
257  if (PI != PE) {
258  Argument *Formal = &*PI++;
259  Assert(Formal->getType() == Actual->getType(),
260  "Undefined behavior: Call argument type mismatches "
261  "callee parameter type",
262  &I);
263 
264  // Check that noalias arguments don't alias other arguments. This is
265  // not fully precise because we don't know the sizes of the dereferenced
266  // memory regions.
267  if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
268  AttributeList PAL = CS.getAttributes();
269  unsigned ArgNo = 0;
270  for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
271  // Skip ByVal arguments since they will be memcpy'd to the callee's
272  // stack so we're not really passing the pointer anyway.
273  if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
274  continue;
275  if (AI != BI && (*BI)->getType()->isPointerTy()) {
276  AliasResult Result = AA->alias(*AI, *BI);
277  Assert(Result != MustAlias && Result != PartialAlias,
278  "Unusual: noalias argument aliases another argument", &I);
279  }
280  }
281  }
282 
283  // Check that an sret argument points to valid memory.
284  if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
285  Type *Ty =
286  cast<PointerType>(Formal->getType())->getElementType();
287  visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
288  DL->getABITypeAlignment(Ty), Ty,
289  MemRef::Read | MemRef::Write);
290  }
291  }
292  }
293  }
294 
295  if (CS.isCall()) {
296  const CallInst *CI = cast<CallInst>(CS.getInstruction());
297  if (CI->isTailCall()) {
298  const AttributeList &PAL = CI->getAttributes();
299  unsigned ArgNo = 0;
300  for (Value *Arg : CS.args()) {
301  // Skip ByVal arguments since they will be memcpy'd to the callee's
302  // stack anyway.
303  if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
304  continue;
305  Value *Obj = findValue(Arg, /*OffsetOk=*/true);
306  Assert(!isa<AllocaInst>(Obj),
307  "Undefined behavior: Call with \"tail\" keyword references "
308  "alloca",
309  &I);
310  }
311  }
312  }
313 
314 
315  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
316  switch (II->getIntrinsicID()) {
317  default: break;
318 
319  // TODO: Check more intrinsics
320 
321  case Intrinsic::memcpy: {
322  MemCpyInst *MCI = cast<MemCpyInst>(&I);
323  // TODO: If the size is known, use it.
324  visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
325  MCI->getDestAlignment(), nullptr, MemRef::Write);
326  visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
327  MCI->getSourceAlignment(), nullptr, MemRef::Read);
328 
329  // Check that the memcpy arguments don't overlap. The AliasAnalysis API
330  // isn't expressive enough for what we really want to do. Known partial
331  // overlap is not distinguished from the case where nothing is known.
332  auto Size = LocationSize::unknown();
333  if (const ConstantInt *Len =
334  dyn_cast<ConstantInt>(findValue(MCI->getLength(),
335  /*OffsetOk=*/false)))
336  if (Len->getValue().isIntN(32))
337  Size = LocationSize::precise(Len->getValue().getZExtValue());
338  Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
339  MustAlias,
340  "Undefined behavior: memcpy source and destination overlap", &I);
341  break;
342  }
343  case Intrinsic::memmove: {
344  MemMoveInst *MMI = cast<MemMoveInst>(&I);
345  // TODO: If the size is known, use it.
346  visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
347  MMI->getDestAlignment(), nullptr, MemRef::Write);
348  visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
349  MMI->getSourceAlignment(), nullptr, MemRef::Read);
350  break;
351  }
352  case Intrinsic::memset: {
353  MemSetInst *MSI = cast<MemSetInst>(&I);
354  // TODO: If the size is known, use it.
355  visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
356  MSI->getDestAlignment(), nullptr, MemRef::Write);
357  break;
358  }
359 
360  case Intrinsic::vastart:
362  "Undefined behavior: va_start called in a non-varargs function",
363  &I);
364 
365  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
366  nullptr, MemRef::Read | MemRef::Write);
367  break;
368  case Intrinsic::vacopy:
369  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
370  nullptr, MemRef::Write);
371  visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
372  nullptr, MemRef::Read);
373  break;
374  case Intrinsic::vaend:
375  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
376  nullptr, MemRef::Read | MemRef::Write);
377  break;
378 
379  case Intrinsic::stackrestore:
380  // Stackrestore doesn't read or write memory, but it sets the
381  // stack pointer, which the compiler may read from or write to
382  // at any time, so check it for both readability and writeability.
383  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
384  nullptr, MemRef::Read | MemRef::Write);
385  break;
386  }
387 }
388 
389 void Lint::visitCallInst(CallInst &I) {
390  return visitCallSite(&I);
391 }
392 
393 void Lint::visitInvokeInst(InvokeInst &I) {
394  return visitCallSite(&I);
395 }
396 
397 void Lint::visitReturnInst(ReturnInst &I) {
398  Function *F = I.getParent()->getParent();
399  Assert(!F->doesNotReturn(),
400  "Unusual: Return statement in function with noreturn attribute", &I);
401 
402  if (Value *V = I.getReturnValue()) {
403  Value *Obj = findValue(V, /*OffsetOk=*/true);
404  Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
405  }
406 }
407 
408 // TODO: Check that the reference is in bounds.
409 // TODO: Check readnone/readonly function attributes.
410 void Lint::visitMemoryReference(Instruction &I,
411  Value *Ptr, uint64_t Size, unsigned Align,
412  Type *Ty, unsigned Flags) {
413  // If no memory is being referenced, it doesn't matter if the pointer
414  // is valid.
415  if (Size == 0)
416  return;
417 
418  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
419  Assert(!isa<ConstantPointerNull>(UnderlyingObject),
420  "Undefined behavior: Null pointer dereference", &I);
421  Assert(!isa<UndefValue>(UnderlyingObject),
422  "Undefined behavior: Undef pointer dereference", &I);
423  Assert(!isa<ConstantInt>(UnderlyingObject) ||
424  !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
425  "Unusual: All-ones pointer dereference", &I);
426  Assert(!isa<ConstantInt>(UnderlyingObject) ||
427  !cast<ConstantInt>(UnderlyingObject)->isOne(),
428  "Unusual: Address one pointer dereference", &I);
429 
430  if (Flags & MemRef::Write) {
431  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
432  Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
433  &I);
434  Assert(!isa<Function>(UnderlyingObject) &&
435  !isa<BlockAddress>(UnderlyingObject),
436  "Undefined behavior: Write to text section", &I);
437  }
438  if (Flags & MemRef::Read) {
439  Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
440  &I);
441  Assert(!isa<BlockAddress>(UnderlyingObject),
442  "Undefined behavior: Load from block address", &I);
443  }
444  if (Flags & MemRef::Callee) {
445  Assert(!isa<BlockAddress>(UnderlyingObject),
446  "Undefined behavior: Call to block address", &I);
447  }
448  if (Flags & MemRef::Branchee) {
449  Assert(!isa<Constant>(UnderlyingObject) ||
450  isa<BlockAddress>(UnderlyingObject),
451  "Undefined behavior: Branch to non-blockaddress", &I);
452  }
453 
454  // Check for buffer overflows and misalignment.
455  // Only handles memory references that read/write something simple like an
456  // alloca instruction or a global variable.
457  int64_t Offset = 0;
458  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
459  // OK, so the access is to a constant offset from Ptr. Check that Ptr is
460  // something we can handle and if so extract the size of this base object
461  // along with its alignment.
462  uint64_t BaseSize = MemoryLocation::UnknownSize;
463  unsigned BaseAlign = 0;
464 
465  if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
466  Type *ATy = AI->getAllocatedType();
467  if (!AI->isArrayAllocation() && ATy->isSized())
468  BaseSize = DL->getTypeAllocSize(ATy);
469  BaseAlign = AI->getAlignment();
470  if (BaseAlign == 0 && ATy->isSized())
471  BaseAlign = DL->getABITypeAlignment(ATy);
472  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
473  // If the global may be defined differently in another compilation unit
474  // then don't warn about funky memory accesses.
475  if (GV->hasDefinitiveInitializer()) {
476  Type *GTy = GV->getValueType();
477  if (GTy->isSized())
478  BaseSize = DL->getTypeAllocSize(GTy);
479  BaseAlign = GV->getAlignment();
480  if (BaseAlign == 0 && GTy->isSized())
481  BaseAlign = DL->getABITypeAlignment(GTy);
482  }
483  }
484 
485  // Accesses from before the start or after the end of the object are not
486  // defined.
488  BaseSize == MemoryLocation::UnknownSize ||
489  (Offset >= 0 && Offset + Size <= BaseSize),
490  "Undefined behavior: Buffer overflow", &I);
491 
492  // Accesses that say that the memory is more aligned than it is are not
493  // defined.
494  if (Align == 0 && Ty && Ty->isSized())
495  Align = DL->getABITypeAlignment(Ty);
496  Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
497  "Undefined behavior: Memory reference address is misaligned", &I);
498  }
499 }
500 
501 void Lint::visitLoadInst(LoadInst &I) {
502  visitMemoryReference(I, I.getPointerOperand(),
503  DL->getTypeStoreSize(I.getType()), I.getAlignment(),
504  I.getType(), MemRef::Read);
505 }
506 
507 void Lint::visitStoreInst(StoreInst &I) {
508  visitMemoryReference(I, I.getPointerOperand(),
509  DL->getTypeStoreSize(I.getOperand(0)->getType()),
510  I.getAlignment(),
511  I.getOperand(0)->getType(), MemRef::Write);
512 }
513 
514 void Lint::visitXor(BinaryOperator &I) {
515  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
516  "Undefined result: xor(undef, undef)", &I);
517 }
518 
519 void Lint::visitSub(BinaryOperator &I) {
520  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
521  "Undefined result: sub(undef, undef)", &I);
522 }
523 
524 void Lint::visitLShr(BinaryOperator &I) {
525  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
526  /*OffsetOk=*/false)))
527  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
528  "Undefined result: Shift count out of range", &I);
529 }
530 
531 void Lint::visitAShr(BinaryOperator &I) {
532  if (ConstantInt *CI =
533  dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
534  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
535  "Undefined result: Shift count out of range", &I);
536 }
537 
538 void Lint::visitShl(BinaryOperator &I) {
539  if (ConstantInt *CI =
540  dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
541  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
542  "Undefined result: Shift count out of range", &I);
543 }
544 
545 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
546  AssumptionCache *AC) {
547  // Assume undef could be zero.
548  if (isa<UndefValue>(V))
549  return true;
550 
551  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
552  if (!VecTy) {
553  KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
554  return Known.isZero();
555  }
556 
557  // Per-component check doesn't work with zeroinitializer
558  Constant *C = dyn_cast<Constant>(V);
559  if (!C)
560  return false;
561 
562  if (C->isZeroValue())
563  return true;
564 
565  // For a vector, KnownZero will only be true if all values are zero, so check
566  // this per component
567  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
568  Constant *Elem = C->getAggregateElement(I);
569  if (isa<UndefValue>(Elem))
570  return true;
571 
572  KnownBits Known = computeKnownBits(Elem, DL);
573  if (Known.isZero())
574  return true;
575  }
576 
577  return false;
578 }
579 
580 void Lint::visitSDiv(BinaryOperator &I) {
581  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
582  "Undefined behavior: Division by zero", &I);
583 }
584 
585 void Lint::visitUDiv(BinaryOperator &I) {
586  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
587  "Undefined behavior: Division by zero", &I);
588 }
589 
590 void Lint::visitSRem(BinaryOperator &I) {
591  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
592  "Undefined behavior: Division by zero", &I);
593 }
594 
595 void Lint::visitURem(BinaryOperator &I) {
596  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
597  "Undefined behavior: Division by zero", &I);
598 }
599 
600 void Lint::visitAllocaInst(AllocaInst &I) {
601  if (isa<ConstantInt>(I.getArraySize()))
602  // This isn't undefined behavior, it's just an obvious pessimization.
604  "Pessimization: Static alloca outside of entry block", &I);
605 
606  // TODO: Check for an unusual size (MSB set?)
607 }
608 
609 void Lint::visitVAArgInst(VAArgInst &I) {
610  visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0,
611  nullptr, MemRef::Read | MemRef::Write);
612 }
613 
614 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
615  visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0,
616  nullptr, MemRef::Branchee);
617 
618  Assert(I.getNumDestinations() != 0,
619  "Undefined behavior: indirectbr with no destinations", &I);
620 }
621 
622 void Lint::visitExtractElementInst(ExtractElementInst &I) {
623  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
624  /*OffsetOk=*/false)))
625  Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
626  "Undefined result: extractelement index out of range", &I);
627 }
628 
629 void Lint::visitInsertElementInst(InsertElementInst &I) {
630  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
631  /*OffsetOk=*/false)))
632  Assert(CI->getValue().ult(I.getType()->getNumElements()),
633  "Undefined result: insertelement index out of range", &I);
634 }
635 
636 void Lint::visitUnreachableInst(UnreachableInst &I) {
637  // This isn't undefined behavior, it's merely suspicious.
638  Assert(&I == &I.getParent()->front() ||
639  std::prev(I.getIterator())->mayHaveSideEffects(),
640  "Unusual: unreachable immediately preceded by instruction without "
641  "side effects",
642  &I);
643 }
644 
645 /// findValue - Look through bitcasts and simple memory reference patterns
646 /// to identify an equivalent, but more informative, value. If OffsetOk
647 /// is true, look through getelementptrs with non-zero offsets too.
648 ///
649 /// Most analysis passes don't require this logic, because instcombine
650 /// will simplify most of these kinds of things away. But it's a goal of
651 /// this Lint pass to be useful even on non-optimized IR.
652 Value *Lint::findValue(Value *V, bool OffsetOk) const {
653  SmallPtrSet<Value *, 4> Visited;
654  return findValueImpl(V, OffsetOk, Visited);
655 }
656 
657 /// findValueImpl - Implementation helper for findValue.
658 Value *Lint::findValueImpl(Value *V, bool OffsetOk,
659  SmallPtrSetImpl<Value *> &Visited) const {
660  // Detect self-referential values.
661  if (!Visited.insert(V).second)
662  return UndefValue::get(V->getType());
663 
664  // TODO: Look through sext or zext cast, when the result is known to
665  // be interpreted as signed or unsigned, respectively.
666  // TODO: Look through eliminable cast pairs.
667  // TODO: Look through calls with unique return values.
668  // TODO: Look through vector insert/extract/shuffle.
669  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
670  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
671  BasicBlock::iterator BBI = L->getIterator();
672  BasicBlock *BB = L->getParent();
673  SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
674  for (;;) {
675  if (!VisitedBlocks.insert(BB).second)
676  break;
677  if (Value *U =
679  return findValueImpl(U, OffsetOk, Visited);
680  if (BBI != BB->begin()) break;
681  BB = BB->getUniquePredecessor();
682  if (!BB) break;
683  BBI = BB->end();
684  }
685  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
686  if (Value *W = PN->hasConstantValue())
687  if (W != V)
688  return findValueImpl(W, OffsetOk, Visited);
689  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
690  if (CI->isNoopCast(*DL))
691  return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
692  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
693  if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
694  Ex->getIndices()))
695  if (W != V)
696  return findValueImpl(W, OffsetOk, Visited);
697  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
698  // Same as above, but for ConstantExpr instead of Instruction.
699  if (Instruction::isCast(CE->getOpcode())) {
700  if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
701  CE->getOperand(0)->getType(), CE->getType(),
702  *DL))
703  return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
704  } else if (CE->getOpcode() == Instruction::ExtractValue) {
705  ArrayRef<unsigned> Indices = CE->getIndices();
706  if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
707  if (W != V)
708  return findValueImpl(W, OffsetOk, Visited);
709  }
710  }
711 
712  // As a last resort, try SimplifyInstruction or constant folding.
713  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
714  if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
715  return findValueImpl(W, OffsetOk, Visited);
716  } else if (auto *C = dyn_cast<Constant>(V)) {
717  if (Value *W = ConstantFoldConstant(C, *DL, TLI))
718  if (W && W != V)
719  return findValueImpl(W, OffsetOk, Visited);
720  }
721 
722  return V;
723 }
724 
725 //===----------------------------------------------------------------------===//
726 // Implement the public interfaces to this file...
727 //===----------------------------------------------------------------------===//
728 
730  return new Lint();
731 }
732 
733 /// lintFunction - Check a function for errors, printing messages on stderr.
734 ///
735 void llvm::lintFunction(const Function &f) {
736  Function &F = const_cast<Function&>(f);
737  assert(!F.isDeclaration() && "Cannot lint external functions");
738 
740  Lint *V = new Lint();
741  FPM.add(V);
742  FPM.run(F);
743 }
744 
745 /// lintModule - Check a module for errors, printing messages on stderr.
746 ///
747 void llvm::lintModule(const Module &M) {
749  Lint *V = new Lint();
750  PM.add(V);
751  PM.run(const_cast<Module&>(M));
752 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
IterTy arg_end() const
Definition: CallSite.h:583
uint64_t CallInst * C
Return a value (possibly void), from a function.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void lintModule(const Module &M)
Check a module.
Definition: Lint.cpp:747
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
IterTy arg_begin() const
Definition: CallSite.h:579
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
static constexpr LocationSize unknown()
This class represents a function call, abstracting a target machine&#39;s calling convention.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void lintFunction(const Function &F)
lintFunction - Check a function for errors, printing messages on stderr.
Definition: Lint.cpp:735
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned getSourceAlignment() const
A cache of @llvm.assume calls within a function.
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memset intrinsic.
arg_iterator arg_end()
Definition: Function.h:682
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
Value * getLength() const
#define Assert(C,...)
Definition: Lint.cpp:196
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:320
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void add(Pass *P) override
Add a pass to the queue of passes to run.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Implicit null checks
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
This class wraps the llvm.memmove intrinsic.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:328
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:104
unsigned getDestAlignment() const
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
This file implements a class to represent arbitrary precision integral constant values and operations...
InstrTy * getInstruction() const
Definition: CallSite.h:96
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
Class to represent function types.
Definition: DerivedTypes.h:102
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Definition: Lint.cpp:83
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:246
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
lint
Definition: Lint.cpp:192
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:320
VectorType * getType() const
Overload to return most specific vector type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:133
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition: Constants.cpp:64
PassManager manages ModulePassManagers.
const BasicBlock & getEntryBlock() const
Definition: Function.h:642
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
static bool runOnFunction(Function &F, bool PostInlining)
This instruction inserts a single (scalar) element into a VectorType value.
bool hasName() const
Definition: Value.h:250
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static bool isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DstTy, const DataLayout &DL)
A no-op cast is one that can be effected without changing any bits.
This function has undefined behavior.
This is an important base class in LLVM.
Definition: Constant.h:41
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
Definition: Function.cpp:143
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
const Instruction & front() const
Definition: BasicBlock.h:280
Indirect Branch Instruction.
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:337
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:193
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool doesNotReturn() const
Determine if the function cannot return.
Definition: Function.h:508
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Value * getPointerOperand()
Definition: Instructions.h:284
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
arg_iterator arg_begin()
Definition: Function.h:673
self_iterator getIterator()
Definition: ilist_node.h:81
VectorType * getVectorOperandType() const
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:71
FunctionPassManager manages FunctionPasses and BasicBlockPassManagers.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
bool isCast() const
Definition: Instruction.h:133
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
bool run(Module &M)
run - Execute all of the passes scheduled for execution.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4288
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator_range< IterTy > args() const
Definition: CallSite.h:222
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) INITIALIZE_PASS_END(Lint
Value * FindInsertedValue(Value *V, ArrayRef< unsigned > idx_range, Instruction *InsertBefore=nullptr)
Given an aggregrate and an sequence of indices, see if the scalar value indexed is already around as ...
FunctionPass * createLintPass()
Create a lint pass.
Definition: Lint.cpp:729
The two locations precisely alias each other.
Definition: AliasAnalysis.h:90
Iterator for intrusive lists based on ilist_node.
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
iterator end()
Definition: BasicBlock.h:270
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
unsigned arg_size() const
Definition: CallSite.h:226
void initializeLintPass(PassRegistry &)
This class wraps the llvm.memcpy intrinsic.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:220
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1280
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:163
Class to represent vector types.
Definition: DerivedTypes.h:424
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
amdgpu Simplify well known AMD library false FunctionCallee Callee
void setPreservesAll()
Set by analyses that do not transform their input at all.
bool isTailCall() const
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:545
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This file provides utility analysis objects describing memory locations.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
uint32_t Size
Definition: Profile.cpp:46
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
Invoke instruction.
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:88
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Value * getPointerOperand()
Definition: Instructions.h:412
Statically lint checks LLVM IR
Definition: Lint.cpp:192
#define T1
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59