LLVM  6.0.0svn
SafepointIRVerifier.cpp
Go to the documentation of this file.
1 //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Run a sanity check on the IR to ensure that Safepoints - if they've been
11 // inserted - were inserted correctly. In particular, look for use of
12 // non-relocated values after a safepoint. It's primary use is to check the
13 // correctness of safepoint insertion immediately after insertion, but it can
14 // also be used to verify that later transforms have not found a way to break
15 // safepoint semenatics.
16 //
17 // In its current form, this verify checks a property which is sufficient, but
18 // not neccessary for correctness. There are some cases where an unrelocated
19 // pointer can be used after the safepoint. Consider this example:
20 //
21 // a = ...
22 // b = ...
23 // (a',b') = safepoint(a,b)
24 // c = cmp eq a b
25 // br c, ..., ....
26 //
27 // Because it is valid to reorder 'c' above the safepoint, this is legal. In
28 // practice, this is a somewhat uncommon transform, but CodeGenPrep does create
29 // idioms like this. The verifier knows about these cases and avoids reporting
30 // false positives.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "llvm/ADT/DenseSet.h"
35 #include "llvm/ADT/SetOperations.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Value.h"
46 #include "llvm/IR/Statepoint.h"
47 #include "llvm/Support/Debug.h"
50 
51 #define DEBUG_TYPE "safepoint-ir-verifier"
52 
53 using namespace llvm;
54 
55 /// This option is used for writing test cases. Instead of crashing the program
56 /// when verification fails, report a message to the console (for FileCheck
57 /// usage) and continue execution as if nothing happened.
58 static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
59  cl::init(false));
60 
61 static void Verify(const Function &F, const DominatorTree &DT);
62 
63 namespace {
64 struct SafepointIRVerifier : public FunctionPass {
65  static char ID; // Pass identification, replacement for typeid
66  DominatorTree DT;
67  SafepointIRVerifier() : FunctionPass(ID) {
69  }
70 
71  bool runOnFunction(Function &F) override {
72  DT.recalculate(F);
73  Verify(F, DT);
74  return false; // no modifications
75  }
76 
77  void getAnalysisUsage(AnalysisUsage &AU) const override {
78  AU.setPreservesAll();
79  }
80 
81  StringRef getPassName() const override { return "safepoint verifier"; }
82 };
83 } // namespace
84 
86  SafepointIRVerifier pass;
87  pass.runOnFunction(F);
88 }
89 
91 
93  return new SafepointIRVerifier();
94 }
95 
96 INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
97  "Safepoint IR Verifier", false, true)
98 INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
99  "Safepoint IR Verifier", false, true)
100 
101 static bool isGCPointerType(Type *T) {
102  if (auto *PT = dyn_cast<PointerType>(T))
103  // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
104  // GC managed heap. We know that a pointer into this heap needs to be
105  // updated and that no other pointer does.
106  return (1 == PT->getAddressSpace());
107  return false;
108 }
109 
110 static bool containsGCPtrType(Type *Ty) {
111  if (isGCPointerType(Ty))
112  return true;
113  if (VectorType *VT = dyn_cast<VectorType>(Ty))
114  return isGCPointerType(VT->getScalarType());
115  if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
116  return containsGCPtrType(AT->getElementType());
117  if (StructType *ST = dyn_cast<StructType>(Ty))
118  return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
120  return false;
121 }
122 
123 // Debugging aid -- prints a [Begin, End) range of values.
124 template<typename IteratorTy>
125 static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
126  OS << "[ ";
127  while (Begin != End) {
128  OS << **Begin << " ";
129  ++Begin;
130  }
131  OS << "]";
132 }
133 
134 /// The verifier algorithm is phrased in terms of availability. The set of
135 /// values "available" at a given point in the control flow graph is the set of
136 /// correctly relocated value at that point, and is a subset of the set of
137 /// definitions dominating that point.
138 
139 /// State we compute and track per basic block.
141  // Set of values available coming in, before the phi nodes
143 
144  // Set of values available going out
146 
147  // AvailableOut minus AvailableIn.
148  // All elements are Instructions
150 
151  // True if this block contains a safepoint and thus AvailableIn does not
152  // contribute to AvailableOut.
153  bool Cleared = false;
154 };
155 
156 
157 /// Gather all the definitions dominating the start of BB into Result. This is
158 /// simply the Defs introduced by every dominating basic block and the function
159 /// arguments.
160 static void GatherDominatingDefs(const BasicBlock *BB,
161  DenseSet<const Value *> &Result,
162  const DominatorTree &DT,
164  DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
165 
166  while (DTN->getIDom()) {
167  DTN = DTN->getIDom();
168  const auto &Defs = BlockMap[DTN->getBlock()]->Contribution;
169  Result.insert(Defs.begin(), Defs.end());
170  // If this block is 'Cleared', then nothing LiveIn to this block can be
171  // available after this block completes. Note: This turns out to be
172  // really important for reducing memory consuption of the initial available
173  // sets and thus peak memory usage by this verifier.
174  if (BlockMap[DTN->getBlock()]->Cleared)
175  return;
176  }
177 
178  for (const Argument &A : BB->getParent()->args())
179  if (containsGCPtrType(A.getType()))
180  Result.insert(&A);
181 }
182 
183 /// Model the effect of an instruction on the set of available values.
184 static void TransferInstruction(const Instruction &I, bool &Cleared,
185  DenseSet<const Value *> &Available) {
186  if (isStatepoint(I)) {
187  Cleared = true;
188  Available.clear();
189  } else if (containsGCPtrType(I.getType()))
190  Available.insert(&I);
191 }
192 
193 /// Compute the AvailableOut set for BB, based on the
194 /// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set
195 /// when the verifier runs for the first time computing the AvailableOut set
196 /// for BB.
197 static void TransferBlock(const BasicBlock *BB,
198  BasicBlockState &BBS, bool FirstPass) {
199 
200  const DenseSet<const Value *> &AvailableIn = BBS.AvailableIn;
201  DenseSet<const Value *> &AvailableOut = BBS.AvailableOut;
202 
203  if (BBS.Cleared) {
204  // AvailableOut does not change no matter how the input changes, just
205  // leave it be. We need to force this calculation the first time so that
206  // we have a AvailableOut at all.
207  if (FirstPass) {
208  AvailableOut = BBS.Contribution;
209  }
210  } else {
211  // Otherwise, we need to reduce the AvailableOut set by things which are no
212  // longer in our AvailableIn
214  set_union(Temp, AvailableIn);
215  AvailableOut = std::move(Temp);
216  }
217 
218  DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
219  PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
220  dbgs() << " to ";
221  PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
222  dbgs() << "\n";);
223 }
224 
225 /// A given derived pointer can have multiple base pointers through phi/selects.
226 /// This type indicates when the base pointer is exclusively constant
227 /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
228 /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
229 /// NonConstant.
230 enum BaseType {
231  NonConstant = 1, // Base pointers is not exclusively constant.
233  ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
234  // set of constants, but they are not exclusively
235  // null.
236 };
237 
238 /// Return the baseType for Val which states whether Val is exclusively
239 /// derived from constant/null, or not exclusively derived from constant.
240 /// Val is exclusively derived off a constant base when all operands of phi and
241 /// selects are derived off a constant base.
242 static enum BaseType getBaseType(const Value *Val) {
243 
245  DenseSet<const Value *> Visited;
246  bool isExclusivelyDerivedFromNull = true;
247  Worklist.push_back(Val);
248  // Strip through all the bitcasts and geps to get base pointer. Also check for
249  // the exclusive value when there can be multiple base pointers (through phis
250  // or selects).
251  while(!Worklist.empty()) {
252  const Value *V = Worklist.pop_back_val();
253  if (!Visited.insert(V).second)
254  continue;
255 
256  if (const auto *CI = dyn_cast<CastInst>(V)) {
257  Worklist.push_back(CI->stripPointerCasts());
258  continue;
259  }
260  if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
261  Worklist.push_back(GEP->getPointerOperand());
262  continue;
263  }
264  // Push all the incoming values of phi node into the worklist for
265  // processing.
266  if (const auto *PN = dyn_cast<PHINode>(V)) {
267  for (Value *InV: PN->incoming_values())
268  Worklist.push_back(InV);
269  continue;
270  }
271  if (const auto *SI = dyn_cast<SelectInst>(V)) {
272  // Push in the true and false values
273  Worklist.push_back(SI->getTrueValue());
274  Worklist.push_back(SI->getFalseValue());
275  continue;
276  }
277  if (isa<Constant>(V)) {
278  // We found at least one base pointer which is non-null, so this derived
279  // pointer is not exclusively derived from null.
280  if (V != Constant::getNullValue(V->getType()))
281  isExclusivelyDerivedFromNull = false;
282  // Continue processing the remaining values to make sure it's exclusively
283  // constant.
284  continue;
285  }
286  // At this point, we know that the base pointer is not exclusively
287  // constant.
288  return BaseType::NonConstant;
289  }
290  // Now, we know that the base pointer is exclusively constant, but we need to
291  // differentiate between exclusive null constant and non-null constant.
292  return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
294 }
295 
296 static void Verify(const Function &F, const DominatorTree &DT) {
299 
300  DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n");
301  if (PrintOnly)
302  dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
303 
304 
305  for (const BasicBlock &BB : F) {
306  BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState;
307  for (const auto &I : BB)
309  BlockMap[&BB] = BBS;
310  }
311 
312  for (auto &BBI : BlockMap) {
313  GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap);
314  TransferBlock(BBI.first, *BBI.second, true);
315  }
316 
318  for (auto &BBI : BlockMap)
319  Worklist.insert(BBI.first);
320 
321  // This loop iterates the AvailableIn and AvailableOut sets to a fixed point.
322  // The AvailableIn and AvailableOut sets decrease as we iterate.
323  while (!Worklist.empty()) {
324  const BasicBlock *BB = Worklist.pop_back_val();
325  BasicBlockState *BBS = BlockMap[BB];
326 
327  size_t OldInCount = BBS->AvailableIn.size();
328  for (const BasicBlock *PBB : predecessors(BB))
329  set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut);
330 
331  if (OldInCount == BBS->AvailableIn.size())
332  continue;
333 
334  assert(OldInCount > BBS->AvailableIn.size() && "invariant!");
335 
336  size_t OldOutCount = BBS->AvailableOut.size();
337  TransferBlock(BB, *BBS, false);
338  if (OldOutCount != BBS->AvailableOut.size()) {
339  assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
340  Worklist.insert(succ_begin(BB), succ_end(BB));
341  }
342  }
343 
344  // We now have all the information we need to decide if the use of a heap
345  // reference is legal or not, given our safepoint semantics.
346 
347  bool AnyInvalidUses = false;
348 
349  auto ReportInvalidUse = [&AnyInvalidUses](const Value &V,
350  const Instruction &I) {
351  errs() << "Illegal use of unrelocated value found!\n";
352  errs() << "Def: " << V << "\n";
353  errs() << "Use: " << I << "\n";
354  if (!PrintOnly)
355  abort();
356  AnyInvalidUses = true;
357  };
358 
359  auto isNotExclusivelyConstantDerived = [](const Value *V) {
360  return getBaseType(V) == BaseType::NonConstant;
361  };
362 
363  for (const BasicBlock &BB : F) {
364  // We destructively modify AvailableIn as we traverse the block instruction
365  // by instruction.
366  DenseSet<const Value *> &AvailableSet = BlockMap[&BB]->AvailableIn;
367  for (const Instruction &I : BB) {
368  if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
369  if (containsGCPtrType(PN->getType()))
370  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
371  const BasicBlock *InBB = PN->getIncomingBlock(i);
372  const Value *InValue = PN->getIncomingValue(i);
373 
374  if (isNotExclusivelyConstantDerived(InValue) &&
375  !BlockMap[InBB]->AvailableOut.count(InValue))
376  ReportInvalidUse(*InValue, *PN);
377  }
378  } else if (isa<CmpInst>(I) &&
379  containsGCPtrType(I.getOperand(0)->getType())) {
380  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
381  enum BaseType baseTyLHS = getBaseType(LHS),
382  baseTyRHS = getBaseType(RHS);
383 
384  // Returns true if LHS and RHS are unrelocated pointers and they are
385  // valid unrelocated uses.
386  auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () {
387  // A cmp instruction has valid unrelocated pointer operands only if
388  // both operands are unrelocated pointers.
389  // In the comparison between two pointers, if one is an unrelocated
390  // use, the other *should be* an unrelocated use, for this
391  // instruction to contain valid unrelocated uses. This unrelocated
392  // use can be a null constant as well, or another unrelocated
393  // pointer.
394  if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
395  return false;
396  // Constant pointers (that are not exclusively null) may have
397  // meaning in different VMs, so we cannot reorder the compare
398  // against constant pointers before the safepoint. In other words,
399  // comparison of an unrelocated use against a non-null constant
400  // maybe invalid.
401  if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
402  baseTyRHS == BaseType::NonConstant) ||
403  (baseTyLHS == BaseType::NonConstant &&
404  baseTyRHS == BaseType::ExclusivelySomeConstant))
405  return false;
406  // All other cases are valid cases enumerated below:
407  // 1. Comparison between an exlusively derived null pointer and a
408  // constant base pointer.
409  // 2. Comparison between an exlusively derived null pointer and a
410  // non-constant unrelocated base pointer.
411  // 3. Comparison between 2 unrelocated pointers.
412  return true;
413  };
414  if (!hasValidUnrelocatedUse()) {
415  // Print out all non-constant derived pointers that are unrelocated
416  // uses, which are invalid.
417  if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
418  ReportInvalidUse(*LHS, I);
419  if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
420  ReportInvalidUse(*RHS, I);
421  }
422  } else {
423  for (const Value *V : I.operands())
424  if (containsGCPtrType(V->getType()) &&
425  isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
426  ReportInvalidUse(*V, I);
427  }
428 
429  bool Cleared = false;
430  TransferInstruction(I, Cleared, AvailableSet);
431  (void)Cleared;
432  }
433  }
434 
435  if (PrintOnly && !AnyInvalidUses) {
436  dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
437  << "\n";
438  }
439 }
static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End)
Safe Stack instrumentation pass
Definition: SafeStack.cpp:846
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:228
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void initializeSafepointIRVerifierPass(PassRegistry &)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
static enum BaseType getBaseType(const Value *Val)
Return the baseType for Val which states whether Val is exclusively derived from constant/null, or not exclusively derived from constant.
F(f)
Hexagon Common GEP
DenseSet< const Value * > AvailableIn
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
static void TransferBlock(const BasicBlock *BB, BasicBlockState &BBS, bool FirstPass)
Compute the AvailableOut set for BB, based on the BasicBlockState BBS, which is the BasicBlockState f...
Class to represent struct types.
Definition: DerivedTypes.h:201
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
DenseSet< const Value * > AvailableOut
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
Class to represent array types.
Definition: DerivedTypes.h:369
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
static void Verify(const Function &F, const DominatorTree &DT)
The verifier algorithm is phrased in terms of availability.
NodeT * getBlock() const
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
void verifySafepointIR(Function &F)
Run the safepoint verifier over a single function. Crashes on failure.
FunctionPass * createSafepointIRVerifierPass()
Create an instance of the safepoint verifier pass which can be added to a pass pipeline to check for ...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
verify safepoint Safepoint IR static true bool isGCPointerType(Type *T)
DomTreeNodeBase * getIDom() const
static bool containsGCPtrType(Type *Ty)
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:774
static const unsigned End
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
Definition: Allocator.h:434
DenseSet< const Value * > Contribution
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
static void GatherDominatingDefs(const BasicBlock *BB, DenseSet< const Value *> &Result, const DominatorTree &DT, DenseMap< const BasicBlock *, BasicBlockState *> &BlockMap)
Gather all the definitions dominating the start of BB into Result.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
size_type size() const
Definition: DenseSet.h:75
INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir", "Safepoint IR Verifier", false, true) INITIALIZE_PASS_END(SafepointIRVerifier
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:385
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class to represent vector types.
Definition: DerivedTypes.h:393
void setPreservesAll()
Set by analyses that do not transform their input at all.
Basic Alias true
static cl::opt< bool > PrintOnly("safepoint-ir-verifier-print-only", cl::init(false))
This option is used for writing test cases.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
verify safepoint Safepoint IR Verifier
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
static void TransferInstruction(const Instruction &I, bool &Cleared, DenseSet< const Value *> &Available)
Model the effect of an instruction on the set of available values.
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
A vector that has set insertion semantics.
Definition: SetVector.h:41
verify safepoint ir
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
Definition: SetOperations.h:23
Statically lint checks LLVM IR
Definition: Lint.cpp:193
iterator_range< arg_iterator > args()
Definition: Function.h:621