LCOV - code coverage report
Current view: top level - lib/IR - SafepointIRVerifier.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 152 167 91.0 %
Date: 2017-09-14 15:23:50 Functions: 18 22 81.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Run a sanity check on the IR to ensure that Safepoints - if they've been
      11             : // inserted - were inserted correctly.  In particular, look for use of
      12             : // non-relocated values after a safepoint.  It's primary use is to check the
      13             : // correctness of safepoint insertion immediately after insertion, but it can
      14             : // also be used to verify that later transforms have not found a way to break
      15             : // safepoint semenatics.
      16             : //
      17             : // In its current form, this verify checks a property which is sufficient, but
      18             : // not neccessary for correctness.  There are some cases where an unrelocated
      19             : // pointer can be used after the safepoint.  Consider this example:
      20             : //
      21             : //    a = ...
      22             : //    b = ...
      23             : //    (a',b') = safepoint(a,b)
      24             : //    c = cmp eq a b
      25             : //    br c, ..., ....
      26             : //
      27             : // Because it is valid to reorder 'c' above the safepoint, this is legal.  In
      28             : // practice, this is a somewhat uncommon transform, but CodeGenPrep does create
      29             : // idioms like this.  The verifier knows about these cases and avoids reporting
      30             : // false positives.
      31             : //
      32             : //===----------------------------------------------------------------------===//
      33             : 
      34             : #include "llvm/ADT/DenseSet.h"
      35             : #include "llvm/ADT/SetOperations.h"
      36             : #include "llvm/ADT/SetVector.h"
      37             : #include "llvm/IR/BasicBlock.h"
      38             : #include "llvm/IR/Dominators.h"
      39             : #include "llvm/IR/Function.h"
      40             : #include "llvm/IR/Instructions.h"
      41             : #include "llvm/IR/Intrinsics.h"
      42             : #include "llvm/IR/IntrinsicInst.h"
      43             : #include "llvm/IR/Module.h"
      44             : #include "llvm/IR/Value.h"
      45             : #include "llvm/IR/SafepointIRVerifier.h"
      46             : #include "llvm/IR/Statepoint.h"
      47             : #include "llvm/Support/Debug.h"
      48             : #include "llvm/Support/CommandLine.h"
      49             : #include "llvm/Support/raw_ostream.h"
      50             : 
      51             : #define DEBUG_TYPE "safepoint-ir-verifier"
      52             : 
      53             : using namespace llvm;
      54             : 
      55             : /// This option is used for writing test cases.  Instead of crashing the program
      56             : /// when verification fails, report a message to the console (for FileCheck
      57             : /// usage) and continue execution as if nothing happened.
      58       72306 : static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
      59      144612 :                                cl::init(false));
      60             : 
      61             : static void Verify(const Function &F, const DominatorTree &DT);
      62             : 
      63             : namespace {
      64          15 : struct SafepointIRVerifier : public FunctionPass {
      65             :   static char ID; // Pass identification, replacement for typeid
      66             :   DominatorTree DT;
      67          15 :   SafepointIRVerifier() : FunctionPass(ID) {
      68           5 :     initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
      69           5 :   }
      70             : 
      71          18 :   bool runOnFunction(Function &F) override {
      72          36 :     DT.recalculate(F);
      73          18 :     Verify(F, DT);
      74          18 :     return false; // no modifications
      75             :   }
      76             : 
      77           5 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      78           5 :     AU.setPreservesAll();
      79           5 :   }
      80             : 
      81           0 :   StringRef getPassName() const override { return "safepoint verifier"; }
      82             : };
      83             : } // namespace
      84             : 
      85           0 : void llvm::verifySafepointIR(Function &F) {
      86           0 :   SafepointIRVerifier pass;
      87           0 :   pass.runOnFunction(F);
      88           0 : }
      89             : 
      90             : char SafepointIRVerifier::ID = 0;
      91             : 
      92           0 : FunctionPass *llvm::createSafepointIRVerifierPass() {
      93           0 :   return new SafepointIRVerifier();
      94             : }
      95             : 
      96       25765 : INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
      97             :                       "Safepoint IR Verifier", false, true)
      98      128840 : INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
      99             :                     "Safepoint IR Verifier", false, true)
     100             : 
     101             : static bool isGCPointerType(Type *T) {
     102         253 :   if (auto *PT = dyn_cast<PointerType>(T))
     103             :     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
     104             :     // GC managed heap.  We know that a pointer into this heap needs to be
     105             :     // updated and that no other pointer does.
     106         253 :     return (1 == PT->getAddressSpace());
     107             :   return false;
     108             : }
     109             : 
     110         646 : static bool containsGCPtrType(Type *Ty) {
     111         253 :   if (isGCPointerType(Ty))
     112             :     return true;
     113           0 :   if (VectorType *VT = dyn_cast<VectorType>(Ty))
     114           0 :     return isGCPointerType(VT->getScalarType());
     115           0 :   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
     116           0 :     return containsGCPtrType(AT->getElementType());
     117           0 :   if (StructType *ST = dyn_cast<StructType>(Ty))
     118           0 :     return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
     119           0 :                        containsGCPtrType);
     120             :   return false;
     121             : }
     122             : 
     123             : // Debugging aid -- prints a [Begin, End) range of values.
     124             : template<typename IteratorTy>
     125             : static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
     126             :   OS << "[ ";
     127             :   while (Begin != End) {
     128             :     OS << **Begin << " ";
     129             :     ++Begin;
     130             :   }
     131             :   OS << "]";
     132             : }
     133             : 
     134             : /// The verifier algorithm is phrased in terms of availability.  The set of
     135             : /// values "available" at a given point in the control flow graph is the set of
     136             : /// correctly relocated value at that point, and is a subset of the set of
     137             : /// definitions dominating that point.
     138             : 
     139             : /// State we compute and track per basic block.
     140         312 : struct BasicBlockState {
     141             :   // Set of values available coming in, before the phi nodes
     142             :   DenseSet<const Value *> AvailableIn;
     143             : 
     144             :   // Set of values available going out
     145             :   DenseSet<const Value *> AvailableOut;
     146             : 
     147             :   // AvailableOut minus AvailableIn.
     148             :   // All elements are Instructions
     149             :   DenseSet<const Value *> Contribution;
     150             : 
     151             :   // True if this block contains a safepoint and thus AvailableIn does not
     152             :   // contribute to AvailableOut.
     153             :   bool Cleared = false;
     154             : };
     155             : 
     156             : 
     157             : /// Gather all the definitions dominating the start of BB into Result.  This is
     158             : /// simply the Defs introduced by every dominating basic block and the function
     159             : /// arguments.
     160          39 : static void GatherDominatingDefs(const BasicBlock *BB,
     161             :                                  DenseSet<const Value *> &Result,
     162             :                                  const DominatorTree &DT,
     163             :                     DenseMap<const BasicBlock *, BasicBlockState *> &BlockMap) {
     164          78 :   DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
     165             : 
     166          62 :   while (DTN->getIDom()) {
     167          23 :     DTN = DTN->getIDom();
     168          46 :     const auto &Defs = BlockMap[DTN->getBlock()]->Contribution;
     169          69 :     Result.insert(Defs.begin(), Defs.end());
     170             :     // If this block is 'Cleared', then nothing LiveIn to this block can be
     171             :     // available after this block completes.  Note: This turns out to be 
     172             :     // really important for reducing memory consuption of the initial available
     173             :     // sets and thus peak memory usage by this verifier.
     174          46 :     if (BlockMap[DTN->getBlock()]->Cleared)
     175             :       return;
     176             :   }
     177             : 
     178         110 :   for (const Argument &A : BB->getParent()->args())
     179          71 :     if (containsGCPtrType(A.getType()))
     180          76 :       Result.insert(&A);
     181             : }
     182             : 
     183             : /// Model the effect of an instruction on the set of available values.
     184         230 : static void TransferInstruction(const Instruction &I, bool &Cleared,
     185             :                               DenseSet<const Value *> &Available) {
     186         230 :   if (isStatepoint(I)) {
     187          34 :     Cleared = true;
     188          34 :     Available.clear();
     189         196 :   } else if (containsGCPtrType(I.getType()))
     190         204 :     Available.insert(&I);
     191         230 : }
     192             : 
     193             : /// Compute the AvailableOut set for BB, based on the
     194             : /// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set
     195             : /// when the verifier runs for the first time computing the AvailableOut set
     196             : /// for BB.
     197          45 : static void TransferBlock(const BasicBlock *BB,
     198             :                           BasicBlockState &BBS, bool FirstPass) {
     199             : 
     200          45 :   const DenseSet<const Value *> &AvailableIn = BBS.AvailableIn; 
     201          45 :   DenseSet<const Value *> &AvailableOut  = BBS.AvailableOut;
     202             : 
     203          45 :   if (BBS.Cleared) {
     204             :     // AvailableOut does not change no matter how the input changes, just
     205             :     // leave it be.  We need to force this calculation the first time so that
     206             :     // we have a AvailableOut at all.
     207          18 :     if (FirstPass) {
     208          17 :       AvailableOut = BBS.Contribution;
     209             :     }
     210             :   } else {
     211             :     // Otherwise, we need to reduce the AvailableOut set by things which are no
     212             :     // longer in our AvailableIn
     213          81 :     DenseSet<const Value *> Temp = BBS.Contribution;
     214          27 :     set_union(Temp, AvailableIn);
     215          54 :     AvailableOut = std::move(Temp);
     216             :   }
     217             : 
     218             :   DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
     219             :         PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
     220             :         dbgs() << " to ";
     221             :         PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
     222             :         dbgs() << "\n";);
     223          45 : }
     224             : 
     225             : /// A given derived pointer can have multiple base pointers through phi/selects.
     226             : /// This type indicates when the base pointer is exclusively constant
     227             : /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
     228             : /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
     229             : /// NonConstant.
     230             : enum BaseType {
     231             :   NonConstant = 1, // Base pointers is not exclusively constant.
     232             :   ExclusivelyNull,
     233             :   ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
     234             :                           // set of constants, but they are not exclusively
     235             :                           // null.
     236             : };
     237             : 
     238             : /// Return the baseType for Val which states whether Val is exclusively
     239             : /// derived from constant/null, or not exclusively derived from constant.
     240             : /// Val is exclusively derived off a constant base when all operands of phi and
     241             : /// selects are derived off a constant base.
     242          89 : static enum BaseType getBaseType(const Value *Val) {
     243             : 
     244         178 :   SmallVector<const Value *, 32> Worklist;
     245         178 :   DenseSet<const Value *> Visited;
     246          89 :   bool isExclusivelyDerivedFromNull = true;
     247          89 :   Worklist.push_back(Val);
     248             :   // Strip through all the bitcasts and geps to get base pointer. Also check for
     249             :   // the exclusive value when there can be multiple base pointers (through phis
     250             :   // or selects).
     251         329 :   while(!Worklist.empty()) {
     252         174 :     const Value *V = Worklist.pop_back_val();
     253         174 :     if (!Visited.insert(V).second)
     254         122 :       continue;
     255             : 
     256         214 :     if (const auto *CI = dyn_cast<CastInst>(V)) {
     257          21 :       Worklist.push_back(CI->stripPointerCasts());
     258          21 :       continue;
     259             :     }
     260         211 :     if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
     261          30 :       Worklist.push_back(GEP->getPointerOperand());
     262          30 :       continue;
     263             :     }
     264             :     // Push all the incoming values of phi node into the worklist for
     265             :     // processing.
     266         146 :     if (const auto *PN = dyn_cast<PHINode>(V)) {
     267          75 :       for (Value *InV: PN->incoming_values())
     268          50 :         Worklist.push_back(InV);
     269          25 :       continue;
     270             :     }
     271         100 :     if (const auto *SI = dyn_cast<SelectInst>(V)) {
     272             :       // Push in the true and false values
     273           2 :       Worklist.push_back(SI->getTrueValue());
     274           2 :       Worklist.push_back(SI->getFalseValue());
     275           2 :       continue;
     276             :     }
     277         134 :     if (isa<Constant>(V)) {
     278             :       // We found at least one base pointer which is non-null, so this derived
     279             :       // pointer is not exclusively derived from null.
     280          40 :       if (V != Constant::getNullValue(V->getType()))
     281          25 :         isExclusivelyDerivedFromNull = false;
     282             :       // Continue processing the remaining values to make sure it's exclusively
     283             :       // constant.
     284          40 :       continue;
     285             :     }
     286             :     // At this point, we know that the base pointer is not exclusively
     287             :     // constant.
     288          54 :     return BaseType::NonConstant;
     289             :   }
     290             :   // Now, we know that the base pointer is exclusively constant, but we need to
     291             :   // differentiate between exclusive null constant and non-null constant.
     292          35 :   return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
     293             :                                       : BaseType::ExclusivelySomeConstant;
     294             : }
     295             : 
     296          18 : static void Verify(const Function &F, const DominatorTree &DT) {
     297          36 :   SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
     298          36 :   DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
     299             :  
     300             :   DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n");
     301          18 :   if (PrintOnly)
     302          18 :     dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
     303             : 
     304             : 
     305          93 :   for (const BasicBlock &BB : F) {
     306          39 :     BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState;
     307         232 :     for (const auto &I : BB)
     308         115 :       TransferInstruction(I, BBS->Cleared, BBS->Contribution);
     309          78 :     BlockMap[&BB] = BBS;
     310             :   }
     311             : 
     312          93 :   for (auto &BBI : BlockMap) {
     313          39 :     GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap);
     314          39 :     TransferBlock(BBI.first, *BBI.second, true);
     315             :   }
     316             : 
     317          36 :   SetVector<const BasicBlock *> Worklist;
     318          93 :   for (auto &BBI : BlockMap)
     319          39 :     Worklist.insert(BBI.first);
     320             : 
     321             :   // This loop iterates the AvailableIn and AvailableOut sets to a fixed point.
     322             :   // The AvailableIn and AvailableOut sets decrease as we iterate.
     323          59 :   while (!Worklist.empty()) {
     324          41 :     const BasicBlock *BB = Worklist.pop_back_val();
     325          41 :     BasicBlockState *BBS = BlockMap[BB];
     326             : 
     327          82 :     size_t OldInCount = BBS->AvailableIn.size();
     328         189 :     for (const BasicBlock *PBB : predecessors(BB))
     329          33 :       set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut);
     330             : 
     331          82 :     if (OldInCount == BBS->AvailableIn.size())
     332          35 :       continue;
     333             : 
     334             :     assert(OldInCount > BBS->AvailableIn.size() && "invariant!");
     335             : 
     336          12 :     size_t OldOutCount = BBS->AvailableOut.size();
     337           6 :     TransferBlock(BB, *BBS, false);
     338          12 :     if (OldOutCount != BBS->AvailableOut.size()) {
     339             :       assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
     340          10 :       Worklist.insert(succ_begin(BB), succ_end(BB));
     341             :     }
     342             :   }
     343             : 
     344             :   // We now have all the information we need to decide if the use of a heap
     345             :   // reference is legal or not, given our safepoint semantics.
     346             : 
     347          18 :   bool AnyInvalidUses = false;
     348             : 
     349             :   auto ReportInvalidUse = [&AnyInvalidUses](const Value &V,
     350          16 :                                             const Instruction &I) {
     351           8 :     errs() << "Illegal use of unrelocated value found!\n";
     352          16 :     errs() << "Def: " << V << "\n";
     353          16 :     errs() << "Use: " << I << "\n";
     354           8 :     if (!PrintOnly)
     355           0 :       abort();
     356           8 :     AnyInvalidUses = true;
     357          26 :   };
     358             : 
     359             :   auto isNotExclusivelyConstantDerived = [](const Value *V) {
     360          77 :     return getBaseType(V) == BaseType::NonConstant;
     361             :   };
     362             : 
     363          93 :   for (const BasicBlock &BB : F) {
     364             :     // We destructively modify AvailableIn as we traverse the block instruction
     365             :     // by instruction.
     366          78 :     DenseSet<const Value *> &AvailableSet = BlockMap[&BB]->AvailableIn;
     367         232 :     for (const Instruction &I : BB) {
     368          15 :       if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
     369          15 :         if (containsGCPtrType(PN->getType()))
     370          60 :           for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     371          30 :             const BasicBlock *InBB = PN->getIncomingBlock(i);
     372          30 :             const Value *InValue = PN->getIncomingValue(i);
     373             : 
     374          60 :             if (isNotExclusivelyConstantDerived(InValue) &&
     375          44 :                 !BlockMap[InBB]->AvailableOut.count(InValue))
     376           3 :               ReportInvalidUse(*InValue, *PN);
     377             :           }
     378           6 :       } else if (isa<CmpInst>(I) &&
     379          12 :                  containsGCPtrType(I.getOperand(0)->getType())) {
     380          18 :         Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
     381           6 :         enum BaseType baseTyLHS = getBaseType(LHS),
     382           6 :                       baseTyRHS = getBaseType(RHS);
     383             : 
     384             :         // Returns true if LHS and RHS are unrelocated pointers and they are
     385             :         // valid unrelocated uses.
     386          34 :         auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () {
     387             :             // A cmp instruction has valid unrelocated pointer operands only if
     388             :             // both operands are unrelocated pointers.
     389             :             // In the comparison between two pointers, if one is an unrelocated
     390             :             // use, the other *should be* an unrelocated use, for this
     391             :             // instruction to contain valid unrelocated uses. This unrelocated
     392             :             // use can be a null constant as well, or another unrelocated
     393             :             // pointer.
     394          36 :             if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
     395             :               return false;
     396             :             // Constant pointers (that are not exclusively null) may have
     397             :             // meaning in different VMs, so we cannot reorder the compare
     398             :             // against constant pointers before the safepoint. In other words,
     399             :             // comparison of an unrelocated use against a non-null constant
     400             :             // maybe invalid.
     401          10 :             if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
     402           9 :                  baseTyRHS == BaseType::NonConstant) ||
     403           8 :                 (baseTyLHS == BaseType::NonConstant &&
     404           4 :                  baseTyRHS == BaseType::ExclusivelySomeConstant))
     405             :               return false;
     406             :             // All other cases are valid cases enumerated below:
     407             :             // 1. Comparison between an exlusively derived null pointer and a
     408             :             // constant base pointer.
     409             :             // 2. Comparison between an exlusively derived null pointer and a
     410             :             // non-constant unrelocated base pointer.
     411             :             // 3. Comparison between 2 unrelocated pointers.
     412           3 :             return true;
     413           6 :         };
     414           6 :         if (!hasValidUnrelocatedUse()) {
     415             :           // Print out all non-constant derived pointers that are unrelocated
     416             :           // uses, which are invalid.
     417           5 :           if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
     418           2 :             ReportInvalidUse(*LHS, I);
     419           5 :           if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
     420           1 :             ReportInvalidUse(*RHS, I);
     421             :         }
     422             :       } else {
     423         546 :         for (const Value *V : I.operands())
     424         405 :           if (containsGCPtrType(V->getType()) &&
     425         453 :               isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
     426           2 :             ReportInvalidUse(*V, I);
     427             :       }
     428             : 
     429         115 :       bool Cleared = false;
     430         115 :       TransferInstruction(I, Cleared, AvailableSet);
     431             :       (void)Cleared;
     432             :     }
     433             :   }
     434             : 
     435          18 :   if (PrintOnly && !AnyInvalidUses) {
     436          10 :     dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
     437          10 :            << "\n";
     438             :   }
     439      216936 : }

Generated by: LCOV version 1.13