LCOV - code coverage report
Current view: top level - lib/IR - SafepointIRVerifier.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 181 226 80.1 %
Date: 2018-10-20 13:21:21 Functions: 19 28 67.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Run a sanity check on the IR to ensure that Safepoints - if they've been
      11             : // inserted - were inserted correctly.  In particular, look for use of
      12             : // non-relocated values after a safepoint.  It's primary use is to check the
      13             : // correctness of safepoint insertion immediately after insertion, but it can
      14             : // also be used to verify that later transforms have not found a way to break
      15             : // safepoint semenatics.
      16             : //
      17             : // In its current form, this verify checks a property which is sufficient, but
      18             : // not neccessary for correctness.  There are some cases where an unrelocated
      19             : // pointer can be used after the safepoint.  Consider this example:
      20             : //
      21             : //    a = ...
      22             : //    b = ...
      23             : //    (a',b') = safepoint(a,b)
      24             : //    c = cmp eq a b
      25             : //    br c, ..., ....
      26             : //
      27             : // Because it is valid to reorder 'c' above the safepoint, this is legal.  In
      28             : // practice, this is a somewhat uncommon transform, but CodeGenPrep does create
      29             : // idioms like this.  The verifier knows about these cases and avoids reporting
      30             : // false positives.
      31             : //
      32             : //===----------------------------------------------------------------------===//
      33             : 
      34             : #include "llvm/ADT/DenseSet.h"
      35             : #include "llvm/ADT/PostOrderIterator.h"
      36             : #include "llvm/ADT/SetOperations.h"
      37             : #include "llvm/ADT/SetVector.h"
      38             : #include "llvm/IR/BasicBlock.h"
      39             : #include "llvm/IR/Dominators.h"
      40             : #include "llvm/IR/Function.h"
      41             : #include "llvm/IR/Instructions.h"
      42             : #include "llvm/IR/Intrinsics.h"
      43             : #include "llvm/IR/IntrinsicInst.h"
      44             : #include "llvm/IR/Module.h"
      45             : #include "llvm/IR/Value.h"
      46             : #include "llvm/IR/SafepointIRVerifier.h"
      47             : #include "llvm/IR/Statepoint.h"
      48             : #include "llvm/Support/Debug.h"
      49             : #include "llvm/Support/CommandLine.h"
      50             : #include "llvm/Support/raw_ostream.h"
      51             : 
      52             : #define DEBUG_TYPE "safepoint-ir-verifier"
      53             : 
      54             : using namespace llvm;
      55             : 
      56             : /// This option is used for writing test cases.  Instead of crashing the program
      57             : /// when verification fails, report a message to the console (for FileCheck
      58             : /// usage) and continue execution as if nothing happened.
      59             : static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
      60             :                                cl::init(false));
      61             : 
      62             : namespace {
      63             : 
      64             : /// This CFG Deadness finds dead blocks and edges. Algorithm starts with a set
      65             : /// of blocks unreachable from entry then propagates deadness using foldable
      66             : /// conditional branches without modifying CFG. So GVN does but it changes CFG
      67             : /// by splitting critical edges. In most cases passes rely on SimplifyCFG to
      68             : /// clean up dead blocks, but in some cases, like verification or loop passes
      69             : /// it's not possible.
      70          34 : class CFGDeadness {
      71             :   const DominatorTree *DT = nullptr;
      72             :   SetVector<const BasicBlock *> DeadBlocks;
      73             :   SetVector<const Use *> DeadEdges; // Contains all dead edges from live blocks.
      74             : 
      75             : public:
      76             :   /// Return the edge that coresponds to the predecessor.
      77         136 :   static const Use& getEdge(const_pred_iterator &PredIt) {
      78             :     auto &PU = PredIt.getUse();
      79         136 :     return PU.getUser()->getOperandUse(PU.getOperandNo());
      80             :   }
      81             : 
      82             :   /// Return true if there is at least one live edge that corresponds to the
      83             :   /// basic block InBB listed in the phi node.
      84          76 :   bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
      85             :     assert(!isDeadBlock(InBB) && "block must be live");
      86          76 :     const BasicBlock* BB = PN->getParent();
      87             :     bool Listed = false;
      88         113 :     for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
      89         113 :       if (InBB == *PredIt) {
      90          76 :         if (!isDeadEdge(&getEdge(PredIt)))
      91          76 :           return true;
      92             :         Listed = true;
      93             :       }
      94             :     }
      95             :     (void)Listed;
      96             :     assert(Listed && "basic block is not found among incoming blocks");
      97           0 :     return false;
      98             :   }
      99             : 
     100             : 
     101             :   bool isDeadBlock(const BasicBlock *BB) const {
     102             :     return DeadBlocks.count(BB);
     103             :   }
     104             : 
     105             :   bool isDeadEdge(const Use *U) const {
     106             :     assert(dyn_cast<Instruction>(U->getUser())->isTerminator() &&
     107             :            "edge must be operand of terminator");
     108             :     assert(cast_or_null<BasicBlock>(U->get()) &&
     109             :            "edge must refer to basic block");
     110             :     assert(!isDeadBlock(dyn_cast<Instruction>(U->getUser())->getParent()) &&
     111             :            "isDeadEdge() must be applied to edge from live block");
     112             :     return DeadEdges.count(U);
     113             :   }
     114             : 
     115           0 :   bool hasLiveIncomingEdges(const BasicBlock *BB) const {
     116             :     // Check if all incoming edges are dead.
     117           0 :     for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
     118             :       auto &PU = PredIt.getUse();
     119           0 :       const Use &U = PU.getUser()->getOperandUse(PU.getOperandNo());
     120             :       if (!isDeadBlock(*PredIt) && !isDeadEdge(&U))
     121           0 :         return true; // Found a live edge.
     122             :     }
     123           0 :     return false;
     124             :   }
     125             : 
     126          34 :   void processFunction(const Function &F, const DominatorTree &DT) {
     127          34 :     this->DT = &DT;
     128             : 
     129             :     // Start with all blocks unreachable from entry.
     130         111 :     for (const BasicBlock &BB : F)
     131          77 :       if (!DT.isReachableFromEntry(&BB))
     132           2 :         DeadBlocks.insert(&BB);
     133             : 
     134             :     // Top-down walk of the dominator tree
     135             :     ReversePostOrderTraversal<const Function *> RPOT(&F);
     136         109 :     for (const BasicBlock *BB : RPOT) {
     137          75 :       const Instruction *TI = BB->getTerminator();
     138             :       assert(TI && "blocks must be well formed");
     139             : 
     140             :       // For conditional branches, we can perform simple conditional propagation on
     141             :       // the condition value itself.
     142             :       const BranchInst *BI = dyn_cast<BranchInst>(TI);
     143          42 :       if (!BI || !BI->isConditional() || !isa<Constant>(BI->getCondition()))
     144             :         continue;
     145             : 
     146             :       // If a branch has two identical successors, we cannot declare either dead.
     147          10 :       if (BI->getSuccessor(0) == BI->getSuccessor(1))
     148             :         continue;
     149             : 
     150             :       ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
     151             :       if (!Cond)
     152             :         continue;
     153             : 
     154           0 :       addDeadEdge(BI->getOperandUse(Cond->getZExtValue() ? 1 : 2));
     155             :     }
     156          34 :   }
     157             : 
     158             : protected:
     159           0 :   void addDeadBlock(const BasicBlock *BB) {
     160             :     SmallVector<const BasicBlock *, 4> NewDead;
     161             :     SmallSetVector<const BasicBlock *, 4> DF;
     162             : 
     163           0 :     NewDead.push_back(BB);
     164           0 :     while (!NewDead.empty()) {
     165             :       const BasicBlock *D = NewDead.pop_back_val();
     166             :       if (isDeadBlock(D))
     167           0 :         continue;
     168             : 
     169             :       // All blocks dominated by D are dead.
     170             :       SmallVector<BasicBlock *, 8> Dom;
     171           0 :       DT->getDescendants(const_cast<BasicBlock*>(D), Dom);
     172             :       // Do not need to mark all in and out edges dead
     173             :       // because BB is marked dead and this is enough
     174             :       // to run further.
     175           0 :       DeadBlocks.insert(Dom.begin(), Dom.end());
     176             : 
     177             :       // Figure out the dominance-frontier(D).
     178           0 :       for (BasicBlock *B : Dom)
     179           0 :         for (BasicBlock *S : successors(B))
     180           0 :           if (!isDeadBlock(S) && !hasLiveIncomingEdges(S))
     181           0 :             NewDead.push_back(S);
     182             :     }
     183           0 :   }
     184             : 
     185           0 :   void addDeadEdge(const Use &DeadEdge) {
     186           0 :     if (!DeadEdges.insert(&DeadEdge))
     187             :       return;
     188             : 
     189           0 :     BasicBlock *BB = cast_or_null<BasicBlock>(DeadEdge.get());
     190           0 :     if (hasLiveIncomingEdges(BB))
     191             :       return;
     192             : 
     193           0 :     addDeadBlock(BB);
     194             :   }
     195             : };
     196             : } // namespace
     197             : 
     198             : static void Verify(const Function &F, const DominatorTree &DT,
     199             :                    const CFGDeadness &CD);
     200             : 
     201             : namespace {
     202             : 
     203             : struct SafepointIRVerifier : public FunctionPass {
     204             :   static char ID; // Pass identification, replacement for typeid
     205           8 :   SafepointIRVerifier() : FunctionPass(ID) {
     206           8 :     initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
     207             :   }
     208             : 
     209          34 :   bool runOnFunction(Function &F) override {
     210          34 :     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     211          34 :     CFGDeadness CD;
     212          34 :     CD.processFunction(F, DT);
     213          34 :     Verify(F, DT, CD);
     214          34 :     return false; // no modifications
     215             :   }
     216             : 
     217           8 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     218           8 :     AU.addRequiredID(DominatorTreeWrapperPass::ID);
     219             :     AU.setPreservesAll();
     220           8 :   }
     221             : 
     222           0 :   StringRef getPassName() const override { return "safepoint verifier"; }
     223             : };
     224             : } // namespace
     225             : 
     226           0 : void llvm::verifySafepointIR(Function &F) {
     227             :   SafepointIRVerifier pass;
     228           0 :   pass.runOnFunction(F);
     229           0 : }
     230             : 
     231             : char SafepointIRVerifier::ID = 0;
     232             : 
     233           0 : FunctionPass *llvm::createSafepointIRVerifierPass() {
     234           0 :   return new SafepointIRVerifier();
     235             : }
     236             : 
     237       32127 : INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
     238             :                       "Safepoint IR Verifier", false, false)
     239       32127 : INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
     240       64262 : INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
     241             :                     "Safepoint IR Verifier", false, false)
     242             : 
     243             : static bool isGCPointerType(Type *T) {
     244             :   if (auto *PT = dyn_cast<PointerType>(T))
     245             :     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
     246             :     // GC managed heap.  We know that a pointer into this heap needs to be
     247             :     // updated and that no other pointer does.
     248           0 :     return (1 == PT->getAddressSpace());
     249             :   return false;
     250             : }
     251             : 
     252        1560 : static bool containsGCPtrType(Type *Ty) {
     253         663 :   if (isGCPointerType(Ty))
     254             :     return true;
     255             :   if (VectorType *VT = dyn_cast<VectorType>(Ty))
     256             :     return isGCPointerType(VT->getScalarType());
     257             :   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
     258           0 :     return containsGCPtrType(AT->getElementType());
     259             :   if (StructType *ST = dyn_cast<StructType>(Ty))
     260             :     return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
     261             :                        containsGCPtrType);
     262             :   return false;
     263             : }
     264             : 
     265             : // Debugging aid -- prints a [Begin, End) range of values.
     266             : template<typename IteratorTy>
     267             : static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
     268             :   OS << "[ ";
     269             :   while (Begin != End) {
     270             :     OS << **Begin << " ";
     271             :     ++Begin;
     272             :   }
     273             :   OS << "]";
     274             : }
     275             : 
     276             : /// The verifier algorithm is phrased in terms of availability.  The set of
     277             : /// values "available" at a given point in the control flow graph is the set of
     278             : /// correctly relocated value at that point, and is a subset of the set of
     279             : /// definitions dominating that point.
     280             : 
     281             : using AvailableValueSet = DenseSet<const Value *>;
     282             : 
     283             : /// State we compute and track per basic block.
     284             : struct BasicBlockState {
     285             :   // Set of values available coming in, before the phi nodes
     286             :   AvailableValueSet AvailableIn;
     287             : 
     288             :   // Set of values available going out
     289             :   AvailableValueSet AvailableOut;
     290             : 
     291             :   // AvailableOut minus AvailableIn.
     292             :   // All elements are Instructions
     293             :   AvailableValueSet Contribution;
     294             : 
     295             :   // True if this block contains a safepoint and thus AvailableIn does not
     296             :   // contribute to AvailableOut.
     297             :   bool Cleared = false;
     298             : };
     299             : 
     300             : /// A given derived pointer can have multiple base pointers through phi/selects.
     301             : /// This type indicates when the base pointer is exclusively constant
     302             : /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
     303             : /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
     304             : /// NonConstant.
     305             : enum BaseType {
     306             :   NonConstant = 1, // Base pointers is not exclusively constant.
     307             :   ExclusivelyNull,
     308             :   ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
     309             :                           // set of constants, but they are not exclusively
     310             :                           // null.
     311             : };
     312             : 
     313             : /// Return the baseType for Val which states whether Val is exclusively
     314             : /// derived from constant/null, or not exclusively derived from constant.
     315             : /// Val is exclusively derived off a constant base when all operands of phi and
     316             : /// selects are derived off a constant base.
     317         247 : static enum BaseType getBaseType(const Value *Val) {
     318             : 
     319             :   SmallVector<const Value *, 32> Worklist;
     320             :   DenseSet<const Value *> Visited;
     321             :   bool isExclusivelyDerivedFromNull = true;
     322         247 :   Worklist.push_back(Val);
     323             :   // Strip through all the bitcasts and geps to get base pointer. Also check for
     324             :   // the exclusive value when there can be multiple base pointers (through phis
     325             :   // or selects).
     326         510 :   while(!Worklist.empty()) {
     327         454 :     const Value *V = Worklist.pop_back_val();
     328         454 :     if (!Visited.insert(V).second)
     329         263 :       continue;
     330             : 
     331         452 :     if (const auto *CI = dyn_cast<CastInst>(V)) {
     332          50 :       Worklist.push_back(CI->stripPointerCasts());
     333          50 :       continue;
     334             :     }
     335             :     if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
     336         105 :       Worklist.push_back(GEP->getPointerOperand());
     337         105 :       continue;
     338             :     }
     339             :     // Push all the incoming values of phi node into the worklist for
     340             :     // processing.
     341             :     if (const auto *PN = dyn_cast<PHINode>(V)) {
     342         126 :       for (Value *InV: PN->incoming_values())
     343          84 :         Worklist.push_back(InV);
     344             :       continue;
     345             :     }
     346             :     if (const auto *SI = dyn_cast<SelectInst>(V)) {
     347             :       // Push in the true and false values
     348           2 :       Worklist.push_back(SI->getTrueValue());
     349           2 :       Worklist.push_back(SI->getFalseValue());
     350           2 :       continue;
     351             :     }
     352         253 :     if (isa<Constant>(V)) {
     353             :       // We found at least one base pointer which is non-null, so this derived
     354             :       // pointer is not exclusively derived from null.
     355          62 :       if (V != Constant::getNullValue(V->getType()))
     356             :         isExclusivelyDerivedFromNull = false;
     357             :       // Continue processing the remaining values to make sure it's exclusively
     358             :       // constant.
     359          62 :       continue;
     360             :     }
     361             :     // At this point, we know that the base pointer is not exclusively
     362             :     // constant.
     363         191 :     return BaseType::NonConstant;
     364             :   }
     365             :   // Now, we know that the base pointer is exclusively constant, but we need to
     366             :   // differentiate between exclusive null constant and non-null constant.
     367          56 :   return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
     368             :                                       : BaseType::ExclusivelySomeConstant;
     369             : }
     370             : 
     371             : static bool isNotExclusivelyConstantDerived(const Value *V) {
     372         209 :   return getBaseType(V) == BaseType::NonConstant;
     373             : }
     374             : 
     375             : namespace {
     376             : class InstructionVerifier;
     377             : 
     378             : /// Builds BasicBlockState for each BB of the function.
     379             : /// It can traverse function for verification and provides all required
     380             : /// information.
     381             : ///
     382             : /// GC pointer may be in one of three states: relocated, unrelocated and
     383             : /// poisoned.
     384             : /// Relocated pointer may be used without any restrictions.
     385             : /// Unrelocated pointer cannot be dereferenced, passed as argument to any call
     386             : /// or returned. Unrelocated pointer may be safely compared against another
     387             : /// unrelocated pointer or against a pointer exclusively derived from null.
     388             : /// Poisoned pointers are produced when we somehow derive pointer from relocated
     389             : /// and unrelocated pointers (e.g. phi, select). This pointers may be safely
     390             : /// used in a very limited number of situations. Currently the only way to use
     391             : /// it is comparison against constant exclusively derived from null. All
     392             : /// limitations arise due to their undefined state: this pointers should be
     393             : /// treated as relocated and unrelocated simultaneously.
     394             : /// Rules of deriving:
     395             : /// R + U = P - that's where the poisoned pointers come from
     396             : /// P + X = P
     397             : /// U + U = U
     398             : /// R + R = R
     399             : /// X + C = X
     400             : /// Where "+" - any operation that somehow derive pointer, U - unrelocated,
     401             : /// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
     402             : /// nothing (in case when "+" is unary operation).
     403             : /// Deriving of pointers by itself is always safe.
     404             : /// NOTE: when we are making decision on the status of instruction's result:
     405             : /// a) for phi we need to check status of each input *at the end of
     406             : ///    corresponding predecessor BB*.
     407             : /// b) for other instructions we need to check status of each input *at the
     408             : ///    current point*.
     409             : ///
     410             : /// FIXME: This works fairly well except one case
     411             : ///     bb1:
     412             : ///     p = *some GC-ptr def*
     413             : ///     p1 = gep p, offset
     414             : ///         /     |
     415             : ///        /      |
     416             : ///    bb2:       |
     417             : ///    safepoint  |
     418             : ///        \      |
     419             : ///         \     |
     420             : ///      bb3:
     421             : ///      p2 = phi [p, bb2] [p1, bb1]
     422             : ///      p3 = phi [p, bb2] [p, bb1]
     423             : ///      here p and p1 is unrelocated
     424             : ///           p2 and p3 is poisoned (though they shouldn't be)
     425             : ///
     426             : /// This leads to some weird results:
     427             : ///      cmp eq p, p2 - illegal instruction (false-positive)
     428             : ///      cmp eq p1, p2 - illegal instruction (false-positive)
     429             : ///      cmp eq p, p3 - illegal instruction (false-positive)
     430             : ///      cmp eq p, p1 - ok
     431             : /// To fix this we need to introduce conception of generations and be able to
     432             : /// check if two values belong to one generation or not. This way p2 will be
     433             : /// considered to be unrelocated and no false alarm will happen.
     434             : class GCPtrTracker {
     435             :   const Function &F;
     436             :   const CFGDeadness &CD;
     437             :   SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
     438             :   DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
     439             :   // This set contains defs of unrelocated pointers that are proved to be legal
     440             :   // and don't need verification.
     441             :   DenseSet<const Instruction *> ValidUnrelocatedDefs;
     442             :   // This set contains poisoned defs. They can be safely ignored during
     443             :   // verification too.
     444             :   DenseSet<const Value *> PoisonedDefs;
     445             : 
     446             : public:
     447             :   GCPtrTracker(const Function &F, const DominatorTree &DT,
     448             :                const CFGDeadness &CD);
     449             : 
     450           0 :   bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
     451           0 :     return CD.hasLiveIncomingEdge(PN, InBB);
     452             :   }
     453             : 
     454             :   BasicBlockState *getBasicBlockState(const BasicBlock *BB);
     455             :   const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
     456             : 
     457             :   bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
     458             : 
     459             :   /// Traverse each BB of the function and call
     460             :   /// InstructionVerifier::verifyInstruction for each possibly invalid
     461             :   /// instruction.
     462             :   /// It destructively modifies GCPtrTracker so it's passed via rvalue reference
     463             :   /// in order to prohibit further usages of GCPtrTracker as it'll be in
     464             :   /// inconsistent state.
     465             :   static void verifyFunction(GCPtrTracker &&Tracker,
     466             :                              InstructionVerifier &Verifier);
     467             : 
     468             :   /// Returns true for reachable and live blocks.
     469             :   bool isMapped(const BasicBlock *BB) const {
     470          54 :     return BlockMap.find(BB) != BlockMap.end();
     471             :   }
     472             : 
     473             : private:
     474             :   /// Returns true if the instruction may be safely skipped during verification.
     475             :   bool instructionMayBeSkipped(const Instruction *I) const;
     476             : 
     477             :   /// Iterates over all BBs from BlockMap and recalculates AvailableIn/Out for
     478             :   /// each of them until it converges.
     479             :   void recalculateBBsStates();
     480             : 
     481             :   /// Remove from Contribution all defs that legally produce unrelocated
     482             :   /// pointers and saves them to ValidUnrelocatedDefs.
     483             :   /// Though Contribution should belong to BBS it is passed separately with
     484             :   /// different const-modifier in order to emphasize (and guarantee) that only
     485             :   /// Contribution will be changed.
     486             :   /// Returns true if Contribution was changed otherwise false.
     487             :   bool removeValidUnrelocatedDefs(const BasicBlock *BB,
     488             :                                   const BasicBlockState *BBS,
     489             :                                   AvailableValueSet &Contribution);
     490             : 
     491             :   /// Gather all the definitions dominating the start of BB into Result. This is
     492             :   /// simply the defs introduced by every dominating basic block and the
     493             :   /// function arguments.
     494             :   void gatherDominatingDefs(const BasicBlock *BB, AvailableValueSet &Result,
     495             :                             const DominatorTree &DT);
     496             : 
     497             :   /// Compute the AvailableOut set for BB, based on the BasicBlockState BBS,
     498             :   /// which is the BasicBlockState for BB.
     499             :   /// ContributionChanged is set when the verifier runs for the first time
     500             :   /// (in this case Contribution was changed from 'empty' to its initial state)
     501             :   /// or when Contribution of this BB was changed since last computation.
     502             :   static void transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
     503             :                             bool ContributionChanged);
     504             : 
     505             :   /// Model the effect of an instruction on the set of available values.
     506             :   static void transferInstruction(const Instruction &I, bool &Cleared,
     507             :                                   AvailableValueSet &Available);
     508             : };
     509             : 
     510             : /// It is a visitor for GCPtrTracker::verifyFunction. It decides if the
     511             : /// instruction (which uses heap reference) is legal or not, given our safepoint
     512             : /// semantics.
     513             : class InstructionVerifier {
     514             :   bool AnyInvalidUses = false;
     515             : 
     516             : public:
     517             :   void verifyInstruction(const GCPtrTracker *Tracker, const Instruction &I,
     518             :                          const AvailableValueSet &AvailableSet);
     519             : 
     520           0 :   bool hasAnyInvalidUses() const { return AnyInvalidUses; }
     521             : 
     522             : private:
     523             :   void reportInvalidUse(const Value &V, const Instruction &I);
     524             : };
     525             : } // end anonymous namespace
     526             : 
     527          34 : GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT,
     528          68 :                            const CFGDeadness &CD) : F(F), CD(CD) {
     529             :   // Calculate Contribution of each live BB.
     530             :   // Allocate BB states for live blocks.
     531         111 :   for (const BasicBlock &BB : F)
     532             :     if (!CD.isDeadBlock(&BB)) {
     533          75 :       BasicBlockState *BBS = new (BSAllocator.Allocate()) BasicBlockState;
     534         309 :       for (const auto &I : BB)
     535         234 :         transferInstruction(I, BBS->Cleared, BBS->Contribution);
     536          75 :       BlockMap[&BB] = BBS;
     537             :     }
     538             : 
     539             :   // Initialize AvailableIn/Out sets of each BB using only information about
     540             :   // dominating BBs.
     541         109 :   for (auto &BBI : BlockMap) {
     542          75 :     gatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT);
     543          75 :     transferBlock(BBI.first, *BBI.second, true);
     544             :   }
     545             : 
     546             :   // Simulate the flow of defs through the CFG and recalculate AvailableIn/Out
     547             :   // sets of each BB until it converges. If any def is proved to be an
     548             :   // unrelocated pointer, it will be removed from all BBSs.
     549          34 :   recalculateBBsStates();
     550          34 : }
     551             : 
     552             : BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) {
     553         284 :   auto it = BlockMap.find(BB);
     554         284 :   return it != BlockMap.end() ? it->second : nullptr;
     555             : }
     556             : 
     557             : const BasicBlockState *GCPtrTracker::getBasicBlockState(
     558             :     const BasicBlock *BB) const {
     559             :   return const_cast<GCPtrTracker *>(this)->getBasicBlockState(BB);
     560             : }
     561             : 
     562         234 : bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
     563             :   // Poisoned defs are skipped since they are always safe by itself by
     564             :   // definition (for details see comment to this class).
     565         234 :   return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
     566             : }
     567             : 
     568          34 : void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
     569             :                                   InstructionVerifier &Verifier) {
     570             :   // We need RPO here to a) report always the first error b) report errors in
     571             :   // same order from run to run.
     572          34 :   ReversePostOrderTraversal<const Function *> RPOT(&Tracker.F);
     573         109 :   for (const BasicBlock *BB : RPOT) {
     574             :     BasicBlockState *BBS = Tracker.getBasicBlockState(BB);
     575          75 :     if (!BBS)
     576           0 :       continue;
     577             : 
     578             :     // We destructively modify AvailableIn as we traverse the block instruction
     579             :     // by instruction.
     580          75 :     AvailableValueSet &AvailableSet = BBS->AvailableIn;
     581         309 :     for (const Instruction &I : *BB) {
     582         234 :       if (Tracker.instructionMayBeSkipped(&I))
     583          32 :         continue; // This instruction shouldn't be added to AvailableSet.
     584             : 
     585         202 :       Verifier.verifyInstruction(&Tracker, I, AvailableSet);
     586             : 
     587             :       // Model the effect of current instruction on AvailableSet to keep the set
     588             :       // relevant at each point of BB.
     589         202 :       bool Cleared = false;
     590         202 :       transferInstruction(I, Cleared, AvailableSet);
     591             :       (void)Cleared;
     592             :     }
     593             :   }
     594          34 : }
     595             : 
     596          34 : void GCPtrTracker::recalculateBBsStates() {
     597          34 :   SetVector<const BasicBlock *> Worklist;
     598             :   // TODO: This order is suboptimal, it's better to replace it with priority
     599             :   // queue where priority is RPO number of BB.
     600         109 :   for (auto &BBI : BlockMap)
     601          75 :     Worklist.insert(BBI.first);
     602             : 
     603             :   // This loop iterates the AvailableIn/Out sets until it converges.
     604             :   // The AvailableIn and AvailableOut sets decrease as we iterate.
     605         113 :   while (!Worklist.empty()) {
     606             :     const BasicBlock *BB = Worklist.pop_back_val();
     607             :     BasicBlockState *BBS = getBasicBlockState(BB);
     608          79 :     if (!BBS)
     609           0 :       continue; // Ignore dead successors.
     610             : 
     611             :     size_t OldInCount = BBS->AvailableIn.size();
     612         140 :     for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
     613             :       const BasicBlock *PBB = *PredIt;
     614             :       BasicBlockState *PBBS = getBasicBlockState(PBB);
     615          60 :       if (PBBS && !CD.isDeadEdge(&CFGDeadness::getEdge(PredIt)))
     616          60 :         set_intersect(BBS->AvailableIn, PBBS->AvailableOut);
     617             :     }
     618             : 
     619             :     assert(OldInCount >= BBS->AvailableIn.size() && "invariant!");
     620             : 
     621             :     bool InputsChanged = OldInCount != BBS->AvailableIn.size();
     622             :     bool ContributionChanged =
     623          79 :         removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution);
     624          79 :     if (!InputsChanged && !ContributionChanged)
     625             :       continue;
     626             : 
     627             :     size_t OldOutCount = BBS->AvailableOut.size();
     628          22 :     transferBlock(BB, *BBS, ContributionChanged);
     629          22 :     if (OldOutCount != BBS->AvailableOut.size()) {
     630             :       assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
     631          20 :       Worklist.insert(succ_begin(BB), succ_end(BB));
     632             :     }
     633             :   }
     634          34 : }
     635             : 
     636          79 : bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
     637             :                                               const BasicBlockState *BBS,
     638             :                                               AvailableValueSet &Contribution) {
     639             :   assert(&BBS->Contribution == &Contribution &&
     640             :          "Passed Contribution should be from the passed BasicBlockState!");
     641             :   AvailableValueSet AvailableSet = BBS->AvailableIn;
     642             :   bool ContributionChanged = false;
     643             :   // For explanation why instructions are processed this way see
     644             :   // "Rules of deriving" in the comment to this class.
     645         334 :   for (const Instruction &I : *BB) {
     646             :     bool ValidUnrelocatedPointerDef = false;
     647             :     bool PoisonedPointerDef = false;
     648             :     // TODO: `select` instructions should be handled here too.
     649             :     if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
     650          27 :       if (containsGCPtrType(PN->getType())) {
     651             :         // If both is true, output is poisoned.
     652             :         bool HasRelocatedInputs = false;
     653             :         bool HasUnrelocatedInputs = false;
     654          79 :         for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     655          54 :           const BasicBlock *InBB = PN->getIncomingBlock(i);
     656         107 :           if (!isMapped(InBB) ||
     657          53 :               !CD.hasLiveIncomingEdge(PN, InBB))
     658           1 :             continue; // Skip dead block or dead edge.
     659             : 
     660             :           const Value *InValue = PN->getIncomingValue(i);
     661             : 
     662          53 :           if (isNotExclusivelyConstantDerived(InValue)) {
     663             :             if (isValuePoisoned(InValue)) {
     664             :               // If any of inputs is poisoned, output is always poisoned too.
     665             :               HasRelocatedInputs = true;
     666             :               HasUnrelocatedInputs = true;
     667           2 :               break;
     668             :             }
     669          42 :             if (BlockMap[InBB]->AvailableOut.count(InValue))
     670             :               HasRelocatedInputs = true;
     671             :             else
     672             :               HasUnrelocatedInputs = true;
     673             :           }
     674             :         }
     675          25 :         if (HasUnrelocatedInputs) {
     676          10 :           if (HasRelocatedInputs)
     677             :             PoisonedPointerDef = true;
     678             :           else
     679             :             ValidUnrelocatedPointerDef = true;
     680             :         }
     681             :       }
     682         284 :     } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
     683          56 :                containsGCPtrType(I.getType())) {
     684             :       // GEP/bitcast of unrelocated pointer is legal by itself but this def
     685             :       // shouldn't appear in any AvailableSet.
     686         174 :       for (const Value *V : I.operands())
     687         140 :         if (containsGCPtrType(V->getType()) &&
     688          84 :             isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
     689             :           if (isValuePoisoned(V))
     690             :             PoisonedPointerDef = true;
     691             :           else
     692             :             ValidUnrelocatedPointerDef = true;
     693             :           break;
     694             :         }
     695             :     }
     696             :     assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
     697             :            "Value cannot be both unrelocated and poisoned!");
     698             :     if (ValidUnrelocatedPointerDef) {
     699             :       // Remove def of unrelocated pointer from Contribution of this BB and
     700             :       // trigger update of all its successors.
     701          23 :       Contribution.erase(&I);
     702          23 :       PoisonedDefs.erase(&I);
     703          23 :       ValidUnrelocatedDefs.insert(&I);
     704             :       LLVM_DEBUG(dbgs() << "Removing urelocated " << I
     705             :                         << " from Contribution of " << BB->getName() << "\n");
     706             :       ContributionChanged = true;
     707         232 :     } else if (PoisonedPointerDef) {
     708             :       // Mark pointer as poisoned, remove its def from Contribution and trigger
     709             :       // update of all successors.
     710           9 :       Contribution.erase(&I);
     711           9 :       PoisonedDefs.insert(&I);
     712             :       LLVM_DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
     713             :                         << BB->getName() << "\n");
     714             :       ContributionChanged = true;
     715             :     } else {
     716         223 :       bool Cleared = false;
     717         223 :       transferInstruction(I, Cleared, AvailableSet);
     718             :       (void)Cleared;
     719             :     }
     720             :   }
     721          79 :   return ContributionChanged;
     722             : }
     723             : 
     724          75 : void GCPtrTracker::gatherDominatingDefs(const BasicBlock *BB,
     725             :                                         AvailableValueSet &Result,
     726             :                                         const DominatorTree &DT) {
     727             :   DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
     728             : 
     729             :   assert(DTN && "Unreachable blocks are ignored");
     730         117 :   while (DTN->getIDom()) {
     731             :     DTN = DTN->getIDom();
     732          45 :     auto BBS = getBasicBlockState(DTN->getBlock());
     733             :     assert(BBS && "immediate dominator cannot be dead for a live block");
     734             :     const auto &Defs = BBS->Contribution;
     735          45 :     Result.insert(Defs.begin(), Defs.end());
     736             :     // If this block is 'Cleared', then nothing LiveIn to this block can be
     737             :     // available after this block completes.  Note: This turns out to be
     738             :     // really important for reducing memory consuption of the initial available
     739             :     // sets and thus peak memory usage by this verifier.
     740          45 :     if (BBS->Cleared)
     741             :       return;
     742             :   }
     743             : 
     744         193 :   for (const Argument &A : BB->getParent()->args())
     745         121 :     if (containsGCPtrType(A.getType()))
     746          79 :       Result.insert(&A);
     747             : }
     748             : 
     749          97 : void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
     750             :                                  bool ContributionChanged) {
     751          97 :   const AvailableValueSet &AvailableIn = BBS.AvailableIn;
     752             :   AvailableValueSet &AvailableOut = BBS.AvailableOut;
     753             : 
     754          97 :   if (BBS.Cleared) {
     755             :     // AvailableOut will change only when Contribution changed.
     756          44 :     if (ContributionChanged)
     757             :       AvailableOut = BBS.Contribution;
     758             :   } else {
     759             :     // Otherwise, we need to reduce the AvailableOut set by things which are no
     760             :     // longer in our AvailableIn
     761             :     AvailableValueSet Temp = BBS.Contribution;
     762          53 :     set_union(Temp, AvailableIn);
     763             :     AvailableOut = std::move(Temp);
     764             :   }
     765             : 
     766             :   LLVM_DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
     767             :              PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
     768             :              dbgs() << " to ";
     769             :              PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
     770             :              dbgs() << "\n";);
     771          97 : }
     772             : 
     773         659 : void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared,
     774             :                                        AvailableValueSet &Available) {
     775         659 :   if (isStatepoint(I)) {
     776         112 :     Cleared = true;
     777             :     Available.clear();
     778         547 :   } else if (containsGCPtrType(I.getType()))
     779         242 :     Available.insert(&I);
     780         659 : }
     781             : 
     782         202 : void InstructionVerifier::verifyInstruction(
     783             :     const GCPtrTracker *Tracker, const Instruction &I,
     784             :     const AvailableValueSet &AvailableSet) {
     785             :   if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
     786          12 :     if (containsGCPtrType(PN->getType()))
     787          36 :       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     788             :         const BasicBlock *InBB = PN->getIncomingBlock(i);
     789             :         const BasicBlockState *InBBS = Tracker->getBasicBlockState(InBB);
     790          46 :         if (!InBBS ||
     791          23 :             !Tracker->hasLiveIncomingEdge(PN, InBB))
     792           1 :           continue; // Skip dead block or dead edge.
     793             : 
     794             :         const Value *InValue = PN->getIncomingValue(i);
     795             : 
     796          23 :         if (isNotExclusivelyConstantDerived(InValue) &&
     797             :             !InBBS->AvailableOut.count(InValue))
     798           0 :           reportInvalidUse(*InValue, *PN);
     799             :       }
     800          19 :   } else if (isa<CmpInst>(I) &&
     801          38 :              containsGCPtrType(I.getOperand(0)->getType())) {
     802          19 :     Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
     803          19 :     enum BaseType baseTyLHS = getBaseType(LHS),
     804          19 :                   baseTyRHS = getBaseType(RHS);
     805             : 
     806             :     // Returns true if LHS and RHS are unrelocated pointers and they are
     807             :     // valid unrelocated uses.
     808             :     auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
     809             :                                    &LHS, &RHS] () {
     810             :         // A cmp instruction has valid unrelocated pointer operands only if
     811             :         // both operands are unrelocated pointers.
     812             :         // In the comparison between two pointers, if one is an unrelocated
     813             :         // use, the other *should be* an unrelocated use, for this
     814             :         // instruction to contain valid unrelocated uses. This unrelocated
     815             :         // use can be a null constant as well, or another unrelocated
     816             :         // pointer.
     817             :         if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
     818             :           return false;
     819             :         // Constant pointers (that are not exclusively null) may have
     820             :         // meaning in different VMs, so we cannot reorder the compare
     821             :         // against constant pointers before the safepoint. In other words,
     822             :         // comparison of an unrelocated use against a non-null constant
     823             :         // maybe invalid.
     824             :         if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
     825             :              baseTyRHS == BaseType::NonConstant) ||
     826             :             (baseTyLHS == BaseType::NonConstant &&
     827             :              baseTyRHS == BaseType::ExclusivelySomeConstant))
     828             :           return false;
     829             : 
     830             :         // If one of pointers is poisoned and other is not exclusively derived
     831             :         // from null it is an invalid expression: it produces poisoned result
     832             :         // and unless we want to track all defs (not only gc pointers) the only
     833             :         // option is to prohibit such instructions.
     834             :         if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
     835             :             (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
     836             :             return false;
     837             : 
     838             :         // All other cases are valid cases enumerated below:
     839             :         // 1. Comparison between an exclusively derived null pointer and a
     840             :         // constant base pointer.
     841             :         // 2. Comparison between an exclusively derived null pointer and a
     842             :         // non-constant unrelocated base pointer.
     843             :         // 3. Comparison between 2 unrelocated pointers.
     844             :         // 4. Comparison between a pointer exclusively derived from null and a
     845             :         // non-constant poisoned pointer.
     846             :         return true;
     847          19 :     };
     848          19 :     if (!hasValidUnrelocatedUse()) {
     849             :       // Print out all non-constant derived pointers that are unrelocated
     850             :       // uses, which are invalid.
     851           9 :       if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
     852           7 :         reportInvalidUse(*LHS, I);
     853           9 :       if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
     854           3 :         reportInvalidUse(*RHS, I);
     855             :     }
     856             :   } else {
     857        1036 :     for (const Value *V : I.operands())
     858         771 :       if (containsGCPtrType(V->getType()) &&
     859         694 :           isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
     860           8 :         reportInvalidUse(*V, I);
     861             :   }
     862         202 : }
     863             : 
     864           0 : void InstructionVerifier::reportInvalidUse(const Value &V,
     865             :                                            const Instruction &I) {
     866           0 :   errs() << "Illegal use of unrelocated value found!\n";
     867           0 :   errs() << "Def: " << V << "\n";
     868           0 :   errs() << "Use: " << I << "\n";
     869           0 :   if (!PrintOnly)
     870           0 :     abort();
     871           0 :   AnyInvalidUses = true;
     872           0 : }
     873             : 
     874          34 : static void Verify(const Function &F, const DominatorTree &DT,
     875             :                    const CFGDeadness &CD) {
     876             :   LLVM_DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName()
     877             :                     << "\n");
     878          34 :   if (PrintOnly)
     879          34 :     dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
     880             : 
     881          68 :   GCPtrTracker Tracker(F, DT, CD);
     882             : 
     883             :   // We now have all the information we need to decide if the use of a heap
     884             :   // reference is legal or not, given our safepoint semantics.
     885             : 
     886          34 :   InstructionVerifier Verifier;
     887          34 :   GCPtrTracker::verifyFunction(std::move(Tracker), Verifier);
     888             : 
     889          34 :   if (PrintOnly && !Verifier.hasAnyInvalidUses()) {
     890          18 :     dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
     891          18 :            << "\n";
     892             :   }
     893          34 : }

Generated by: LCOV version 1.13