Line data Source code
1 : //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // Run a sanity check on the IR to ensure that Safepoints - if they've been
11 : // inserted - were inserted correctly. In particular, look for use of
12 : // non-relocated values after a safepoint. It's primary use is to check the
13 : // correctness of safepoint insertion immediately after insertion, but it can
14 : // also be used to verify that later transforms have not found a way to break
15 : // safepoint semenatics.
16 : //
17 : // In its current form, this verify checks a property which is sufficient, but
18 : // not neccessary for correctness. There are some cases where an unrelocated
19 : // pointer can be used after the safepoint. Consider this example:
20 : //
21 : // a = ...
22 : // b = ...
23 : // (a',b') = safepoint(a,b)
24 : // c = cmp eq a b
25 : // br c, ..., ....
26 : //
27 : // Because it is valid to reorder 'c' above the safepoint, this is legal. In
28 : // practice, this is a somewhat uncommon transform, but CodeGenPrep does create
29 : // idioms like this. The verifier knows about these cases and avoids reporting
30 : // false positives.
31 : //
32 : //===----------------------------------------------------------------------===//
33 :
34 : #include "llvm/ADT/DenseSet.h"
35 : #include "llvm/ADT/PostOrderIterator.h"
36 : #include "llvm/ADT/SetOperations.h"
37 : #include "llvm/ADT/SetVector.h"
38 : #include "llvm/IR/BasicBlock.h"
39 : #include "llvm/IR/Dominators.h"
40 : #include "llvm/IR/Function.h"
41 : #include "llvm/IR/Instructions.h"
42 : #include "llvm/IR/Intrinsics.h"
43 : #include "llvm/IR/IntrinsicInst.h"
44 : #include "llvm/IR/Module.h"
45 : #include "llvm/IR/Value.h"
46 : #include "llvm/IR/SafepointIRVerifier.h"
47 : #include "llvm/IR/Statepoint.h"
48 : #include "llvm/Support/Debug.h"
49 : #include "llvm/Support/CommandLine.h"
50 : #include "llvm/Support/raw_ostream.h"
51 :
52 : #define DEBUG_TYPE "safepoint-ir-verifier"
53 :
54 : using namespace llvm;
55 :
56 : /// This option is used for writing test cases. Instead of crashing the program
57 : /// when verification fails, report a message to the console (for FileCheck
58 : /// usage) and continue execution as if nothing happened.
59 : static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
60 : cl::init(false));
61 :
62 : namespace {
63 :
64 : /// This CFG Deadness finds dead blocks and edges. Algorithm starts with a set
65 : /// of blocks unreachable from entry then propagates deadness using foldable
66 : /// conditional branches without modifying CFG. So GVN does but it changes CFG
67 : /// by splitting critical edges. In most cases passes rely on SimplifyCFG to
68 : /// clean up dead blocks, but in some cases, like verification or loop passes
69 : /// it's not possible.
70 34 : class CFGDeadness {
71 : const DominatorTree *DT = nullptr;
72 : SetVector<const BasicBlock *> DeadBlocks;
73 : SetVector<const Use *> DeadEdges; // Contains all dead edges from live blocks.
74 :
75 : public:
76 : /// Return the edge that coresponds to the predecessor.
77 136 : static const Use& getEdge(const_pred_iterator &PredIt) {
78 : auto &PU = PredIt.getUse();
79 136 : return PU.getUser()->getOperandUse(PU.getOperandNo());
80 : }
81 :
82 : /// Return true if there is at least one live edge that corresponds to the
83 : /// basic block InBB listed in the phi node.
84 76 : bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
85 : assert(!isDeadBlock(InBB) && "block must be live");
86 76 : const BasicBlock* BB = PN->getParent();
87 : bool Listed = false;
88 113 : for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
89 113 : if (InBB == *PredIt) {
90 76 : if (!isDeadEdge(&getEdge(PredIt)))
91 76 : return true;
92 : Listed = true;
93 : }
94 : }
95 : (void)Listed;
96 : assert(Listed && "basic block is not found among incoming blocks");
97 0 : return false;
98 : }
99 :
100 :
101 : bool isDeadBlock(const BasicBlock *BB) const {
102 : return DeadBlocks.count(BB);
103 : }
104 :
105 : bool isDeadEdge(const Use *U) const {
106 : assert(dyn_cast<Instruction>(U->getUser())->isTerminator() &&
107 : "edge must be operand of terminator");
108 : assert(cast_or_null<BasicBlock>(U->get()) &&
109 : "edge must refer to basic block");
110 : assert(!isDeadBlock(dyn_cast<Instruction>(U->getUser())->getParent()) &&
111 : "isDeadEdge() must be applied to edge from live block");
112 : return DeadEdges.count(U);
113 : }
114 :
115 0 : bool hasLiveIncomingEdges(const BasicBlock *BB) const {
116 : // Check if all incoming edges are dead.
117 0 : for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
118 : auto &PU = PredIt.getUse();
119 0 : const Use &U = PU.getUser()->getOperandUse(PU.getOperandNo());
120 : if (!isDeadBlock(*PredIt) && !isDeadEdge(&U))
121 0 : return true; // Found a live edge.
122 : }
123 0 : return false;
124 : }
125 :
126 34 : void processFunction(const Function &F, const DominatorTree &DT) {
127 34 : this->DT = &DT;
128 :
129 : // Start with all blocks unreachable from entry.
130 111 : for (const BasicBlock &BB : F)
131 77 : if (!DT.isReachableFromEntry(&BB))
132 2 : DeadBlocks.insert(&BB);
133 :
134 : // Top-down walk of the dominator tree
135 : ReversePostOrderTraversal<const Function *> RPOT(&F);
136 109 : for (const BasicBlock *BB : RPOT) {
137 75 : const Instruction *TI = BB->getTerminator();
138 : assert(TI && "blocks must be well formed");
139 :
140 : // For conditional branches, we can perform simple conditional propagation on
141 : // the condition value itself.
142 : const BranchInst *BI = dyn_cast<BranchInst>(TI);
143 42 : if (!BI || !BI->isConditional() || !isa<Constant>(BI->getCondition()))
144 : continue;
145 :
146 : // If a branch has two identical successors, we cannot declare either dead.
147 10 : if (BI->getSuccessor(0) == BI->getSuccessor(1))
148 : continue;
149 :
150 : ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
151 : if (!Cond)
152 : continue;
153 :
154 0 : addDeadEdge(BI->getOperandUse(Cond->getZExtValue() ? 1 : 2));
155 : }
156 34 : }
157 :
158 : protected:
159 0 : void addDeadBlock(const BasicBlock *BB) {
160 : SmallVector<const BasicBlock *, 4> NewDead;
161 : SmallSetVector<const BasicBlock *, 4> DF;
162 :
163 0 : NewDead.push_back(BB);
164 0 : while (!NewDead.empty()) {
165 : const BasicBlock *D = NewDead.pop_back_val();
166 : if (isDeadBlock(D))
167 0 : continue;
168 :
169 : // All blocks dominated by D are dead.
170 : SmallVector<BasicBlock *, 8> Dom;
171 0 : DT->getDescendants(const_cast<BasicBlock*>(D), Dom);
172 : // Do not need to mark all in and out edges dead
173 : // because BB is marked dead and this is enough
174 : // to run further.
175 0 : DeadBlocks.insert(Dom.begin(), Dom.end());
176 :
177 : // Figure out the dominance-frontier(D).
178 0 : for (BasicBlock *B : Dom)
179 0 : for (BasicBlock *S : successors(B))
180 0 : if (!isDeadBlock(S) && !hasLiveIncomingEdges(S))
181 0 : NewDead.push_back(S);
182 : }
183 0 : }
184 :
185 0 : void addDeadEdge(const Use &DeadEdge) {
186 0 : if (!DeadEdges.insert(&DeadEdge))
187 : return;
188 :
189 0 : BasicBlock *BB = cast_or_null<BasicBlock>(DeadEdge.get());
190 0 : if (hasLiveIncomingEdges(BB))
191 : return;
192 :
193 0 : addDeadBlock(BB);
194 : }
195 : };
196 : } // namespace
197 :
198 : static void Verify(const Function &F, const DominatorTree &DT,
199 : const CFGDeadness &CD);
200 :
201 : namespace {
202 :
203 : struct SafepointIRVerifier : public FunctionPass {
204 : static char ID; // Pass identification, replacement for typeid
205 8 : SafepointIRVerifier() : FunctionPass(ID) {
206 8 : initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
207 : }
208 :
209 34 : bool runOnFunction(Function &F) override {
210 34 : auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
211 34 : CFGDeadness CD;
212 34 : CD.processFunction(F, DT);
213 34 : Verify(F, DT, CD);
214 34 : return false; // no modifications
215 : }
216 :
217 8 : void getAnalysisUsage(AnalysisUsage &AU) const override {
218 8 : AU.addRequiredID(DominatorTreeWrapperPass::ID);
219 : AU.setPreservesAll();
220 8 : }
221 :
222 0 : StringRef getPassName() const override { return "safepoint verifier"; }
223 : };
224 : } // namespace
225 :
226 0 : void llvm::verifySafepointIR(Function &F) {
227 : SafepointIRVerifier pass;
228 0 : pass.runOnFunction(F);
229 0 : }
230 :
231 : char SafepointIRVerifier::ID = 0;
232 :
233 0 : FunctionPass *llvm::createSafepointIRVerifierPass() {
234 0 : return new SafepointIRVerifier();
235 : }
236 :
237 32127 : INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
238 : "Safepoint IR Verifier", false, false)
239 32127 : INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
240 64262 : INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
241 : "Safepoint IR Verifier", false, false)
242 :
243 : static bool isGCPointerType(Type *T) {
244 : if (auto *PT = dyn_cast<PointerType>(T))
245 : // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
246 : // GC managed heap. We know that a pointer into this heap needs to be
247 : // updated and that no other pointer does.
248 0 : return (1 == PT->getAddressSpace());
249 : return false;
250 : }
251 :
252 1560 : static bool containsGCPtrType(Type *Ty) {
253 663 : if (isGCPointerType(Ty))
254 : return true;
255 : if (VectorType *VT = dyn_cast<VectorType>(Ty))
256 : return isGCPointerType(VT->getScalarType());
257 : if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
258 0 : return containsGCPtrType(AT->getElementType());
259 : if (StructType *ST = dyn_cast<StructType>(Ty))
260 : return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
261 : containsGCPtrType);
262 : return false;
263 : }
264 :
265 : // Debugging aid -- prints a [Begin, End) range of values.
266 : template<typename IteratorTy>
267 : static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
268 : OS << "[ ";
269 : while (Begin != End) {
270 : OS << **Begin << " ";
271 : ++Begin;
272 : }
273 : OS << "]";
274 : }
275 :
276 : /// The verifier algorithm is phrased in terms of availability. The set of
277 : /// values "available" at a given point in the control flow graph is the set of
278 : /// correctly relocated value at that point, and is a subset of the set of
279 : /// definitions dominating that point.
280 :
281 : using AvailableValueSet = DenseSet<const Value *>;
282 :
283 : /// State we compute and track per basic block.
284 : struct BasicBlockState {
285 : // Set of values available coming in, before the phi nodes
286 : AvailableValueSet AvailableIn;
287 :
288 : // Set of values available going out
289 : AvailableValueSet AvailableOut;
290 :
291 : // AvailableOut minus AvailableIn.
292 : // All elements are Instructions
293 : AvailableValueSet Contribution;
294 :
295 : // True if this block contains a safepoint and thus AvailableIn does not
296 : // contribute to AvailableOut.
297 : bool Cleared = false;
298 : };
299 :
300 : /// A given derived pointer can have multiple base pointers through phi/selects.
301 : /// This type indicates when the base pointer is exclusively constant
302 : /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
303 : /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
304 : /// NonConstant.
305 : enum BaseType {
306 : NonConstant = 1, // Base pointers is not exclusively constant.
307 : ExclusivelyNull,
308 : ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
309 : // set of constants, but they are not exclusively
310 : // null.
311 : };
312 :
313 : /// Return the baseType for Val which states whether Val is exclusively
314 : /// derived from constant/null, or not exclusively derived from constant.
315 : /// Val is exclusively derived off a constant base when all operands of phi and
316 : /// selects are derived off a constant base.
317 247 : static enum BaseType getBaseType(const Value *Val) {
318 :
319 : SmallVector<const Value *, 32> Worklist;
320 : DenseSet<const Value *> Visited;
321 : bool isExclusivelyDerivedFromNull = true;
322 247 : Worklist.push_back(Val);
323 : // Strip through all the bitcasts and geps to get base pointer. Also check for
324 : // the exclusive value when there can be multiple base pointers (through phis
325 : // or selects).
326 510 : while(!Worklist.empty()) {
327 454 : const Value *V = Worklist.pop_back_val();
328 454 : if (!Visited.insert(V).second)
329 263 : continue;
330 :
331 452 : if (const auto *CI = dyn_cast<CastInst>(V)) {
332 50 : Worklist.push_back(CI->stripPointerCasts());
333 50 : continue;
334 : }
335 : if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
336 105 : Worklist.push_back(GEP->getPointerOperand());
337 105 : continue;
338 : }
339 : // Push all the incoming values of phi node into the worklist for
340 : // processing.
341 : if (const auto *PN = dyn_cast<PHINode>(V)) {
342 126 : for (Value *InV: PN->incoming_values())
343 84 : Worklist.push_back(InV);
344 : continue;
345 : }
346 : if (const auto *SI = dyn_cast<SelectInst>(V)) {
347 : // Push in the true and false values
348 2 : Worklist.push_back(SI->getTrueValue());
349 2 : Worklist.push_back(SI->getFalseValue());
350 2 : continue;
351 : }
352 253 : if (isa<Constant>(V)) {
353 : // We found at least one base pointer which is non-null, so this derived
354 : // pointer is not exclusively derived from null.
355 62 : if (V != Constant::getNullValue(V->getType()))
356 : isExclusivelyDerivedFromNull = false;
357 : // Continue processing the remaining values to make sure it's exclusively
358 : // constant.
359 62 : continue;
360 : }
361 : // At this point, we know that the base pointer is not exclusively
362 : // constant.
363 191 : return BaseType::NonConstant;
364 : }
365 : // Now, we know that the base pointer is exclusively constant, but we need to
366 : // differentiate between exclusive null constant and non-null constant.
367 56 : return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
368 : : BaseType::ExclusivelySomeConstant;
369 : }
370 :
371 : static bool isNotExclusivelyConstantDerived(const Value *V) {
372 209 : return getBaseType(V) == BaseType::NonConstant;
373 : }
374 :
375 : namespace {
376 : class InstructionVerifier;
377 :
378 : /// Builds BasicBlockState for each BB of the function.
379 : /// It can traverse function for verification and provides all required
380 : /// information.
381 : ///
382 : /// GC pointer may be in one of three states: relocated, unrelocated and
383 : /// poisoned.
384 : /// Relocated pointer may be used without any restrictions.
385 : /// Unrelocated pointer cannot be dereferenced, passed as argument to any call
386 : /// or returned. Unrelocated pointer may be safely compared against another
387 : /// unrelocated pointer or against a pointer exclusively derived from null.
388 : /// Poisoned pointers are produced when we somehow derive pointer from relocated
389 : /// and unrelocated pointers (e.g. phi, select). This pointers may be safely
390 : /// used in a very limited number of situations. Currently the only way to use
391 : /// it is comparison against constant exclusively derived from null. All
392 : /// limitations arise due to their undefined state: this pointers should be
393 : /// treated as relocated and unrelocated simultaneously.
394 : /// Rules of deriving:
395 : /// R + U = P - that's where the poisoned pointers come from
396 : /// P + X = P
397 : /// U + U = U
398 : /// R + R = R
399 : /// X + C = X
400 : /// Where "+" - any operation that somehow derive pointer, U - unrelocated,
401 : /// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
402 : /// nothing (in case when "+" is unary operation).
403 : /// Deriving of pointers by itself is always safe.
404 : /// NOTE: when we are making decision on the status of instruction's result:
405 : /// a) for phi we need to check status of each input *at the end of
406 : /// corresponding predecessor BB*.
407 : /// b) for other instructions we need to check status of each input *at the
408 : /// current point*.
409 : ///
410 : /// FIXME: This works fairly well except one case
411 : /// bb1:
412 : /// p = *some GC-ptr def*
413 : /// p1 = gep p, offset
414 : /// / |
415 : /// / |
416 : /// bb2: |
417 : /// safepoint |
418 : /// \ |
419 : /// \ |
420 : /// bb3:
421 : /// p2 = phi [p, bb2] [p1, bb1]
422 : /// p3 = phi [p, bb2] [p, bb1]
423 : /// here p and p1 is unrelocated
424 : /// p2 and p3 is poisoned (though they shouldn't be)
425 : ///
426 : /// This leads to some weird results:
427 : /// cmp eq p, p2 - illegal instruction (false-positive)
428 : /// cmp eq p1, p2 - illegal instruction (false-positive)
429 : /// cmp eq p, p3 - illegal instruction (false-positive)
430 : /// cmp eq p, p1 - ok
431 : /// To fix this we need to introduce conception of generations and be able to
432 : /// check if two values belong to one generation or not. This way p2 will be
433 : /// considered to be unrelocated and no false alarm will happen.
434 : class GCPtrTracker {
435 : const Function &F;
436 : const CFGDeadness &CD;
437 : SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
438 : DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
439 : // This set contains defs of unrelocated pointers that are proved to be legal
440 : // and don't need verification.
441 : DenseSet<const Instruction *> ValidUnrelocatedDefs;
442 : // This set contains poisoned defs. They can be safely ignored during
443 : // verification too.
444 : DenseSet<const Value *> PoisonedDefs;
445 :
446 : public:
447 : GCPtrTracker(const Function &F, const DominatorTree &DT,
448 : const CFGDeadness &CD);
449 :
450 0 : bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
451 0 : return CD.hasLiveIncomingEdge(PN, InBB);
452 : }
453 :
454 : BasicBlockState *getBasicBlockState(const BasicBlock *BB);
455 : const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
456 :
457 : bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
458 :
459 : /// Traverse each BB of the function and call
460 : /// InstructionVerifier::verifyInstruction for each possibly invalid
461 : /// instruction.
462 : /// It destructively modifies GCPtrTracker so it's passed via rvalue reference
463 : /// in order to prohibit further usages of GCPtrTracker as it'll be in
464 : /// inconsistent state.
465 : static void verifyFunction(GCPtrTracker &&Tracker,
466 : InstructionVerifier &Verifier);
467 :
468 : /// Returns true for reachable and live blocks.
469 : bool isMapped(const BasicBlock *BB) const {
470 54 : return BlockMap.find(BB) != BlockMap.end();
471 : }
472 :
473 : private:
474 : /// Returns true if the instruction may be safely skipped during verification.
475 : bool instructionMayBeSkipped(const Instruction *I) const;
476 :
477 : /// Iterates over all BBs from BlockMap and recalculates AvailableIn/Out for
478 : /// each of them until it converges.
479 : void recalculateBBsStates();
480 :
481 : /// Remove from Contribution all defs that legally produce unrelocated
482 : /// pointers and saves them to ValidUnrelocatedDefs.
483 : /// Though Contribution should belong to BBS it is passed separately with
484 : /// different const-modifier in order to emphasize (and guarantee) that only
485 : /// Contribution will be changed.
486 : /// Returns true if Contribution was changed otherwise false.
487 : bool removeValidUnrelocatedDefs(const BasicBlock *BB,
488 : const BasicBlockState *BBS,
489 : AvailableValueSet &Contribution);
490 :
491 : /// Gather all the definitions dominating the start of BB into Result. This is
492 : /// simply the defs introduced by every dominating basic block and the
493 : /// function arguments.
494 : void gatherDominatingDefs(const BasicBlock *BB, AvailableValueSet &Result,
495 : const DominatorTree &DT);
496 :
497 : /// Compute the AvailableOut set for BB, based on the BasicBlockState BBS,
498 : /// which is the BasicBlockState for BB.
499 : /// ContributionChanged is set when the verifier runs for the first time
500 : /// (in this case Contribution was changed from 'empty' to its initial state)
501 : /// or when Contribution of this BB was changed since last computation.
502 : static void transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
503 : bool ContributionChanged);
504 :
505 : /// Model the effect of an instruction on the set of available values.
506 : static void transferInstruction(const Instruction &I, bool &Cleared,
507 : AvailableValueSet &Available);
508 : };
509 :
510 : /// It is a visitor for GCPtrTracker::verifyFunction. It decides if the
511 : /// instruction (which uses heap reference) is legal or not, given our safepoint
512 : /// semantics.
513 : class InstructionVerifier {
514 : bool AnyInvalidUses = false;
515 :
516 : public:
517 : void verifyInstruction(const GCPtrTracker *Tracker, const Instruction &I,
518 : const AvailableValueSet &AvailableSet);
519 :
520 0 : bool hasAnyInvalidUses() const { return AnyInvalidUses; }
521 :
522 : private:
523 : void reportInvalidUse(const Value &V, const Instruction &I);
524 : };
525 : } // end anonymous namespace
526 :
527 34 : GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT,
528 68 : const CFGDeadness &CD) : F(F), CD(CD) {
529 : // Calculate Contribution of each live BB.
530 : // Allocate BB states for live blocks.
531 111 : for (const BasicBlock &BB : F)
532 : if (!CD.isDeadBlock(&BB)) {
533 75 : BasicBlockState *BBS = new (BSAllocator.Allocate()) BasicBlockState;
534 309 : for (const auto &I : BB)
535 234 : transferInstruction(I, BBS->Cleared, BBS->Contribution);
536 75 : BlockMap[&BB] = BBS;
537 : }
538 :
539 : // Initialize AvailableIn/Out sets of each BB using only information about
540 : // dominating BBs.
541 109 : for (auto &BBI : BlockMap) {
542 75 : gatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT);
543 75 : transferBlock(BBI.first, *BBI.second, true);
544 : }
545 :
546 : // Simulate the flow of defs through the CFG and recalculate AvailableIn/Out
547 : // sets of each BB until it converges. If any def is proved to be an
548 : // unrelocated pointer, it will be removed from all BBSs.
549 34 : recalculateBBsStates();
550 34 : }
551 :
552 : BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) {
553 284 : auto it = BlockMap.find(BB);
554 284 : return it != BlockMap.end() ? it->second : nullptr;
555 : }
556 :
557 : const BasicBlockState *GCPtrTracker::getBasicBlockState(
558 : const BasicBlock *BB) const {
559 : return const_cast<GCPtrTracker *>(this)->getBasicBlockState(BB);
560 : }
561 :
562 234 : bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
563 : // Poisoned defs are skipped since they are always safe by itself by
564 : // definition (for details see comment to this class).
565 234 : return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
566 : }
567 :
568 34 : void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
569 : InstructionVerifier &Verifier) {
570 : // We need RPO here to a) report always the first error b) report errors in
571 : // same order from run to run.
572 34 : ReversePostOrderTraversal<const Function *> RPOT(&Tracker.F);
573 109 : for (const BasicBlock *BB : RPOT) {
574 : BasicBlockState *BBS = Tracker.getBasicBlockState(BB);
575 75 : if (!BBS)
576 0 : continue;
577 :
578 : // We destructively modify AvailableIn as we traverse the block instruction
579 : // by instruction.
580 75 : AvailableValueSet &AvailableSet = BBS->AvailableIn;
581 309 : for (const Instruction &I : *BB) {
582 234 : if (Tracker.instructionMayBeSkipped(&I))
583 32 : continue; // This instruction shouldn't be added to AvailableSet.
584 :
585 202 : Verifier.verifyInstruction(&Tracker, I, AvailableSet);
586 :
587 : // Model the effect of current instruction on AvailableSet to keep the set
588 : // relevant at each point of BB.
589 202 : bool Cleared = false;
590 202 : transferInstruction(I, Cleared, AvailableSet);
591 : (void)Cleared;
592 : }
593 : }
594 34 : }
595 :
596 34 : void GCPtrTracker::recalculateBBsStates() {
597 34 : SetVector<const BasicBlock *> Worklist;
598 : // TODO: This order is suboptimal, it's better to replace it with priority
599 : // queue where priority is RPO number of BB.
600 109 : for (auto &BBI : BlockMap)
601 75 : Worklist.insert(BBI.first);
602 :
603 : // This loop iterates the AvailableIn/Out sets until it converges.
604 : // The AvailableIn and AvailableOut sets decrease as we iterate.
605 113 : while (!Worklist.empty()) {
606 : const BasicBlock *BB = Worklist.pop_back_val();
607 : BasicBlockState *BBS = getBasicBlockState(BB);
608 79 : if (!BBS)
609 0 : continue; // Ignore dead successors.
610 :
611 : size_t OldInCount = BBS->AvailableIn.size();
612 140 : for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
613 : const BasicBlock *PBB = *PredIt;
614 : BasicBlockState *PBBS = getBasicBlockState(PBB);
615 60 : if (PBBS && !CD.isDeadEdge(&CFGDeadness::getEdge(PredIt)))
616 60 : set_intersect(BBS->AvailableIn, PBBS->AvailableOut);
617 : }
618 :
619 : assert(OldInCount >= BBS->AvailableIn.size() && "invariant!");
620 :
621 : bool InputsChanged = OldInCount != BBS->AvailableIn.size();
622 : bool ContributionChanged =
623 79 : removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution);
624 79 : if (!InputsChanged && !ContributionChanged)
625 : continue;
626 :
627 : size_t OldOutCount = BBS->AvailableOut.size();
628 22 : transferBlock(BB, *BBS, ContributionChanged);
629 22 : if (OldOutCount != BBS->AvailableOut.size()) {
630 : assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
631 20 : Worklist.insert(succ_begin(BB), succ_end(BB));
632 : }
633 : }
634 34 : }
635 :
636 79 : bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
637 : const BasicBlockState *BBS,
638 : AvailableValueSet &Contribution) {
639 : assert(&BBS->Contribution == &Contribution &&
640 : "Passed Contribution should be from the passed BasicBlockState!");
641 : AvailableValueSet AvailableSet = BBS->AvailableIn;
642 : bool ContributionChanged = false;
643 : // For explanation why instructions are processed this way see
644 : // "Rules of deriving" in the comment to this class.
645 334 : for (const Instruction &I : *BB) {
646 : bool ValidUnrelocatedPointerDef = false;
647 : bool PoisonedPointerDef = false;
648 : // TODO: `select` instructions should be handled here too.
649 : if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
650 27 : if (containsGCPtrType(PN->getType())) {
651 : // If both is true, output is poisoned.
652 : bool HasRelocatedInputs = false;
653 : bool HasUnrelocatedInputs = false;
654 79 : for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
655 54 : const BasicBlock *InBB = PN->getIncomingBlock(i);
656 107 : if (!isMapped(InBB) ||
657 53 : !CD.hasLiveIncomingEdge(PN, InBB))
658 1 : continue; // Skip dead block or dead edge.
659 :
660 : const Value *InValue = PN->getIncomingValue(i);
661 :
662 53 : if (isNotExclusivelyConstantDerived(InValue)) {
663 : if (isValuePoisoned(InValue)) {
664 : // If any of inputs is poisoned, output is always poisoned too.
665 : HasRelocatedInputs = true;
666 : HasUnrelocatedInputs = true;
667 2 : break;
668 : }
669 42 : if (BlockMap[InBB]->AvailableOut.count(InValue))
670 : HasRelocatedInputs = true;
671 : else
672 : HasUnrelocatedInputs = true;
673 : }
674 : }
675 25 : if (HasUnrelocatedInputs) {
676 10 : if (HasRelocatedInputs)
677 : PoisonedPointerDef = true;
678 : else
679 : ValidUnrelocatedPointerDef = true;
680 : }
681 : }
682 284 : } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
683 56 : containsGCPtrType(I.getType())) {
684 : // GEP/bitcast of unrelocated pointer is legal by itself but this def
685 : // shouldn't appear in any AvailableSet.
686 174 : for (const Value *V : I.operands())
687 140 : if (containsGCPtrType(V->getType()) &&
688 84 : isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
689 : if (isValuePoisoned(V))
690 : PoisonedPointerDef = true;
691 : else
692 : ValidUnrelocatedPointerDef = true;
693 : break;
694 : }
695 : }
696 : assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
697 : "Value cannot be both unrelocated and poisoned!");
698 : if (ValidUnrelocatedPointerDef) {
699 : // Remove def of unrelocated pointer from Contribution of this BB and
700 : // trigger update of all its successors.
701 23 : Contribution.erase(&I);
702 23 : PoisonedDefs.erase(&I);
703 23 : ValidUnrelocatedDefs.insert(&I);
704 : LLVM_DEBUG(dbgs() << "Removing urelocated " << I
705 : << " from Contribution of " << BB->getName() << "\n");
706 : ContributionChanged = true;
707 232 : } else if (PoisonedPointerDef) {
708 : // Mark pointer as poisoned, remove its def from Contribution and trigger
709 : // update of all successors.
710 9 : Contribution.erase(&I);
711 9 : PoisonedDefs.insert(&I);
712 : LLVM_DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
713 : << BB->getName() << "\n");
714 : ContributionChanged = true;
715 : } else {
716 223 : bool Cleared = false;
717 223 : transferInstruction(I, Cleared, AvailableSet);
718 : (void)Cleared;
719 : }
720 : }
721 79 : return ContributionChanged;
722 : }
723 :
724 75 : void GCPtrTracker::gatherDominatingDefs(const BasicBlock *BB,
725 : AvailableValueSet &Result,
726 : const DominatorTree &DT) {
727 : DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
728 :
729 : assert(DTN && "Unreachable blocks are ignored");
730 117 : while (DTN->getIDom()) {
731 : DTN = DTN->getIDom();
732 45 : auto BBS = getBasicBlockState(DTN->getBlock());
733 : assert(BBS && "immediate dominator cannot be dead for a live block");
734 : const auto &Defs = BBS->Contribution;
735 45 : Result.insert(Defs.begin(), Defs.end());
736 : // If this block is 'Cleared', then nothing LiveIn to this block can be
737 : // available after this block completes. Note: This turns out to be
738 : // really important for reducing memory consuption of the initial available
739 : // sets and thus peak memory usage by this verifier.
740 45 : if (BBS->Cleared)
741 : return;
742 : }
743 :
744 193 : for (const Argument &A : BB->getParent()->args())
745 121 : if (containsGCPtrType(A.getType()))
746 79 : Result.insert(&A);
747 : }
748 :
749 97 : void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
750 : bool ContributionChanged) {
751 97 : const AvailableValueSet &AvailableIn = BBS.AvailableIn;
752 : AvailableValueSet &AvailableOut = BBS.AvailableOut;
753 :
754 97 : if (BBS.Cleared) {
755 : // AvailableOut will change only when Contribution changed.
756 44 : if (ContributionChanged)
757 : AvailableOut = BBS.Contribution;
758 : } else {
759 : // Otherwise, we need to reduce the AvailableOut set by things which are no
760 : // longer in our AvailableIn
761 : AvailableValueSet Temp = BBS.Contribution;
762 53 : set_union(Temp, AvailableIn);
763 : AvailableOut = std::move(Temp);
764 : }
765 :
766 : LLVM_DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
767 : PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
768 : dbgs() << " to ";
769 : PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
770 : dbgs() << "\n";);
771 97 : }
772 :
773 659 : void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared,
774 : AvailableValueSet &Available) {
775 659 : if (isStatepoint(I)) {
776 112 : Cleared = true;
777 : Available.clear();
778 547 : } else if (containsGCPtrType(I.getType()))
779 242 : Available.insert(&I);
780 659 : }
781 :
782 202 : void InstructionVerifier::verifyInstruction(
783 : const GCPtrTracker *Tracker, const Instruction &I,
784 : const AvailableValueSet &AvailableSet) {
785 : if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
786 12 : if (containsGCPtrType(PN->getType()))
787 36 : for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
788 : const BasicBlock *InBB = PN->getIncomingBlock(i);
789 : const BasicBlockState *InBBS = Tracker->getBasicBlockState(InBB);
790 46 : if (!InBBS ||
791 23 : !Tracker->hasLiveIncomingEdge(PN, InBB))
792 1 : continue; // Skip dead block or dead edge.
793 :
794 : const Value *InValue = PN->getIncomingValue(i);
795 :
796 23 : if (isNotExclusivelyConstantDerived(InValue) &&
797 : !InBBS->AvailableOut.count(InValue))
798 0 : reportInvalidUse(*InValue, *PN);
799 : }
800 19 : } else if (isa<CmpInst>(I) &&
801 38 : containsGCPtrType(I.getOperand(0)->getType())) {
802 19 : Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
803 19 : enum BaseType baseTyLHS = getBaseType(LHS),
804 19 : baseTyRHS = getBaseType(RHS);
805 :
806 : // Returns true if LHS and RHS are unrelocated pointers and they are
807 : // valid unrelocated uses.
808 : auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
809 : &LHS, &RHS] () {
810 : // A cmp instruction has valid unrelocated pointer operands only if
811 : // both operands are unrelocated pointers.
812 : // In the comparison between two pointers, if one is an unrelocated
813 : // use, the other *should be* an unrelocated use, for this
814 : // instruction to contain valid unrelocated uses. This unrelocated
815 : // use can be a null constant as well, or another unrelocated
816 : // pointer.
817 : if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
818 : return false;
819 : // Constant pointers (that are not exclusively null) may have
820 : // meaning in different VMs, so we cannot reorder the compare
821 : // against constant pointers before the safepoint. In other words,
822 : // comparison of an unrelocated use against a non-null constant
823 : // maybe invalid.
824 : if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
825 : baseTyRHS == BaseType::NonConstant) ||
826 : (baseTyLHS == BaseType::NonConstant &&
827 : baseTyRHS == BaseType::ExclusivelySomeConstant))
828 : return false;
829 :
830 : // If one of pointers is poisoned and other is not exclusively derived
831 : // from null it is an invalid expression: it produces poisoned result
832 : // and unless we want to track all defs (not only gc pointers) the only
833 : // option is to prohibit such instructions.
834 : if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
835 : (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
836 : return false;
837 :
838 : // All other cases are valid cases enumerated below:
839 : // 1. Comparison between an exclusively derived null pointer and a
840 : // constant base pointer.
841 : // 2. Comparison between an exclusively derived null pointer and a
842 : // non-constant unrelocated base pointer.
843 : // 3. Comparison between 2 unrelocated pointers.
844 : // 4. Comparison between a pointer exclusively derived from null and a
845 : // non-constant poisoned pointer.
846 : return true;
847 19 : };
848 19 : if (!hasValidUnrelocatedUse()) {
849 : // Print out all non-constant derived pointers that are unrelocated
850 : // uses, which are invalid.
851 9 : if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
852 7 : reportInvalidUse(*LHS, I);
853 9 : if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
854 3 : reportInvalidUse(*RHS, I);
855 : }
856 : } else {
857 1036 : for (const Value *V : I.operands())
858 771 : if (containsGCPtrType(V->getType()) &&
859 694 : isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
860 8 : reportInvalidUse(*V, I);
861 : }
862 202 : }
863 :
864 0 : void InstructionVerifier::reportInvalidUse(const Value &V,
865 : const Instruction &I) {
866 0 : errs() << "Illegal use of unrelocated value found!\n";
867 0 : errs() << "Def: " << V << "\n";
868 0 : errs() << "Use: " << I << "\n";
869 0 : if (!PrintOnly)
870 0 : abort();
871 0 : AnyInvalidUses = true;
872 0 : }
873 :
874 34 : static void Verify(const Function &F, const DominatorTree &DT,
875 : const CFGDeadness &CD) {
876 : LLVM_DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName()
877 : << "\n");
878 34 : if (PrintOnly)
879 34 : dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
880 :
881 68 : GCPtrTracker Tracker(F, DT, CD);
882 :
883 : // We now have all the information we need to decide if the use of a heap
884 : // reference is legal or not, given our safepoint semantics.
885 :
886 34 : InstructionVerifier Verifier;
887 34 : GCPtrTracker::verifyFunction(std::move(Tracker), Verifier);
888 :
889 34 : if (PrintOnly && !Verifier.hasAnyInvalidUses()) {
890 18 : dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
891 18 : << "\n";
892 : }
893 34 : }
|