LLVM 23.0.0git
GenericUniformityImpl.h
Go to the documentation of this file.
1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36/// NOTE: In general, no interface exists for a transform to update
37/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
38/// transitive dependence, but it also does not provide an interface for
39/// updating itself. Given that, transforms should not preserve uniformity in
40/// their getAnalysisUsage() callback.
41///
42//===----------------------------------------------------------------------===//
43
44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
46
48
49#include "llvm/ADT/DenseSet.h"
50#include "llvm/ADT/STLExtras.h"
55
56#define DEBUG_TYPE "uniformity"
57
58namespace llvm {
59
60// Forward decl from llvm/CodeGen/MachineInstr.h
61class MachineInstr;
62
63/// Construct a specially modified post-order traversal of cycles.
64///
65/// The ModifiedPO is contructed using a virtually modified CFG as follows:
66///
67/// 1. The successors of pre-entry nodes (predecessors of an cycle
68/// entry that are outside the cycle) are replaced by the
69/// successors of the successors of the header.
70/// 2. Successors of the cycle header are replaced by the exit blocks
71/// of the cycle.
72///
73/// Effectively, we produce a depth-first numbering with the following
74/// properties:
75///
76/// 1. Nodes after a cycle are numbered earlier than the cycle header.
77/// 2. The header is numbered earlier than the nodes in the cycle.
78/// 3. The numbering of the nodes within the cycle forms an interval
79/// starting with the header.
80///
81/// Effectively, the virtual modification arranges the nodes in a
82/// cycle as a DAG with the header as the sole leaf, and successors of
83/// the header as the roots. A reverse traversal of this numbering has
84/// the following invariant on the unmodified original CFG:
85///
86/// Each node is visited after all its predecessors, except if that
87/// predecessor is the cycle header.
88///
89template <typename ContextT> class ModifiedPostOrder {
90public:
91 using BlockT = typename ContextT::BlockT;
92 using FunctionT = typename ContextT::FunctionT;
93 using DominatorTreeT = typename ContextT::DominatorTreeT;
94
96 using CycleT = typename CycleInfoT::CycleT;
97 using const_iterator = typename std::vector<BlockT *>::const_iterator;
98
99 ModifiedPostOrder(const ContextT &C) : Context(C) {}
100
101 bool empty() const { return m_order.empty(); }
102 size_t size() const { return m_order.size(); }
103
104 void clear() { m_order.clear(); }
105 void compute(const CycleInfoT &CI);
106
107 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
108 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
109
110 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
111 POIndex[&BB] = m_order.size();
112 m_order.push_back(&BB);
113 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
114 << "): " << Context.print(&BB) << "\n");
116 ReducibleCycleHeaders.insert(&BB);
117 }
118
119 unsigned getIndex(const BlockT *BB) const {
120 assert(POIndex.count(BB));
121 return POIndex.lookup(BB);
122 }
123
124 bool isReducibleCycleHeader(const BlockT *BB) const {
125 return ReducibleCycleHeaders.contains(BB);
126 }
127
128private:
131 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
132 const ContextT &Context;
133
134 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
136
137 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
138 const CycleInfoT &CI, const CycleT *Cycle,
140};
141
142template <typename> class DivergencePropagator;
143
144/// \class GenericSyncDependenceAnalysis
145///
146/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
147///
148/// An analysis per divergent branch that returns the set of basic
149/// blocks whose phi nodes become divergent due to divergent control.
150/// These are the blocks that are reachable by two disjoint paths from
151/// the branch, or cycle exits reachable along a path that is disjoint
152/// from a path to the cycle latch.
153
154// --- Above line is not a doxygen comment; intentionally left blank ---
155//
156// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
157//
158// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
159// control-induced divergence in phi nodes.
160//
161// -- Reference --
162// The algorithm is an extension of Section 5 of
163//
164// An abstract interpretation for SPMD divergence
165// on reducible control flow graphs.
166// Julian Rosemann, Simon Moll and Sebastian Hack
167// POPL '21
168//
169//
170// -- Sync dependence --
171// Sync dependence characterizes the control flow aspect of the
172// propagation of branch divergence. For example,
173//
174// %cond = icmp slt i32 %tid, 10
175// br i1 %cond, label %then, label %else
176// then:
177// br label %merge
178// else:
179// br label %merge
180// merge:
181// %a = phi i32 [ 0, %then ], [ 1, %else ]
182//
183// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
184// because %tid is not on its use-def chains, %a is sync dependent on %tid
185// because the branch "br i1 %cond" depends on %tid and affects which value %a
186// is assigned to.
187//
188//
189// -- Reduction to SSA construction --
190// There are two disjoint paths from A to X, if a certain variant of SSA
191// construction places a phi node in X under the following set-up scheme.
192//
193// This variant of SSA construction ignores incoming undef values.
194// That is paths from the entry without a definition do not result in
195// phi nodes.
196//
197// entry
198// / \
199// A \
200// / \ Y
201// B C /
202// \ / \ /
203// D E
204// \ /
205// F
206//
207// Assume that A contains a divergent branch. We are interested
208// in the set of all blocks where each block is reachable from A
209// via two disjoint paths. This would be the set {D, F} in this
210// case.
211// To generally reduce this query to SSA construction we introduce
212// a virtual variable x and assign to x different values in each
213// successor block of A.
214//
215// entry
216// / \
217// A \
218// / \ Y
219// x = 0 x = 1 /
220// \ / \ /
221// D E
222// \ /
223// F
224//
225// Our flavor of SSA construction for x will construct the following
226//
227// entry
228// / \
229// A \
230// / \ Y
231// x0 = 0 x1 = 1 /
232// \ / \ /
233// x2 = phi E
234// \ /
235// x3 = phi
236//
237// The blocks D and F contain phi nodes and are thus each reachable
238// by two disjoins paths from A.
239//
240// -- Remarks --
241// * In case of cycle exits we need to check for temporal divergence.
242// To this end, we check whether the definition of x differs between the
243// cycle exit and the cycle header (_after_ SSA construction).
244//
245// * In the presence of irreducible control flow, the fixed point is
246// reached only after multiple iterations. This is because labels
247// reaching the header of a cycle must be repropagated through the
248// cycle. This is true even in a reducible cycle, since the labels
249// may have been produced by a nested irreducible cycle.
250//
251// * Note that SyncDependenceAnalysis is not concerned with the points
252// of convergence in an irreducible cycle. It's only purpose is to
253// identify join blocks. The "diverged entry" criterion is
254// separately applied on join blocks to determine if an entire
255// irreducible cycle is assumed to be divergent.
256//
257// * Relevant related work:
258// A simple algorithm for global data flow analysis problems.
259// Matthew S. Hecht and Jeffrey D. Ullman.
260// SIAM Journal on Computing, 4(4):519–532, December 1975.
261//
262template <typename ContextT> class GenericSyncDependenceAnalysis {
263public:
264 using BlockT = typename ContextT::BlockT;
265 using DominatorTreeT = typename ContextT::DominatorTreeT;
266 using FunctionT = typename ContextT::FunctionT;
267 using ValueRefT = typename ContextT::ValueRefT;
268 using InstructionT = typename ContextT::InstructionT;
269
271 using CycleT = typename CycleInfoT::CycleT;
272
275
276 // * if BlockLabels[B] == C then C is the dominating definition at
277 // block B
278 // * if BlockLabels[B] == nullptr then we haven't seen B yet
279 // * if BlockLabels[B] == B then:
280 // - B is a join point of disjoint paths from X, or,
281 // - B is an immediate successor of X (initial value), or,
282 // - B is X
284
285 /// Information discovered by the sync dependence analysis for each
286 /// divergent branch.
288 // Join points of diverged paths.
290 // Divergent cycle exits
292 // Labels assigned to blocks on diverged paths.
294 };
295
297
298 GenericSyncDependenceAnalysis(const ContextT &Context,
299 const DominatorTreeT &DT, const CycleInfoT &CI);
300
301 /// \brief Computes divergent join points and cycle exits caused by branch
302 /// divergence in \p Term.
303 ///
304 /// This returns a pair of sets:
305 /// * The set of blocks which are reachable by disjoint paths from
306 /// \p Term.
307 /// * The set also contains cycle exits if there two disjoint paths:
308 /// one from \p Term to the cycle exit and another from \p Term to
309 /// the cycle header.
310 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
311
312private:
313 static inline DivergenceDescriptor EmptyDivergenceDesc;
314
315 ModifiedPO CyclePO;
316
317 const DominatorTreeT &DT;
318 const CycleInfoT &CI;
319
321 CachedControlDivDescs;
322};
323
324/// \brief Analysis that identifies uniform values in a data-parallel
325/// execution.
326///
327/// This analysis propagates divergence in a data-parallel context
328/// from sources of divergence to all users. It can be instantiated
329/// for an IR that provides a suitable SSAContext.
330template <typename ContextT> class GenericUniformityAnalysisImpl {
331public:
332 using BlockT = typename ContextT::BlockT;
333 using FunctionT = typename ContextT::FunctionT;
334 using ValueRefT = typename ContextT::ValueRefT;
335 using ConstValueRefT = typename ContextT::ConstValueRefT;
336 using UseT = typename ContextT::UseT;
337 using InstructionT = typename ContextT::InstructionT;
338 using DominatorTreeT = typename ContextT::DominatorTreeT;
339
341 using CycleT = typename CycleInfoT::CycleT;
342
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
346 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
347
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
350
353 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
354 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
355
357
358 const FunctionT &getFunction() const { return F; }
359
360 /// \brief Mark \p UniVal as a value that is always uniform.
361 void addUniformOverride(const InstructionT &Instr);
362
363 /// \brief Examine \p I for divergent outputs and add to the worklist.
364 void markDivergent(const InstructionT &I);
365
366 /// \brief Mark \p DivVal as a divergent value.
367 /// \returns Whether the tracked divergence state of \p DivVal changed.
368 bool markDivergent(ConstValueRefT DivVal);
369
370 /// \brief Mark outputs of \p Instr as divergent.
371 /// \returns Whether the tracked divergence state of any output has changed.
372 bool markDefsDivergent(const InstructionT &Instr);
373
374 /// \brief Propagate divergence to all instructions in the region.
375 /// Divergence is seeded by calls to \p markDivergent.
376 void compute();
377
378 /// \brief Whether any value was marked or analyzed to be divergent.
379 bool hasDivergence() const { return !DivergentValues.empty(); }
380
381 /// \brief Whether \p Val will always return a uniform value regardless of its
382 /// operands
383 bool isAlwaysUniform(const InstructionT &Instr) const;
384
385 bool hasDivergentDefs(const InstructionT &I) const;
386
387 bool isDivergent(const InstructionT &I) const {
388 if (I.isTerminator()) {
389 return DivergentTermBlocks.contains(I.getParent());
390 }
391 return hasDivergentDefs(I);
392 };
393
394 /// \brief Whether \p Val is divergent at its definition.
395 bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
396
397 bool isDivergentUse(const UseT &U) const;
398
399 bool hasDivergentTerminator(const BlockT &B) const {
400 return DivergentTermBlocks.contains(&B);
401 }
402
403 void print(raw_ostream &out) const;
404
406
408 const CycleT *);
409
410protected:
411 const ContextT &Context;
412 const FunctionT &F;
414 const TargetTransformInfo *TTI = nullptr;
415
416 // Detected/marked divergent values.
419
420 // Internal worklist for divergence propagation.
421 std::vector<const InstructionT *> Worklist;
422
423 /// \brief Mark \p Term as divergent and push all Instructions that become
424 /// divergent as a result on the worklist.
425 void analyzeControlDivergence(const InstructionT &Term);
426
427private:
428 const DominatorTreeT &DT;
429
430 // Recognized cycles with divergent exits.
431 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
432
433 // Cycles assumed to be divergent.
434 //
435 // We don't use a set here because every insertion needs an explicit
436 // traversal of all existing members.
437 SmallVector<const CycleT *> AssumedDivergent;
438
439 // The SDA links divergent branches to divergent control-flow joins.
441
442 // Set of known-uniform values.
444
445 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
446 /// the worklist.
447 void taintAndPushAllDefs(const BlockT &JoinBlock);
448
449 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
450 /// the worklist.
451 void taintAndPushPhiNodes(const BlockT &JoinBlock);
452
453 /// \brief Identify all Instructions that become divergent because \p DivExit
454 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
455 /// divergent and push them on the worklist.
456 void propagateCycleExitDivergence(const BlockT &DivExit,
457 const CycleT &DivCycle);
458
459 /// Mark as divergent all external uses of values defined in \p DefCycle.
460 void analyzeCycleExitDivergence(const CycleT &DefCycle);
461
462 /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
463 void propagateTemporalDivergence(const InstructionT &I,
464 const CycleT &DefCycle);
465
466 /// \brief Push all users of \p Val (in the region) to the worklist.
467 void pushUsers(const InstructionT &I);
468 void pushUsers(ConstValueRefT V);
469
470 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
471
472 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
473 bool isTemporalDivergent(const BlockT &ObservingBlock,
474 const InstructionT &Def) const;
475};
476
477template <typename ImplT>
481
482/// Compute divergence starting with a divergent branch.
483template <typename ContextT> class DivergencePropagator {
484public:
485 using BlockT = typename ContextT::BlockT;
486 using DominatorTreeT = typename ContextT::DominatorTreeT;
487 using FunctionT = typename ContextT::FunctionT;
488 using ValueRefT = typename ContextT::ValueRefT;
489
491 using CycleT = typename CycleInfoT::CycleT;
492
496 typename SyncDependenceAnalysisT::DivergenceDescriptor;
497 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
498
503 const ContextT &Context;
504
505 // Track blocks that receive a new label. Every time we relabel a
506 // cycle header, we another pass over the modified post-order in
507 // order to propagate the header label. The bit vector also allows
508 // us to skip labels that have not changed.
510
511 // divergent join and cycle exit descriptor.
512 std::unique_ptr<DivergenceDescriptorT> DivDesc;
514
520
522 Out << "Propagator::BlockLabels {\n";
523 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
524 const auto *Block = CyclePOT[BlockIdx];
525 const auto *Label = BlockLabels[Block];
526 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
527 if (!Label) {
528 Out << "<null>\n";
529 } else {
530 Out << Context.print(Label) << "\n";
531 }
532 }
533 Out << "}\n";
534 }
535
536 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
537 // causes a divergent join.
538 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
539 const auto *OldLabel = BlockLabels[&SuccBlock];
540
541 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
542 << "\tpushed label: " << Context.print(&PushedLabel)
543 << "\n"
544 << "\told label: " << Context.print(OldLabel) << "\n");
545
546 // Early exit if there is no change in the label.
547 if (OldLabel == &PushedLabel)
548 return false;
549
550 if (OldLabel != &SuccBlock) {
551 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
552 // Assigning a new label, mark this in FreshLabels.
553 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
554 FreshLabels.set(SuccIdx);
555 }
556
557 // This is not a join if the succ was previously unlabeled.
558 if (!OldLabel) {
559 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
560 << "\n");
561 BlockLabels[&SuccBlock] = &PushedLabel;
562 return false;
563 }
564
565 // This is a new join. Label the join block as itself, and not as
566 // the pushed label.
567 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
568 BlockLabels[&SuccBlock] = &SuccBlock;
569
570 return true;
571 }
572
573 // visiting a virtual cycle exit edge from the cycle header --> temporal
574 // divergence on join
575 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
576 if (!computeJoin(ExitBlock, Label))
577 return false;
578
579 // Identified a divergent cycle exit
580 DivDesc->CycleDivBlocks.insert(&ExitBlock);
581 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
582 << "\n");
583 return true;
584 }
585
586 // process \p SuccBlock with reaching definition \p Label
587 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
588 if (!computeJoin(SuccBlock, Label))
589 return false;
590
591 // Divergent, disjoint paths join.
592 DivDesc->JoinDivBlocks.insert(&SuccBlock);
593 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
594 << "\n");
595 return true;
596 }
597
598 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
600
601 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
602 << Context.print(&DivTermBlock) << "\n");
603
604 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
605 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
606
607 // Locate the largest ancestor cycle that is not reducible and does not
608 // contain a reducible ancestor. This is done with a lambda that is defined
609 // and invoked in the same statement.
610 const CycleT *IrreducibleAncestor = [](const CycleT *C) -> const CycleT * {
611 if (!C)
612 return nullptr;
613 if (C->isReducible())
614 return nullptr;
615 while (const CycleT *P = C->getParentCycle()) {
616 if (P->isReducible())
617 return C;
618 C = P;
619 }
620 assert(!C->getParentCycle());
621 assert(!C->isReducible());
622 return C;
623 }(DivTermCycle);
624
625 // Bootstrap with branch targets
626 for (const auto *SuccBlock : successors(&DivTermBlock)) {
627 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
628 // If DivTerm exits the cycle immediately, computeJoin() might
629 // not reach SuccBlock with a different label. We need to
630 // check for this exit now.
631 DivDesc->CycleDivBlocks.insert(SuccBlock);
632 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
633 << Context.print(SuccBlock) << "\n");
634 }
635 visitEdge(*SuccBlock, *SuccBlock);
636 }
637
638 // Technically propagation can continue until it reaches the last node.
639 //
640 // For efficiency, propagation can stop if FreshLabels.count()==1. But
641 // For irreducible cycles, let propagation continue until it reaches
642 // out of irreducible cycles (see code for details.)
643 while (true) {
644 auto BlockIdx = FreshLabels.find_last();
645 if (BlockIdx == -1)
646 break;
647
648 const auto *Block = CyclePOT[BlockIdx];
649 // If no irreducible cycle, stop if freshLable.count() = 1 and Block
650 // is the IPD. If it is in any irreducible cycle, continue propagation.
651 if (FreshLabels.count() == 1 &&
652 (!IrreducibleAncestor || !IrreducibleAncestor->contains(Block)))
653 break;
654
655 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
656
657 FreshLabels.reset(BlockIdx);
658 if (BlockIdx == DivTermIdx) {
659 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
660 continue;
661 }
662
663 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
664 << BlockIdx << "\n");
665
666 const auto *Label = BlockLabels[Block];
667 assert(Label);
668
669 // If the current block is the header of a reducible cycle, then the label
670 // should be propagated to the cycle exits. If this cycle contains the
671 // branch, then those exits are divergent exits. This is true for any DFS.
672 //
673 // If some DFS has a reducible cycle C with header H, then for
674 // any other DFS, H is the header of a cycle C' that is a
675 // superset of C.
676 //
677 // - For a divergent branch inside the subgraph C, any join node inside
678 // C is either H, or some node encountered by paths within C, without
679 // passing through H.
680 //
681 // - For a divergent branch outside the subgraph C, H is the only node
682 // in C reachable from multiple paths since it is the only entry to C.
683 LLVM_DEBUG(dbgs() << "Check for reducible cycle: " << Context.print(Block)
684 << '\n');
685 if (CyclePOT.isReducibleCycleHeader(Block)) {
686 const auto *BlockCycle = CI.getCycle(Block);
687 LLVM_DEBUG(dbgs() << BlockCycle->print(Context) << '\n');
688 SmallVector<BlockT *, 4> BlockCycleExits;
689 BlockCycle->getExitBlocks(BlockCycleExits);
690 bool BranchIsInside = BlockCycle->contains(&DivTermBlock);
691 for (auto *BlockCycleExit : BlockCycleExits) {
692 if (BranchIsInside)
693 visitCycleExitEdge(*BlockCycleExit, *Label);
694 else
695 visitEdge(*BlockCycleExit, *Label);
696 }
697 } else {
698 for (const auto *SuccBlock : successors(Block))
699 visitEdge(*SuccBlock, *Label);
700 }
701 }
702
703 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
704
705 // Check every cycle containing DivTermBlock for exit divergence.
706 // A cycle has exit divergence if the label of an exit block does
707 // not match the label of its header.
708 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
709 Cycle = Cycle->getParentCycle()) {
710 if (Cycle->isReducible()) {
711 // The exit divergence of a reducible cycle is recorded while
712 // propagating labels.
713 continue;
714 }
716 Cycle->getExitBlocks(Exits);
717 auto *Header = Cycle->getHeader();
718 auto *HeaderLabel = BlockLabels[Header];
719 for (const auto *Exit : Exits) {
720 if (BlockLabels[Exit] != HeaderLabel) {
721 // Identified a divergent cycle exit
722 DivDesc->CycleDivBlocks.insert(Exit);
723 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
724 << "\n");
725 }
726 }
727 }
728
729 return std::move(DivDesc);
730 }
731};
732
733template <typename ContextT>
735 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
736 : CyclePO(Context), DT(DT), CI(CI) {
737 CyclePO.compute(CI);
738}
739
740template <typename ContextT>
742 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
743 // trivial case
744 if (succ_size(DivTermBlock) <= 1) {
745 return EmptyDivergenceDesc;
746 }
747
748 // already available in cache?
749 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
750 if (ItCached != CachedControlDivDescs.end())
751 return *ItCached->second;
752
753 // compute all join points
754 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
755 auto DivDesc = Propagator.computeJoinPoints();
756
757 auto printBlockSet = [&](ConstBlockSet &Blocks) {
758 return Printable([&](raw_ostream &Out) {
759 Out << "[";
760 ListSeparator LS;
761 for (const auto *BB : Blocks) {
762 Out << LS << CI.getSSAContext().print(BB);
763 }
764 Out << "]\n";
765 });
766 };
767
769 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
770 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
771 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
772 << "\n");
773 (void)printBlockSet;
774
775 auto ItInserted =
776 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
777 assert(ItInserted.second);
778 return *ItInserted.first->second;
779}
780
781template <typename ContextT>
783 const InstructionT &I) {
784 if (isAlwaysUniform(I))
785 return;
786 bool Marked = false;
787 if (I.isTerminator()) {
788 Marked = DivergentTermBlocks.insert(I.getParent()).second;
789 if (Marked) {
790 LLVM_DEBUG(dbgs() << "marked divergent term block: "
791 << Context.print(I.getParent()) << "\n");
792 }
793 } else {
794 Marked = markDefsDivergent(I);
795 }
796
797 if (Marked)
798 Worklist.push_back(&I);
799}
800
801template <typename ContextT>
803 ConstValueRefT Val) {
804 if (DivergentValues.insert(Val).second) {
805 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
806 return true;
807 }
808 return false;
809}
810
811template <typename ContextT>
813 const InstructionT &Instr) {
814 UniformOverrides.insert(&Instr);
815}
816
817// Mark as divergent all external uses of values defined in \p DefCycle.
818//
819// A value V defined by a block B inside \p DefCycle may be used outside the
820// cycle only if the use is a PHI in some exit block, or B dominates some exit
821// block. Thus, we check uses as follows:
822//
823// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
824// - For every block B inside \p DefCycle that dominates at least one exit
825// block, check all uses outside \p DefCycle.
826//
827// FIXME: This function does not distinguish between divergent and uniform
828// exits. For each divergent exit, only the values that are live at that exit
829// need to be propagated as divergent at their use outside the cycle.
830template <typename ContextT>
831void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
832 const CycleT &DefCycle) {
834 DefCycle.getExitBlocks(Exits);
835 for (auto *Exit : Exits) {
836 for (auto &Phi : Exit->phis()) {
837 if (usesValueFromCycle(Phi, DefCycle)) {
838 markDivergent(Phi);
839 }
840 }
841 }
842
843 for (auto *BB : DefCycle.blocks()) {
844 if (!llvm::any_of(Exits,
845 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
846 continue;
847 for (auto &II : *BB) {
848 propagateTemporalDivergence(II, DefCycle);
849 }
850 }
851}
852
853template <typename ContextT>
854void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
855 const BlockT &DivExit, const CycleT &InnerDivCycle) {
856 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
857 << "\n");
858 auto *DivCycle = &InnerDivCycle;
859 auto *OuterDivCycle = DivCycle;
860 auto *ExitLevelCycle = CI.getCycle(&DivExit);
861 const unsigned CycleExitDepth =
862 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
863
864 // Find outer-most cycle that does not contain \p DivExit
865 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
866 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
867 << Context.print(DivCycle->getHeader()) << "\n");
868 OuterDivCycle = DivCycle;
869 DivCycle = DivCycle->getParentCycle();
870 }
871 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
872 << Context.print(OuterDivCycle->getHeader()) << "\n");
873
874 if (!DivergentExitCycles.insert(OuterDivCycle).second)
875 return;
876
877 // Exit divergence does not matter if the cycle itself is assumed to
878 // be divergent.
879 for (const auto *C : AssumedDivergent) {
880 if (C->contains(OuterDivCycle))
881 return;
882 }
883
884 analyzeCycleExitDivergence(*OuterDivCycle);
885}
886
887template <typename ContextT>
888void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
889 const BlockT &BB) {
890 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
891 for (const auto &I : instrs(BB)) {
892 // Terminators do not produce values; they are divergent only if
893 // the condition is divergent. That is handled when the divergent
894 // condition is placed in the worklist.
895 if (I.isTerminator())
896 break;
897
898 markDivergent(I);
899 }
900}
901
902/// Mark divergent phi nodes in a join block
903template <typename ContextT>
904void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
905 const BlockT &JoinBlock) {
906 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
907 << "\n");
908 for (const auto &Phi : JoinBlock.phis()) {
909 // FIXME: The non-undef value is not constant per se; it just happens to be
910 // uniform and may not dominate this PHI. So assuming that the same value
911 // reaches along all incoming edges may itself be undefined behaviour. This
912 // particular interpretation of the undef value was added to
913 // DivergenceAnalysis in the following review:
914 //
915 // https://reviews.llvm.org/D19013
916 if (ContextT::isConstantOrUndefValuePhi(Phi))
917 continue;
918 markDivergent(Phi);
919 }
920}
921
922/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
923///
924/// \return true iff \p Candidate was added to \p Cycles.
925template <typename CycleT>
927 CycleT *Candidate) {
928 if (llvm::any_of(Cycles,
929 [Candidate](CycleT *C) { return C->contains(Candidate); }))
930 return false;
931 Cycles.push_back(Candidate);
932 return true;
933}
934
935/// Return the outermost cycle made divergent by branch outside it.
936///
937/// If two paths that diverged outside an irreducible cycle join
938/// inside that cycle, then that whole cycle is assumed to be
939/// divergent. This does not apply if the cycle is reducible.
940template <typename CycleT, typename BlockT>
941static const CycleT *getExtDivCycle(const CycleT *Cycle,
942 const BlockT *DivTermBlock,
943 const BlockT *JoinBlock) {
944 assert(Cycle);
945 assert(Cycle->contains(JoinBlock));
946
947 if (Cycle->contains(DivTermBlock))
948 return nullptr;
949
950 const auto *OriginalCycle = Cycle;
951 const auto *Parent = Cycle->getParentCycle();
952 while (Parent && !Parent->contains(DivTermBlock)) {
953 Cycle = Parent;
954 Parent = Cycle->getParentCycle();
955 }
956
957 // If the original cycle is not the outermost cycle, then the outermost cycle
958 // is irreducible. If the outermost cycle were reducible, then external
959 // diverged paths would not reach the original inner cycle.
960 (void)OriginalCycle;
961 assert(Cycle == OriginalCycle || !Cycle->isReducible());
962
963 if (Cycle->isReducible()) {
964 assert(Cycle->getHeader() == JoinBlock);
965 return nullptr;
966 }
967
968 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
969 return Cycle;
970}
971
972/// Return the outermost cycle made divergent by branch inside it.
973///
974/// This checks the "diverged entry" criterion defined in the
975/// docs/ConvergenceAnalysis.html.
976template <typename ContextT, typename CycleT, typename BlockT,
977 typename DominatorTreeT>
978static const CycleT *
979getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
980 const BlockT *JoinBlock, const DominatorTreeT &DT,
981 ContextT &Context) {
982 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
983 << " for internal branch " << Context.print(DivTermBlock)
984 << "\n");
985 if (DT.properlyDominates(DivTermBlock, JoinBlock))
986 return nullptr;
987
988 // Find the smallest common cycle, if one exists.
989 assert(Cycle && Cycle->contains(JoinBlock));
990 while (Cycle && !Cycle->contains(DivTermBlock)) {
991 Cycle = Cycle->getParentCycle();
992 }
993 if (!Cycle || Cycle->isReducible())
994 return nullptr;
995
996 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
997 return nullptr;
998
999 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1000 << " does not dominate join\n");
1001
1002 const auto *Parent = Cycle->getParentCycle();
1003 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1004 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1005 << " does not dominate join\n");
1006 Cycle = Parent;
1007 Parent = Parent->getParentCycle();
1008 }
1009
1010 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1011 return Cycle;
1012}
1013
1014template <typename ContextT, typename CycleT, typename BlockT,
1015 typename DominatorTreeT>
1016static const CycleT *
1017getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1018 const BlockT *JoinBlock, const DominatorTreeT &DT,
1019 ContextT &Context) {
1020 if (!Cycle)
1021 return nullptr;
1022
1023 // First try to expand Cycle to the largest that contains JoinBlock
1024 // but not DivTermBlock.
1025 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1026
1027 // Continue expanding to the largest cycle that contains both.
1028 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1029
1030 if (Int)
1031 return Int;
1032 return Ext;
1033}
1034
1035template <typename ContextT>
1036bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1037 const BlockT &ObservingBlock, const InstructionT &Def) const {
1038 const BlockT *DefBlock = Def.getParent();
1039 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1040 Cycle && !Cycle->contains(&ObservingBlock);
1041 Cycle = Cycle->getParentCycle()) {
1042 if (DivergentExitCycles.contains(Cycle)) {
1043 return true;
1044 }
1045 }
1046 return false;
1047}
1048
1049template <typename ContextT>
1051 const InstructionT &Term) {
1052 const auto *DivTermBlock = Term.getParent();
1053 DivergentTermBlocks.insert(DivTermBlock);
1054 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1055 << "\n");
1056
1057 // Don't propagate divergence from unreachable blocks.
1058 if (!DT.isReachableFromEntry(DivTermBlock))
1059 return;
1060
1061 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1063
1064 // Iterate over all blocks now reachable by a disjoint path join
1065 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1066 const auto *Cycle = CI.getCycle(JoinBlock);
1067 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1068 << "\n");
1069 if (const auto *Outermost = getOutermostDivergentCycle(
1070 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1071 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1072 DivCycles.push_back(Outermost);
1073 continue;
1074 }
1075 taintAndPushPhiNodes(*JoinBlock);
1076 }
1077
1078 // Sort by order of decreasing depth. This allows later cycles to be skipped
1079 // because they are already contained in earlier ones.
1080 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1081 return A->getDepth() > B->getDepth();
1082 });
1083
1084 // Cycles that are assumed divergent due to the diverged entry
1085 // criterion potentially contain temporal divergence depending on
1086 // the DFS chosen. Conservatively, all values produced in such a
1087 // cycle are assumed divergent. "Cycle invariant" values may be
1088 // assumed uniform, but that requires further analysis.
1089 for (auto *C : DivCycles) {
1090 if (!insertIfNotContained(AssumedDivergent, C))
1091 continue;
1092 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1093 for (const BlockT *BB : C->blocks()) {
1094 taintAndPushAllDefs(*BB);
1095 }
1096 }
1097
1098 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1099 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1100 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1101 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1102 }
1103}
1104
1105template <typename ContextT>
1107 // Initialize worklist.
1108 auto DivValuesCopy = DivergentValues;
1109 for (const auto DivVal : DivValuesCopy) {
1110 assert(isDivergent(DivVal) && "Worklist invariant violated!");
1111 pushUsers(DivVal);
1112 }
1113
1114 // All values on the Worklist are divergent.
1115 // Their users may not have been updated yet.
1116 while (!Worklist.empty()) {
1117 const InstructionT *I = Worklist.back();
1118 Worklist.pop_back();
1119
1120 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1121
1122 if (I->isTerminator()) {
1124 continue;
1125 }
1126
1127 // propagate value divergence to users
1128 assert(isDivergent(*I) && "Worklist invariant violated!");
1129 pushUsers(*I);
1130 }
1131}
1132
1133template <typename ContextT>
1139
1140template <typename ContextT>
1142 const InstructionT &Instr) const {
1143 return UniformOverrides.contains(&Instr);
1144}
1145
1146template <typename ContextT>
1148 const DominatorTreeT &DT, const CycleInfoT &CI,
1149 const TargetTransformInfo *TTI) {
1150 DA.reset(new ImplT{DT, CI, TTI});
1151}
1152
1153template <typename ContextT>
1155 bool haveDivergentArgs = false;
1156
1157 // When we print Value, LLVM IR instruction, we want to print extra new line.
1158 // In LLVM IR print function for Value does not print new line at the end.
1159 // In MIR print for MachineInstr prints new line at the end.
1160 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1161 std::string NewLine = IsMIR ? "" : "\n";
1162
1163 // Control flow instructions may be divergent even if their inputs are
1164 // uniform. Thus, although exceedingly rare, it is possible to have a program
1165 // with no divergent values but with divergent control structures.
1166 if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1167 DivergentExitCycles.empty()) {
1168 OS << "ALL VALUES UNIFORM\n";
1169 return;
1170 }
1171
1172 for (const auto &entry : DivergentValues) {
1173 const BlockT *parent = Context.getDefBlock(entry);
1174 if (!parent) {
1175 if (!haveDivergentArgs) {
1176 OS << "DIVERGENT ARGUMENTS:\n";
1177 haveDivergentArgs = true;
1178 }
1179 OS << " DIVERGENT: " << Context.print(entry) << '\n';
1180 }
1181 }
1182
1183 if (!AssumedDivergent.empty()) {
1184 OS << "CYCLES ASSUMED DIVERGENT:\n";
1185 for (const CycleT *cycle : AssumedDivergent) {
1186 OS << " " << cycle->print(Context) << '\n';
1187 }
1188 }
1189
1190 if (!DivergentExitCycles.empty()) {
1191 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1192 for (const CycleT *cycle : DivergentExitCycles) {
1193 OS << " " << cycle->print(Context) << '\n';
1194 }
1195 }
1196
1197 if (!TemporalDivergenceList.empty()) {
1198 OS << "\nTEMPORAL DIVERGENCE LIST:\n";
1199
1200 for (auto [Val, UseInst, Cycle] : TemporalDivergenceList) {
1201 OS << "Value :" << Context.print(Val) << NewLine
1202 << "Used by :" << Context.print(UseInst) << NewLine
1203 << "Outside cycle :" << Cycle->print(Context) << "\n\n";
1204 }
1205 }
1206
1207 for (auto &block : F) {
1208 OS << "\nBLOCK " << Context.print(&block) << '\n';
1209
1210 OS << "DEFINITIONS\n";
1212 Context.appendBlockDefs(defs, block);
1213 for (auto value : defs) {
1214 if (isDivergent(value))
1215 OS << " DIVERGENT: ";
1216 else
1217 OS << " ";
1218 OS << Context.print(value) << NewLine;
1219 }
1220
1221 OS << "TERMINATORS\n";
1223 Context.appendBlockTerms(terms, block);
1224 bool divergentTerminators = hasDivergentTerminator(block);
1225 for (auto *T : terms) {
1226 if (divergentTerminators)
1227 OS << " DIVERGENT: ";
1228 else
1229 OS << " ";
1230 OS << Context.print(T) << NewLine;
1231 }
1232
1233 OS << "END BLOCK\n";
1234 }
1235}
1236
1237template <typename ContextT>
1241 return make_range(DA->TemporalDivergenceList.begin(),
1242 DA->TemporalDivergenceList.end());
1243}
1244
1245template <typename ContextT>
1247 return DA->hasDivergence();
1248}
1249
1250template <typename ContextT>
1251const typename ContextT::FunctionT &
1253 return DA->getFunction();
1254}
1255
1256/// Whether \p V is divergent at its definition.
1257template <typename ContextT>
1259 return DA->isDivergent(V);
1260}
1261
1262template <typename ContextT>
1264 return DA->isDivergent(*I);
1265}
1266
1267template <typename ContextT>
1269 return DA->isDivergentUse(U);
1270}
1271
1272template <typename ContextT>
1274 return DA->hasDivergentTerminator(B);
1275}
1276
1277/// \brief T helper function for printing.
1278template <typename ContextT>
1280 DA->print(out);
1281}
1282
1283template <typename ContextT>
1284void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1285 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1286 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1287 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1288 while (!Stack.empty()) {
1289 auto *NextBB = Stack.back();
1290 if (Finalized.count(NextBB)) {
1291 Stack.pop_back();
1292 continue;
1293 }
1294 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1295 << "\n");
1296 auto *NestedCycle = CI.getCycle(NextBB);
1297 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1298 LLVM_DEBUG(dbgs() << " found a cycle\n");
1299 while (NestedCycle->getParentCycle() != Cycle)
1300 NestedCycle = NestedCycle->getParentCycle();
1301
1302 SmallVector<BlockT *, 3> NestedExits;
1303 NestedCycle->getExitBlocks(NestedExits);
1304 bool PushedNodes = false;
1305 for (auto *NestedExitBB : NestedExits) {
1306 LLVM_DEBUG(dbgs() << " examine exit: "
1307 << CI.getSSAContext().print(NestedExitBB) << "\n");
1308 if (Cycle && !Cycle->contains(NestedExitBB))
1309 continue;
1310 if (Finalized.count(NestedExitBB))
1311 continue;
1312 PushedNodes = true;
1313 Stack.push_back(NestedExitBB);
1314 LLVM_DEBUG(dbgs() << " pushed exit: "
1315 << CI.getSSAContext().print(NestedExitBB) << "\n");
1316 }
1317 if (!PushedNodes) {
1318 // All loop exits finalized -> finish this node
1319 Stack.pop_back();
1320 computeCyclePO(CI, NestedCycle, Finalized);
1321 }
1322 continue;
1323 }
1324
1325 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1326 // DAG-style
1327 bool PushedNodes = false;
1328 for (auto *SuccBB : successors(NextBB)) {
1329 LLVM_DEBUG(dbgs() << " examine succ: "
1330 << CI.getSSAContext().print(SuccBB) << "\n");
1331 if (Cycle && !Cycle->contains(SuccBB))
1332 continue;
1333 if (Finalized.count(SuccBB))
1334 continue;
1335 PushedNodes = true;
1336 Stack.push_back(SuccBB);
1337 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1338 << "\n");
1339 }
1340 if (!PushedNodes) {
1341 // Never push nodes twice
1342 LLVM_DEBUG(dbgs() << " finishing node: "
1343 << CI.getSSAContext().print(NextBB) << "\n");
1344 Stack.pop_back();
1345 Finalized.insert(NextBB);
1346 appendBlock(*NextBB);
1347 }
1348 }
1349 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1350}
1351
1352template <typename ContextT>
1353void ModifiedPostOrder<ContextT>::computeCyclePO(
1354 const CycleInfoT &CI, const CycleT *Cycle,
1356 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1358 auto *CycleHeader = Cycle->getHeader();
1359
1360 LLVM_DEBUG(dbgs() << " noted header: "
1361 << CI.getSSAContext().print(CycleHeader) << "\n");
1362 assert(!Finalized.count(CycleHeader));
1363 Finalized.insert(CycleHeader);
1364
1365 // Visit the header last
1366 LLVM_DEBUG(dbgs() << " finishing header: "
1367 << CI.getSSAContext().print(CycleHeader) << "\n");
1368 appendBlock(*CycleHeader, Cycle->isReducible());
1369
1370 // Initialize with immediate successors
1371 for (auto *BB : successors(CycleHeader)) {
1372 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1373 << "\n");
1374 if (!Cycle->contains(BB))
1375 continue;
1376 if (BB == CycleHeader)
1377 continue;
1378 if (!Finalized.count(BB)) {
1379 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1380 << "\n");
1381 Stack.push_back(BB);
1382 }
1383 }
1384
1385 // Compute PO inside region
1386 computeStackPO(Stack, CI, Cycle, Finalized);
1387
1388 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1389}
1390
1391/// \brief Generically compute the modified post order.
1392template <typename ContextT>
1396 auto *F = CI.getFunction();
1397 Stack.reserve(24); // FIXME made-up number
1398 Stack.push_back(&F->front());
1399 computeStackPO(Stack, CI, nullptr, Finalized);
1400}
1401
1402} // namespace llvm
1403
1404#undef DEBUG_TYPE
1405
1406#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
unify loop Fixup each natural loop to have a single exit block
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
Cycle information for a function.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
DivergencePropagator< ContextT > DivergencePropagatorT
DenseMap< const BlockT *, const BlockT * > BlockLabelMap
SmallPtrSet< const BlockT *, 4 > ConstBlockSet
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::ValueRefT ValueRefT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
SmallVector< TemporalDivergenceTuple, 8 > TemporalDivergenceList
bool isAlwaysUniform(const InstructionT &Instr) const
Whether Val will always return a uniform value regardless of its operands.
typename ContextT::ValueRefT ValueRefT
void analyzeControlDivergence(const InstructionT &Term)
Mark Term as divergent and push all Instructions that become divergent as a result on the worklist.
bool isDivergent(ConstValueRefT V) const
Whether Val is divergent at its definition.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
typename ContextT::InstructionT InstructionT
DenseSet< ConstValueRefT > DivergentValues
typename ContextT::ConstValueRefT ConstValueRefT
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
bool hasDivergence() const
Whether any value was marked or analyzed to be divergent.
typename ContextT::DominatorTreeT DominatorTreeT
void compute()
Propagate divergence to all instructions in the region.
bool hasDivergentTerminator(const BlockT &B) const
GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI)
bool markDefsDivergent(const InstructionT &Instr)
Mark outputs of Instr as divergent.
typename ContextT::FunctionT FunctionT
bool isDivergent(const InstructionT &I) const
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
std::vector< const InstructionT * > Worklist
void markDivergent(const InstructionT &I)
Examine I for divergent outputs and add to the worklist.
SmallPtrSet< const BlockT *, 32 > DivergentTermBlocks
GenericCycleInfo< ContextT > CycleInfoT
void addUniformOverride(const InstructionT &Instr)
Mark UniVal as a value that is always uniform.
void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
bool hasDivergentTerminator(const BlockT &B)
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
void print(raw_ostream &Out) const
T helper function for printing.
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
typename ContextT::ConstValueRefT ConstValueRefT
typename ContextT::BlockT BlockT
typename ContextT::InstructionT InstructionT
bool hasDivergence() const
Whether any divergence was detected.
const FunctionT & getFunction() const
The GPU kernel this analysis result is for.
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::DominatorTreeT DominatorTreeT
iterator_range< TemporalDivergenceTuple * > getTemporalDivergenceList() const
A helper class to return the specified delimiter string after the first invocation of operator String...
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
const BlockT * operator[](size_t idx) const
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader=false)
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
TargetTransformInfo TTI
auto instrs(const MachineBasicBlock &BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.