LLVM 23.0.0git
VPlanPredicator.cpp
Go to the documentation of this file.
1//===-- VPlanPredicator.cpp - VPlan predicator ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements predication for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanDominatorTree.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanTransforms.h"
20#include "VPlanUtils.h"
22
23using namespace llvm;
24using namespace VPlanPatternMatch;
25
26namespace {
27class VPPredicator {
28 /// Builder to construct recipes to compute masks.
29 VPBuilder Builder;
30
31 /// Dominator tree for the VPlan.
32 VPDominatorTree VPDT;
33
34 /// Post-dominator tree for the VPlan.
35 VPPostDominatorTree VPPDT;
36
37 /// When we if-convert we need to create edge masks. We have to cache values
38 /// so that we don't end up with exponential recursion/IR.
39 using EdgeMaskCacheTy =
40 DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
41 VPValue *>;
42 using BlockMaskCacheTy = DenseMap<const VPBasicBlock *, VPValue *>;
43 EdgeMaskCacheTy EdgeMaskCache;
44
45 BlockMaskCacheTy BlockMaskCache;
46
47 /// Create an edge mask for every destination of cases and/or default.
48 void createSwitchEdgeMasks(const VPInstruction *SI);
49
50 /// Computes and return the predicate of the edge between \p Src and \p Dst,
51 /// possibly inserting new recipes at \p Dst (using Builder's insertion point)
52 VPValue *createEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst);
53
54 /// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
55 /// already have a mask.
56 void setBlockInMask(const VPBasicBlock *VPBB, VPValue *Mask) {
57 // TODO: Include the masks as operands in the predicated VPlan directly to
58 // avoid keeping the map of masks beyond the predication transform.
59 assert(!getBlockInMask(VPBB) && "Mask already set");
60 BlockMaskCache[VPBB] = Mask;
61 }
62
63 /// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
64 /// expected to not have a mask already.
65 VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
66 VPValue *Mask) {
67 assert(Src != Dst && "Src and Dst must be different");
68 assert(!getEdgeMask(Src, Dst) && "Mask already set");
69 return EdgeMaskCache[{Src, Dst}] = Mask;
70 }
71
72public:
73 VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan) {}
74
75 /// Returns the *entry* mask for \p VPBB.
76 VPValue *getBlockInMask(const VPBasicBlock *VPBB) const {
77 return BlockMaskCache.lookup(VPBB);
78 }
79
80 /// Returns the precomputed predicate of the edge from \p Src to \p Dst.
81 VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
82 return EdgeMaskCache.lookup({Src, Dst});
83 }
84
85 /// Compute the predicate of \p VPBB.
86 void createBlockInMask(VPBasicBlock *VPBB);
87
88 /// Convert phi recipes in \p VPBB to VPBlendRecipes.
89 void convertPhisToBlends(VPBasicBlock *VPBB);
90};
91} // namespace
92
93VPValue *VPPredicator::createEdgeMask(const VPBasicBlock *Src,
94 const VPBasicBlock *Dst) {
95 assert(is_contained(Dst->getPredecessors(), Src) && "Invalid edge");
96
97 // Look for cached value.
98 VPValue *EdgeMask = getEdgeMask(Src, Dst);
99 if (EdgeMask)
100 return EdgeMask;
101
102 VPValue *SrcMask = getBlockInMask(Src);
103
104 // If there's a single successor, there's no terminator recipe.
105 if (Src->getNumSuccessors() == 1)
106 return setEdgeMask(Src, Dst, SrcMask);
107
108 auto *Term = cast<VPInstruction>(Src->getTerminator());
109 if (Term->getOpcode() == Instruction::Switch) {
110 createSwitchEdgeMasks(Term);
111 return getEdgeMask(Src, Dst);
112 }
113
114 assert(Term->getOpcode() == VPInstruction::BranchOnCond &&
115 "Unsupported terminator");
116 if (Src->getSuccessors()[0] == Src->getSuccessors()[1])
117 return setEdgeMask(Src, Dst, SrcMask);
118
119 EdgeMask = Term->getOperand(0);
120 assert(EdgeMask && "No Edge Mask found for condition");
121
122 if (Src->getSuccessors()[0] != Dst)
123 EdgeMask = Builder.createNot(EdgeMask, Term->getDebugLoc());
124
125 if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
126 // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
127 // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
128 // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
129 EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, Term->getDebugLoc());
130 }
131
132 return setEdgeMask(Src, Dst, EdgeMask);
133}
134
135void VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
136 // Start inserting after the block's phis, which be replaced by blends later.
137 Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
138
139 // Reuse the mask of the immediate dominator if the VPBB post-dominates the
140 // immediate dominator.
141 auto *IDom = VPDT.getNode(VPBB)->getIDom();
142 assert(IDom && "Block in loop must have immediate dominator");
143 auto *IDomBB = cast<VPBasicBlock>(IDom->getBlock());
144 if (VPPDT.properlyDominates(VPBB, IDomBB)) {
145 setBlockInMask(VPBB, getBlockInMask(IDomBB));
146 return;
147 }
148 // All-one mask is modelled as no-mask following the convention for masked
149 // load/store/gather/scatter. Initialize BlockMask to no-mask.
150 VPValue *BlockMask = nullptr;
151 // This is the block mask. We OR all unique incoming edges.
152 for (auto *Predecessor : SetVector<VPBlockBase *>(
153 VPBB->getPredecessors().begin(), VPBB->getPredecessors().end())) {
154 VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
155 if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
156 // too.
157 setBlockInMask(VPBB, EdgeMask);
158 return;
159 }
160
161 if (!BlockMask) { // BlockMask has its initial nullptr value.
162 BlockMask = EdgeMask;
163 continue;
164 }
165
166 BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
167 }
168
169 setBlockInMask(VPBB, BlockMask);
170}
171
172void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) {
173 const VPBasicBlock *Src = SI->getParent();
174
175 // Create masks where SI is a switch. We create masks for all edges from SI's
176 // parent block at the same time. This is more efficient, as we can create and
177 // collect compares for all cases once.
178 VPValue *Cond = SI->getOperand(0);
179 VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Src->getSuccessors()[0]);
180 MapVector<VPBasicBlock *, SmallVector<VPValue *>> Dst2Compares;
181 for (const auto &[Idx, Succ] : enumerate(drop_begin(Src->getSuccessors()))) {
182 VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
183 assert(!getEdgeMask(Src, Dst) && "Edge masks already created");
184 // Cases whose destination is the same as default are redundant and can
185 // be ignored - they will get there anyhow.
186 if (Dst == DefaultDst)
187 continue;
188 auto &Compares = Dst2Compares[Dst];
189 VPValue *V = SI->getOperand(Idx + 1);
190 Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
191 }
192
193 // We need to handle 2 separate cases below for all entries in Dst2Compares,
194 // which excludes destinations matching the default destination.
195 VPValue *SrcMask = getBlockInMask(Src);
196 VPValue *DefaultMask = nullptr;
197 for (const auto &[Dst, Conds] : Dst2Compares) {
198 // 1. Dst is not the default destination. Dst is reached if any of the
199 // cases with destination == Dst are taken. Join the conditions for each
200 // case whose destination == Dst using an OR.
201 VPValue *Mask = Conds[0];
202 for (VPValue *V : drop_begin(Conds))
203 Mask = Builder.createOr(Mask, V);
204 if (SrcMask)
205 Mask = Builder.createLogicalAnd(SrcMask, Mask);
206 setEdgeMask(Src, Dst, Mask);
207
208 // 2. Create the mask for the default destination, which is reached if
209 // none of the cases with destination != default destination are taken.
210 // Join the conditions for each case where the destination is != Dst using
211 // an OR and negate it.
212 DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
213 }
214
215 if (DefaultMask) {
216 DefaultMask = Builder.createNot(DefaultMask);
217 if (SrcMask)
218 DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
219 } else {
220 // There are no destinations other than the default destination, so this is
221 // an unconditional branch.
222 DefaultMask = SrcMask;
223 }
224 setEdgeMask(Src, DefaultDst, DefaultMask);
225}
226
227void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
229 for (VPRecipeBase &R : VPBB->phis())
230 Phis.push_back(cast<VPPhi>(&R));
231 for (VPPhi *PhiR : Phis) {
232 // The non-header Phi is converted into a Blend recipe below,
233 // so we don't have to worry about the insertion order and we can just use
234 // the builder. At this point we generate the predication tree. There may
235 // be duplications since this is a simple recursive scan, but future
236 // optimizations will clean it up.
237
238 auto NotPoison = make_filter_range(PhiR->incoming_values(), [](VPValue *V) {
239 return !match(V, m_Poison());
240 });
241 if (all_equal(NotPoison)) {
242 PhiR->replaceAllUsesWith(NotPoison.empty() ? PhiR->getIncomingValue(0)
243 : *NotPoison.begin());
244 PhiR->eraseFromParent();
245 continue;
246 }
247
248 SmallVector<VPValue *, 2> OperandsWithMask;
249 for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
250 OperandsWithMask.push_back(InVPV);
251 OperandsWithMask.push_back(createEdgeMask(InVPBB, VPBB));
252 }
253 PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
254 auto *Blend =
255 new VPBlendRecipe(IRPhi, OperandsWithMask, *PhiR, PhiR->getDebugLoc());
256 Builder.insert(Blend);
257 PhiR->replaceAllUsesWith(Blend);
258 PhiR->eraseFromParent();
259 }
260}
261
263 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
264 // Scan the body of the loop in a topological order to visit each basic block
265 // after having visited its predecessor basic blocks.
266 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
268 Header);
269 VPPredicator Predicator(Plan);
270 for (VPBlockBase *VPB : RPOT) {
271 // Non-outer regions with VPBBs only are supported at the moment.
272 auto *VPBB = cast<VPBasicBlock>(VPB);
273 // Introduce the mask for VPBB, which may introduce needed edge masks, and
274 // convert all phi recipes of VPBB to blend recipes unless VPBB is the
275 // header.
276 if (VPBB != Header) {
277 Predicator.createBlockInMask(VPBB);
278 Predicator.convertPhisToBlends(VPBB);
279 }
280
281 VPValue *BlockMask = Predicator.getBlockInMask(VPBB);
282 if (!BlockMask)
283 continue;
284
285 // Mask all VPInstructions in the block.
286 for (VPRecipeBase &R : *VPBB) {
287 if (auto *VPI = dyn_cast<VPInstruction>(&R))
288 VPI->addMask(BlockMask);
289 }
290 }
291
292 // Linearize the blocks of the loop into one serial chain.
293 VPBlockBase *PrevVPBB = nullptr;
295 auto Successors = to_vector(VPBB->getSuccessors());
296 if (Successors.size() > 1)
298
299 // Flatten the CFG in the loop. To do so, first disconnect VPBB from its
300 // successors. Then connect VPBB to the previously visited VPBB.
301 for (auto *Succ : Successors)
303 if (PrevVPBB)
304 VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
305
306 PrevVPBB = VPBB;
307 }
308}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Early If Predicator
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
DomTreeNodeBase * getIDom() const
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
void push_back(const T &Elt)
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4237
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4325
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:644
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:82
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:205
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:199
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:269
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:221
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:239
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:388
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4425
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4555
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1038
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto cast_or_null(const Y &Val)
Definition Casting.h:714
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2166
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.