LLVM 20.0.0git
BottomUpVec.cpp
Go to the documentation of this file.
1//===- BottomUpVec.cpp - A bottom-up vectorizer pass ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
19
20namespace llvm {
21
22static cl::opt<unsigned>
23 OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden,
24 cl::desc("Override the vector register size in bits, "
25 "which is otherwise found by querying TTI."));
26static cl::opt<bool>
27 AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden,
28 cl::desc("Allow non-power-of-2 vectorization."));
29
30namespace sandboxir {
31
33 : FunctionPass("bottom-up-vec"),
34 RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {}
35
37 unsigned OpIdx) {
39 for (Value *BndlV : Bndl) {
40 auto *BndlI = cast<Instruction>(BndlV);
41 Operands.push_back(BndlI->getOperand(OpIdx));
42 }
43 return Operands;
44}
45
48 // TODO: Use the VecUtils function for getting the bottom instr once it lands.
49 auto *BotI = cast<Instruction>(
50 *std::max_element(Instrs.begin(), Instrs.end(), [](auto *V1, auto *V2) {
51 return cast<Instruction>(V1)->comesBefore(cast<Instruction>(V2));
52 }));
53 // If Bndl contains Arguments or Constants, use the beginning of the BB.
54 return std::next(BotI->getIterator());
55}
56
57Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl,
59 Change = true;
60 assert(all_of(Bndl, [](auto *V) { return isa<Instruction>(V); }) &&
61 "Expect Instructions!");
62 auto &Ctx = Bndl[0]->getContext();
63
64 Type *ScalarTy = VecUtils::getElementType(Utils::getExpectedType(Bndl[0]));
65 auto *VecTy = VecUtils::getWideType(ScalarTy, VecUtils::getNumLanes(Bndl));
66
68
69 auto Opcode = cast<Instruction>(Bndl[0])->getOpcode();
70 switch (Opcode) {
71 case Instruction::Opcode::ZExt:
72 case Instruction::Opcode::SExt:
73 case Instruction::Opcode::FPToUI:
74 case Instruction::Opcode::FPToSI:
75 case Instruction::Opcode::FPExt:
76 case Instruction::Opcode::PtrToInt:
77 case Instruction::Opcode::IntToPtr:
78 case Instruction::Opcode::SIToFP:
79 case Instruction::Opcode::UIToFP:
80 case Instruction::Opcode::Trunc:
81 case Instruction::Opcode::FPTrunc:
82 case Instruction::Opcode::BitCast: {
83 assert(Operands.size() == 1u && "Casts are unary!");
84 return CastInst::create(VecTy, Opcode, Operands[0], WhereIt, Ctx, "VCast");
85 }
86 case Instruction::Opcode::FCmp:
87 case Instruction::Opcode::ICmp: {
88 auto Pred = cast<CmpInst>(Bndl[0])->getPredicate();
90 [Pred](auto *SBV) {
91 return cast<CmpInst>(SBV)->getPredicate() == Pred;
92 }) &&
93 "Expected same predicate across bundle.");
94 return CmpInst::create(Pred, Operands[0], Operands[1], WhereIt, Ctx,
95 "VCmp");
96 }
97 case Instruction::Opcode::Select: {
98 return SelectInst::create(Operands[0], Operands[1], Operands[2], WhereIt,
99 Ctx, "Vec");
100 }
101 case Instruction::Opcode::FNeg: {
102 auto *UOp0 = cast<UnaryOperator>(Bndl[0]);
103 auto OpC = UOp0->getOpcode();
104 return UnaryOperator::createWithCopiedFlags(OpC, Operands[0], UOp0, WhereIt,
105 Ctx, "Vec");
106 }
107 case Instruction::Opcode::Add:
108 case Instruction::Opcode::FAdd:
109 case Instruction::Opcode::Sub:
110 case Instruction::Opcode::FSub:
111 case Instruction::Opcode::Mul:
112 case Instruction::Opcode::FMul:
113 case Instruction::Opcode::UDiv:
114 case Instruction::Opcode::SDiv:
115 case Instruction::Opcode::FDiv:
116 case Instruction::Opcode::URem:
117 case Instruction::Opcode::SRem:
118 case Instruction::Opcode::FRem:
119 case Instruction::Opcode::Shl:
120 case Instruction::Opcode::LShr:
121 case Instruction::Opcode::AShr:
122 case Instruction::Opcode::And:
123 case Instruction::Opcode::Or:
124 case Instruction::Opcode::Xor: {
125 auto *BinOp0 = cast<BinaryOperator>(Bndl[0]);
126 auto *LHS = Operands[0];
127 auto *RHS = Operands[1];
128 return BinaryOperator::createWithCopiedFlags(BinOp0->getOpcode(), LHS, RHS,
129 BinOp0, WhereIt, Ctx, "Vec");
130 }
131 case Instruction::Opcode::Load: {
132 auto *Ld0 = cast<LoadInst>(Bndl[0]);
133 Value *Ptr = Ld0->getPointerOperand();
134 return LoadInst::create(VecTy, Ptr, Ld0->getAlign(), WhereIt, Ctx, "VecL");
135 }
136 case Instruction::Opcode::Store: {
137 auto Align = cast<StoreInst>(Bndl[0])->getAlign();
138 Value *Val = Operands[0];
139 Value *Ptr = Operands[1];
140 return StoreInst::create(Val, Ptr, Align, WhereIt, Ctx);
141 }
142 case Instruction::Opcode::Br:
143 case Instruction::Opcode::Ret:
144 case Instruction::Opcode::PHI:
145 case Instruction::Opcode::AddrSpaceCast:
146 case Instruction::Opcode::Call:
147 case Instruction::Opcode::GetElementPtr:
148 llvm_unreachable("Unimplemented");
149 break;
150 default:
151 llvm_unreachable("Unimplemented");
152 break;
153 }
154 llvm_unreachable("Missing switch case!");
155 // TODO: Propagate debug info.
156}
157
158void BottomUpVec::tryEraseDeadInstrs() {
159 // Visiting the dead instructions bottom-to-top.
160 SmallVector<Instruction *> SortedDeadInstrCandidates(
161 DeadInstrCandidates.begin(), DeadInstrCandidates.end());
162 sort(SortedDeadInstrCandidates,
163 [](Instruction *I1, Instruction *I2) { return I1->comesBefore(I2); });
164 for (Instruction *I : reverse(SortedDeadInstrCandidates)) {
165 if (I->hasNUses(0))
166 I->eraseFromParent();
167 }
168 DeadInstrCandidates.clear();
169}
170
171Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack) {
173
174 Type *ScalarTy = VecUtils::getCommonScalarType(ToPack);
175 unsigned Lanes = VecUtils::getNumLanes(ToPack);
176 Type *VecTy = VecUtils::getWideType(ScalarTy, Lanes);
177
178 // Create a series of pack instructions.
179 Value *LastInsert = PoisonValue::get(VecTy);
180
181 Context &Ctx = ToPack[0]->getContext();
182
183 unsigned InsertIdx = 0;
184 for (Value *Elm : ToPack) {
185 // An element can be either scalar or vector. We need to generate different
186 // IR for each case.
187 if (Elm->getType()->isVectorTy()) {
188 unsigned NumElms =
189 cast<FixedVectorType>(Elm->getType())->getNumElements();
190 for (auto ExtrLane : seq<int>(0, NumElms)) {
191 // We generate extract-insert pairs, for each lane in `Elm`.
192 Constant *ExtrLaneC =
194 // This may return a Constant if Elm is a Constant.
195 auto *ExtrI =
196 ExtractElementInst::create(Elm, ExtrLaneC, WhereIt, Ctx, "VPack");
197 if (!isa<Constant>(ExtrI))
198 WhereIt = std::next(cast<Instruction>(ExtrI)->getIterator());
199 Constant *InsertLaneC =
200 ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++);
201 // This may also return a Constant if ExtrI is a Constant.
202 auto *InsertI = InsertElementInst::create(
203 LastInsert, ExtrI, InsertLaneC, WhereIt, Ctx, "VPack");
204 if (!isa<Constant>(InsertI)) {
205 LastInsert = InsertI;
206 WhereIt = std::next(cast<Instruction>(LastInsert)->getIterator());
207 }
208 }
209 } else {
210 Constant *InsertLaneC =
211 ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++);
212 // This may be folded into a Constant if LastInsert is a Constant. In
213 // that case we only collect the last constant.
214 LastInsert = InsertElementInst::create(LastInsert, Elm, InsertLaneC,
215 WhereIt, Ctx, "Pack");
216 if (auto *NewI = dyn_cast<Instruction>(LastInsert))
217 WhereIt = std::next(NewI->getIterator());
218 }
219 }
220 return LastInsert;
221}
222
223void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) {
224 for (Value *V : Bndl)
225 DeadInstrCandidates.insert(cast<Instruction>(V));
226 // Also collect the GEPs of vectorized loads and stores.
227 auto Opcode = cast<Instruction>(Bndl[0])->getOpcode();
228 switch (Opcode) {
229 case Instruction::Opcode::Load: {
230 for (Value *V : drop_begin(Bndl))
231 if (auto *Ptr =
232 dyn_cast<Instruction>(cast<LoadInst>(V)->getPointerOperand()))
233 DeadInstrCandidates.insert(Ptr);
234 break;
235 }
236 case Instruction::Opcode::Store: {
237 for (Value *V : drop_begin(Bndl))
238 if (auto *Ptr =
239 dyn_cast<Instruction>(cast<StoreInst>(V)->getPointerOperand()))
240 DeadInstrCandidates.insert(Ptr);
241 break;
242 }
243 default:
244 break;
245 }
246}
247
248Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth) {
249 Value *NewVec = nullptr;
250 const auto &LegalityRes = Legality->canVectorize(Bndl);
251 switch (LegalityRes.getSubclassID()) {
253 auto *I = cast<Instruction>(Bndl[0]);
254 SmallVector<Value *, 2> VecOperands;
255 switch (I->getOpcode()) {
256 case Instruction::Opcode::Load:
257 // Don't recurse towards the pointer operand.
258 VecOperands.push_back(cast<LoadInst>(I)->getPointerOperand());
259 break;
260 case Instruction::Opcode::Store: {
261 // Don't recurse towards the pointer operand.
262 auto *VecOp = vectorizeRec(getOperand(Bndl, 0), Depth + 1);
263 VecOperands.push_back(VecOp);
264 VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand());
265 break;
266 }
267 default:
268 // Visit all operands.
269 for (auto OpIdx : seq<unsigned>(I->getNumOperands())) {
270 auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx), Depth + 1);
271 VecOperands.push_back(VecOp);
272 }
273 break;
274 }
275 NewVec = createVectorInstr(Bndl, VecOperands);
276
277 // Collect any potentially dead scalar instructions, including the original
278 // scalars and pointer operands of loads/stores.
279 if (NewVec != nullptr)
280 collectPotentiallyDeadInstrs(Bndl);
281 break;
282 }
284 // If we can't vectorize the seeds then just return.
285 if (Depth == 0)
286 return nullptr;
287 NewVec = createPack(Bndl);
288 break;
289 }
290 }
291 return NewVec;
292}
293
294bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
295 DeadInstrCandidates.clear();
296 Legality->clear();
297 vectorizeRec(Bndl, /*Depth=*/0);
298 tryEraseDeadInstrs();
299 return Change;
300}
301
303 Legality = std::make_unique<LegalityAnalysis>(
304 A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
305 F.getContext());
306 Change = false;
307 const auto &DL = F.getParent()->getDataLayout();
308 unsigned VecRegBits =
311 : A.getTTI()
313 .getFixedValue();
314
315 // TODO: Start from innermost BBs first
316 for (auto &BB : F) {
317 SeedCollector SC(&BB, A.getScalarEvolution());
318 for (SeedBundle &Seeds : SC.getStoreSeeds()) {
319 unsigned ElmBits =
321 Seeds[Seeds.getFirstUnusedElementIdx()])),
322 DL);
323
324 auto DivideBy2 = [](unsigned Num) {
325 auto Floor = VecUtils::getFloorPowerOf2(Num);
326 if (Floor == Num)
327 return Floor / 2;
328 return Floor;
329 };
330 // Try to create the largest vector supported by the target. If it fails
331 // reduce the vector size by half.
332 for (unsigned SliceElms = std::min(VecRegBits / ElmBits,
333 Seeds.getNumUnusedBits() / ElmBits);
334 SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
335 if (Seeds.allUsed())
336 break;
337 // Keep trying offsets after FirstUnusedElementIdx, until we vectorize
338 // the slice. This could be quite expensive, so we enforce a limit.
339 for (unsigned Offset = Seeds.getFirstUnusedElementIdx(),
340 OE = Seeds.size();
341 Offset + 1 < OE; Offset += 1) {
342 // Seeds are getting used as we vectorize, so skip them.
343 if (Seeds.isUsed(Offset))
344 continue;
345 if (Seeds.allUsed())
346 break;
347
348 auto SeedSlice =
349 Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2);
350 if (SeedSlice.empty())
351 continue;
352
353 assert(SeedSlice.size() >= 2 && "Should have been rejected!");
354
355 // TODO: If vectorization succeeds, run the RegionPassManager on the
356 // resulting region.
357
358 // TODO: Refactor to remove the unnecessary copy to SeedSliceVals.
359 SmallVector<Value *> SeedSliceVals(SeedSlice.begin(),
360 SeedSlice.end());
361 Change |= tryVectorize(SeedSliceVals);
362 }
363 }
364 }
365 }
366 return Change;
367}
368
369} // namespace sandboxir
370} // namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
iterator begin() const
Definition: ArrayRef.h:156
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
LLVM Value Representation.
Definition: Value.h:74
static Value * createWithCopiedFlags(Instruction::Opcode Op, Value *LHS, Value *RHS, Value *CopyFrom, InsertPosition Pos, Context &Ctx, const Twine &Name="")
BottomUpVec(StringRef Pipeline)
Definition: BottomUpVec.cpp:32
bool runOnFunction(Function &F, const Analyses &A) final
\Returns true if it modifies F.
static Value * create(Type *DestTy, Opcode Op, Value *Operand, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static CmpInst * create(Predicate Pred, Value *S1, Value *S2, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constant.cpp:57
static Value * create(Value *Vec, Value *Idx, InsertPosition Pos, Context &Ctx, const Twine &Name="")
A pass that runs on a sandbox::Function.
Definition: Pass.h:75
static Value * create(Value *Vec, Value *NewElt, Value *Idx, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static LoadInst * create(Type *Ty, Value *Ptr, MaybeAlign Align, InsertPosition Pos, bool IsVolatile, Context &Ctx, const Twine &Name="")
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constant.cpp:238
A set of candidate Instructions for vectorizing together.
Definition: SeedCollector.h:27
static Value * create(Value *Cond, Value *True, Value *False, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static StoreInst * create(Value *V, Value *Ptr, MaybeAlign Align, InsertPosition Pos, bool IsVolatile, Context &Ctx)
static Type * getInt32Ty(Context &Ctx)
static Value * createWithCopiedFlags(Instruction::Opcode Op, Value *OpV, Value *CopyFrom, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static unsigned getNumBits(Type *Ty, const DataLayout &DL)
\Returns the number of bits of Ty.
Definition: Utils.h:64
static Type * getExpectedType(const Value *V)
\Returns the expected type of Value V.
Definition: Utils.h:30
A SandboxIR Value has users. This is the base class.
Definition: Value.h:63
static Type * getCommonScalarType(ArrayRef< Value * > Bndl)
Similar to tryGetCommonScalarType() but will assert that there is a common type.
Definition: VecUtils.h:129
static unsigned getNumLanes(Type *Ty)
\Returns the number of vector lanes of Ty or 1 if not a vector.
Definition: VecUtils.h:72
static Type * getWideType(Type *ElemTy, unsigned NumElts)
\Returns <NumElts x ElemTy>.
Definition: VecUtils.h:95
static Type * getElementType(Type *Ty)
Returns Ty if scalar or its element type if vector.
Definition: VecUtils.h:32
static unsigned getFloorPowerOf2(unsigned Num)
\Returns the first integer power of 2 that is <= Num.
Definition: VecUtils.h:137
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
@ Widen
ā€¨Collect scalar values.
static BasicBlock::iterator getInsertPointAfterInstrs(ArrayRef< Value * > Instrs)
Definition: BottomUpVec.cpp:47
static SmallVector< Value *, 4 > getOperand(ArrayRef< Value * > Bndl, unsigned OpIdx)
Definition: BottomUpVec.cpp:36
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
static cl::opt< unsigned > OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden, cl::desc("Override the vector register size in bits, " "which is otherwise found by querying TTI."))
static cl::opt< bool > AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden, cl::desc("Allow non-power-of-2 vectorization."))