LLVM 22.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
23#include "llvm/IR/Argument.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
43#include <cassert>
44#include <cstdint>
45#include <iterator>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
54 BasicBlock *BB = Itr->getParent();
55 if (isa<PHINode>(Itr))
56 Itr = BB->getFirstInsertionPt();
57 if (Itr != BB->end())
58 Itr = skipDebugIntrinsics(Itr);
59 return Itr;
60}
61
62// Used to store the scattered form of a vector.
64
65// Used to map a vector Value and associated type to its scattered form.
66// The associated type is only non-null for pointer values that are "scattered"
67// when used as pointer operands to load or store.
68//
69// We use std::map because we want iterators to persist across insertion and
70// because the values are relatively large.
71using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
72
73// Lists Instructions that have been replaced with scalar implementations,
74// along with a pointer to their scattered forms.
76
77namespace {
78
79struct VectorSplit {
80 // The type of the vector.
81 FixedVectorType *VecTy = nullptr;
82
83 // The number of elements packed in a fragment (other than the remainder).
84 unsigned NumPacked = 0;
85
86 // The number of fragments (scalars or smaller vectors) into which the vector
87 // shall be split.
88 unsigned NumFragments = 0;
89
90 // The type of each complete fragment.
91 Type *SplitTy = nullptr;
92
93 // The type of the remainder (last) fragment; null if all fragments are
94 // complete.
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102// Provides a very limited vector-like interface for lazily accessing one
103// component of a scattered vector or vector pointer.
104class Scatterer {
105public:
106 Scatterer() = default;
107
108 // Scatter V into Size components. If new instructions are needed,
109 // insert them before BBI in BB. If Cache is nonnull, use it to cache
110 // the results.
111 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114 // Return component I, creating a new Value for it if necessary.
115 Value *operator[](unsigned I);
116
117 // Return the number of components.
118 unsigned size() const { return VS.NumFragments; }
119
120private:
121 BasicBlock *BB;
123 Value *V;
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
131// called Name that compares X and Y in the same way as FCI.
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
135 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
136 const Twine &Name) const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
138 }
139
140 FCmpInst &FCI;
141};
142
143// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
144// called Name that compares X and Y in the same way as ICI.
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
148 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
149 const Twine &Name) const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
151 }
152
153 ICmpInst &ICI;
154};
155
156// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
157// a unary operator like UO called Name with operand X.
158struct UnarySplitter {
159 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
160
161 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
162 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
163 }
164
165 UnaryOperator &UO;
166};
167
168// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
169// a binary operator like BO called Name with operands X and Y.
170struct BinarySplitter {
171 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
172
173 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
174 const Twine &Name) const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
176 }
177
178 BinaryOperator &BO;
179};
180
181// Information about a load or store that we're scalarizing.
182struct VectorLayout {
183 VectorLayout() = default;
184
185 // Return the alignment of fragment Frag.
186 Align getFragmentAlign(unsigned Frag) {
187 return commonAlignment(VecAlign, Frag * SplitSize);
188 }
189
190 // The split of the underlying vector type.
191 VectorSplit VS;
192
193 // The alignment of the vector.
194 Align VecAlign;
195
196 // The size of each (non-remainder) fragment in bytes.
197 uint64_t SplitSize = 0;
198};
199} // namespace
200
202 if (!isa<StructType>(Ty))
203 return false;
204 unsigned StructSize = Ty->getNumContainedTypes();
205 if (StructSize < 1)
206 return false;
207 FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
208 if (!VecTy)
209 return false;
210 unsigned VecSize = VecTy->getNumElements();
211 for (unsigned I = 1; I < StructSize; I++) {
212 VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
213 if (!VecTy || VecSize != VecTy->getNumElements())
214 return false;
215 }
216 return true;
217}
218
219/// Concatenate the given fragments to a single vector value of the type
220/// described in @p VS.
221static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
222 const VectorSplit &VS, Twine Name) {
223 unsigned NumElements = VS.VecTy->getNumElements();
224 SmallVector<int> ExtendMask;
225 SmallVector<int> InsertMask;
226
227 if (VS.NumPacked > 1) {
228 // Prepare the shufflevector masks once and re-use them for all
229 // fragments.
230 ExtendMask.resize(NumElements, -1);
231 for (unsigned I = 0; I < VS.NumPacked; ++I)
232 ExtendMask[I] = I;
233
234 InsertMask.resize(NumElements);
235 for (unsigned I = 0; I < NumElements; ++I)
236 InsertMask[I] = I;
237 }
238
239 Value *Res = PoisonValue::get(VS.VecTy);
240 for (unsigned I = 0; I < VS.NumFragments; ++I) {
241 Value *Fragment = Fragments[I];
242
243 unsigned NumPacked = VS.NumPacked;
244 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
245 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
246 NumPacked = RemVecTy->getNumElements();
247 else
248 NumPacked = 1;
249 }
250
251 if (NumPacked == 1) {
252 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
253 Name + ".upto" + Twine(I));
254 } else {
255 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
256 if (I == 0) {
257 Res = Fragment;
258 } else {
259 for (unsigned J = 0; J < NumPacked; ++J)
260 InsertMask[I * VS.NumPacked + J] = NumElements + J;
261 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
262 Name + ".upto" + Twine(I));
263 for (unsigned J = 0; J < NumPacked; ++J)
264 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
265 }
266 }
267 }
268
269 return Res;
270}
271
272namespace {
273class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
274public:
275 ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
276 ScalarizerPassOptions Options)
277 : DT(DT), TTI(TTI),
278 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
279 ScalarizeLoadStore(Options.ScalarizeLoadStore),
280 ScalarizeMinBits(Options.ScalarizeMinBits) {}
281
282 bool visit(Function &F);
283
284 // InstVisitor methods. They return true if the instruction was scalarized,
285 // false if nothing changed.
286 bool visitInstruction(Instruction &I) { return false; }
287 bool visitSelectInst(SelectInst &SI);
288 bool visitICmpInst(ICmpInst &ICI);
289 bool visitFCmpInst(FCmpInst &FCI);
290 bool visitUnaryOperator(UnaryOperator &UO);
291 bool visitBinaryOperator(BinaryOperator &BO);
292 bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
293 bool visitCastInst(CastInst &CI);
294 bool visitBitCastInst(BitCastInst &BCI);
295 bool visitInsertElementInst(InsertElementInst &IEI);
296 bool visitExtractElementInst(ExtractElementInst &EEI);
297 bool visitExtractValueInst(ExtractValueInst &EVI);
298 bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
299 bool visitPHINode(PHINode &PHI);
300 bool visitLoadInst(LoadInst &LI);
301 bool visitStoreInst(StoreInst &SI);
302 bool visitCallInst(CallInst &ICI);
303 bool visitFreezeInst(FreezeInst &FI);
304
305private:
306 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
307 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
308 void replaceUses(Instruction *Op, Value *CV);
309 bool canTransferMetadata(unsigned Kind);
310 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
311 std::optional<VectorSplit> getVectorSplit(Type *Ty);
312 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
313 const DataLayout &DL);
314 bool finish();
315
316 template<typename T> bool splitUnary(Instruction &, const T &);
317 template<typename T> bool splitBinary(Instruction &, const T &);
318
319 bool splitCall(CallInst &CI);
320
321 ScatterMap Scattered;
322 GatherList Gathered;
323 bool Scalarized;
324
325 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
326
327 DominatorTree *DT;
328 const TargetTransformInfo *TTI;
329
330 const bool ScalarizeVariableInsertExtract;
331 const bool ScalarizeLoadStore;
332 const unsigned ScalarizeMinBits;
333};
334
335class ScalarizerLegacyPass : public FunctionPass {
336public:
337 static char ID;
338 ScalarizerPassOptions Options;
339 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
340 ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
341 bool runOnFunction(Function &F) override;
342 void getAnalysisUsage(AnalysisUsage &AU) const override;
343};
344
345} // end anonymous namespace
346
347ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
349
350void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
351 AU.addRequired<DominatorTreeWrapperPass>();
352 AU.addRequired<TargetTransformInfoWrapperPass>();
353 AU.addPreserved<DominatorTreeWrapperPass>();
354}
355
356char ScalarizerLegacyPass::ID = 0;
357INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
358 "Scalarize vector operations", false, false)
360INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
361 "Scalarize vector operations", false, false)
362
363Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
364 const VectorSplit &VS, ValueVector *cachePtr)
365 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
366 IsPointer = V->getType()->isPointerTy();
367 if (!CachePtr) {
368 Tmp.resize(VS.NumFragments, nullptr);
369 } else {
370 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
371 IsPointer) &&
372 "Inconsistent vector sizes");
373 if (VS.NumFragments > CachePtr->size())
374 CachePtr->resize(VS.NumFragments, nullptr);
375 }
376}
377
378// Return fragment Frag, creating a new Value for it if necessary.
379Value *Scatterer::operator[](unsigned Frag) {
380 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
381 // Try to reuse a previous value.
382 if (CV[Frag])
383 return CV[Frag];
384 IRBuilder<> Builder(BB, BBI);
385 if (IsPointer) {
386 if (Frag == 0)
387 CV[Frag] = V;
388 else
389 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
390 V->getName() + ".i" + Twine(Frag));
391 return CV[Frag];
392 }
393
394 Type *FragmentTy = VS.getFragmentType(Frag);
395
396 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
397 SmallVector<int> Mask;
398 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
399 Mask.push_back(Frag * VS.NumPacked + J);
400 CV[Frag] =
401 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
402 V->getName() + ".i" + Twine(Frag));
403 } else {
404 // Search through a chain of InsertElementInsts looking for element Frag.
405 // Record other elements in the cache. The new V is still suitable
406 // for all uncached indices.
407 while (true) {
408 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
409 if (!Insert)
410 break;
411 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
412 if (!Idx)
413 break;
414 unsigned J = Idx->getZExtValue();
415 V = Insert->getOperand(0);
416 if (Frag * VS.NumPacked == J) {
417 CV[Frag] = Insert->getOperand(1);
418 return CV[Frag];
419 }
420
421 if (VS.NumPacked == 1 && !CV[J]) {
422 // Only cache the first entry we find for each index we're not actively
423 // searching for. This prevents us from going too far up the chain and
424 // caching incorrect entries.
425 CV[J] = Insert->getOperand(1);
426 }
427 }
428 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
429 V->getName() + ".i" + Twine(Frag));
430 }
431
432 return CV[Frag];
433}
434
435bool ScalarizerLegacyPass::runOnFunction(Function &F) {
436 if (skipFunction(F))
437 return false;
438
439 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
440 const TargetTransformInfo *TTI =
441 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
442 ScalarizerVisitor Impl(DT, TTI, Options);
443 return Impl.visit(F);
444}
445
447 return new ScalarizerLegacyPass(Options);
448}
449
450bool ScalarizerVisitor::visit(Function &F) {
451 assert(Gathered.empty() && Scattered.empty());
452
453 Scalarized = false;
454
455 // To ensure we replace gathered components correctly we need to do an ordered
456 // traversal of the basic blocks in the function.
457 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
458 for (BasicBlock *BB : RPOT) {
459 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
460 Instruction *I = &*II;
461 bool Done = InstVisitor::visit(I);
462 ++II;
463 if (Done && I->getType()->isVoidTy()) {
464 I->eraseFromParent();
465 Scalarized = true;
466 }
467 }
468 }
469 return finish();
470}
471
472// Return a scattered form of V that can be accessed by Point. V must be a
473// vector or a pointer to a vector.
474Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
475 const VectorSplit &VS) {
476 if (Argument *VArg = dyn_cast<Argument>(V)) {
477 // Put the scattered form of arguments in the entry block,
478 // so that it can be used everywhere.
479 Function *F = VArg->getParent();
480 BasicBlock *BB = &F->getEntryBlock();
481 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
482 }
483 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
484 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
485 // nodes in predecessors. If those predecessors are unreachable from entry,
486 // then the IR in those blocks could have unexpected properties resulting in
487 // infinite loops in Scatterer::operator[]. By simply treating values
488 // originating from instructions in unreachable blocks as undef we do not
489 // need to analyse them further.
490 if (!DT->isReachableFromEntry(VOp->getParent()))
491 return Scatterer(Point->getParent(), Point->getIterator(),
492 PoisonValue::get(V->getType()), VS);
493 // Put the scattered form of an instruction directly after the
494 // instruction, skipping over PHI nodes and debug intrinsics.
495 BasicBlock *BB = VOp->getParent();
496 return Scatterer(
497 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
498 &Scattered[{V, VS.SplitTy}]);
499 }
500 // In the fallback case, just put the scattered before Point and
501 // keep the result local to Point.
502 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
503}
504
505// Replace Op with the gathered form of the components in CV. Defer the
506// deletion of Op and creation of the gathered form to the end of the pass,
507// so that we can avoid creating the gathered form if all uses of Op are
508// replaced with uses of CV.
509void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
510 const VectorSplit &VS) {
511 transferMetadataAndIRFlags(Op, CV);
512
513 // If we already have a scattered form of Op (created from ExtractElements
514 // of Op itself), replace them with the new form.
515 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
516 if (!SV.empty()) {
517 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
518 Value *V = SV[I];
519 if (V == nullptr || SV[I] == CV[I])
520 continue;
521
523 if (isa<Instruction>(CV[I]))
524 CV[I]->takeName(Old);
525 Old->replaceAllUsesWith(CV[I]);
526 PotentiallyDeadInstrs.emplace_back(Old);
527 }
528 }
529 SV = CV;
530 Gathered.push_back(GatherList::value_type(Op, &SV));
531}
532
533// Replace Op with CV and collect Op has a potentially dead instruction.
534void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
535 if (CV != Op) {
536 Op->replaceAllUsesWith(CV);
537 PotentiallyDeadInstrs.emplace_back(Op);
538 Scalarized = true;
539 }
540}
541
542// Return true if it is safe to transfer the given metadata tag from
543// vector to scalar instructions.
544bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
545 return (Tag == LLVMContext::MD_tbaa
546 || Tag == LLVMContext::MD_fpmath
547 || Tag == LLVMContext::MD_tbaa_struct
548 || Tag == LLVMContext::MD_invariant_load
549 || Tag == LLVMContext::MD_alias_scope
550 || Tag == LLVMContext::MD_noalias
551 || Tag == LLVMContext::MD_mem_parallel_loop_access
552 || Tag == LLVMContext::MD_access_group);
553}
554
555// Transfer metadata from Op to the instructions in CV if it is known
556// to be safe to do so.
557void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
558 const ValueVector &CV) {
560 Op->getAllMetadataOtherThanDebugLoc(MDs);
561 for (Value *V : CV) {
562 if (Instruction *New = dyn_cast<Instruction>(V)) {
563 for (const auto &MD : MDs)
564 if (canTransferMetadata(MD.first))
565 New->setMetadata(MD.first, MD.second);
566 New->copyIRFlags(Op);
567 if (Op->getDebugLoc() && !New->getDebugLoc())
568 New->setDebugLoc(Op->getDebugLoc());
569 }
570 }
571}
572
573// Determine how Ty is split, if at all.
574std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
575 VectorSplit Split;
577 if (!Split.VecTy)
578 return {};
579
580 unsigned NumElems = Split.VecTy->getNumElements();
581 Type *ElemTy = Split.VecTy->getElementType();
582
583 if (NumElems == 1 || ElemTy->isPointerTy() ||
584 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
585 Split.NumPacked = 1;
586 Split.NumFragments = NumElems;
587 Split.SplitTy = ElemTy;
588 } else {
589 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
590 if (Split.NumPacked >= NumElems)
591 return {};
592
593 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
594 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
595
596 unsigned RemainderElems = NumElems % Split.NumPacked;
597 if (RemainderElems > 1)
598 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
599 else if (RemainderElems == 1)
600 Split.RemainderTy = ElemTy;
601 }
602
603 return Split;
604}
605
606// Try to fill in Layout from Ty, returning true on success. Alignment is
607// the alignment of the vector, or std::nullopt if the ABI default should be
608// used.
609std::optional<VectorLayout>
610ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
611 const DataLayout &DL) {
612 std::optional<VectorSplit> VS = getVectorSplit(Ty);
613 if (!VS)
614 return {};
615
616 VectorLayout Layout;
617 Layout.VS = *VS;
618 // Check that we're dealing with full-byte fragments.
619 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
620 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
621 return {};
622 Layout.VecAlign = Alignment;
623 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
624 return Layout;
625}
626
627// Scalarize one-operand instruction I, using Split(Builder, X, Name)
628// to create an instruction like I with operand X and name Name.
629template<typename Splitter>
630bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
631 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
632 if (!VS)
633 return false;
634
635 std::optional<VectorSplit> OpVS;
636 if (I.getOperand(0)->getType() == I.getType()) {
637 OpVS = VS;
638 } else {
639 OpVS = getVectorSplit(I.getOperand(0)->getType());
640 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
641 return false;
642 }
643
644 IRBuilder<> Builder(&I);
645 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
646 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
647 ValueVector Res;
648 Res.resize(VS->NumFragments);
649 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
650 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
651 gather(&I, Res, *VS);
652 return true;
653}
654
655// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
656// to create an instruction like I with operands X and Y and name Name.
657template<typename Splitter>
658bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
659 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
660 if (!VS)
661 return false;
662
663 std::optional<VectorSplit> OpVS;
664 if (I.getOperand(0)->getType() == I.getType()) {
665 OpVS = VS;
666 } else {
667 OpVS = getVectorSplit(I.getOperand(0)->getType());
668 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
669 return false;
670 }
671
672 IRBuilder<> Builder(&I);
673 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
674 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
675 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
676 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
677 ValueVector Res;
678 Res.resize(VS->NumFragments);
679 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
680 Value *Op0 = VOp0[Frag];
681 Value *Op1 = VOp1[Frag];
682 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
683 }
684 gather(&I, Res, *VS);
685 return true;
686}
687
688/// If a call to a vector typed intrinsic function, split into a scalar call per
689/// element if possible for the intrinsic.
690bool ScalarizerVisitor::splitCall(CallInst &CI) {
691 Type *CallType = CI.getType();
692 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
693 std::optional<VectorSplit> VS;
694 if (AreAllVectorsOfMatchingSize)
695 VS = getVectorSplit(CallType->getContainedType(0));
696 else
697 VS = getVectorSplit(CallType);
698 if (!VS)
699 return false;
700
702 if (!F)
703 return false;
704
705 Intrinsic::ID ID = F->getIntrinsicID();
706
708 return false;
709
710 // unsigned NumElems = VT->getNumElements();
711 unsigned NumArgs = CI.arg_size();
712
713 ValueVector ScalarOperands(NumArgs);
714 SmallVector<Scatterer, 8> Scattered(NumArgs);
715 SmallVector<int> OverloadIdx(NumArgs, -1);
716
718 // Add return type if intrinsic is overloaded on it.
720 Tys.push_back(VS->SplitTy);
721
722 if (AreAllVectorsOfMatchingSize) {
723 for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
724 std::optional<VectorSplit> CurrVS =
725 getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
726 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
727 // VectorSplit is not returned and we will bailout of handling this call.
728 // The secondary bailout case is if NumPacked does not match. This can
729 // happen if ScalarizeMinBits is not set to the default. This means with
730 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
731 // the struct elements have the same bitness.
732 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
733 return false;
735 Tys.push_back(CurrVS->SplitTy);
736 }
737 }
738 // Assumes that any vector type has the same number of elements as the return
739 // vector type, which is true for all current intrinsics.
740 for (unsigned I = 0; I != NumArgs; ++I) {
741 Value *OpI = CI.getOperand(I);
742 if ([[maybe_unused]] auto *OpVecTy =
744 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
745 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
746 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
747 // The natural split of the operand doesn't match the result. This could
748 // happen if the vector elements are different and the ScalarizeMinBits
749 // option is used.
750 //
751 // We could in principle handle this case as well, at the cost of
752 // complicating the scattering machinery to support multiple scattering
753 // granularities for a single value.
754 return false;
755 }
756
757 Scattered[I] = scatter(&CI, OpI, *OpVS);
759 OverloadIdx[I] = Tys.size();
760 Tys.push_back(OpVS->SplitTy);
761 }
762 } else {
763 ScalarOperands[I] = OpI;
765 Tys.push_back(OpI->getType());
766 }
767 }
768
769 ValueVector Res(VS->NumFragments);
770 ValueVector ScalarCallOps(NumArgs);
771
772 Function *NewIntrin =
773 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
774 IRBuilder<> Builder(&CI);
775
776 // Perform actual scalarization, taking care to preserve any scalar operands.
777 for (unsigned I = 0; I < VS->NumFragments; ++I) {
778 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
779 ScalarCallOps.clear();
780
781 if (IsRemainder)
782 Tys[0] = VS->RemainderTy;
783
784 for (unsigned J = 0; J != NumArgs; ++J) {
786 ScalarCallOps.push_back(ScalarOperands[J]);
787 } else {
788 ScalarCallOps.push_back(Scattered[J][I]);
789 if (IsRemainder && OverloadIdx[J] >= 0)
790 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
791 }
792 }
793
794 if (IsRemainder)
795 NewIntrin = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
796
797 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
798 CI.getName() + ".i" + Twine(I));
799 }
800
801 gather(&CI, Res, *VS);
802 return true;
803}
804
805bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
806 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
807 if (!VS)
808 return false;
809
810 std::optional<VectorSplit> CondVS;
811 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
812 CondVS = getVectorSplit(SI.getCondition()->getType());
813 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
814 // This happens when ScalarizeMinBits is used.
815 return false;
816 }
817 }
818
819 IRBuilder<> Builder(&SI);
820 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
821 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
822 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
823 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
824 ValueVector Res;
825 Res.resize(VS->NumFragments);
826
827 if (CondVS) {
828 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
829 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
830 for (unsigned I = 0; I < VS->NumFragments; ++I) {
831 Value *Op0 = VOp0[I];
832 Value *Op1 = VOp1[I];
833 Value *Op2 = VOp2[I];
834 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
835 SI.getName() + ".i" + Twine(I));
836 }
837 } else {
838 Value *Op0 = SI.getOperand(0);
839 for (unsigned I = 0; I < VS->NumFragments; ++I) {
840 Value *Op1 = VOp1[I];
841 Value *Op2 = VOp2[I];
842 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
843 SI.getName() + ".i" + Twine(I));
844 }
845 }
846 gather(&SI, Res, *VS);
847 return true;
848}
849
850bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
851 return splitBinary(ICI, ICmpSplitter(ICI));
852}
853
854bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
855 return splitBinary(FCI, FCmpSplitter(FCI));
856}
857
858bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
859 return splitUnary(UO, UnarySplitter(UO));
860}
861
862bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
863 return splitBinary(BO, BinarySplitter(BO));
864}
865
866bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
867 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
868 if (!VS)
869 return false;
870
871 IRBuilder<> Builder(&GEPI);
872 unsigned NumIndices = GEPI.getNumIndices();
873
874 // The base pointer and indices might be scalar even if it's a vector GEP.
875 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
876 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
877
878 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
879 if (auto *VecTy =
881 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
882 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
883 // This can happen when ScalarizeMinBits is used.
884 return false;
885 }
886 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
887 } else {
888 ScalarOps[I] = GEPI.getOperand(I);
889 }
890 }
891
892 ValueVector Res;
893 Res.resize(VS->NumFragments);
894 for (unsigned I = 0; I < VS->NumFragments; ++I) {
895 SmallVector<Value *, 8> SplitOps;
896 SplitOps.resize(1 + NumIndices);
897 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
898 if (ScalarOps[J])
899 SplitOps[J] = ScalarOps[J];
900 else
901 SplitOps[J] = ScatterOps[J][I];
902 }
903 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
904 ArrayRef(SplitOps).drop_front(),
905 GEPI.getName() + ".i" + Twine(I));
906 if (GEPI.isInBounds())
907 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
908 NewGEPI->setIsInBounds();
909 }
910 gather(&GEPI, Res, *VS);
911 return true;
912}
913
914bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
915 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
916 if (!DestVS)
917 return false;
918
919 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
920 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
921 return false;
922
923 IRBuilder<> Builder(&CI);
924 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
925 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
926 ValueVector Res;
927 Res.resize(DestVS->NumFragments);
928 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
929 Res[I] =
930 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
931 CI.getName() + ".i" + Twine(I));
932 gather(&CI, Res, *DestVS);
933 return true;
934}
935
936bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
937 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
938 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
939 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
940 return false;
941
942 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
943
944 // Vectors of pointers are always fully scalarized.
945 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
946
947 IRBuilder<> Builder(&BCI);
948 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
949 ValueVector Res;
950 Res.resize(DstVS->NumFragments);
951
952 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
953 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
954
955 if (isPointerTy || DstSplitBits == SrcSplitBits) {
956 assert(DstVS->NumFragments == SrcVS->NumFragments);
957 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
958 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
959 BCI.getName() + ".i" + Twine(I));
960 }
961 } else if (SrcSplitBits % DstSplitBits == 0) {
962 // Convert each source fragment to the same-sized destination vector and
963 // then scatter the result to the destination.
964 VectorSplit MidVS;
965 MidVS.NumPacked = DstVS->NumPacked;
966 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
967 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
968 MidVS.NumPacked * MidVS.NumFragments);
969 MidVS.SplitTy = DstVS->SplitTy;
970
971 unsigned ResI = 0;
972 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
973 Value *V = Op0[I];
974
975 // Look through any existing bitcasts before converting to <N x t2>.
976 // In the best case, the resulting conversion might be a no-op.
978 while ((VI = dyn_cast<Instruction>(V)) &&
979 VI->getOpcode() == Instruction::BitCast)
980 V = VI->getOperand(0);
981
982 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
983
984 Scatterer Mid = scatter(&BCI, V, MidVS);
985 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
986 Res[ResI++] = Mid[J];
987 }
988 } else if (DstSplitBits % SrcSplitBits == 0) {
989 // Gather enough source fragments to make up a destination fragment and
990 // then convert to the destination type.
991 VectorSplit MidVS;
992 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
993 MidVS.NumPacked = SrcVS->NumPacked;
994 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
995 MidVS.NumPacked * MidVS.NumFragments);
996 MidVS.SplitTy = SrcVS->SplitTy;
997
998 unsigned SrcI = 0;
999 SmallVector<Value *, 8> ConcatOps;
1000 ConcatOps.resize(MidVS.NumFragments);
1001 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
1002 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1003 ConcatOps[J] = Op0[SrcI++];
1004 Value *V = concatenate(Builder, ConcatOps, MidVS,
1005 BCI.getName() + ".i" + Twine(I));
1006 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
1007 BCI.getName() + ".i" + Twine(I));
1008 }
1009 } else {
1010 return false;
1011 }
1012
1013 gather(&BCI, Res, *DstVS);
1014 return true;
1015}
1016
1017bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1018 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
1019 if (!VS)
1020 return false;
1021
1022 IRBuilder<> Builder(&IEI);
1023 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1024 Value *NewElt = IEI.getOperand(1);
1025 Value *InsIdx = IEI.getOperand(2);
1026
1027 ValueVector Res;
1028 Res.resize(VS->NumFragments);
1029
1030 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1031 unsigned Idx = CI->getZExtValue();
1032 unsigned Fragment = Idx / VS->NumPacked;
1033 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1034 if (I == Fragment) {
1035 bool IsPacked = VS->NumPacked > 1;
1036 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1037 !VS->RemainderTy->isVectorTy())
1038 IsPacked = false;
1039 if (IsPacked) {
1040 Res[I] =
1041 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1042 } else {
1043 Res[I] = NewElt;
1044 }
1045 } else {
1046 Res[I] = Op0[I];
1047 }
1048 }
1049 } else {
1050 // Never split a variable insertelement that isn't fully scalarized.
1051 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1052 return false;
1053
1054 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1055 Value *ShouldReplace =
1056 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1057 InsIdx->getName() + ".is." + Twine(I));
1058 Value *OldElt = Op0[I];
1059 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1060 IEI.getName() + ".i" + Twine(I));
1061 }
1062 }
1063
1064 gather(&IEI, Res, *VS);
1065 return true;
1066}
1067
1068bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1069 Value *Op = EVI.getOperand(0);
1070 Type *OpTy = Op->getType();
1071 ValueVector Res;
1073 return false;
1074 if (CallInst *CI = dyn_cast<CallInst>(Op)) {
1075 Function *F = CI->getCalledFunction();
1076 if (!F)
1077 return false;
1078 Intrinsic::ID ID = F->getIntrinsicID();
1080 return false;
1081 // Note: Fall through means Operand is a`CallInst` and it is defined in
1082 // `isTriviallyScalarizable`.
1083 } else
1084 return false;
1085 Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
1086 std::optional<VectorSplit> VS = getVectorSplit(VecType);
1087 if (!VS)
1088 return false;
1089 for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
1090 std::optional<VectorSplit> CurrVS =
1091 getVectorSplit(cast<FixedVectorType>(OpTy->getContainedType(I)));
1092 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
1093 // VectorSplit is not returned and we will bailout of handling this call.
1094 // The secondary bailout case is if NumPacked does not match. This can
1095 // happen if ScalarizeMinBits is not set to the default. This means with
1096 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
1097 // the struct elements have the same bitness.
1098 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
1099 return false;
1100 }
1101 IRBuilder<> Builder(&EVI);
1102 Scatterer Op0 = scatter(&EVI, Op, *VS);
1103 assert(!EVI.getIndices().empty() && "Make sure an index exists");
1104 // Note for our use case we only care about the top level index.
1105 unsigned Index = EVI.getIndices()[0];
1106 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1107 Value *ResElem = Builder.CreateExtractValue(
1108 Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
1109 Res.push_back(ResElem);
1110 }
1111
1112 Type *ActualVecType = cast<FixedVectorType>(OpTy->getContainedType(Index));
1113 std::optional<VectorSplit> AVS = getVectorSplit(ActualVecType);
1114 gather(&EVI, Res, *AVS);
1115 return true;
1116}
1117
1118bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1119 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1120 if (!VS)
1121 return false;
1122
1123 IRBuilder<> Builder(&EEI);
1124 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1125 Value *ExtIdx = EEI.getOperand(1);
1126
1127 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1128 unsigned Idx = CI->getZExtValue();
1129 unsigned Fragment = Idx / VS->NumPacked;
1130 Value *Res = Op0[Fragment];
1131 bool IsPacked = VS->NumPacked > 1;
1132 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1133 !VS->RemainderTy->isVectorTy())
1134 IsPacked = false;
1135 if (IsPacked)
1136 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1137 replaceUses(&EEI, Res);
1138 return true;
1139 }
1140
1141 // Never split a variable extractelement that isn't fully scalarized.
1142 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1143 return false;
1144
1145 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1146 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1147 Value *ShouldExtract =
1148 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1149 ExtIdx->getName() + ".is." + Twine(I));
1150 Value *Elt = Op0[I];
1151 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1152 EEI.getName() + ".upto" + Twine(I));
1153 }
1154 replaceUses(&EEI, Res);
1155 return true;
1156}
1157
1158bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1159 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1160 std::optional<VectorSplit> VSOp =
1161 getVectorSplit(SVI.getOperand(0)->getType());
1162 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1163 return false;
1164
1165 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1166 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1167 ValueVector Res;
1168 Res.resize(VS->NumFragments);
1169
1170 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1171 int Selector = SVI.getMaskValue(I);
1172 if (Selector < 0)
1173 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1174 else if (unsigned(Selector) < Op0.size())
1175 Res[I] = Op0[Selector];
1176 else
1177 Res[I] = Op1[Selector - Op0.size()];
1178 }
1179 gather(&SVI, Res, *VS);
1180 return true;
1181}
1182
1183bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1184 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1185 if (!VS)
1186 return false;
1187
1188 IRBuilder<> Builder(&PHI);
1189 ValueVector Res;
1190 Res.resize(VS->NumFragments);
1191
1192 unsigned NumOps = PHI.getNumOperands();
1193 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1194 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1195 PHI.getName() + ".i" + Twine(I));
1196 }
1197
1198 for (unsigned I = 0; I < NumOps; ++I) {
1199 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1200 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1201 for (unsigned J = 0; J < VS->NumFragments; ++J)
1202 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1203 }
1204 gather(&PHI, Res, *VS);
1205 return true;
1206}
1207
1208bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1209 if (!ScalarizeLoadStore)
1210 return false;
1211 if (!LI.isSimple())
1212 return false;
1213
1214 std::optional<VectorLayout> Layout = getVectorLayout(
1215 LI.getType(), LI.getAlign(), LI.getDataLayout());
1216 if (!Layout)
1217 return false;
1218
1219 IRBuilder<> Builder(&LI);
1220 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1221 ValueVector Res;
1222 Res.resize(Layout->VS.NumFragments);
1223
1224 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1225 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1226 Align(Layout->getFragmentAlign(I)),
1227 LI.getName() + ".i" + Twine(I));
1228 }
1229 gather(&LI, Res, Layout->VS);
1230 return true;
1231}
1232
1233bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1234 if (!ScalarizeLoadStore)
1235 return false;
1236 if (!SI.isSimple())
1237 return false;
1238
1239 Value *FullValue = SI.getValueOperand();
1240 std::optional<VectorLayout> Layout = getVectorLayout(
1241 FullValue->getType(), SI.getAlign(), SI.getDataLayout());
1242 if (!Layout)
1243 return false;
1244
1245 IRBuilder<> Builder(&SI);
1246 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1247 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1248
1249 ValueVector Stores;
1250 Stores.resize(Layout->VS.NumFragments);
1251 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1252 Value *Val = VVal[I];
1253 Value *Ptr = VPtr[I];
1254 Stores[I] =
1255 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1256 }
1257 transferMetadataAndIRFlags(&SI, Stores);
1258 return true;
1259}
1260
1261bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1262 return splitCall(CI);
1263}
1264
1265bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1266 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1267 return Builder.CreateFreeze(Op, Name);
1268 });
1269}
1270
1271// Delete the instructions that we scalarized. If a full vector result
1272// is still needed, recreate it using InsertElements.
1273bool ScalarizerVisitor::finish() {
1274 // The presence of data in Gathered or Scattered indicates changes
1275 // made to the Function.
1276 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1277 return false;
1278 for (const auto &GMI : Gathered) {
1279 Instruction *Op = GMI.first;
1280 ValueVector &CV = *GMI.second;
1281 if (!Op->use_empty()) {
1282 // The value is still needed, so recreate it using a series of
1283 // insertelements and/or shufflevectors.
1284 Value *Res;
1285 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1286 BasicBlock *BB = Op->getParent();
1287 IRBuilder<> Builder(Op);
1288 if (isa<PHINode>(Op))
1289 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1290
1291 VectorSplit VS = *getVectorSplit(Ty);
1292 assert(VS.NumFragments == CV.size());
1293
1294 Res = concatenate(Builder, CV, VS, Op->getName());
1295
1296 Res->takeName(Op);
1297 } else if (auto *Ty = dyn_cast<StructType>(Op->getType())) {
1298 BasicBlock *BB = Op->getParent();
1299 IRBuilder<> Builder(Op);
1300 if (isa<PHINode>(Op))
1301 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1302
1303 // Iterate over each element in the struct
1304 unsigned NumOfStructElements = Ty->getNumElements();
1305 SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
1306 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1307 for (auto *CVelem : CV) {
1308 Value *Elem = Builder.CreateExtractValue(
1309 CVelem, I, Op->getName() + ".elem" + Twine(I));
1310 ElemCV[I].push_back(Elem);
1311 }
1312 }
1313 Res = PoisonValue::get(Ty);
1314 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1315 Type *ElemTy = Ty->getElementType(I);
1316 assert(isa<FixedVectorType>(ElemTy) &&
1317 "Only Structs of all FixedVectorType supported");
1318 VectorSplit VS = *getVectorSplit(ElemTy);
1319 assert(VS.NumFragments == CV.size());
1320
1321 Value *ConcatenatedVector =
1322 concatenate(Builder, ElemCV[I], VS, Op->getName());
1323 Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I,
1324 Op->getName() + ".insert");
1325 }
1326 } else {
1327 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1328 Res = CV[0];
1329 if (Op == Res)
1330 continue;
1331 }
1332 Op->replaceAllUsesWith(Res);
1333 }
1334 PotentiallyDeadInstrs.emplace_back(Op);
1335 }
1336 Gathered.clear();
1337 Scattered.clear();
1338 Scalarized = false;
1339
1341
1342 return true;
1343}
1344
1348 ScalarizerVisitor Impl(DT, TTI, Options);
1349 bool Changed = Impl.visit(F);
1352 return Changed ? PA : PreservedAnalyses::all();
1353}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
SmallVector< std::pair< Instruction *, ValueVector * >, 16 > GatherList
static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr)
static bool isStructOfMatchingFixedVectors(Type *Ty)
std::map< std::pair< Value *, Type * >, ValueVector > ScatterMap
SmallVector< Value *, 8 > ValueVector
static Value * concatenate(IRBuilder<> &Builder, ArrayRef< Value * > Fragments, const VectorSplit &VS, Twine Name)
Concatenate the given fragments to a single vector value of the type described in VS.
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:615
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:617
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
ArrayRef< unsigned > getIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2633
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2626
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Definition Type.h:387
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:381
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
LLVM_ABI BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:339
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39