LLVM 20.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
23#include "llvm/IR/Argument.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
43#include <cassert>
44#include <cstdint>
45#include <iterator>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
53namespace {
54
55BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
56 BasicBlock *BB = Itr->getParent();
57 if (isa<PHINode>(Itr))
58 Itr = BB->getFirstInsertionPt();
59 if (Itr != BB->end())
60 Itr = skipDebugIntrinsics(Itr);
61 return Itr;
62}
63
64// Used to store the scattered form of a vector.
65using ValueVector = SmallVector<Value *, 8>;
66
67// Used to map a vector Value and associated type to its scattered form.
68// The associated type is only non-null for pointer values that are "scattered"
69// when used as pointer operands to load or store.
70//
71// We use std::map because we want iterators to persist across insertion and
72// because the values are relatively large.
73using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
74
75// Lists Instructions that have been replaced with scalar implementations,
76// along with a pointer to their scattered forms.
78
79struct VectorSplit {
80 // The type of the vector.
81 FixedVectorType *VecTy = nullptr;
82
83 // The number of elements packed in a fragment (other than the remainder).
84 unsigned NumPacked = 0;
85
86 // The number of fragments (scalars or smaller vectors) into which the vector
87 // shall be split.
88 unsigned NumFragments = 0;
89
90 // The type of each complete fragment.
91 Type *SplitTy = nullptr;
92
93 // The type of the remainder (last) fragment; null if all fragments are
94 // complete.
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102// Provides a very limited vector-like interface for lazily accessing one
103// component of a scattered vector or vector pointer.
104class Scatterer {
105public:
106 Scatterer() = default;
107
108 // Scatter V into Size components. If new instructions are needed,
109 // insert them before BBI in BB. If Cache is nonnull, use it to cache
110 // the results.
111 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114 // Return component I, creating a new Value for it if necessary.
115 Value *operator[](unsigned I);
116
117 // Return the number of components.
118 unsigned size() const { return VS.NumFragments; }
119
120private:
121 BasicBlock *BB;
123 Value *V;
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
131// called Name that compares X and Y in the same way as FCI.
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
135 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
136 const Twine &Name) const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
138 }
139
140 FCmpInst &FCI;
141};
142
143// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
144// called Name that compares X and Y in the same way as ICI.
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
148 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
149 const Twine &Name) const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
151 }
152
153 ICmpInst &ICI;
154};
155
156// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
157// a unary operator like UO called Name with operand X.
158struct UnarySplitter {
159 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
160
161 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
162 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
163 }
164
165 UnaryOperator &UO;
166};
167
168// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
169// a binary operator like BO called Name with operands X and Y.
170struct BinarySplitter {
171 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
172
173 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
174 const Twine &Name) const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
176 }
177
178 BinaryOperator &BO;
179};
180
181// Information about a load or store that we're scalarizing.
182struct VectorLayout {
183 VectorLayout() = default;
184
185 // Return the alignment of fragment Frag.
186 Align getFragmentAlign(unsigned Frag) {
187 return commonAlignment(VecAlign, Frag * SplitSize);
188 }
189
190 // The split of the underlying vector type.
191 VectorSplit VS;
192
193 // The alignment of the vector.
194 Align VecAlign;
195
196 // The size of each (non-remainder) fragment in bytes.
197 uint64_t SplitSize = 0;
198};
199
200static bool isStructOfMatchingFixedVectors(Type *Ty) {
201 if (!isa<StructType>(Ty))
202 return false;
203 unsigned StructSize = Ty->getNumContainedTypes();
204 if (StructSize < 1)
205 return false;
206 FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
207 if (!VecTy)
208 return false;
209 unsigned VecSize = VecTy->getNumElements();
210 for (unsigned I = 1; I < StructSize; I++) {
211 VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
212 if (!VecTy || VecSize != VecTy->getNumElements())
213 return false;
214 }
215 return true;
216}
217
218/// Concatenate the given fragments to a single vector value of the type
219/// described in @p VS.
220static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
221 const VectorSplit &VS, Twine Name) {
222 unsigned NumElements = VS.VecTy->getNumElements();
223 SmallVector<int> ExtendMask;
224 SmallVector<int> InsertMask;
225
226 if (VS.NumPacked > 1) {
227 // Prepare the shufflevector masks once and re-use them for all
228 // fragments.
229 ExtendMask.resize(NumElements, -1);
230 for (unsigned I = 0; I < VS.NumPacked; ++I)
231 ExtendMask[I] = I;
232
233 InsertMask.resize(NumElements);
234 for (unsigned I = 0; I < NumElements; ++I)
235 InsertMask[I] = I;
236 }
237
238 Value *Res = PoisonValue::get(VS.VecTy);
239 for (unsigned I = 0; I < VS.NumFragments; ++I) {
240 Value *Fragment = Fragments[I];
241
242 unsigned NumPacked = VS.NumPacked;
243 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
244 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
245 NumPacked = RemVecTy->getNumElements();
246 else
247 NumPacked = 1;
248 }
249
250 if (NumPacked == 1) {
251 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
252 Name + ".upto" + Twine(I));
253 } else {
254 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
255 if (I == 0) {
256 Res = Fragment;
257 } else {
258 for (unsigned J = 0; J < NumPacked; ++J)
259 InsertMask[I * VS.NumPacked + J] = NumElements + J;
260 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
261 Name + ".upto" + Twine(I));
262 for (unsigned J = 0; J < NumPacked; ++J)
263 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
264 }
265 }
266 }
267
268 return Res;
269}
270
271class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
272public:
273 ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
275 : DT(DT), TTI(TTI),
276 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
277 ScalarizeLoadStore(Options.ScalarizeLoadStore),
278 ScalarizeMinBits(Options.ScalarizeMinBits) {}
279
280 bool visit(Function &F);
281
282 // InstVisitor methods. They return true if the instruction was scalarized,
283 // false if nothing changed.
284 bool visitInstruction(Instruction &I) { return false; }
285 bool visitSelectInst(SelectInst &SI);
286 bool visitICmpInst(ICmpInst &ICI);
287 bool visitFCmpInst(FCmpInst &FCI);
291 bool visitCastInst(CastInst &CI);
292 bool visitBitCastInst(BitCastInst &BCI);
297 bool visitPHINode(PHINode &PHI);
298 bool visitLoadInst(LoadInst &LI);
299 bool visitStoreInst(StoreInst &SI);
300 bool visitCallInst(CallInst &ICI);
301 bool visitFreezeInst(FreezeInst &FI);
302
303private:
304 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
305 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
306 void replaceUses(Instruction *Op, Value *CV);
307 bool canTransferMetadata(unsigned Kind);
308 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
309 std::optional<VectorSplit> getVectorSplit(Type *Ty);
310 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
311 const DataLayout &DL);
312 bool finish();
313
314 template<typename T> bool splitUnary(Instruction &, const T &);
315 template<typename T> bool splitBinary(Instruction &, const T &);
316
317 bool splitCall(CallInst &CI);
318
319 ScatterMap Scattered;
320 GatherList Gathered;
321 bool Scalarized;
322
323 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
324
325 DominatorTree *DT;
327
328 const bool ScalarizeVariableInsertExtract;
329 const bool ScalarizeLoadStore;
330 const unsigned ScalarizeMinBits;
331};
332
333class ScalarizerLegacyPass : public FunctionPass {
334public:
335 static char ID;
337 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
338 ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
339 bool runOnFunction(Function &F) override;
340 void getAnalysisUsage(AnalysisUsage &AU) const override;
341};
342
343} // end anonymous namespace
344
345ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
347
348void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
352}
353
354char ScalarizerLegacyPass::ID = 0;
355INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
356 "Scalarize vector operations", false, false)
358INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
360
361Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
362 const VectorSplit &VS, ValueVector *cachePtr)
363 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
364 IsPointer = V->getType()->isPointerTy();
365 if (!CachePtr) {
366 Tmp.resize(VS.NumFragments, nullptr);
367 } else {
368 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
369 IsPointer) &&
370 "Inconsistent vector sizes");
371 if (VS.NumFragments > CachePtr->size())
372 CachePtr->resize(VS.NumFragments, nullptr);
373 }
374}
375
376// Return fragment Frag, creating a new Value for it if necessary.
377Value *Scatterer::operator[](unsigned Frag) {
378 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
379 // Try to reuse a previous value.
380 if (CV[Frag])
381 return CV[Frag];
382 IRBuilder<> Builder(BB, BBI);
383 if (IsPointer) {
384 if (Frag == 0)
385 CV[Frag] = V;
386 else
387 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
388 V->getName() + ".i" + Twine(Frag));
389 return CV[Frag];
390 }
391
392 Type *FragmentTy = VS.getFragmentType(Frag);
393
394 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
396 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
397 Mask.push_back(Frag * VS.NumPacked + J);
398 CV[Frag] =
399 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
400 V->getName() + ".i" + Twine(Frag));
401 } else {
402 // Search through a chain of InsertElementInsts looking for element Frag.
403 // Record other elements in the cache. The new V is still suitable
404 // for all uncached indices.
405 while (true) {
406 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
407 if (!Insert)
408 break;
409 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
410 if (!Idx)
411 break;
412 unsigned J = Idx->getZExtValue();
413 V = Insert->getOperand(0);
414 if (Frag * VS.NumPacked == J) {
415 CV[Frag] = Insert->getOperand(1);
416 return CV[Frag];
417 }
418
419 if (VS.NumPacked == 1 && !CV[J]) {
420 // Only cache the first entry we find for each index we're not actively
421 // searching for. This prevents us from going too far up the chain and
422 // caching incorrect entries.
423 CV[J] = Insert->getOperand(1);
424 }
425 }
426 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
427 V->getName() + ".i" + Twine(Frag));
428 }
429
430 return CV[Frag];
431}
432
433bool ScalarizerLegacyPass::runOnFunction(Function &F) {
434 if (skipFunction(F))
435 return false;
436
437 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
438 const TargetTransformInfo *TTI =
439 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
440 ScalarizerVisitor Impl(DT, TTI, Options);
441 return Impl.visit(F);
442}
443
445 return new ScalarizerLegacyPass(Options);
446}
447
448bool ScalarizerVisitor::visit(Function &F) {
449 assert(Gathered.empty() && Scattered.empty());
450
451 Scalarized = false;
452
453 // To ensure we replace gathered components correctly we need to do an ordered
454 // traversal of the basic blocks in the function.
455 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
456 for (BasicBlock *BB : RPOT) {
457 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
458 Instruction *I = &*II;
459 bool Done = InstVisitor::visit(I);
460 ++II;
461 if (Done && I->getType()->isVoidTy())
462 I->eraseFromParent();
463 }
464 }
465 return finish();
466}
467
468// Return a scattered form of V that can be accessed by Point. V must be a
469// vector or a pointer to a vector.
470Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
471 const VectorSplit &VS) {
472 if (Argument *VArg = dyn_cast<Argument>(V)) {
473 // Put the scattered form of arguments in the entry block,
474 // so that it can be used everywhere.
475 Function *F = VArg->getParent();
476 BasicBlock *BB = &F->getEntryBlock();
477 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
478 }
479 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
480 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
481 // nodes in predecessors. If those predecessors are unreachable from entry,
482 // then the IR in those blocks could have unexpected properties resulting in
483 // infinite loops in Scatterer::operator[]. By simply treating values
484 // originating from instructions in unreachable blocks as undef we do not
485 // need to analyse them further.
486 if (!DT->isReachableFromEntry(VOp->getParent()))
487 return Scatterer(Point->getParent(), Point->getIterator(),
488 PoisonValue::get(V->getType()), VS);
489 // Put the scattered form of an instruction directly after the
490 // instruction, skipping over PHI nodes and debug intrinsics.
491 BasicBlock *BB = VOp->getParent();
492 return Scatterer(
493 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
494 &Scattered[{V, VS.SplitTy}]);
495 }
496 // In the fallback case, just put the scattered before Point and
497 // keep the result local to Point.
498 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
499}
500
501// Replace Op with the gathered form of the components in CV. Defer the
502// deletion of Op and creation of the gathered form to the end of the pass,
503// so that we can avoid creating the gathered form if all uses of Op are
504// replaced with uses of CV.
505void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
506 const VectorSplit &VS) {
507 transferMetadataAndIRFlags(Op, CV);
508
509 // If we already have a scattered form of Op (created from ExtractElements
510 // of Op itself), replace them with the new form.
511 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
512 if (!SV.empty()) {
513 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
514 Value *V = SV[I];
515 if (V == nullptr || SV[I] == CV[I])
516 continue;
517
518 Instruction *Old = cast<Instruction>(V);
519 if (isa<Instruction>(CV[I]))
520 CV[I]->takeName(Old);
521 Old->replaceAllUsesWith(CV[I]);
522 PotentiallyDeadInstrs.emplace_back(Old);
523 }
524 }
525 SV = CV;
526 Gathered.push_back(GatherList::value_type(Op, &SV));
527}
528
529// Replace Op with CV and collect Op has a potentially dead instruction.
530void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
531 if (CV != Op) {
532 Op->replaceAllUsesWith(CV);
533 PotentiallyDeadInstrs.emplace_back(Op);
534 Scalarized = true;
535 }
536}
537
538// Return true if it is safe to transfer the given metadata tag from
539// vector to scalar instructions.
540bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
541 return (Tag == LLVMContext::MD_tbaa
542 || Tag == LLVMContext::MD_fpmath
543 || Tag == LLVMContext::MD_tbaa_struct
544 || Tag == LLVMContext::MD_invariant_load
545 || Tag == LLVMContext::MD_alias_scope
546 || Tag == LLVMContext::MD_noalias
547 || Tag == LLVMContext::MD_mem_parallel_loop_access
548 || Tag == LLVMContext::MD_access_group);
549}
550
551// Transfer metadata from Op to the instructions in CV if it is known
552// to be safe to do so.
553void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
554 const ValueVector &CV) {
556 Op->getAllMetadataOtherThanDebugLoc(MDs);
557 for (Value *V : CV) {
558 if (Instruction *New = dyn_cast<Instruction>(V)) {
559 for (const auto &MD : MDs)
560 if (canTransferMetadata(MD.first))
561 New->setMetadata(MD.first, MD.second);
562 New->copyIRFlags(Op);
563 if (Op->getDebugLoc() && !New->getDebugLoc())
564 New->setDebugLoc(Op->getDebugLoc());
565 }
566 }
567}
568
569// Determine how Ty is split, if at all.
570std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
571 VectorSplit Split;
572 Split.VecTy = dyn_cast<FixedVectorType>(Ty);
573 if (!Split.VecTy)
574 return {};
575
576 unsigned NumElems = Split.VecTy->getNumElements();
577 Type *ElemTy = Split.VecTy->getElementType();
578
579 if (NumElems == 1 || ElemTy->isPointerTy() ||
580 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
581 Split.NumPacked = 1;
582 Split.NumFragments = NumElems;
583 Split.SplitTy = ElemTy;
584 } else {
585 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
586 if (Split.NumPacked >= NumElems)
587 return {};
588
589 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
590 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
591
592 unsigned RemainderElems = NumElems % Split.NumPacked;
593 if (RemainderElems > 1)
594 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
595 else if (RemainderElems == 1)
596 Split.RemainderTy = ElemTy;
597 }
598
599 return Split;
600}
601
602// Try to fill in Layout from Ty, returning true on success. Alignment is
603// the alignment of the vector, or std::nullopt if the ABI default should be
604// used.
605std::optional<VectorLayout>
606ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
607 const DataLayout &DL) {
608 std::optional<VectorSplit> VS = getVectorSplit(Ty);
609 if (!VS)
610 return {};
611
612 VectorLayout Layout;
613 Layout.VS = *VS;
614 // Check that we're dealing with full-byte fragments.
615 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
616 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
617 return {};
618 Layout.VecAlign = Alignment;
619 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
620 return Layout;
621}
622
623// Scalarize one-operand instruction I, using Split(Builder, X, Name)
624// to create an instruction like I with operand X and name Name.
625template<typename Splitter>
626bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
627 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
628 if (!VS)
629 return false;
630
631 std::optional<VectorSplit> OpVS;
632 if (I.getOperand(0)->getType() == I.getType()) {
633 OpVS = VS;
634 } else {
635 OpVS = getVectorSplit(I.getOperand(0)->getType());
636 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
637 return false;
638 }
639
640 IRBuilder<> Builder(&I);
641 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
642 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
643 ValueVector Res;
644 Res.resize(VS->NumFragments);
645 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
646 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
647 gather(&I, Res, *VS);
648 return true;
649}
650
651// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
652// to create an instruction like I with operands X and Y and name Name.
653template<typename Splitter>
654bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
655 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
656 if (!VS)
657 return false;
658
659 std::optional<VectorSplit> OpVS;
660 if (I.getOperand(0)->getType() == I.getType()) {
661 OpVS = VS;
662 } else {
663 OpVS = getVectorSplit(I.getOperand(0)->getType());
664 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
665 return false;
666 }
667
668 IRBuilder<> Builder(&I);
669 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
670 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
671 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
672 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
673 ValueVector Res;
674 Res.resize(VS->NumFragments);
675 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
676 Value *Op0 = VOp0[Frag];
677 Value *Op1 = VOp1[Frag];
678 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
679 }
680 gather(&I, Res, *VS);
681 return true;
682}
683
684/// If a call to a vector typed intrinsic function, split into a scalar call per
685/// element if possible for the intrinsic.
686bool ScalarizerVisitor::splitCall(CallInst &CI) {
687 Type *CallType = CI.getType();
688 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
689 std::optional<VectorSplit> VS;
690 if (AreAllVectorsOfMatchingSize)
691 VS = getVectorSplit(CallType->getContainedType(0));
692 else
693 VS = getVectorSplit(CallType);
694 if (!VS)
695 return false;
696
698 if (!F)
699 return false;
700
701 Intrinsic::ID ID = F->getIntrinsicID();
702
704 return false;
705
706 // unsigned NumElems = VT->getNumElements();
707 unsigned NumArgs = CI.arg_size();
708
709 ValueVector ScalarOperands(NumArgs);
710 SmallVector<Scatterer, 8> Scattered(NumArgs);
711 SmallVector<int> OverloadIdx(NumArgs, -1);
712
714 // Add return type if intrinsic is overloaded on it.
716 Tys.push_back(VS->SplitTy);
717
718 if (AreAllVectorsOfMatchingSize) {
719 for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
720 std::optional<VectorSplit> CurrVS =
721 getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
722 // This case does not seem to happen, but it is possible for
723 // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
724 // is not returned and we will bailout of handling this call.
725 // The secondary bailout case is if NumPacked does not match.
726 // This can happen if ScalarizeMinBits is not set to the default.
727 // This means with certain ScalarizeMinBits intrinsics like frexp
728 // will only scalarize when the struct elements have the same bitness.
729 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
730 return false;
732 Tys.push_back(CurrVS->SplitTy);
733 }
734 }
735 // Assumes that any vector type has the same number of elements as the return
736 // vector type, which is true for all current intrinsics.
737 for (unsigned I = 0; I != NumArgs; ++I) {
738 Value *OpI = CI.getOperand(I);
739 if ([[maybe_unused]] auto *OpVecTy =
740 dyn_cast<FixedVectorType>(OpI->getType())) {
741 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
742 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
743 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
744 // The natural split of the operand doesn't match the result. This could
745 // happen if the vector elements are different and the ScalarizeMinBits
746 // option is used.
747 //
748 // We could in principle handle this case as well, at the cost of
749 // complicating the scattering machinery to support multiple scattering
750 // granularities for a single value.
751 return false;
752 }
753
754 Scattered[I] = scatter(&CI, OpI, *OpVS);
756 OverloadIdx[I] = Tys.size();
757 Tys.push_back(OpVS->SplitTy);
758 }
759 } else {
760 ScalarOperands[I] = OpI;
762 Tys.push_back(OpI->getType());
763 }
764 }
765
766 ValueVector Res(VS->NumFragments);
767 ValueVector ScalarCallOps(NumArgs);
768
769 Function *NewIntrin =
770 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
771 IRBuilder<> Builder(&CI);
772
773 // Perform actual scalarization, taking care to preserve any scalar operands.
774 for (unsigned I = 0; I < VS->NumFragments; ++I) {
775 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
776 ScalarCallOps.clear();
777
778 if (IsRemainder)
779 Tys[0] = VS->RemainderTy;
780
781 for (unsigned J = 0; J != NumArgs; ++J) {
783 ScalarCallOps.push_back(ScalarOperands[J]);
784 } else {
785 ScalarCallOps.push_back(Scattered[J][I]);
786 if (IsRemainder && OverloadIdx[J] >= 0)
787 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
788 }
789 }
790
791 if (IsRemainder)
792 NewIntrin = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
793
794 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
795 CI.getName() + ".i" + Twine(I));
796 }
797
798 gather(&CI, Res, *VS);
799 return true;
800}
801
802bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
803 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
804 if (!VS)
805 return false;
806
807 std::optional<VectorSplit> CondVS;
808 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
809 CondVS = getVectorSplit(SI.getCondition()->getType());
810 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
811 // This happens when ScalarizeMinBits is used.
812 return false;
813 }
814 }
815
816 IRBuilder<> Builder(&SI);
817 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
818 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
819 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
820 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
821 ValueVector Res;
822 Res.resize(VS->NumFragments);
823
824 if (CondVS) {
825 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
826 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
827 for (unsigned I = 0; I < VS->NumFragments; ++I) {
828 Value *Op0 = VOp0[I];
829 Value *Op1 = VOp1[I];
830 Value *Op2 = VOp2[I];
831 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
832 SI.getName() + ".i" + Twine(I));
833 }
834 } else {
835 Value *Op0 = SI.getOperand(0);
836 for (unsigned I = 0; I < VS->NumFragments; ++I) {
837 Value *Op1 = VOp1[I];
838 Value *Op2 = VOp2[I];
839 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
840 SI.getName() + ".i" + Twine(I));
841 }
842 }
843 gather(&SI, Res, *VS);
844 return true;
845}
846
847bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
848 return splitBinary(ICI, ICmpSplitter(ICI));
849}
850
851bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
852 return splitBinary(FCI, FCmpSplitter(FCI));
853}
854
855bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
856 return splitUnary(UO, UnarySplitter(UO));
857}
858
859bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
860 return splitBinary(BO, BinarySplitter(BO));
861}
862
863bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
864 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
865 if (!VS)
866 return false;
867
868 IRBuilder<> Builder(&GEPI);
869 unsigned NumIndices = GEPI.getNumIndices();
870
871 // The base pointer and indices might be scalar even if it's a vector GEP.
872 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
873 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
874
875 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
876 if (auto *VecTy =
877 dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
878 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
879 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
880 // This can happen when ScalarizeMinBits is used.
881 return false;
882 }
883 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
884 } else {
885 ScalarOps[I] = GEPI.getOperand(I);
886 }
887 }
888
889 ValueVector Res;
890 Res.resize(VS->NumFragments);
891 for (unsigned I = 0; I < VS->NumFragments; ++I) {
893 SplitOps.resize(1 + NumIndices);
894 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
895 if (ScalarOps[J])
896 SplitOps[J] = ScalarOps[J];
897 else
898 SplitOps[J] = ScatterOps[J][I];
899 }
900 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
901 ArrayRef(SplitOps).drop_front(),
902 GEPI.getName() + ".i" + Twine(I));
903 if (GEPI.isInBounds())
904 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
905 NewGEPI->setIsInBounds();
906 }
907 gather(&GEPI, Res, *VS);
908 return true;
909}
910
911bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
912 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
913 if (!DestVS)
914 return false;
915
916 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
917 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
918 return false;
919
920 IRBuilder<> Builder(&CI);
921 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
922 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
923 ValueVector Res;
924 Res.resize(DestVS->NumFragments);
925 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
926 Res[I] =
927 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
928 CI.getName() + ".i" + Twine(I));
929 gather(&CI, Res, *DestVS);
930 return true;
931}
932
933bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
934 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
935 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
936 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
937 return false;
938
939 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
940
941 // Vectors of pointers are always fully scalarized.
942 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
943
944 IRBuilder<> Builder(&BCI);
945 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
946 ValueVector Res;
947 Res.resize(DstVS->NumFragments);
948
949 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
950 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
951
952 if (isPointerTy || DstSplitBits == SrcSplitBits) {
953 assert(DstVS->NumFragments == SrcVS->NumFragments);
954 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
955 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
956 BCI.getName() + ".i" + Twine(I));
957 }
958 } else if (SrcSplitBits % DstSplitBits == 0) {
959 // Convert each source fragment to the same-sized destination vector and
960 // then scatter the result to the destination.
961 VectorSplit MidVS;
962 MidVS.NumPacked = DstVS->NumPacked;
963 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
964 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
965 MidVS.NumPacked * MidVS.NumFragments);
966 MidVS.SplitTy = DstVS->SplitTy;
967
968 unsigned ResI = 0;
969 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
970 Value *V = Op0[I];
971
972 // Look through any existing bitcasts before converting to <N x t2>.
973 // In the best case, the resulting conversion might be a no-op.
975 while ((VI = dyn_cast<Instruction>(V)) &&
976 VI->getOpcode() == Instruction::BitCast)
977 V = VI->getOperand(0);
978
979 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
980
981 Scatterer Mid = scatter(&BCI, V, MidVS);
982 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
983 Res[ResI++] = Mid[J];
984 }
985 } else if (DstSplitBits % SrcSplitBits == 0) {
986 // Gather enough source fragments to make up a destination fragment and
987 // then convert to the destination type.
988 VectorSplit MidVS;
989 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
990 MidVS.NumPacked = SrcVS->NumPacked;
991 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
992 MidVS.NumPacked * MidVS.NumFragments);
993 MidVS.SplitTy = SrcVS->SplitTy;
994
995 unsigned SrcI = 0;
996 SmallVector<Value *, 8> ConcatOps;
997 ConcatOps.resize(MidVS.NumFragments);
998 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
999 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1000 ConcatOps[J] = Op0[SrcI++];
1001 Value *V = concatenate(Builder, ConcatOps, MidVS,
1002 BCI.getName() + ".i" + Twine(I));
1003 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
1004 BCI.getName() + ".i" + Twine(I));
1005 }
1006 } else {
1007 return false;
1008 }
1009
1010 gather(&BCI, Res, *DstVS);
1011 return true;
1012}
1013
1014bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1015 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
1016 if (!VS)
1017 return false;
1018
1019 IRBuilder<> Builder(&IEI);
1020 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1021 Value *NewElt = IEI.getOperand(1);
1022 Value *InsIdx = IEI.getOperand(2);
1023
1024 ValueVector Res;
1025 Res.resize(VS->NumFragments);
1026
1027 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1028 unsigned Idx = CI->getZExtValue();
1029 unsigned Fragment = Idx / VS->NumPacked;
1030 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1031 if (I == Fragment) {
1032 bool IsPacked = VS->NumPacked > 1;
1033 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1034 !VS->RemainderTy->isVectorTy())
1035 IsPacked = false;
1036 if (IsPacked) {
1037 Res[I] =
1038 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1039 } else {
1040 Res[I] = NewElt;
1041 }
1042 } else {
1043 Res[I] = Op0[I];
1044 }
1045 }
1046 } else {
1047 // Never split a variable insertelement that isn't fully scalarized.
1048 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1049 return false;
1050
1051 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1052 Value *ShouldReplace =
1053 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1054 InsIdx->getName() + ".is." + Twine(I));
1055 Value *OldElt = Op0[I];
1056 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1057 IEI.getName() + ".i" + Twine(I));
1058 }
1059 }
1060
1061 gather(&IEI, Res, *VS);
1062 return true;
1063}
1064
1065bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1066 Value *Op = EVI.getOperand(0);
1067 Type *OpTy = Op->getType();
1068 ValueVector Res;
1069 if (!isStructOfMatchingFixedVectors(OpTy))
1070 return false;
1071 if (CallInst *CI = dyn_cast<CallInst>(Op)) {
1072 Function *F = CI->getCalledFunction();
1073 if (!F)
1074 return false;
1075 Intrinsic::ID ID = F->getIntrinsicID();
1077 return false;
1078 // Note: Fall through means Operand is a`CallInst` and it is defined in
1079 // `isTriviallyScalarizable`.
1080 } else
1081 return false;
1082 Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
1083 std::optional<VectorSplit> VS = getVectorSplit(VecType);
1084 if (!VS)
1085 return false;
1086 IRBuilder<> Builder(&EVI);
1087 Scatterer Op0 = scatter(&EVI, Op, *VS);
1088 assert(!EVI.getIndices().empty() && "Make sure an index exists");
1089 // Note for our use case we only care about the top level index.
1090 unsigned Index = EVI.getIndices()[0];
1091 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1092 Value *ResElem = Builder.CreateExtractValue(
1093 Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
1094 Res.push_back(ResElem);
1095 }
1096
1097 gather(&EVI, Res, *VS);
1098 return true;
1099}
1100
1101bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1102 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1103 if (!VS)
1104 return false;
1105
1106 IRBuilder<> Builder(&EEI);
1107 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1108 Value *ExtIdx = EEI.getOperand(1);
1109
1110 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1111 unsigned Idx = CI->getZExtValue();
1112 unsigned Fragment = Idx / VS->NumPacked;
1113 Value *Res = Op0[Fragment];
1114 bool IsPacked = VS->NumPacked > 1;
1115 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1116 !VS->RemainderTy->isVectorTy())
1117 IsPacked = false;
1118 if (IsPacked)
1119 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1120 replaceUses(&EEI, Res);
1121 return true;
1122 }
1123
1124 // Never split a variable extractelement that isn't fully scalarized.
1125 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1126 return false;
1127
1128 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1129 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1130 Value *ShouldExtract =
1131 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1132 ExtIdx->getName() + ".is." + Twine(I));
1133 Value *Elt = Op0[I];
1134 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1135 EEI.getName() + ".upto" + Twine(I));
1136 }
1137 replaceUses(&EEI, Res);
1138 return true;
1139}
1140
1141bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1142 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1143 std::optional<VectorSplit> VSOp =
1144 getVectorSplit(SVI.getOperand(0)->getType());
1145 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1146 return false;
1147
1148 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1149 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1150 ValueVector Res;
1151 Res.resize(VS->NumFragments);
1152
1153 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1154 int Selector = SVI.getMaskValue(I);
1155 if (Selector < 0)
1156 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1157 else if (unsigned(Selector) < Op0.size())
1158 Res[I] = Op0[Selector];
1159 else
1160 Res[I] = Op1[Selector - Op0.size()];
1161 }
1162 gather(&SVI, Res, *VS);
1163 return true;
1164}
1165
1166bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1167 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1168 if (!VS)
1169 return false;
1170
1171 IRBuilder<> Builder(&PHI);
1172 ValueVector Res;
1173 Res.resize(VS->NumFragments);
1174
1175 unsigned NumOps = PHI.getNumOperands();
1176 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1177 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1178 PHI.getName() + ".i" + Twine(I));
1179 }
1180
1181 for (unsigned I = 0; I < NumOps; ++I) {
1182 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1183 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1184 for (unsigned J = 0; J < VS->NumFragments; ++J)
1185 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1186 }
1187 gather(&PHI, Res, *VS);
1188 return true;
1189}
1190
1191bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1192 if (!ScalarizeLoadStore)
1193 return false;
1194 if (!LI.isSimple())
1195 return false;
1196
1197 std::optional<VectorLayout> Layout = getVectorLayout(
1198 LI.getType(), LI.getAlign(), LI.getDataLayout());
1199 if (!Layout)
1200 return false;
1201
1202 IRBuilder<> Builder(&LI);
1203 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1204 ValueVector Res;
1205 Res.resize(Layout->VS.NumFragments);
1206
1207 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1208 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1209 Align(Layout->getFragmentAlign(I)),
1210 LI.getName() + ".i" + Twine(I));
1211 }
1212 gather(&LI, Res, Layout->VS);
1213 return true;
1214}
1215
1216bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1217 if (!ScalarizeLoadStore)
1218 return false;
1219 if (!SI.isSimple())
1220 return false;
1221
1222 Value *FullValue = SI.getValueOperand();
1223 std::optional<VectorLayout> Layout = getVectorLayout(
1224 FullValue->getType(), SI.getAlign(), SI.getDataLayout());
1225 if (!Layout)
1226 return false;
1227
1228 IRBuilder<> Builder(&SI);
1229 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1230 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1231
1232 ValueVector Stores;
1233 Stores.resize(Layout->VS.NumFragments);
1234 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1235 Value *Val = VVal[I];
1236 Value *Ptr = VPtr[I];
1237 Stores[I] =
1238 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1239 }
1240 transferMetadataAndIRFlags(&SI, Stores);
1241 return true;
1242}
1243
1244bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1245 return splitCall(CI);
1246}
1247
1248bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1249 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1250 return Builder.CreateFreeze(Op, Name);
1251 });
1252}
1253
1254// Delete the instructions that we scalarized. If a full vector result
1255// is still needed, recreate it using InsertElements.
1256bool ScalarizerVisitor::finish() {
1257 // The presence of data in Gathered or Scattered indicates changes
1258 // made to the Function.
1259 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1260 return false;
1261 for (const auto &GMI : Gathered) {
1262 Instruction *Op = GMI.first;
1263 ValueVector &CV = *GMI.second;
1264 if (!Op->use_empty()) {
1265 // The value is still needed, so recreate it using a series of
1266 // insertelements and/or shufflevectors.
1267 Value *Res;
1268 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1269 BasicBlock *BB = Op->getParent();
1270 IRBuilder<> Builder(Op);
1271 if (isa<PHINode>(Op))
1272 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1273
1274 VectorSplit VS = *getVectorSplit(Ty);
1275 assert(VS.NumFragments == CV.size());
1276
1277 Res = concatenate(Builder, CV, VS, Op->getName());
1278
1279 Res->takeName(Op);
1280 } else if (auto *Ty = dyn_cast<StructType>(Op->getType())) {
1281 BasicBlock *BB = Op->getParent();
1282 IRBuilder<> Builder(Op);
1283 if (isa<PHINode>(Op))
1284 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1285
1286 // Iterate over each element in the struct
1287 unsigned NumOfStructElements = Ty->getNumElements();
1288 SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
1289 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1290 for (auto *CVelem : CV) {
1291 Value *Elem = Builder.CreateExtractValue(
1292 CVelem, I, Op->getName() + ".elem" + Twine(I));
1293 ElemCV[I].push_back(Elem);
1294 }
1295 }
1296 Res = PoisonValue::get(Ty);
1297 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1298 Type *ElemTy = Ty->getElementType(I);
1299 assert(isa<FixedVectorType>(ElemTy) &&
1300 "Only Structs of all FixedVectorType supported");
1301 VectorSplit VS = *getVectorSplit(ElemTy);
1302 assert(VS.NumFragments == CV.size());
1303
1304 Value *ConcatenatedVector =
1305 concatenate(Builder, ElemCV[I], VS, Op->getName());
1306 Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I,
1307 Op->getName() + ".insert");
1308 }
1309 } else {
1310 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1311 Res = CV[0];
1312 if (Op == Res)
1313 continue;
1314 }
1315 Op->replaceAllUsesWith(Res);
1316 }
1317 PotentiallyDeadInstrs.emplace_back(Op);
1318 }
1319 Gathered.clear();
1320 Scattered.clear();
1321 Scalarized = false;
1322
1324
1325 return true;
1326}
1327
1331 ScalarizerVisitor Impl(DT, TTI, Options);
1332 bool Changed = Impl.visit(F);
1335 return Changed ? PA : PreservedAnalyses::all();
1336}
aarch64 promote const
AMDGPU promote alloca to vector or false DEBUG_TYPE to vector
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Scalarize vector operations
Definition: Scalarizer.cpp:359
scalarizer
Definition: Scalarizer.cpp:358
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
unsigned arg_size() const
Definition: InstrTypes.h:1292
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:613
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:615
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:563
unsigned getNumElements() const
Definition: DerivedTypes.h:606
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
Definition: Instructions.h:990
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2393
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1776
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1689
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitFreezeInst(FreezeInst &I)
Definition: InstVisitor.h:200
RetTy visitFCmpInst(FCmpInst &I)
Definition: InstVisitor.h:167
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:193
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitExtractValueInst(ExtractValueInst &I)
Definition: InstVisitor.h:194
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:263
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:264
RetTy visitICmpInst(ICmpInst &I)
Definition: InstVisitor.h:166
RetTy visitCallInst(CallInst &I)
Definition: InstVisitor.h:223
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:262
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:283
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void resize(size_type N)
Definition: SmallVector.h:638
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Definition: Type.h:390
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:384
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
@ Done
Definition: Threading.h:61
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:698
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:250
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
Definition: Scalarizer.cpp:444
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:561
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39