LLVM 22.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
23#include "llvm/IR/Argument.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
43#include <cassert>
44#include <cstdint>
45#include <iterator>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
53namespace {
54
55BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
56 BasicBlock *BB = Itr->getParent();
57 if (isa<PHINode>(Itr))
58 Itr = BB->getFirstInsertionPt();
59 if (Itr != BB->end())
60 Itr = skipDebugIntrinsics(Itr);
61 return Itr;
62}
63
64// Used to store the scattered form of a vector.
65using ValueVector = SmallVector<Value *, 8>;
66
67// Used to map a vector Value and associated type to its scattered form.
68// The associated type is only non-null for pointer values that are "scattered"
69// when used as pointer operands to load or store.
70//
71// We use std::map because we want iterators to persist across insertion and
72// because the values are relatively large.
73using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
74
75// Lists Instructions that have been replaced with scalar implementations,
76// along with a pointer to their scattered forms.
78
79struct VectorSplit {
80 // The type of the vector.
81 FixedVectorType *VecTy = nullptr;
82
83 // The number of elements packed in a fragment (other than the remainder).
84 unsigned NumPacked = 0;
85
86 // The number of fragments (scalars or smaller vectors) into which the vector
87 // shall be split.
88 unsigned NumFragments = 0;
89
90 // The type of each complete fragment.
91 Type *SplitTy = nullptr;
92
93 // The type of the remainder (last) fragment; null if all fragments are
94 // complete.
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102// Provides a very limited vector-like interface for lazily accessing one
103// component of a scattered vector or vector pointer.
104class Scatterer {
105public:
106 Scatterer() = default;
107
108 // Scatter V into Size components. If new instructions are needed,
109 // insert them before BBI in BB. If Cache is nonnull, use it to cache
110 // the results.
111 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114 // Return component I, creating a new Value for it if necessary.
115 Value *operator[](unsigned I);
116
117 // Return the number of components.
118 unsigned size() const { return VS.NumFragments; }
119
120private:
121 BasicBlock *BB;
123 Value *V;
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
131// called Name that compares X and Y in the same way as FCI.
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
135 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
136 const Twine &Name) const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
138 }
139
140 FCmpInst &FCI;
141};
142
143// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
144// called Name that compares X and Y in the same way as ICI.
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
148 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
149 const Twine &Name) const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
151 }
152
153 ICmpInst &ICI;
154};
155
156// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
157// a unary operator like UO called Name with operand X.
158struct UnarySplitter {
159 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
160
161 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
162 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
163 }
164
165 UnaryOperator &UO;
166};
167
168// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
169// a binary operator like BO called Name with operands X and Y.
170struct BinarySplitter {
171 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
172
173 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
174 const Twine &Name) const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
176 }
177
178 BinaryOperator &BO;
179};
180
181// Information about a load or store that we're scalarizing.
182struct VectorLayout {
183 VectorLayout() = default;
184
185 // Return the alignment of fragment Frag.
186 Align getFragmentAlign(unsigned Frag) {
187 return commonAlignment(VecAlign, Frag * SplitSize);
188 }
189
190 // The split of the underlying vector type.
191 VectorSplit VS;
192
193 // The alignment of the vector.
194 Align VecAlign;
195
196 // The size of each (non-remainder) fragment in bytes.
197 uint64_t SplitSize = 0;
198};
199
200static bool isStructOfMatchingFixedVectors(Type *Ty) {
201 if (!isa<StructType>(Ty))
202 return false;
203 unsigned StructSize = Ty->getNumContainedTypes();
204 if (StructSize < 1)
205 return false;
206 FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
207 if (!VecTy)
208 return false;
209 unsigned VecSize = VecTy->getNumElements();
210 for (unsigned I = 1; I < StructSize; I++) {
211 VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
212 if (!VecTy || VecSize != VecTy->getNumElements())
213 return false;
214 }
215 return true;
216}
217
218/// Concatenate the given fragments to a single vector value of the type
219/// described in @p VS.
220static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
221 const VectorSplit &VS, Twine Name) {
222 unsigned NumElements = VS.VecTy->getNumElements();
223 SmallVector<int> ExtendMask;
224 SmallVector<int> InsertMask;
225
226 if (VS.NumPacked > 1) {
227 // Prepare the shufflevector masks once and re-use them for all
228 // fragments.
229 ExtendMask.resize(NumElements, -1);
230 for (unsigned I = 0; I < VS.NumPacked; ++I)
231 ExtendMask[I] = I;
232
233 InsertMask.resize(NumElements);
234 for (unsigned I = 0; I < NumElements; ++I)
235 InsertMask[I] = I;
236 }
237
238 Value *Res = PoisonValue::get(VS.VecTy);
239 for (unsigned I = 0; I < VS.NumFragments; ++I) {
240 Value *Fragment = Fragments[I];
241
242 unsigned NumPacked = VS.NumPacked;
243 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
244 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
245 NumPacked = RemVecTy->getNumElements();
246 else
247 NumPacked = 1;
248 }
249
250 if (NumPacked == 1) {
251 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
252 Name + ".upto" + Twine(I));
253 } else {
254 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
255 if (I == 0) {
256 Res = Fragment;
257 } else {
258 for (unsigned J = 0; J < NumPacked; ++J)
259 InsertMask[I * VS.NumPacked + J] = NumElements + J;
260 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
261 Name + ".upto" + Twine(I));
262 for (unsigned J = 0; J < NumPacked; ++J)
263 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
264 }
265 }
266 }
267
268 return Res;
269}
270
271class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
272public:
273 ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
275 : DT(DT), TTI(TTI),
276 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
277 ScalarizeLoadStore(Options.ScalarizeLoadStore),
278 ScalarizeMinBits(Options.ScalarizeMinBits) {}
279
280 bool visit(Function &F);
281
282 // InstVisitor methods. They return true if the instruction was scalarized,
283 // false if nothing changed.
284 bool visitInstruction(Instruction &I) { return false; }
285 bool visitSelectInst(SelectInst &SI);
286 bool visitICmpInst(ICmpInst &ICI);
287 bool visitFCmpInst(FCmpInst &FCI);
288 bool visitUnaryOperator(UnaryOperator &UO);
289 bool visitBinaryOperator(BinaryOperator &BO);
290 bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
291 bool visitCastInst(CastInst &CI);
292 bool visitBitCastInst(BitCastInst &BCI);
293 bool visitInsertElementInst(InsertElementInst &IEI);
294 bool visitExtractElementInst(ExtractElementInst &EEI);
295 bool visitExtractValueInst(ExtractValueInst &EVI);
296 bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
297 bool visitPHINode(PHINode &PHI);
298 bool visitLoadInst(LoadInst &LI);
299 bool visitStoreInst(StoreInst &SI);
300 bool visitCallInst(CallInst &ICI);
301 bool visitFreezeInst(FreezeInst &FI);
302
303private:
304 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
305 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
306 void replaceUses(Instruction *Op, Value *CV);
307 bool canTransferMetadata(unsigned Kind);
308 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
309 std::optional<VectorSplit> getVectorSplit(Type *Ty);
310 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
311 const DataLayout &DL);
312 bool finish();
313
314 template<typename T> bool splitUnary(Instruction &, const T &);
315 template<typename T> bool splitBinary(Instruction &, const T &);
316
317 bool splitCall(CallInst &CI);
318
319 ScatterMap Scattered;
320 GatherList Gathered;
321 bool Scalarized;
322
323 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
324
325 DominatorTree *DT;
327
328 const bool ScalarizeVariableInsertExtract;
329 const bool ScalarizeLoadStore;
330 const unsigned ScalarizeMinBits;
331};
332
333class ScalarizerLegacyPass : public FunctionPass {
334public:
335 static char ID;
337 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
338 ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
339 bool runOnFunction(Function &F) override;
340 void getAnalysisUsage(AnalysisUsage &AU) const override;
341};
342
343} // end anonymous namespace
344
345ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
347
348void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
349 AU.addRequired<DominatorTreeWrapperPass>();
350 AU.addRequired<TargetTransformInfoWrapperPass>();
351 AU.addPreserved<DominatorTreeWrapperPass>();
352}
353
354char ScalarizerLegacyPass::ID = 0;
355INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
356 "Scalarize vector operations", false, false)
358INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
359 "Scalarize vector operations", false, false)
360
361Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
362 const VectorSplit &VS, ValueVector *cachePtr)
363 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
364 IsPointer = V->getType()->isPointerTy();
365 if (!CachePtr) {
366 Tmp.resize(VS.NumFragments, nullptr);
367 } else {
368 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
369 IsPointer) &&
370 "Inconsistent vector sizes");
371 if (VS.NumFragments > CachePtr->size())
372 CachePtr->resize(VS.NumFragments, nullptr);
373 }
374}
375
376// Return fragment Frag, creating a new Value for it if necessary.
377Value *Scatterer::operator[](unsigned Frag) {
378 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
379 // Try to reuse a previous value.
380 if (CV[Frag])
381 return CV[Frag];
382 IRBuilder<> Builder(BB, BBI);
383 if (IsPointer) {
384 if (Frag == 0)
385 CV[Frag] = V;
386 else
387 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
388 V->getName() + ".i" + Twine(Frag));
389 return CV[Frag];
390 }
391
392 Type *FragmentTy = VS.getFragmentType(Frag);
393
394 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
395 SmallVector<int> Mask;
396 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
397 Mask.push_back(Frag * VS.NumPacked + J);
398 CV[Frag] =
399 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
400 V->getName() + ".i" + Twine(Frag));
401 } else {
402 // Search through a chain of InsertElementInsts looking for element Frag.
403 // Record other elements in the cache. The new V is still suitable
404 // for all uncached indices.
405 while (true) {
406 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
407 if (!Insert)
408 break;
409 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
410 if (!Idx)
411 break;
412 unsigned J = Idx->getZExtValue();
413 V = Insert->getOperand(0);
414 if (Frag * VS.NumPacked == J) {
415 CV[Frag] = Insert->getOperand(1);
416 return CV[Frag];
417 }
418
419 if (VS.NumPacked == 1 && !CV[J]) {
420 // Only cache the first entry we find for each index we're not actively
421 // searching for. This prevents us from going too far up the chain and
422 // caching incorrect entries.
423 CV[J] = Insert->getOperand(1);
424 }
425 }
426 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
427 V->getName() + ".i" + Twine(Frag));
428 }
429
430 return CV[Frag];
431}
432
433bool ScalarizerLegacyPass::runOnFunction(Function &F) {
434 if (skipFunction(F))
435 return false;
436
437 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
438 const TargetTransformInfo *TTI =
439 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
440 ScalarizerVisitor Impl(DT, TTI, Options);
441 return Impl.visit(F);
442}
443
445 return new ScalarizerLegacyPass(Options);
446}
447
448bool ScalarizerVisitor::visit(Function &F) {
449 assert(Gathered.empty() && Scattered.empty());
450
451 Scalarized = false;
452
453 // To ensure we replace gathered components correctly we need to do an ordered
454 // traversal of the basic blocks in the function.
455 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
456 for (BasicBlock *BB : RPOT) {
457 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
458 Instruction *I = &*II;
459 bool Done = InstVisitor::visit(I);
460 ++II;
461 if (Done && I->getType()->isVoidTy()) {
462 I->eraseFromParent();
463 Scalarized = true;
464 }
465 }
466 }
467 return finish();
468}
469
470// Return a scattered form of V that can be accessed by Point. V must be a
471// vector or a pointer to a vector.
472Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
473 const VectorSplit &VS) {
474 if (Argument *VArg = dyn_cast<Argument>(V)) {
475 // Put the scattered form of arguments in the entry block,
476 // so that it can be used everywhere.
477 Function *F = VArg->getParent();
478 BasicBlock *BB = &F->getEntryBlock();
479 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
480 }
481 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
482 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
483 // nodes in predecessors. If those predecessors are unreachable from entry,
484 // then the IR in those blocks could have unexpected properties resulting in
485 // infinite loops in Scatterer::operator[]. By simply treating values
486 // originating from instructions in unreachable blocks as undef we do not
487 // need to analyse them further.
488 if (!DT->isReachableFromEntry(VOp->getParent()))
489 return Scatterer(Point->getParent(), Point->getIterator(),
490 PoisonValue::get(V->getType()), VS);
491 // Put the scattered form of an instruction directly after the
492 // instruction, skipping over PHI nodes and debug intrinsics.
493 BasicBlock *BB = VOp->getParent();
494 return Scatterer(
495 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
496 &Scattered[{V, VS.SplitTy}]);
497 }
498 // In the fallback case, just put the scattered before Point and
499 // keep the result local to Point.
500 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
501}
502
503// Replace Op with the gathered form of the components in CV. Defer the
504// deletion of Op and creation of the gathered form to the end of the pass,
505// so that we can avoid creating the gathered form if all uses of Op are
506// replaced with uses of CV.
507void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
508 const VectorSplit &VS) {
509 transferMetadataAndIRFlags(Op, CV);
510
511 // If we already have a scattered form of Op (created from ExtractElements
512 // of Op itself), replace them with the new form.
513 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
514 if (!SV.empty()) {
515 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
516 Value *V = SV[I];
517 if (V == nullptr || SV[I] == CV[I])
518 continue;
519
521 if (isa<Instruction>(CV[I]))
522 CV[I]->takeName(Old);
523 Old->replaceAllUsesWith(CV[I]);
524 PotentiallyDeadInstrs.emplace_back(Old);
525 }
526 }
527 SV = CV;
528 Gathered.push_back(GatherList::value_type(Op, &SV));
529}
530
531// Replace Op with CV and collect Op has a potentially dead instruction.
532void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
533 if (CV != Op) {
534 Op->replaceAllUsesWith(CV);
535 PotentiallyDeadInstrs.emplace_back(Op);
536 Scalarized = true;
537 }
538}
539
540// Return true if it is safe to transfer the given metadata tag from
541// vector to scalar instructions.
542bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
543 return (Tag == LLVMContext::MD_tbaa
544 || Tag == LLVMContext::MD_fpmath
545 || Tag == LLVMContext::MD_tbaa_struct
546 || Tag == LLVMContext::MD_invariant_load
547 || Tag == LLVMContext::MD_alias_scope
548 || Tag == LLVMContext::MD_noalias
549 || Tag == LLVMContext::MD_mem_parallel_loop_access
550 || Tag == LLVMContext::MD_access_group);
551}
552
553// Transfer metadata from Op to the instructions in CV if it is known
554// to be safe to do so.
555void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
556 const ValueVector &CV) {
558 Op->getAllMetadataOtherThanDebugLoc(MDs);
559 for (Value *V : CV) {
560 if (Instruction *New = dyn_cast<Instruction>(V)) {
561 for (const auto &MD : MDs)
562 if (canTransferMetadata(MD.first))
563 New->setMetadata(MD.first, MD.second);
564 New->copyIRFlags(Op);
565 if (Op->getDebugLoc() && !New->getDebugLoc())
566 New->setDebugLoc(Op->getDebugLoc());
567 }
568 }
569}
570
571// Determine how Ty is split, if at all.
572std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
573 VectorSplit Split;
575 if (!Split.VecTy)
576 return {};
577
578 unsigned NumElems = Split.VecTy->getNumElements();
579 Type *ElemTy = Split.VecTy->getElementType();
580
581 if (NumElems == 1 || ElemTy->isPointerTy() ||
582 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
583 Split.NumPacked = 1;
584 Split.NumFragments = NumElems;
585 Split.SplitTy = ElemTy;
586 } else {
587 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
588 if (Split.NumPacked >= NumElems)
589 return {};
590
591 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
592 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
593
594 unsigned RemainderElems = NumElems % Split.NumPacked;
595 if (RemainderElems > 1)
596 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
597 else if (RemainderElems == 1)
598 Split.RemainderTy = ElemTy;
599 }
600
601 return Split;
602}
603
604// Try to fill in Layout from Ty, returning true on success. Alignment is
605// the alignment of the vector, or std::nullopt if the ABI default should be
606// used.
607std::optional<VectorLayout>
608ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
609 const DataLayout &DL) {
610 std::optional<VectorSplit> VS = getVectorSplit(Ty);
611 if (!VS)
612 return {};
613
614 VectorLayout Layout;
615 Layout.VS = *VS;
616 // Check that we're dealing with full-byte fragments.
617 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
618 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
619 return {};
620 Layout.VecAlign = Alignment;
621 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
622 return Layout;
623}
624
625// Scalarize one-operand instruction I, using Split(Builder, X, Name)
626// to create an instruction like I with operand X and name Name.
627template<typename Splitter>
628bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
629 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
630 if (!VS)
631 return false;
632
633 std::optional<VectorSplit> OpVS;
634 if (I.getOperand(0)->getType() == I.getType()) {
635 OpVS = VS;
636 } else {
637 OpVS = getVectorSplit(I.getOperand(0)->getType());
638 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
639 return false;
640 }
641
642 IRBuilder<> Builder(&I);
643 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
644 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
645 ValueVector Res;
646 Res.resize(VS->NumFragments);
647 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
648 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
649 gather(&I, Res, *VS);
650 return true;
651}
652
653// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
654// to create an instruction like I with operands X and Y and name Name.
655template<typename Splitter>
656bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
657 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
658 if (!VS)
659 return false;
660
661 std::optional<VectorSplit> OpVS;
662 if (I.getOperand(0)->getType() == I.getType()) {
663 OpVS = VS;
664 } else {
665 OpVS = getVectorSplit(I.getOperand(0)->getType());
666 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
667 return false;
668 }
669
670 IRBuilder<> Builder(&I);
671 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
672 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
673 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
674 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
675 ValueVector Res;
676 Res.resize(VS->NumFragments);
677 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
678 Value *Op0 = VOp0[Frag];
679 Value *Op1 = VOp1[Frag];
680 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
681 }
682 gather(&I, Res, *VS);
683 return true;
684}
685
686/// If a call to a vector typed intrinsic function, split into a scalar call per
687/// element if possible for the intrinsic.
688bool ScalarizerVisitor::splitCall(CallInst &CI) {
689 Type *CallType = CI.getType();
690 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
691 std::optional<VectorSplit> VS;
692 if (AreAllVectorsOfMatchingSize)
693 VS = getVectorSplit(CallType->getContainedType(0));
694 else
695 VS = getVectorSplit(CallType);
696 if (!VS)
697 return false;
698
700 if (!F)
701 return false;
702
703 Intrinsic::ID ID = F->getIntrinsicID();
704
706 return false;
707
708 // unsigned NumElems = VT->getNumElements();
709 unsigned NumArgs = CI.arg_size();
710
711 ValueVector ScalarOperands(NumArgs);
712 SmallVector<Scatterer, 8> Scattered(NumArgs);
713 SmallVector<int> OverloadIdx(NumArgs, -1);
714
716 // Add return type if intrinsic is overloaded on it.
718 Tys.push_back(VS->SplitTy);
719
720 if (AreAllVectorsOfMatchingSize) {
721 for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
722 std::optional<VectorSplit> CurrVS =
723 getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
724 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
725 // VectorSplit is not returned and we will bailout of handling this call.
726 // The secondary bailout case is if NumPacked does not match. This can
727 // happen if ScalarizeMinBits is not set to the default. This means with
728 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
729 // the struct elements have the same bitness.
730 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
731 return false;
733 Tys.push_back(CurrVS->SplitTy);
734 }
735 }
736 // Assumes that any vector type has the same number of elements as the return
737 // vector type, which is true for all current intrinsics.
738 for (unsigned I = 0; I != NumArgs; ++I) {
739 Value *OpI = CI.getOperand(I);
740 if ([[maybe_unused]] auto *OpVecTy =
742 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
743 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
744 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
745 // The natural split of the operand doesn't match the result. This could
746 // happen if the vector elements are different and the ScalarizeMinBits
747 // option is used.
748 //
749 // We could in principle handle this case as well, at the cost of
750 // complicating the scattering machinery to support multiple scattering
751 // granularities for a single value.
752 return false;
753 }
754
755 Scattered[I] = scatter(&CI, OpI, *OpVS);
757 OverloadIdx[I] = Tys.size();
758 Tys.push_back(OpVS->SplitTy);
759 }
760 } else {
761 ScalarOperands[I] = OpI;
763 Tys.push_back(OpI->getType());
764 }
765 }
766
767 ValueVector Res(VS->NumFragments);
768 ValueVector ScalarCallOps(NumArgs);
769
770 Function *NewIntrin =
771 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
772 IRBuilder<> Builder(&CI);
773
774 // Perform actual scalarization, taking care to preserve any scalar operands.
775 for (unsigned I = 0; I < VS->NumFragments; ++I) {
776 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
777 ScalarCallOps.clear();
778
779 if (IsRemainder)
780 Tys[0] = VS->RemainderTy;
781
782 for (unsigned J = 0; J != NumArgs; ++J) {
784 ScalarCallOps.push_back(ScalarOperands[J]);
785 } else {
786 ScalarCallOps.push_back(Scattered[J][I]);
787 if (IsRemainder && OverloadIdx[J] >= 0)
788 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
789 }
790 }
791
792 if (IsRemainder)
793 NewIntrin = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
794
795 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
796 CI.getName() + ".i" + Twine(I));
797 }
798
799 gather(&CI, Res, *VS);
800 return true;
801}
802
803bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
804 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
805 if (!VS)
806 return false;
807
808 std::optional<VectorSplit> CondVS;
809 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
810 CondVS = getVectorSplit(SI.getCondition()->getType());
811 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
812 // This happens when ScalarizeMinBits is used.
813 return false;
814 }
815 }
816
817 IRBuilder<> Builder(&SI);
818 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
819 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
820 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
821 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
822 ValueVector Res;
823 Res.resize(VS->NumFragments);
824
825 if (CondVS) {
826 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
827 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
828 for (unsigned I = 0; I < VS->NumFragments; ++I) {
829 Value *Op0 = VOp0[I];
830 Value *Op1 = VOp1[I];
831 Value *Op2 = VOp2[I];
832 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
833 SI.getName() + ".i" + Twine(I));
834 }
835 } else {
836 Value *Op0 = SI.getOperand(0);
837 for (unsigned I = 0; I < VS->NumFragments; ++I) {
838 Value *Op1 = VOp1[I];
839 Value *Op2 = VOp2[I];
840 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
841 SI.getName() + ".i" + Twine(I));
842 }
843 }
844 gather(&SI, Res, *VS);
845 return true;
846}
847
848bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
849 return splitBinary(ICI, ICmpSplitter(ICI));
850}
851
852bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
853 return splitBinary(FCI, FCmpSplitter(FCI));
854}
855
856bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
857 return splitUnary(UO, UnarySplitter(UO));
858}
859
860bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
861 return splitBinary(BO, BinarySplitter(BO));
862}
863
864bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
865 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
866 if (!VS)
867 return false;
868
869 IRBuilder<> Builder(&GEPI);
870 unsigned NumIndices = GEPI.getNumIndices();
871
872 // The base pointer and indices might be scalar even if it's a vector GEP.
873 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
874 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
875
876 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
877 if (auto *VecTy =
879 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
880 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
881 // This can happen when ScalarizeMinBits is used.
882 return false;
883 }
884 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
885 } else {
886 ScalarOps[I] = GEPI.getOperand(I);
887 }
888 }
889
890 ValueVector Res;
891 Res.resize(VS->NumFragments);
892 for (unsigned I = 0; I < VS->NumFragments; ++I) {
893 SmallVector<Value *, 8> SplitOps;
894 SplitOps.resize(1 + NumIndices);
895 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
896 if (ScalarOps[J])
897 SplitOps[J] = ScalarOps[J];
898 else
899 SplitOps[J] = ScatterOps[J][I];
900 }
901 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
902 ArrayRef(SplitOps).drop_front(),
903 GEPI.getName() + ".i" + Twine(I));
904 if (GEPI.isInBounds())
905 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
906 NewGEPI->setIsInBounds();
907 }
908 gather(&GEPI, Res, *VS);
909 return true;
910}
911
912bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
913 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
914 if (!DestVS)
915 return false;
916
917 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
918 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
919 return false;
920
921 IRBuilder<> Builder(&CI);
922 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
923 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
924 ValueVector Res;
925 Res.resize(DestVS->NumFragments);
926 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
927 Res[I] =
928 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
929 CI.getName() + ".i" + Twine(I));
930 gather(&CI, Res, *DestVS);
931 return true;
932}
933
934bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
935 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
936 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
937 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
938 return false;
939
940 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
941
942 // Vectors of pointers are always fully scalarized.
943 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
944
945 IRBuilder<> Builder(&BCI);
946 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
947 ValueVector Res;
948 Res.resize(DstVS->NumFragments);
949
950 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
951 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
952
953 if (isPointerTy || DstSplitBits == SrcSplitBits) {
954 assert(DstVS->NumFragments == SrcVS->NumFragments);
955 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
956 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
957 BCI.getName() + ".i" + Twine(I));
958 }
959 } else if (SrcSplitBits % DstSplitBits == 0) {
960 // Convert each source fragment to the same-sized destination vector and
961 // then scatter the result to the destination.
962 VectorSplit MidVS;
963 MidVS.NumPacked = DstVS->NumPacked;
964 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
965 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
966 MidVS.NumPacked * MidVS.NumFragments);
967 MidVS.SplitTy = DstVS->SplitTy;
968
969 unsigned ResI = 0;
970 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
971 Value *V = Op0[I];
972
973 // Look through any existing bitcasts before converting to <N x t2>.
974 // In the best case, the resulting conversion might be a no-op.
976 while ((VI = dyn_cast<Instruction>(V)) &&
977 VI->getOpcode() == Instruction::BitCast)
978 V = VI->getOperand(0);
979
980 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
981
982 Scatterer Mid = scatter(&BCI, V, MidVS);
983 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
984 Res[ResI++] = Mid[J];
985 }
986 } else if (DstSplitBits % SrcSplitBits == 0) {
987 // Gather enough source fragments to make up a destination fragment and
988 // then convert to the destination type.
989 VectorSplit MidVS;
990 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
991 MidVS.NumPacked = SrcVS->NumPacked;
992 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
993 MidVS.NumPacked * MidVS.NumFragments);
994 MidVS.SplitTy = SrcVS->SplitTy;
995
996 unsigned SrcI = 0;
997 SmallVector<Value *, 8> ConcatOps;
998 ConcatOps.resize(MidVS.NumFragments);
999 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
1000 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1001 ConcatOps[J] = Op0[SrcI++];
1002 Value *V = concatenate(Builder, ConcatOps, MidVS,
1003 BCI.getName() + ".i" + Twine(I));
1004 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
1005 BCI.getName() + ".i" + Twine(I));
1006 }
1007 } else {
1008 return false;
1009 }
1010
1011 gather(&BCI, Res, *DstVS);
1012 return true;
1013}
1014
1015bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1016 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
1017 if (!VS)
1018 return false;
1019
1020 IRBuilder<> Builder(&IEI);
1021 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1022 Value *NewElt = IEI.getOperand(1);
1023 Value *InsIdx = IEI.getOperand(2);
1024
1025 ValueVector Res;
1026 Res.resize(VS->NumFragments);
1027
1028 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1029 unsigned Idx = CI->getZExtValue();
1030 unsigned Fragment = Idx / VS->NumPacked;
1031 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1032 if (I == Fragment) {
1033 bool IsPacked = VS->NumPacked > 1;
1034 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1035 !VS->RemainderTy->isVectorTy())
1036 IsPacked = false;
1037 if (IsPacked) {
1038 Res[I] =
1039 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1040 } else {
1041 Res[I] = NewElt;
1042 }
1043 } else {
1044 Res[I] = Op0[I];
1045 }
1046 }
1047 } else {
1048 // Never split a variable insertelement that isn't fully scalarized.
1049 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1050 return false;
1051
1052 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1053 Value *ShouldReplace =
1054 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1055 InsIdx->getName() + ".is." + Twine(I));
1056 Value *OldElt = Op0[I];
1057 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1058 IEI.getName() + ".i" + Twine(I));
1059 }
1060 }
1061
1062 gather(&IEI, Res, *VS);
1063 return true;
1064}
1065
1066bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1067 Value *Op = EVI.getOperand(0);
1068 Type *OpTy = Op->getType();
1069 ValueVector Res;
1070 if (!isStructOfMatchingFixedVectors(OpTy))
1071 return false;
1072 if (CallInst *CI = dyn_cast<CallInst>(Op)) {
1073 Function *F = CI->getCalledFunction();
1074 if (!F)
1075 return false;
1076 Intrinsic::ID ID = F->getIntrinsicID();
1078 return false;
1079 // Note: Fall through means Operand is a`CallInst` and it is defined in
1080 // `isTriviallyScalarizable`.
1081 } else
1082 return false;
1083 Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
1084 std::optional<VectorSplit> VS = getVectorSplit(VecType);
1085 if (!VS)
1086 return false;
1087 for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
1088 std::optional<VectorSplit> CurrVS =
1089 getVectorSplit(cast<FixedVectorType>(OpTy->getContainedType(I)));
1090 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
1091 // VectorSplit is not returned and we will bailout of handling this call.
1092 // The secondary bailout case is if NumPacked does not match. This can
1093 // happen if ScalarizeMinBits is not set to the default. This means with
1094 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
1095 // the struct elements have the same bitness.
1096 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
1097 return false;
1098 }
1099 IRBuilder<> Builder(&EVI);
1100 Scatterer Op0 = scatter(&EVI, Op, *VS);
1101 assert(!EVI.getIndices().empty() && "Make sure an index exists");
1102 // Note for our use case we only care about the top level index.
1103 unsigned Index = EVI.getIndices()[0];
1104 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1105 Value *ResElem = Builder.CreateExtractValue(
1106 Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
1107 Res.push_back(ResElem);
1108 }
1109
1110 Type *ActualVecType = cast<FixedVectorType>(OpTy->getContainedType(Index));
1111 std::optional<VectorSplit> AVS = getVectorSplit(ActualVecType);
1112 gather(&EVI, Res, *AVS);
1113 return true;
1114}
1115
1116bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1117 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1118 if (!VS)
1119 return false;
1120
1121 IRBuilder<> Builder(&EEI);
1122 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1123 Value *ExtIdx = EEI.getOperand(1);
1124
1125 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1126 unsigned Idx = CI->getZExtValue();
1127 unsigned Fragment = Idx / VS->NumPacked;
1128 Value *Res = Op0[Fragment];
1129 bool IsPacked = VS->NumPacked > 1;
1130 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1131 !VS->RemainderTy->isVectorTy())
1132 IsPacked = false;
1133 if (IsPacked)
1134 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1135 replaceUses(&EEI, Res);
1136 return true;
1137 }
1138
1139 // Never split a variable extractelement that isn't fully scalarized.
1140 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1141 return false;
1142
1143 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1144 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1145 Value *ShouldExtract =
1146 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1147 ExtIdx->getName() + ".is." + Twine(I));
1148 Value *Elt = Op0[I];
1149 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1150 EEI.getName() + ".upto" + Twine(I));
1151 }
1152 replaceUses(&EEI, Res);
1153 return true;
1154}
1155
1156bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1157 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1158 std::optional<VectorSplit> VSOp =
1159 getVectorSplit(SVI.getOperand(0)->getType());
1160 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1161 return false;
1162
1163 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1164 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1165 ValueVector Res;
1166 Res.resize(VS->NumFragments);
1167
1168 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1169 int Selector = SVI.getMaskValue(I);
1170 if (Selector < 0)
1171 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1172 else if (unsigned(Selector) < Op0.size())
1173 Res[I] = Op0[Selector];
1174 else
1175 Res[I] = Op1[Selector - Op0.size()];
1176 }
1177 gather(&SVI, Res, *VS);
1178 return true;
1179}
1180
1181bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1182 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1183 if (!VS)
1184 return false;
1185
1186 IRBuilder<> Builder(&PHI);
1187 ValueVector Res;
1188 Res.resize(VS->NumFragments);
1189
1190 unsigned NumOps = PHI.getNumOperands();
1191 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1192 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1193 PHI.getName() + ".i" + Twine(I));
1194 }
1195
1196 for (unsigned I = 0; I < NumOps; ++I) {
1197 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1198 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1199 for (unsigned J = 0; J < VS->NumFragments; ++J)
1200 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1201 }
1202 gather(&PHI, Res, *VS);
1203 return true;
1204}
1205
1206bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1207 if (!ScalarizeLoadStore)
1208 return false;
1209 if (!LI.isSimple())
1210 return false;
1211
1212 std::optional<VectorLayout> Layout = getVectorLayout(
1213 LI.getType(), LI.getAlign(), LI.getDataLayout());
1214 if (!Layout)
1215 return false;
1216
1217 IRBuilder<> Builder(&LI);
1218 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1219 ValueVector Res;
1220 Res.resize(Layout->VS.NumFragments);
1221
1222 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1223 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1224 Align(Layout->getFragmentAlign(I)),
1225 LI.getName() + ".i" + Twine(I));
1226 }
1227 gather(&LI, Res, Layout->VS);
1228 return true;
1229}
1230
1231bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1232 if (!ScalarizeLoadStore)
1233 return false;
1234 if (!SI.isSimple())
1235 return false;
1236
1237 Value *FullValue = SI.getValueOperand();
1238 std::optional<VectorLayout> Layout = getVectorLayout(
1239 FullValue->getType(), SI.getAlign(), SI.getDataLayout());
1240 if (!Layout)
1241 return false;
1242
1243 IRBuilder<> Builder(&SI);
1244 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1245 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1246
1247 ValueVector Stores;
1248 Stores.resize(Layout->VS.NumFragments);
1249 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1250 Value *Val = VVal[I];
1251 Value *Ptr = VPtr[I];
1252 Stores[I] =
1253 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1254 }
1255 transferMetadataAndIRFlags(&SI, Stores);
1256 return true;
1257}
1258
1259bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1260 return splitCall(CI);
1261}
1262
1263bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1264 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1265 return Builder.CreateFreeze(Op, Name);
1266 });
1267}
1268
1269// Delete the instructions that we scalarized. If a full vector result
1270// is still needed, recreate it using InsertElements.
1271bool ScalarizerVisitor::finish() {
1272 // The presence of data in Gathered or Scattered indicates changes
1273 // made to the Function.
1274 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1275 return false;
1276 for (const auto &GMI : Gathered) {
1277 Instruction *Op = GMI.first;
1278 ValueVector &CV = *GMI.second;
1279 if (!Op->use_empty()) {
1280 // The value is still needed, so recreate it using a series of
1281 // insertelements and/or shufflevectors.
1282 Value *Res;
1283 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1284 BasicBlock *BB = Op->getParent();
1285 IRBuilder<> Builder(Op);
1286 if (isa<PHINode>(Op))
1287 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1288
1289 VectorSplit VS = *getVectorSplit(Ty);
1290 assert(VS.NumFragments == CV.size());
1291
1292 Res = concatenate(Builder, CV, VS, Op->getName());
1293
1294 Res->takeName(Op);
1295 } else if (auto *Ty = dyn_cast<StructType>(Op->getType())) {
1296 BasicBlock *BB = Op->getParent();
1297 IRBuilder<> Builder(Op);
1298 if (isa<PHINode>(Op))
1299 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1300
1301 // Iterate over each element in the struct
1302 unsigned NumOfStructElements = Ty->getNumElements();
1303 SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
1304 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1305 for (auto *CVelem : CV) {
1306 Value *Elem = Builder.CreateExtractValue(
1307 CVelem, I, Op->getName() + ".elem" + Twine(I));
1308 ElemCV[I].push_back(Elem);
1309 }
1310 }
1311 Res = PoisonValue::get(Ty);
1312 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1313 Type *ElemTy = Ty->getElementType(I);
1314 assert(isa<FixedVectorType>(ElemTy) &&
1315 "Only Structs of all FixedVectorType supported");
1316 VectorSplit VS = *getVectorSplit(ElemTy);
1317 assert(VS.NumFragments == CV.size());
1318
1319 Value *ConcatenatedVector =
1320 concatenate(Builder, ElemCV[I], VS, Op->getName());
1321 Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I,
1322 Op->getName() + ".insert");
1323 }
1324 } else {
1325 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1326 Res = CV[0];
1327 if (Op == Res)
1328 continue;
1329 }
1330 Op->replaceAllUsesWith(Res);
1331 }
1332 PotentiallyDeadInstrs.emplace_back(Op);
1333 }
1334 Gathered.clear();
1335 Scattered.clear();
1336 Scalarized = false;
1337
1339
1340 return true;
1341}
1342
1346 ScalarizerVisitor Impl(DT, TTI, Options);
1347 bool Changed = Impl.visit(F);
1350 return Changed ? PA : PreservedAnalyses::all();
1351}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:617
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:612
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:619
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2625
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2618
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
An instruction for reading from memory.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Definition Type.h:387
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:381
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:130
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
@ Done
Definition Threading.h:60
LLVM_ABI BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:288
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39