LLVM 22.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
23#include "llvm/IR/Argument.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
43#include <cassert>
44#include <cstdint>
45#include <iterator>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
54 BasicBlock *BB = Itr->getParent();
55 if (isa<PHINode>(Itr))
56 Itr = BB->getFirstInsertionPt();
57 if (Itr != BB->end())
58 Itr = skipDebugIntrinsics(Itr);
59 return Itr;
60}
61
62// Used to store the scattered form of a vector.
64
65// Used to map a vector Value and associated type to its scattered form.
66// The associated type is only non-null for pointer values that are "scattered"
67// when used as pointer operands to load or store.
68//
69// We use std::map because we want iterators to persist across insertion and
70// because the values are relatively large.
71using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
72
73// Lists Instructions that have been replaced with scalar implementations,
74// along with a pointer to their scattered forms.
76
77namespace {
78
79struct VectorSplit {
80 // The type of the vector.
81 FixedVectorType *VecTy = nullptr;
82
83 // The number of elements packed in a fragment (other than the remainder).
84 unsigned NumPacked = 0;
85
86 // The number of fragments (scalars or smaller vectors) into which the vector
87 // shall be split.
88 unsigned NumFragments = 0;
89
90 // The type of each complete fragment.
91 Type *SplitTy = nullptr;
92
93 // The type of the remainder (last) fragment; null if all fragments are
94 // complete.
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102// Provides a very limited vector-like interface for lazily accessing one
103// component of a scattered vector or vector pointer.
104class Scatterer {
105public:
106 Scatterer() = default;
107
108 // Scatter V into Size components. If new instructions are needed,
109 // insert them before BBI in BB. If Cache is nonnull, use it to cache
110 // the results.
111 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114 // Return component I, creating a new Value for it if necessary.
115 Value *operator[](unsigned I);
116
117 // Return the number of components.
118 unsigned size() const { return VS.NumFragments; }
119
120private:
121 BasicBlock *BB;
123 Value *V;
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
131// called Name that compares X and Y in the same way as FCI.
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
135 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
136 const Twine &Name) const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
138 }
139
140 FCmpInst &FCI;
141};
142
143// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
144// called Name that compares X and Y in the same way as ICI.
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
148 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
149 const Twine &Name) const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
151 }
152
153 ICmpInst &ICI;
154};
155
156// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
157// a unary operator like UO called Name with operand X.
158struct UnarySplitter {
159 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
160
161 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
162 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
163 }
164
165 UnaryOperator &UO;
166};
167
168// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
169// a binary operator like BO called Name with operands X and Y.
170struct BinarySplitter {
171 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
172
173 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
174 const Twine &Name) const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
176 }
177
178 BinaryOperator &BO;
179};
180
181// Information about a load or store that we're scalarizing.
182struct VectorLayout {
183 VectorLayout() = default;
184
185 // Return the alignment of fragment Frag.
186 Align getFragmentAlign(unsigned Frag) {
187 return commonAlignment(VecAlign, Frag * SplitSize);
188 }
189
190 // The split of the underlying vector type.
191 VectorSplit VS;
192
193 // The alignment of the vector.
194 Align VecAlign;
195
196 // The size of each (non-remainder) fragment in bytes.
197 uint64_t SplitSize = 0;
198};
199} // namespace
200
202 if (!isa<StructType>(Ty))
203 return false;
204 unsigned StructSize = Ty->getNumContainedTypes();
205 if (StructSize < 1)
206 return false;
207 FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
208 if (!VecTy)
209 return false;
210 unsigned VecSize = VecTy->getNumElements();
211 for (unsigned I = 1; I < StructSize; I++) {
212 VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
213 if (!VecTy || VecSize != VecTy->getNumElements())
214 return false;
215 }
216 return true;
217}
218
219/// Concatenate the given fragments to a single vector value of the type
220/// described in @p VS.
221static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
222 const VectorSplit &VS, Twine Name) {
223 unsigned NumElements = VS.VecTy->getNumElements();
224 SmallVector<int> ExtendMask;
225 SmallVector<int> InsertMask;
226
227 if (VS.NumPacked > 1) {
228 // Prepare the shufflevector masks once and re-use them for all
229 // fragments.
230 ExtendMask.resize(NumElements, -1);
231 for (unsigned I = 0; I < VS.NumPacked; ++I)
232 ExtendMask[I] = I;
233
234 InsertMask.resize(NumElements);
235 for (unsigned I = 0; I < NumElements; ++I)
236 InsertMask[I] = I;
237 }
238
239 Value *Res = PoisonValue::get(VS.VecTy);
240 for (unsigned I = 0; I < VS.NumFragments; ++I) {
241 Value *Fragment = Fragments[I];
242
243 unsigned NumPacked = VS.NumPacked;
244 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
245 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
246 NumPacked = RemVecTy->getNumElements();
247 else
248 NumPacked = 1;
249 }
250
251 if (NumPacked == 1) {
252 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
253 Name + ".upto" + Twine(I));
254 } else {
255 if (NumPacked < VS.NumPacked) {
256 // If last pack of remained bits not match current ExtendMask size.
257 ExtendMask.truncate(NumPacked);
258 ExtendMask.resize(NumElements, -1);
259 }
260
261 Fragment = Builder.CreateShuffleVector(
262 Fragment, PoisonValue::get(Fragment->getType()), ExtendMask);
263 if (I == 0) {
264 Res = Fragment;
265 } else {
266 for (unsigned J = 0; J < NumPacked; ++J)
267 InsertMask[I * VS.NumPacked + J] = NumElements + J;
268 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
269 Name + ".upto" + Twine(I));
270 for (unsigned J = 0; J < NumPacked; ++J)
271 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
272 }
273 }
274 }
275
276 return Res;
277}
278
279namespace {
280class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
281public:
282 ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
283 ScalarizerPassOptions Options)
284 : DT(DT), TTI(TTI),
285 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
286 ScalarizeLoadStore(Options.ScalarizeLoadStore),
287 ScalarizeMinBits(Options.ScalarizeMinBits) {}
288
289 bool visit(Function &F);
290
291 // InstVisitor methods. They return true if the instruction was scalarized,
292 // false if nothing changed.
293 bool visitInstruction(Instruction &I) { return false; }
294 bool visitSelectInst(SelectInst &SI);
295 bool visitICmpInst(ICmpInst &ICI);
296 bool visitFCmpInst(FCmpInst &FCI);
297 bool visitUnaryOperator(UnaryOperator &UO);
298 bool visitBinaryOperator(BinaryOperator &BO);
299 bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
300 bool visitCastInst(CastInst &CI);
301 bool visitBitCastInst(BitCastInst &BCI);
302 bool visitInsertElementInst(InsertElementInst &IEI);
303 bool visitExtractElementInst(ExtractElementInst &EEI);
304 bool visitExtractValueInst(ExtractValueInst &EVI);
305 bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
306 bool visitPHINode(PHINode &PHI);
307 bool visitLoadInst(LoadInst &LI);
308 bool visitStoreInst(StoreInst &SI);
309 bool visitCallInst(CallInst &ICI);
310 bool visitFreezeInst(FreezeInst &FI);
311
312private:
313 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
314 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
315 void replaceUses(Instruction *Op, Value *CV);
316 bool canTransferMetadata(unsigned Kind);
317 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
318 std::optional<VectorSplit> getVectorSplit(Type *Ty);
319 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
320 const DataLayout &DL);
321 bool finish();
322
323 template<typename T> bool splitUnary(Instruction &, const T &);
324 template<typename T> bool splitBinary(Instruction &, const T &);
325
326 bool splitCall(CallInst &CI);
327
328 ScatterMap Scattered;
329 GatherList Gathered;
330 bool Scalarized;
331
332 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
333
334 DominatorTree *DT;
335 const TargetTransformInfo *TTI;
336
337 const bool ScalarizeVariableInsertExtract;
338 const bool ScalarizeLoadStore;
339 const unsigned ScalarizeMinBits;
340};
341
342class ScalarizerLegacyPass : public FunctionPass {
343public:
344 static char ID;
345 ScalarizerPassOptions Options;
346 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
347 ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
348 bool runOnFunction(Function &F) override;
349 void getAnalysisUsage(AnalysisUsage &AU) const override;
350};
351
352} // end anonymous namespace
353
354ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
356
357void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
358 AU.addRequired<DominatorTreeWrapperPass>();
359 AU.addRequired<TargetTransformInfoWrapperPass>();
360 AU.addPreserved<DominatorTreeWrapperPass>();
361}
362
363char ScalarizerLegacyPass::ID = 0;
364INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
365 "Scalarize vector operations", false, false)
367INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
368 "Scalarize vector operations", false, false)
369
370Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
371 const VectorSplit &VS, ValueVector *cachePtr)
372 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
373 IsPointer = V->getType()->isPointerTy();
374 if (!CachePtr) {
375 Tmp.resize(VS.NumFragments, nullptr);
376 } else {
377 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
378 IsPointer) &&
379 "Inconsistent vector sizes");
380 if (VS.NumFragments > CachePtr->size())
381 CachePtr->resize(VS.NumFragments, nullptr);
382 }
383}
384
385// Return fragment Frag, creating a new Value for it if necessary.
386Value *Scatterer::operator[](unsigned Frag) {
387 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
388 // Try to reuse a previous value.
389 if (CV[Frag])
390 return CV[Frag];
391 IRBuilder<> Builder(BB, BBI);
392 if (IsPointer) {
393 if (Frag == 0)
394 CV[Frag] = V;
395 else
396 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
397 V->getName() + ".i" + Twine(Frag));
398 return CV[Frag];
399 }
400
401 Type *FragmentTy = VS.getFragmentType(Frag);
402
403 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
404 SmallVector<int> Mask;
405 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
406 Mask.push_back(Frag * VS.NumPacked + J);
407 CV[Frag] =
408 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
409 V->getName() + ".i" + Twine(Frag));
410 } else {
411 // Search through a chain of InsertElementInsts looking for element Frag.
412 // Record other elements in the cache. The new V is still suitable
413 // for all uncached indices.
414 while (true) {
415 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
416 if (!Insert)
417 break;
418 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
419 if (!Idx)
420 break;
421 unsigned J = Idx->getZExtValue();
422 V = Insert->getOperand(0);
423 if (Frag * VS.NumPacked == J) {
424 CV[Frag] = Insert->getOperand(1);
425 return CV[Frag];
426 }
427
428 if (VS.NumPacked == 1 && !CV[J]) {
429 // Only cache the first entry we find for each index we're not actively
430 // searching for. This prevents us from going too far up the chain and
431 // caching incorrect entries.
432 CV[J] = Insert->getOperand(1);
433 }
434 }
435 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
436 V->getName() + ".i" + Twine(Frag));
437 }
438
439 return CV[Frag];
440}
441
442bool ScalarizerLegacyPass::runOnFunction(Function &F) {
443 if (skipFunction(F))
444 return false;
445
446 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447 const TargetTransformInfo *TTI =
448 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
449 ScalarizerVisitor Impl(DT, TTI, Options);
450 return Impl.visit(F);
451}
452
454 return new ScalarizerLegacyPass(Options);
455}
456
457bool ScalarizerVisitor::visit(Function &F) {
458 assert(Gathered.empty() && Scattered.empty());
459
460 Scalarized = false;
461
462 // To ensure we replace gathered components correctly we need to do an ordered
463 // traversal of the basic blocks in the function.
464 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
465 for (BasicBlock *BB : RPOT) {
466 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
467 Instruction *I = &*II;
468 bool Done = InstVisitor::visit(I);
469 ++II;
470 if (Done && I->getType()->isVoidTy()) {
471 I->eraseFromParent();
472 Scalarized = true;
473 }
474 }
475 }
476 return finish();
477}
478
479// Return a scattered form of V that can be accessed by Point. V must be a
480// vector or a pointer to a vector.
481Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
482 const VectorSplit &VS) {
483 if (Argument *VArg = dyn_cast<Argument>(V)) {
484 // Put the scattered form of arguments in the entry block,
485 // so that it can be used everywhere.
486 Function *F = VArg->getParent();
487 BasicBlock *BB = &F->getEntryBlock();
488 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
489 }
490 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
491 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
492 // nodes in predecessors. If those predecessors are unreachable from entry,
493 // then the IR in those blocks could have unexpected properties resulting in
494 // infinite loops in Scatterer::operator[]. By simply treating values
495 // originating from instructions in unreachable blocks as undef we do not
496 // need to analyse them further.
497 if (!DT->isReachableFromEntry(VOp->getParent()))
498 return Scatterer(Point->getParent(), Point->getIterator(),
499 PoisonValue::get(V->getType()), VS);
500 // Put the scattered form of an instruction directly after the
501 // instruction, skipping over PHI nodes and debug intrinsics.
502 BasicBlock *BB = VOp->getParent();
503 return Scatterer(
504 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
505 &Scattered[{V, VS.SplitTy}]);
506 }
507 // In the fallback case, just put the scattered before Point and
508 // keep the result local to Point.
509 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
510}
511
512// Replace Op with the gathered form of the components in CV. Defer the
513// deletion of Op and creation of the gathered form to the end of the pass,
514// so that we can avoid creating the gathered form if all uses of Op are
515// replaced with uses of CV.
516void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
517 const VectorSplit &VS) {
518 transferMetadataAndIRFlags(Op, CV);
519
520 // If we already have a scattered form of Op (created from ExtractElements
521 // of Op itself), replace them with the new form.
522 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
523 if (!SV.empty()) {
524 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
525 Value *V = SV[I];
526 if (V == nullptr || SV[I] == CV[I])
527 continue;
528
530 if (isa<Instruction>(CV[I]))
531 CV[I]->takeName(Old);
532 Old->replaceAllUsesWith(CV[I]);
533 PotentiallyDeadInstrs.emplace_back(Old);
534 }
535 }
536 SV = CV;
537 Gathered.push_back(GatherList::value_type(Op, &SV));
538}
539
540// Replace Op with CV and collect Op has a potentially dead instruction.
541void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
542 if (CV != Op) {
543 Op->replaceAllUsesWith(CV);
544 PotentiallyDeadInstrs.emplace_back(Op);
545 Scalarized = true;
546 }
547}
548
549// Return true if it is safe to transfer the given metadata tag from
550// vector to scalar instructions.
551bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
552 return (Tag == LLVMContext::MD_tbaa
553 || Tag == LLVMContext::MD_fpmath
554 || Tag == LLVMContext::MD_tbaa_struct
555 || Tag == LLVMContext::MD_invariant_load
556 || Tag == LLVMContext::MD_alias_scope
557 || Tag == LLVMContext::MD_noalias
558 || Tag == LLVMContext::MD_mem_parallel_loop_access
559 || Tag == LLVMContext::MD_access_group);
560}
561
562// Transfer metadata from Op to the instructions in CV if it is known
563// to be safe to do so.
564void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
565 const ValueVector &CV) {
567 Op->getAllMetadataOtherThanDebugLoc(MDs);
568 for (Value *V : CV) {
569 if (Instruction *New = dyn_cast<Instruction>(V)) {
570 for (const auto &MD : MDs)
571 if (canTransferMetadata(MD.first))
572 New->setMetadata(MD.first, MD.second);
573 New->copyIRFlags(Op);
574 if (Op->getDebugLoc() && !New->getDebugLoc())
575 New->setDebugLoc(Op->getDebugLoc());
576 }
577 }
578}
579
580// Determine how Ty is split, if at all.
581std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
582 VectorSplit Split;
584 if (!Split.VecTy)
585 return {};
586
587 unsigned NumElems = Split.VecTy->getNumElements();
588 Type *ElemTy = Split.VecTy->getElementType();
589
590 if (NumElems == 1 || ElemTy->isPointerTy() ||
591 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
592 Split.NumPacked = 1;
593 Split.NumFragments = NumElems;
594 Split.SplitTy = ElemTy;
595 } else {
596 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
597 if (Split.NumPacked >= NumElems)
598 return {};
599
600 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
601 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
602
603 unsigned RemainderElems = NumElems % Split.NumPacked;
604 if (RemainderElems > 1)
605 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
606 else if (RemainderElems == 1)
607 Split.RemainderTy = ElemTy;
608 }
609
610 return Split;
611}
612
613// Try to fill in Layout from Ty, returning true on success. Alignment is
614// the alignment of the vector, or std::nullopt if the ABI default should be
615// used.
616std::optional<VectorLayout>
617ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
618 const DataLayout &DL) {
619 std::optional<VectorSplit> VS = getVectorSplit(Ty);
620 if (!VS)
621 return {};
622
623 VectorLayout Layout;
624 Layout.VS = *VS;
625 // Check that we're dealing with full-byte fragments.
626 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
627 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
628 return {};
629 Layout.VecAlign = Alignment;
630 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
631 return Layout;
632}
633
634// Scalarize one-operand instruction I, using Split(Builder, X, Name)
635// to create an instruction like I with operand X and name Name.
636template<typename Splitter>
637bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
638 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
639 if (!VS)
640 return false;
641
642 std::optional<VectorSplit> OpVS;
643 if (I.getOperand(0)->getType() == I.getType()) {
644 OpVS = VS;
645 } else {
646 OpVS = getVectorSplit(I.getOperand(0)->getType());
647 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
648 return false;
649 }
650
651 IRBuilder<> Builder(&I);
652 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
653 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
654 ValueVector Res;
655 Res.resize(VS->NumFragments);
656 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
657 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
658 gather(&I, Res, *VS);
659 return true;
660}
661
662// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
663// to create an instruction like I with operands X and Y and name Name.
664template<typename Splitter>
665bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
666 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
667 if (!VS)
668 return false;
669
670 std::optional<VectorSplit> OpVS;
671 if (I.getOperand(0)->getType() == I.getType()) {
672 OpVS = VS;
673 } else {
674 OpVS = getVectorSplit(I.getOperand(0)->getType());
675 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
676 return false;
677 }
678
679 IRBuilder<> Builder(&I);
680 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
681 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
682 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
683 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
684 ValueVector Res;
685 Res.resize(VS->NumFragments);
686 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
687 Value *Op0 = VOp0[Frag];
688 Value *Op1 = VOp1[Frag];
689 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
690 }
691 gather(&I, Res, *VS);
692 return true;
693}
694
695/// If a call to a vector typed intrinsic function, split into a scalar call per
696/// element if possible for the intrinsic.
697bool ScalarizerVisitor::splitCall(CallInst &CI) {
698 Type *CallType = CI.getType();
699 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
700 std::optional<VectorSplit> VS;
701 if (AreAllVectorsOfMatchingSize)
702 VS = getVectorSplit(CallType->getContainedType(0));
703 else
704 VS = getVectorSplit(CallType);
705 if (!VS)
706 return false;
707
709 if (!F)
710 return false;
711
712 Intrinsic::ID ID = F->getIntrinsicID();
713
715 return false;
716
717 // unsigned NumElems = VT->getNumElements();
718 unsigned NumArgs = CI.arg_size();
719
720 ValueVector ScalarOperands(NumArgs);
721 SmallVector<Scatterer, 8> Scattered(NumArgs);
722 SmallVector<int> OverloadIdx(NumArgs, -1);
723
725 // Add return type if intrinsic is overloaded on it.
727 Tys.push_back(VS->SplitTy);
728
729 if (AreAllVectorsOfMatchingSize) {
730 for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
731 std::optional<VectorSplit> CurrVS =
732 getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
733 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
734 // VectorSplit is not returned and we will bailout of handling this call.
735 // The secondary bailout case is if NumPacked does not match. This can
736 // happen if ScalarizeMinBits is not set to the default. This means with
737 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
738 // the struct elements have the same bitness.
739 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
740 return false;
742 Tys.push_back(CurrVS->SplitTy);
743 }
744 }
745 // Assumes that any vector type has the same number of elements as the return
746 // vector type, which is true for all current intrinsics.
747 for (unsigned I = 0; I != NumArgs; ++I) {
748 Value *OpI = CI.getOperand(I);
749 if ([[maybe_unused]] auto *OpVecTy =
751 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
752 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
753 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
754 // The natural split of the operand doesn't match the result. This could
755 // happen if the vector elements are different and the ScalarizeMinBits
756 // option is used.
757 //
758 // We could in principle handle this case as well, at the cost of
759 // complicating the scattering machinery to support multiple scattering
760 // granularities for a single value.
761 return false;
762 }
763
764 Scattered[I] = scatter(&CI, OpI, *OpVS);
766 OverloadIdx[I] = Tys.size();
767 Tys.push_back(OpVS->SplitTy);
768 }
769 } else {
770 ScalarOperands[I] = OpI;
772 Tys.push_back(OpI->getType());
773 }
774 }
775
776 ValueVector Res(VS->NumFragments);
777 ValueVector ScalarCallOps(NumArgs);
778
779 Function *NewIntrin =
780 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
781 IRBuilder<> Builder(&CI);
782
783 // Perform actual scalarization, taking care to preserve any scalar operands.
784 for (unsigned I = 0; I < VS->NumFragments; ++I) {
785 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
786 ScalarCallOps.clear();
787
788 if (IsRemainder)
789 Tys[0] = VS->RemainderTy;
790
791 for (unsigned J = 0; J != NumArgs; ++J) {
793 ScalarCallOps.push_back(ScalarOperands[J]);
794 } else {
795 ScalarCallOps.push_back(Scattered[J][I]);
796 if (IsRemainder && OverloadIdx[J] >= 0)
797 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
798 }
799 }
800
801 if (IsRemainder)
802 NewIntrin = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
803
804 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
805 CI.getName() + ".i" + Twine(I));
806 }
807
808 gather(&CI, Res, *VS);
809 return true;
810}
811
812bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
813 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
814 if (!VS)
815 return false;
816
817 std::optional<VectorSplit> CondVS;
818 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
819 CondVS = getVectorSplit(SI.getCondition()->getType());
820 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
821 // This happens when ScalarizeMinBits is used.
822 return false;
823 }
824 }
825
826 IRBuilder<> Builder(&SI);
827 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
828 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
829 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
830 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
831 ValueVector Res;
832 Res.resize(VS->NumFragments);
833
834 if (CondVS) {
835 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
836 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
837 for (unsigned I = 0; I < VS->NumFragments; ++I) {
838 Value *Op0 = VOp0[I];
839 Value *Op1 = VOp1[I];
840 Value *Op2 = VOp2[I];
841 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
842 SI.getName() + ".i" + Twine(I));
843 }
844 } else {
845 Value *Op0 = SI.getOperand(0);
846 for (unsigned I = 0; I < VS->NumFragments; ++I) {
847 Value *Op1 = VOp1[I];
848 Value *Op2 = VOp2[I];
849 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
850 SI.getName() + ".i" + Twine(I));
851 }
852 }
853 gather(&SI, Res, *VS);
854 return true;
855}
856
857bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
858 return splitBinary(ICI, ICmpSplitter(ICI));
859}
860
861bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
862 return splitBinary(FCI, FCmpSplitter(FCI));
863}
864
865bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
866 return splitUnary(UO, UnarySplitter(UO));
867}
868
869bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
870 return splitBinary(BO, BinarySplitter(BO));
871}
872
873bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
874 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
875 if (!VS)
876 return false;
877
878 IRBuilder<> Builder(&GEPI);
879 unsigned NumIndices = GEPI.getNumIndices();
880
881 // The base pointer and indices might be scalar even if it's a vector GEP.
882 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
883 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
884
885 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
886 if (auto *VecTy =
888 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
889 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
890 // This can happen when ScalarizeMinBits is used.
891 return false;
892 }
893 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
894 } else {
895 ScalarOps[I] = GEPI.getOperand(I);
896 }
897 }
898
899 ValueVector Res;
900 Res.resize(VS->NumFragments);
901 for (unsigned I = 0; I < VS->NumFragments; ++I) {
902 SmallVector<Value *, 8> SplitOps;
903 SplitOps.resize(1 + NumIndices);
904 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
905 if (ScalarOps[J])
906 SplitOps[J] = ScalarOps[J];
907 else
908 SplitOps[J] = ScatterOps[J][I];
909 }
910 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
911 ArrayRef(SplitOps).drop_front(),
912 GEPI.getName() + ".i" + Twine(I));
913 if (GEPI.isInBounds())
914 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
915 NewGEPI->setIsInBounds();
916 }
917 gather(&GEPI, Res, *VS);
918 return true;
919}
920
921bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
922 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
923 if (!DestVS)
924 return false;
925
926 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
927 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
928 return false;
929
930 IRBuilder<> Builder(&CI);
931 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
932 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
933 ValueVector Res;
934 Res.resize(DestVS->NumFragments);
935 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
936 Res[I] =
937 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
938 CI.getName() + ".i" + Twine(I));
939 gather(&CI, Res, *DestVS);
940 return true;
941}
942
943bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
944 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
945 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
946 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
947 return false;
948
949 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
950
951 // Vectors of pointers are always fully scalarized.
952 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
953
954 IRBuilder<> Builder(&BCI);
955 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
956 ValueVector Res;
957 Res.resize(DstVS->NumFragments);
958
959 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
960 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
961
962 if (isPointerTy || DstSplitBits == SrcSplitBits) {
963 assert(DstVS->NumFragments == SrcVS->NumFragments);
964 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
965 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
966 BCI.getName() + ".i" + Twine(I));
967 }
968 } else if (SrcSplitBits % DstSplitBits == 0) {
969 // Convert each source fragment to the same-sized destination vector and
970 // then scatter the result to the destination.
971 VectorSplit MidVS;
972 MidVS.NumPacked = DstVS->NumPacked;
973 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
974 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
975 MidVS.NumPacked * MidVS.NumFragments);
976 MidVS.SplitTy = DstVS->SplitTy;
977
978 unsigned ResI = 0;
979 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
980 Value *V = Op0[I];
981
982 // Look through any existing bitcasts before converting to <N x t2>.
983 // In the best case, the resulting conversion might be a no-op.
985 while ((VI = dyn_cast<Instruction>(V)) &&
986 VI->getOpcode() == Instruction::BitCast)
987 V = VI->getOperand(0);
988
989 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
990
991 Scatterer Mid = scatter(&BCI, V, MidVS);
992 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
993 Res[ResI++] = Mid[J];
994 }
995 } else if (DstSplitBits % SrcSplitBits == 0) {
996 // Gather enough source fragments to make up a destination fragment and
997 // then convert to the destination type.
998 VectorSplit MidVS;
999 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
1000 MidVS.NumPacked = SrcVS->NumPacked;
1001 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
1002 MidVS.NumPacked * MidVS.NumFragments);
1003 MidVS.SplitTy = SrcVS->SplitTy;
1004
1005 unsigned SrcI = 0;
1006 SmallVector<Value *, 8> ConcatOps;
1007 ConcatOps.resize(MidVS.NumFragments);
1008 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
1009 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1010 ConcatOps[J] = Op0[SrcI++];
1011 Value *V = concatenate(Builder, ConcatOps, MidVS,
1012 BCI.getName() + ".i" + Twine(I));
1013 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
1014 BCI.getName() + ".i" + Twine(I));
1015 }
1016 } else {
1017 return false;
1018 }
1019
1020 gather(&BCI, Res, *DstVS);
1021 return true;
1022}
1023
1024bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1025 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
1026 if (!VS)
1027 return false;
1028
1029 IRBuilder<> Builder(&IEI);
1030 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1031 Value *NewElt = IEI.getOperand(1);
1032 Value *InsIdx = IEI.getOperand(2);
1033
1034 ValueVector Res;
1035 Res.resize(VS->NumFragments);
1036
1037 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1038 unsigned Idx = CI->getZExtValue();
1039 unsigned Fragment = Idx / VS->NumPacked;
1040 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1041 if (I == Fragment) {
1042 bool IsPacked = VS->NumPacked > 1;
1043 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1044 !VS->RemainderTy->isVectorTy())
1045 IsPacked = false;
1046 if (IsPacked) {
1047 Res[I] =
1048 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1049 } else {
1050 Res[I] = NewElt;
1051 }
1052 } else {
1053 Res[I] = Op0[I];
1054 }
1055 }
1056 } else {
1057 // Never split a variable insertelement that isn't fully scalarized.
1058 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1059 return false;
1060
1061 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1062 Value *ShouldReplace =
1063 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1064 InsIdx->getName() + ".is." + Twine(I));
1065 Value *OldElt = Op0[I];
1066 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1067 IEI.getName() + ".i" + Twine(I));
1068 }
1069 }
1070
1071 gather(&IEI, Res, *VS);
1072 return true;
1073}
1074
1075bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1076 Value *Op = EVI.getOperand(0);
1077 Type *OpTy = Op->getType();
1078 ValueVector Res;
1080 return false;
1081 if (CallInst *CI = dyn_cast<CallInst>(Op)) {
1082 Function *F = CI->getCalledFunction();
1083 if (!F)
1084 return false;
1085 Intrinsic::ID ID = F->getIntrinsicID();
1087 return false;
1088 // Note: Fall through means Operand is a`CallInst` and it is defined in
1089 // `isTriviallyScalarizable`.
1090 } else
1091 return false;
1092 Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
1093 std::optional<VectorSplit> VS = getVectorSplit(VecType);
1094 if (!VS)
1095 return false;
1096 for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
1097 std::optional<VectorSplit> CurrVS =
1098 getVectorSplit(cast<FixedVectorType>(OpTy->getContainedType(I)));
1099 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
1100 // VectorSplit is not returned and we will bailout of handling this call.
1101 // The secondary bailout case is if NumPacked does not match. This can
1102 // happen if ScalarizeMinBits is not set to the default. This means with
1103 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
1104 // the struct elements have the same bitness.
1105 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
1106 return false;
1107 }
1108 IRBuilder<> Builder(&EVI);
1109 Scatterer Op0 = scatter(&EVI, Op, *VS);
1110 assert(!EVI.getIndices().empty() && "Make sure an index exists");
1111 // Note for our use case we only care about the top level index.
1112 unsigned Index = EVI.getIndices()[0];
1113 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1114 Value *ResElem = Builder.CreateExtractValue(
1115 Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
1116 Res.push_back(ResElem);
1117 }
1118
1119 Type *ActualVecType = cast<FixedVectorType>(OpTy->getContainedType(Index));
1120 std::optional<VectorSplit> AVS = getVectorSplit(ActualVecType);
1121 gather(&EVI, Res, *AVS);
1122 return true;
1123}
1124
1125bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1126 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1127 if (!VS)
1128 return false;
1129
1130 IRBuilder<> Builder(&EEI);
1131 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1132 Value *ExtIdx = EEI.getOperand(1);
1133
1134 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1135 unsigned Idx = CI->getZExtValue();
1136 unsigned Fragment = Idx / VS->NumPacked;
1137 Value *Res = Op0[Fragment];
1138 bool IsPacked = VS->NumPacked > 1;
1139 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1140 !VS->RemainderTy->isVectorTy())
1141 IsPacked = false;
1142 if (IsPacked)
1143 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1144 replaceUses(&EEI, Res);
1145 return true;
1146 }
1147
1148 // Never split a variable extractelement that isn't fully scalarized.
1149 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1150 return false;
1151
1152 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1153 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1154 Value *ShouldExtract =
1155 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1156 ExtIdx->getName() + ".is." + Twine(I));
1157 Value *Elt = Op0[I];
1158 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1159 EEI.getName() + ".upto" + Twine(I));
1160 }
1161 replaceUses(&EEI, Res);
1162 return true;
1163}
1164
1165bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1166 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1167 std::optional<VectorSplit> VSOp =
1168 getVectorSplit(SVI.getOperand(0)->getType());
1169 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1170 return false;
1171
1172 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1173 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1174 ValueVector Res;
1175 Res.resize(VS->NumFragments);
1176
1177 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1178 int Selector = SVI.getMaskValue(I);
1179 if (Selector < 0)
1180 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1181 else if (unsigned(Selector) < Op0.size())
1182 Res[I] = Op0[Selector];
1183 else
1184 Res[I] = Op1[Selector - Op0.size()];
1185 }
1186 gather(&SVI, Res, *VS);
1187 return true;
1188}
1189
1190bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1191 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1192 if (!VS)
1193 return false;
1194
1195 IRBuilder<> Builder(&PHI);
1196 ValueVector Res;
1197 Res.resize(VS->NumFragments);
1198
1199 unsigned NumOps = PHI.getNumOperands();
1200 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1201 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1202 PHI.getName() + ".i" + Twine(I));
1203 }
1204
1205 for (unsigned I = 0; I < NumOps; ++I) {
1206 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1207 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1208 for (unsigned J = 0; J < VS->NumFragments; ++J)
1209 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1210 }
1211 gather(&PHI, Res, *VS);
1212 return true;
1213}
1214
1215bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1216 if (!ScalarizeLoadStore)
1217 return false;
1218 if (!LI.isSimple())
1219 return false;
1220
1221 std::optional<VectorLayout> Layout = getVectorLayout(
1222 LI.getType(), LI.getAlign(), LI.getDataLayout());
1223 if (!Layout)
1224 return false;
1225
1226 IRBuilder<> Builder(&LI);
1227 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1228 ValueVector Res;
1229 Res.resize(Layout->VS.NumFragments);
1230
1231 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1232 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1233 Align(Layout->getFragmentAlign(I)),
1234 LI.getName() + ".i" + Twine(I));
1235 }
1236 gather(&LI, Res, Layout->VS);
1237 return true;
1238}
1239
1240bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1241 if (!ScalarizeLoadStore)
1242 return false;
1243 if (!SI.isSimple())
1244 return false;
1245
1246 Value *FullValue = SI.getValueOperand();
1247 std::optional<VectorLayout> Layout = getVectorLayout(
1248 FullValue->getType(), SI.getAlign(), SI.getDataLayout());
1249 if (!Layout)
1250 return false;
1251
1252 IRBuilder<> Builder(&SI);
1253 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1254 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1255
1256 ValueVector Stores;
1257 Stores.resize(Layout->VS.NumFragments);
1258 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1259 Value *Val = VVal[I];
1260 Value *Ptr = VPtr[I];
1261 Stores[I] =
1262 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1263 }
1264 transferMetadataAndIRFlags(&SI, Stores);
1265 return true;
1266}
1267
1268bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1269 return splitCall(CI);
1270}
1271
1272bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1273 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1274 return Builder.CreateFreeze(Op, Name);
1275 });
1276}
1277
1278// Delete the instructions that we scalarized. If a full vector result
1279// is still needed, recreate it using InsertElements.
1280bool ScalarizerVisitor::finish() {
1281 // The presence of data in Gathered or Scattered indicates changes
1282 // made to the Function.
1283 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1284 return false;
1285 for (const auto &GMI : Gathered) {
1286 Instruction *Op = GMI.first;
1287 ValueVector &CV = *GMI.second;
1288 if (!Op->use_empty()) {
1289 // The value is still needed, so recreate it using a series of
1290 // insertelements and/or shufflevectors.
1291 Value *Res;
1292 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1293 BasicBlock *BB = Op->getParent();
1294 IRBuilder<> Builder(Op);
1295 if (isa<PHINode>(Op))
1296 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1297
1298 VectorSplit VS = *getVectorSplit(Ty);
1299 assert(VS.NumFragments == CV.size());
1300
1301 Res = concatenate(Builder, CV, VS, Op->getName());
1302
1303 Res->takeName(Op);
1304 } else if (auto *Ty = dyn_cast<StructType>(Op->getType())) {
1305 BasicBlock *BB = Op->getParent();
1306 IRBuilder<> Builder(Op);
1307 if (isa<PHINode>(Op))
1308 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1309
1310 // Iterate over each element in the struct
1311 unsigned NumOfStructElements = Ty->getNumElements();
1312 SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
1313 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1314 for (auto *CVelem : CV) {
1315 Value *Elem = Builder.CreateExtractValue(
1316 CVelem, I, Op->getName() + ".elem" + Twine(I));
1317 ElemCV[I].push_back(Elem);
1318 }
1319 }
1320 Res = PoisonValue::get(Ty);
1321 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1322 Type *ElemTy = Ty->getElementType(I);
1323 assert(isa<FixedVectorType>(ElemTy) &&
1324 "Only Structs of all FixedVectorType supported");
1325 VectorSplit VS = *getVectorSplit(ElemTy);
1326 assert(VS.NumFragments == CV.size());
1327
1328 Value *ConcatenatedVector =
1329 concatenate(Builder, ElemCV[I], VS, Op->getName());
1330 Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I,
1331 Op->getName() + ".insert");
1332 }
1333 } else {
1334 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1335 Res = CV[0];
1336 if (Op == Res)
1337 continue;
1338 }
1339 Op->replaceAllUsesWith(Res);
1340 }
1341 PotentiallyDeadInstrs.emplace_back(Op);
1342 }
1343 Gathered.clear();
1344 Scattered.clear();
1345 Scalarized = false;
1346
1348
1349 return true;
1350}
1351
1355 ScalarizerVisitor Impl(DT, TTI, Options);
1356 bool Changed = Impl.visit(F);
1359 return Changed ? PA : PreservedAnalyses::all();
1360}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
SmallVector< std::pair< Instruction *, ValueVector * >, 16 > GatherList
static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr)
static bool isStructOfMatchingFixedVectors(Type *Ty)
std::map< std::pair< Value *, Type * >, ValueVector > ScatterMap
SmallVector< Value *, 8 > ValueVector
static Value * concatenate(IRBuilder<> &Builder, ArrayRef< Value * > Fragments, const VectorSplit &VS, Twine Name)
Concatenate the given fragments to a single vector value of the type described in VS.
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:615
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:617
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
ArrayRef< unsigned > getIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2633
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2626
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void truncate(size_type N)
Like resize, but requires that N is less than size().
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Definition Type.h:387
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:381
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
LLVM_ABI BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:359
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39