LLVM  9.0.0svn
Scalarizer.cpp
Go to the documentation of this file.
1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts vector operations into scalar operations, in order
10 // to expose optimization opportunities on the individual scalar operations.
11 // It is mainly intended for targets that do not have vector units, but it
12 // may also be useful for revectorizing code to different vector widths.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Options.h"
40 #include "llvm/Transforms/Scalar.h"
42 #include <cassert>
43 #include <cstdint>
44 #include <iterator>
45 #include <map>
46 #include <utility>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "scalarizer"
51 
52 // This is disabled by default because having separate loads and stores
53 // makes it more likely that the -combiner-alias-analysis limits will be
54 // reached.
55 static cl::opt<bool>
56  ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
57  cl::desc("Allow the scalarizer pass to scalarize loads and store"));
58 
59 namespace {
60 
61 // Used to store the scattered form of a vector.
62 using ValueVector = SmallVector<Value *, 8>;
63 
64 // Used to map a vector Value to its scattered form. We use std::map
65 // because we want iterators to persist across insertion and because the
66 // values are relatively large.
67 using ScatterMap = std::map<Value *, ValueVector>;
68 
69 // Lists Instructions that have been replaced with scalar implementations,
70 // along with a pointer to their scattered forms.
72 
73 // Provides a very limited vector-like interface for lazily accessing one
74 // component of a scattered vector or vector pointer.
75 class Scatterer {
76 public:
77  Scatterer() = default;
78 
79  // Scatter V into Size components. If new instructions are needed,
80  // insert them before BBI in BB. If Cache is nonnull, use it to cache
81  // the results.
82  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
83  ValueVector *cachePtr = nullptr);
84 
85  // Return component I, creating a new Value for it if necessary.
86  Value *operator[](unsigned I);
87 
88  // Return the number of components.
89  unsigned size() const { return Size; }
90 
91 private:
92  BasicBlock *BB;
94  Value *V;
95  ValueVector *CachePtr;
96  PointerType *PtrTy;
97  ValueVector Tmp;
98  unsigned Size;
99 };
100 
101 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
102 // called Name that compares X and Y in the same way as FCI.
103 struct FCmpSplitter {
104  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
105 
106  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
107  const Twine &Name) const {
108  return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
109  }
110 
111  FCmpInst &FCI;
112 };
113 
114 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
115 // called Name that compares X and Y in the same way as ICI.
116 struct ICmpSplitter {
117  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
118 
119  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
120  const Twine &Name) const {
121  return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
122  }
123 
124  ICmpInst &ICI;
125 };
126 
127 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
128 // a binary operator like BO called Name with operands X and Y.
129 struct BinarySplitter {
130  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
131 
132  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
133  const Twine &Name) const {
134  return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
135  }
136 
137  BinaryOperator &BO;
138 };
139 
140 // Information about a load or store that we're scalarizing.
141 struct VectorLayout {
142  VectorLayout() = default;
143 
144  // Return the alignment of element I.
145  uint64_t getElemAlign(unsigned I) {
146  return MinAlign(VecAlign, I * ElemSize);
147  }
148 
149  // The type of the vector.
150  VectorType *VecTy = nullptr;
151 
152  // The type of each element.
153  Type *ElemTy = nullptr;
154 
155  // The alignment of the vector.
156  uint64_t VecAlign = 0;
157 
158  // The size of each element.
159  uint64_t ElemSize = 0;
160 };
161 
162 class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
163 public:
164  ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
165  : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
166  }
167 
168  bool visit(Function &F);
169 
170  // InstVisitor methods. They return true if the instruction was scalarized,
171  // false if nothing changed.
172  bool visitInstruction(Instruction &I) { return false; }
173  bool visitSelectInst(SelectInst &SI);
174  bool visitICmpInst(ICmpInst &ICI);
175  bool visitFCmpInst(FCmpInst &FCI);
176  bool visitBinaryOperator(BinaryOperator &BO);
177  bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
178  bool visitCastInst(CastInst &CI);
179  bool visitBitCastInst(BitCastInst &BCI);
180  bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
181  bool visitPHINode(PHINode &PHI);
182  bool visitLoadInst(LoadInst &LI);
183  bool visitStoreInst(StoreInst &SI);
184  bool visitCallInst(CallInst &ICI);
185 
186 private:
187  Scatterer scatter(Instruction *Point, Value *V);
188  void gather(Instruction *Op, const ValueVector &CV);
189  bool canTransferMetadata(unsigned Kind);
190  void transferMetadata(Instruction *Op, const ValueVector &CV);
191  bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
192  const DataLayout &DL);
193  bool finish();
194 
195  template<typename T> bool splitBinary(Instruction &, const T &);
196 
197  bool splitCall(CallInst &CI);
198 
199  ScatterMap Scattered;
200  GatherList Gathered;
201 
202  unsigned ParallelLoopAccessMDKind;
203 };
204 
205 class ScalarizerLegacyPass : public FunctionPass {
206 public:
207  static char ID;
208 
209  ScalarizerLegacyPass() : FunctionPass(ID) {
211  }
212 
213  bool runOnFunction(Function &F) override;
214 };
215 
216 } // end anonymous namespace
217 
218 char ScalarizerLegacyPass::ID = 0;
219 INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
220  "Scalarize vector operations", false, false)
221 INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
222  "Scalarize vector operations", false, false)
223 
224 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
225  ValueVector *cachePtr)
226  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
227  Type *Ty = V->getType();
228  PtrTy = dyn_cast<PointerType>(Ty);
229  if (PtrTy)
230  Ty = PtrTy->getElementType();
231  Size = Ty->getVectorNumElements();
232  if (!CachePtr)
233  Tmp.resize(Size, nullptr);
234  else if (CachePtr->empty())
235  CachePtr->resize(Size, nullptr);
236  else
237  assert(Size == CachePtr->size() && "Inconsistent vector sizes");
238 }
239 
240 // Return component I, creating a new Value for it if necessary.
241 Value *Scatterer::operator[](unsigned I) {
242  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
243  // Try to reuse a previous value.
244  if (CV[I])
245  return CV[I];
246  IRBuilder<> Builder(BB, BBI);
247  if (PtrTy) {
248  Type *ElTy = PtrTy->getElementType()->getVectorElementType();
249  if (!CV[0]) {
250  Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
251  CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
252  }
253  if (I != 0)
254  CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
255  V->getName() + ".i" + Twine(I));
256  } else {
257  // Search through a chain of InsertElementInsts looking for element I.
258  // Record other elements in the cache. The new V is still suitable
259  // for all uncached indices.
260  while (true) {
262  if (!Insert)
263  break;
264  ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
265  if (!Idx)
266  break;
267  unsigned J = Idx->getZExtValue();
268  V = Insert->getOperand(0);
269  if (I == J) {
270  CV[J] = Insert->getOperand(1);
271  return CV[J];
272  } else if (!CV[J]) {
273  // Only cache the first entry we find for each index we're not actively
274  // searching for. This prevents us from going too far up the chain and
275  // caching incorrect entries.
276  CV[J] = Insert->getOperand(1);
277  }
278  }
279  CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
280  V->getName() + ".i" + Twine(I));
281  }
282  return CV[I];
283 }
284 
286  if (skipFunction(F))
287  return false;
288 
289  Module &M = *F.getParent();
290  unsigned ParallelLoopAccessMDKind =
291  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
292  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
293  return Impl.visit(F);
294 }
295 
297  return new ScalarizerLegacyPass();
298 }
299 
300 bool ScalarizerVisitor::visit(Function &F) {
301  assert(Gathered.empty() && Scattered.empty());
302 
303  // To ensure we replace gathered components correctly we need to do an ordered
304  // traversal of the basic blocks in the function.
306  for (BasicBlock *BB : RPOT) {
307  for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
308  Instruction *I = &*II;
309  bool Done = InstVisitor::visit(I);
310  ++II;
311  if (Done && I->getType()->isVoidTy())
312  I->eraseFromParent();
313  }
314  }
315  return finish();
316 }
317 
318 // Return a scattered form of V that can be accessed by Point. V must be a
319 // vector or a pointer to a vector.
320 Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
321  if (Argument *VArg = dyn_cast<Argument>(V)) {
322  // Put the scattered form of arguments in the entry block,
323  // so that it can be used everywhere.
324  Function *F = VArg->getParent();
325  BasicBlock *BB = &F->getEntryBlock();
326  return Scatterer(BB, BB->begin(), V, &Scattered[V]);
327  }
328  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
329  // Put the scattered form of an instruction directly after the
330  // instruction.
331  BasicBlock *BB = VOp->getParent();
332  return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
333  V, &Scattered[V]);
334  }
335  // In the fallback case, just put the scattered before Point and
336  // keep the result local to Point.
337  return Scatterer(Point->getParent(), Point->getIterator(), V);
338 }
339 
340 // Replace Op with the gathered form of the components in CV. Defer the
341 // deletion of Op and creation of the gathered form to the end of the pass,
342 // so that we can avoid creating the gathered form if all uses of Op are
343 // replaced with uses of CV.
344 void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
345  // Since we're not deleting Op yet, stub out its operands, so that it
346  // doesn't make anything live unnecessarily.
347  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
348  Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
349 
350  transferMetadata(Op, CV);
351 
352  // If we already have a scattered form of Op (created from ExtractElements
353  // of Op itself), replace them with the new form.
354  ValueVector &SV = Scattered[Op];
355  if (!SV.empty()) {
356  for (unsigned I = 0, E = SV.size(); I != E; ++I) {
357  Value *V = SV[I];
358  if (V == nullptr)
359  continue;
360 
361  Instruction *Old = cast<Instruction>(V);
362  CV[I]->takeName(Old);
363  Old->replaceAllUsesWith(CV[I]);
364  Old->eraseFromParent();
365  }
366  }
367  SV = CV;
368  Gathered.push_back(GatherList::value_type(Op, &SV));
369 }
370 
371 // Return true if it is safe to transfer the given metadata tag from
372 // vector to scalar instructions.
373 bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
374  return (Tag == LLVMContext::MD_tbaa
375  || Tag == LLVMContext::MD_fpmath
379  || Tag == LLVMContext::MD_noalias
380  || Tag == ParallelLoopAccessMDKind
381  || Tag == LLVMContext::MD_access_group);
382 }
383 
384 // Transfer metadata from Op to the instructions in CV if it is known
385 // to be safe to do so.
386 void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
389  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
390  if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
391  for (const auto &MD : MDs)
392  if (canTransferMetadata(MD.first))
393  New->setMetadata(MD.first, MD.second);
394  if (Op->getDebugLoc() && !New->getDebugLoc())
395  New->setDebugLoc(Op->getDebugLoc());
396  }
397  }
398 }
399 
400 // Try to fill in Layout from Ty, returning true on success. Alignment is
401 // the alignment of the vector, or 0 if the ABI default should be used.
402 bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
403  VectorLayout &Layout, const DataLayout &DL) {
404  // Make sure we're dealing with a vector.
405  Layout.VecTy = dyn_cast<VectorType>(Ty);
406  if (!Layout.VecTy)
407  return false;
408 
409  // Check that we're dealing with full-byte elements.
410  Layout.ElemTy = Layout.VecTy->getElementType();
411  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
412  DL.getTypeStoreSizeInBits(Layout.ElemTy))
413  return false;
414 
415  if (Alignment)
416  Layout.VecAlign = Alignment;
417  else
418  Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
419  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
420  return true;
421 }
422 
423 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
424 // to create an instruction like I with operands X and Y and name Name.
425 template<typename Splitter>
426 bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
428  if (!VT)
429  return false;
430 
431  unsigned NumElems = VT->getNumElements();
432  IRBuilder<> Builder(&I);
433  Scatterer Op0 = scatter(&I, I.getOperand(0));
434  Scatterer Op1 = scatter(&I, I.getOperand(1));
435  assert(Op0.size() == NumElems && "Mismatched binary operation");
436  assert(Op1.size() == NumElems && "Mismatched binary operation");
437  ValueVector Res;
438  Res.resize(NumElems);
439  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
440  Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
441  I.getName() + ".i" + Twine(Elem));
442  gather(&I, Res);
443  return true;
444 }
445 
447  return isTriviallyVectorizable(ID);
448 }
449 
450 // All of the current scalarizable intrinsics only have one mangled type.
453  VectorType *Ty) {
454  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
455 }
456 
457 /// If a call to a vector typed intrinsic function, split into a scalar call per
458 /// element if possible for the intrinsic.
459 bool ScalarizerVisitor::splitCall(CallInst &CI) {
460  VectorType *VT = dyn_cast<VectorType>(CI.getType());
461  if (!VT)
462  return false;
463 
464  Function *F = CI.getCalledFunction();
465  if (!F)
466  return false;
467 
470  return false;
471 
472  unsigned NumElems = VT->getNumElements();
473  unsigned NumArgs = CI.getNumArgOperands();
474 
475  ValueVector ScalarOperands(NumArgs);
476  SmallVector<Scatterer, 8> Scattered(NumArgs);
477 
478  Scattered.resize(NumArgs);
479 
480  // Assumes that any vector type has the same number of elements as the return
481  // vector type, which is true for all current intrinsics.
482  for (unsigned I = 0; I != NumArgs; ++I) {
483  Value *OpI = CI.getOperand(I);
484  if (OpI->getType()->isVectorTy()) {
485  Scattered[I] = scatter(&CI, OpI);
486  assert(Scattered[I].size() == NumElems && "mismatched call operands");
487  } else {
488  ScalarOperands[I] = OpI;
489  }
490  }
491 
492  ValueVector Res(NumElems);
493  ValueVector ScalarCallOps(NumArgs);
494 
495  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
496  IRBuilder<> Builder(&CI);
497 
498  // Perform actual scalarization, taking care to preserve any scalar operands.
499  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
500  ScalarCallOps.clear();
501 
502  for (unsigned J = 0; J != NumArgs; ++J) {
503  if (hasVectorInstrinsicScalarOpd(ID, J))
504  ScalarCallOps.push_back(ScalarOperands[J]);
505  else
506  ScalarCallOps.push_back(Scattered[J][Elem]);
507  }
508 
509  Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
510  CI.getName() + ".i" + Twine(Elem));
511  }
512 
513  gather(&CI, Res);
514  return true;
515 }
516 
517 bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
518  VectorType *VT = dyn_cast<VectorType>(SI.getType());
519  if (!VT)
520  return false;
521 
522  unsigned NumElems = VT->getNumElements();
523  IRBuilder<> Builder(&SI);
524  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
525  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
526  assert(Op1.size() == NumElems && "Mismatched select");
527  assert(Op2.size() == NumElems && "Mismatched select");
528  ValueVector Res;
529  Res.resize(NumElems);
530 
531  if (SI.getOperand(0)->getType()->isVectorTy()) {
532  Scatterer Op0 = scatter(&SI, SI.getOperand(0));
533  assert(Op0.size() == NumElems && "Mismatched select");
534  for (unsigned I = 0; I < NumElems; ++I)
535  Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
536  SI.getName() + ".i" + Twine(I));
537  } else {
538  Value *Op0 = SI.getOperand(0);
539  for (unsigned I = 0; I < NumElems; ++I)
540  Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
541  SI.getName() + ".i" + Twine(I));
542  }
543  gather(&SI, Res);
544  return true;
545 }
546 
547 bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
548  return splitBinary(ICI, ICmpSplitter(ICI));
549 }
550 
551 bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
552  return splitBinary(FCI, FCmpSplitter(FCI));
553 }
554 
555 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
556  return splitBinary(BO, BinarySplitter(BO));
557 }
558 
559 bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
560  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
561  if (!VT)
562  return false;
563 
564  IRBuilder<> Builder(&GEPI);
565  unsigned NumElems = VT->getNumElements();
566  unsigned NumIndices = GEPI.getNumIndices();
567 
568  // The base pointer might be scalar even if it's a vector GEP. In those cases,
569  // splat the pointer into a vector value, and scatter that vector.
570  Value *Op0 = GEPI.getOperand(0);
571  if (!Op0->getType()->isVectorTy())
572  Op0 = Builder.CreateVectorSplat(NumElems, Op0);
573  Scatterer Base = scatter(&GEPI, Op0);
574 
576  Ops.resize(NumIndices);
577  for (unsigned I = 0; I < NumIndices; ++I) {
578  Value *Op = GEPI.getOperand(I + 1);
579 
580  // The indices might be scalars even if it's a vector GEP. In those cases,
581  // splat the scalar into a vector value, and scatter that vector.
582  if (!Op->getType()->isVectorTy())
583  Op = Builder.CreateVectorSplat(NumElems, Op);
584 
585  Ops[I] = scatter(&GEPI, Op);
586  }
587 
588  ValueVector Res;
589  Res.resize(NumElems);
590  for (unsigned I = 0; I < NumElems; ++I) {
591  SmallVector<Value *, 8> Indices;
592  Indices.resize(NumIndices);
593  for (unsigned J = 0; J < NumIndices; ++J)
594  Indices[J] = Ops[J][I];
595  Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
596  GEPI.getName() + ".i" + Twine(I));
597  if (GEPI.isInBounds())
598  if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
599  NewGEPI->setIsInBounds();
600  }
601  gather(&GEPI, Res);
602  return true;
603 }
604 
605 bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
607  if (!VT)
608  return false;
609 
610  unsigned NumElems = VT->getNumElements();
611  IRBuilder<> Builder(&CI);
612  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
613  assert(Op0.size() == NumElems && "Mismatched cast");
614  ValueVector Res;
615  Res.resize(NumElems);
616  for (unsigned I = 0; I < NumElems; ++I)
617  Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
618  CI.getName() + ".i" + Twine(I));
619  gather(&CI, Res);
620  return true;
621 }
622 
623 bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
624  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
625  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
626  if (!DstVT || !SrcVT)
627  return false;
628 
629  unsigned DstNumElems = DstVT->getNumElements();
630  unsigned SrcNumElems = SrcVT->getNumElements();
631  IRBuilder<> Builder(&BCI);
632  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
633  ValueVector Res;
634  Res.resize(DstNumElems);
635 
636  if (DstNumElems == SrcNumElems) {
637  for (unsigned I = 0; I < DstNumElems; ++I)
638  Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
639  BCI.getName() + ".i" + Twine(I));
640  } else if (DstNumElems > SrcNumElems) {
641  // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
642  // individual elements to the destination.
643  unsigned FanOut = DstNumElems / SrcNumElems;
644  Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
645  unsigned ResI = 0;
646  for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
647  Value *V = Op0[Op0I];
648  Instruction *VI;
649  // Look through any existing bitcasts before converting to <N x t2>.
650  // In the best case, the resulting conversion might be a no-op.
651  while ((VI = dyn_cast<Instruction>(V)) &&
652  VI->getOpcode() == Instruction::BitCast)
653  V = VI->getOperand(0);
654  V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
655  Scatterer Mid = scatter(&BCI, V);
656  for (unsigned MidI = 0; MidI < FanOut; ++MidI)
657  Res[ResI++] = Mid[MidI];
658  }
659  } else {
660  // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
661  unsigned FanIn = SrcNumElems / DstNumElems;
662  Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
663  unsigned Op0I = 0;
664  for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
665  Value *V = UndefValue::get(MidTy);
666  for (unsigned MidI = 0; MidI < FanIn; ++MidI)
667  V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
668  BCI.getName() + ".i" + Twine(ResI)
669  + ".upto" + Twine(MidI));
670  Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
671  BCI.getName() + ".i" + Twine(ResI));
672  }
673  }
674  gather(&BCI, Res);
675  return true;
676 }
677 
678 bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
679  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
680  if (!VT)
681  return false;
682 
683  unsigned NumElems = VT->getNumElements();
684  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
685  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
686  ValueVector Res;
687  Res.resize(NumElems);
688 
689  for (unsigned I = 0; I < NumElems; ++I) {
690  int Selector = SVI.getMaskValue(I);
691  if (Selector < 0)
692  Res[I] = UndefValue::get(VT->getElementType());
693  else if (unsigned(Selector) < Op0.size())
694  Res[I] = Op0[Selector];
695  else
696  Res[I] = Op1[Selector - Op0.size()];
697  }
698  gather(&SVI, Res);
699  return true;
700 }
701 
702 bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
703  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
704  if (!VT)
705  return false;
706 
707  unsigned NumElems = VT->getNumElements();
708  IRBuilder<> Builder(&PHI);
709  ValueVector Res;
710  Res.resize(NumElems);
711 
712  unsigned NumOps = PHI.getNumOperands();
713  for (unsigned I = 0; I < NumElems; ++I)
714  Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
715  PHI.getName() + ".i" + Twine(I));
716 
717  for (unsigned I = 0; I < NumOps; ++I) {
718  Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
719  BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
720  for (unsigned J = 0; J < NumElems; ++J)
721  cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
722  }
723  gather(&PHI, Res);
724  return true;
725 }
726 
727 bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
728  if (!ScalarizeLoadStore)
729  return false;
730  if (!LI.isSimple())
731  return false;
732 
733  VectorLayout Layout;
734  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
735  LI.getModule()->getDataLayout()))
736  return false;
737 
738  unsigned NumElems = Layout.VecTy->getNumElements();
739  IRBuilder<> Builder(&LI);
740  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
741  ValueVector Res;
742  Res.resize(NumElems);
743 
744  for (unsigned I = 0; I < NumElems; ++I)
745  Res[I] = Builder.CreateAlignedLoad(Layout.VecTy->getElementType(), Ptr[I],
746  Layout.getElemAlign(I),
747  LI.getName() + ".i" + Twine(I));
748  gather(&LI, Res);
749  return true;
750 }
751 
752 bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
753  if (!ScalarizeLoadStore)
754  return false;
755  if (!SI.isSimple())
756  return false;
757 
758  VectorLayout Layout;
759  Value *FullValue = SI.getValueOperand();
760  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
761  SI.getModule()->getDataLayout()))
762  return false;
763 
764  unsigned NumElems = Layout.VecTy->getNumElements();
765  IRBuilder<> Builder(&SI);
766  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
767  Scatterer Val = scatter(&SI, FullValue);
768 
769  ValueVector Stores;
770  Stores.resize(NumElems);
771  for (unsigned I = 0; I < NumElems; ++I) {
772  unsigned Align = Layout.getElemAlign(I);
773  Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
774  }
775  transferMetadata(&SI, Stores);
776  return true;
777 }
778 
779 bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
780  return splitCall(CI);
781 }
782 
783 // Delete the instructions that we scalarized. If a full vector result
784 // is still needed, recreate it using InsertElements.
785 bool ScalarizerVisitor::finish() {
786  // The presence of data in Gathered or Scattered indicates changes
787  // made to the Function.
788  if (Gathered.empty() && Scattered.empty())
789  return false;
790  for (const auto &GMI : Gathered) {
791  Instruction *Op = GMI.first;
792  ValueVector &CV = *GMI.second;
793  if (!Op->use_empty()) {
794  // The value is still needed, so recreate it using a series of
795  // InsertElements.
796  Type *Ty = Op->getType();
797  Value *Res = UndefValue::get(Ty);
798  BasicBlock *BB = Op->getParent();
799  unsigned Count = Ty->getVectorNumElements();
800  IRBuilder<> Builder(Op);
801  if (isa<PHINode>(Op))
802  Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
803  for (unsigned I = 0; I < Count; ++I)
804  Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
805  Op->getName() + ".upto" + Twine(I));
806  Res->takeName(Op);
807  Op->replaceAllUsesWith(Res);
808  }
809  Op->eraseFromParent();
810  }
811  Gathered.clear();
812  Scattered.clear();
813  return true;
814 }
815 
817  Module &M = *F.getParent();
818  unsigned ParallelLoopAccessMDKind =
819  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
820  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
821  bool Changed = Impl.visit(F);
822  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
823 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:257
Type * getVectorElementType() const
Definition: Type.h:370
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:452
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1984
bool isSimple() const
Definition: Instructions.h:276
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1551
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:611
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1333
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1428
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
This instruction constructs a fixed permutation of two input vectors.
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1465
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", "Scalarize vector operations", false, false) INITIALIZE_PASS_END(ScalarizerLegacyPass
void initializeScalarizerLegacyPassPass(PassRegistry &)
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
Type * getSourceElementType() const
Definition: Instructions.h:970
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
unsigned getNumIndices() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1767
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:606
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2232
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1018
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: Scalarizer.cpp:816
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:498
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1992
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
This instruction inserts a single (scalar) element into a VectorType value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
Definition: Scalarizer.cpp:451
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
scalarizer
Definition: Scalarizer.cpp:221
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2057
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:446
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Scalarize vector operations
Definition: Scalarizer.cpp:221
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2077
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2004
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1493
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
Module.h This file contains the declarations for the Module class.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2090
This file declares helper objects for defining debug options that can be configured via the command l...
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:306
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:613
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:90
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
This pass converts vector operations into scalar operations, in order to expose optimization opportun...
Class to represent vector types.
Definition: DerivedTypes.h:424
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void push_back(pointer val)
Definition: ilist.h:311
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
static cl::opt< bool > ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1153
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
void clear()
Definition: ilist.h:307
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1201
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
uint32_t Size
Definition: Profile.cpp:46
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1804
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
Type * getElementType() const
Definition: DerivedTypes.h:391
FunctionPass * createScalarizerPass()
Create a legacy pass manager instance of the Scalarizer pass.
Definition: Scalarizer.cpp:296
A container for analyses that lazily runs them and caches their results.
bool isSimple() const
Definition: Instructions.h:401
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
Definition: Instructions.h:412
bool use_empty() const
Definition: Value.h:322
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42
void resize(size_type N)
Definition: SmallVector.h:344