LLVM  6.0.0svn
Scalarizer.cpp
Go to the documentation of this file.
1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass converts vector operations into scalar operations, in order
11 // to expose optimization opportunities on the individual scalar operations.
12 // It is mainly intended for targets that do not have vector units, but it
13 // may also be useful for revectorizing code to different vector widths.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Options.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <iterator>
44 #include <map>
45 #include <utility>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "scalarizer"
50 
51 namespace {
52 
53 // Used to store the scattered form of a vector.
54 using ValueVector = SmallVector<Value *, 8>;
55 
56 // Used to map a vector Value to its scattered form. We use std::map
57 // because we want iterators to persist across insertion and because the
58 // values are relatively large.
59 using ScatterMap = std::map<Value *, ValueVector>;
60 
61 // Lists Instructions that have been replaced with scalar implementations,
62 // along with a pointer to their scattered forms.
64 
65 // Provides a very limited vector-like interface for lazily accessing one
66 // component of a scattered vector or vector pointer.
67 class Scatterer {
68 public:
69  Scatterer() = default;
70 
71  // Scatter V into Size components. If new instructions are needed,
72  // insert them before BBI in BB. If Cache is nonnull, use it to cache
73  // the results.
74  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
75  ValueVector *cachePtr = nullptr);
76 
77  // Return component I, creating a new Value for it if necessary.
78  Value *operator[](unsigned I);
79 
80  // Return the number of components.
81  unsigned size() const { return Size; }
82 
83 private:
84  BasicBlock *BB;
86  Value *V;
87  ValueVector *CachePtr;
88  PointerType *PtrTy;
89  ValueVector Tmp;
90  unsigned Size;
91 };
92 
93 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
94 // called Name that compares X and Y in the same way as FCI.
95 struct FCmpSplitter {
96  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
97 
98  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
99  const Twine &Name) const {
100  return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
101  }
102 
103  FCmpInst &FCI;
104 };
105 
106 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
107 // called Name that compares X and Y in the same way as ICI.
108 struct ICmpSplitter {
109  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
110 
111  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
112  const Twine &Name) const {
113  return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
114  }
115 
116  ICmpInst &ICI;
117 };
118 
119 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
120 // a binary operator like BO called Name with operands X and Y.
121 struct BinarySplitter {
122  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
123 
124  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
125  const Twine &Name) const {
126  return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
127  }
128 
129  BinaryOperator &BO;
130 };
131 
132 // Information about a load or store that we're scalarizing.
133 struct VectorLayout {
134  VectorLayout() = default;
135 
136  // Return the alignment of element I.
137  uint64_t getElemAlign(unsigned I) {
138  return MinAlign(VecAlign, I * ElemSize);
139  }
140 
141  // The type of the vector.
142  VectorType *VecTy = nullptr;
143 
144  // The type of each element.
145  Type *ElemTy = nullptr;
146 
147  // The alignment of the vector.
148  uint64_t VecAlign = 0;
149 
150  // The size of each element.
151  uint64_t ElemSize = 0;
152 };
153 
154 class Scalarizer : public FunctionPass,
155  public InstVisitor<Scalarizer, bool> {
156 public:
157  static char ID;
158 
159  Scalarizer() : FunctionPass(ID) {
161  }
162 
163  bool doInitialization(Module &M) override;
164  bool runOnFunction(Function &F) override;
165 
166  // InstVisitor methods. They return true if the instruction was scalarized,
167  // false if nothing changed.
168  bool visitInstruction(Instruction &I) { return false; }
169  bool visitSelectInst(SelectInst &SI);
170  bool visitICmpInst(ICmpInst &ICI);
171  bool visitFCmpInst(FCmpInst &FCI);
172  bool visitBinaryOperator(BinaryOperator &BO);
173  bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
174  bool visitCastInst(CastInst &CI);
175  bool visitBitCastInst(BitCastInst &BCI);
176  bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
177  bool visitPHINode(PHINode &PHI);
178  bool visitLoadInst(LoadInst &LI);
179  bool visitStoreInst(StoreInst &SI);
180  bool visitCallInst(CallInst &ICI);
181 
182  static void registerOptions() {
183  // This is disabled by default because having separate loads and stores
184  // makes it more likely that the -combiner-alias-analysis limits will be
185  // reached.
187  &Scalarizer::ScalarizeLoadStore>(
188  "scalarize-load-store",
189  "Allow the scalarizer pass to scalarize loads and store", false);
190  }
191 
192 private:
193  Scatterer scatter(Instruction *Point, Value *V);
194  void gather(Instruction *Op, const ValueVector &CV);
195  bool canTransferMetadata(unsigned Kind);
196  void transferMetadata(Instruction *Op, const ValueVector &CV);
197  bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
198  const DataLayout &DL);
199  bool finish();
200 
201  template<typename T> bool splitBinary(Instruction &, const T &);
202 
203  bool splitCall(CallInst &CI);
204 
205  ScatterMap Scattered;
206  GatherList Gathered;
207  unsigned ParallelLoopAccessMDKind;
208  bool ScalarizeLoadStore;
209 };
210 
211 } // end anonymous namespace
212 
213 char Scalarizer::ID = 0;
214 
215 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
216  "Scalarize vector operations", false, false)
217 
218 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
219  ValueVector *cachePtr)
220  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
221  Type *Ty = V->getType();
222  PtrTy = dyn_cast<PointerType>(Ty);
223  if (PtrTy)
224  Ty = PtrTy->getElementType();
225  Size = Ty->getVectorNumElements();
226  if (!CachePtr)
227  Tmp.resize(Size, nullptr);
228  else if (CachePtr->empty())
229  CachePtr->resize(Size, nullptr);
230  else
231  assert(Size == CachePtr->size() && "Inconsistent vector sizes");
232 }
233 
234 // Return component I, creating a new Value for it if necessary.
235 Value *Scatterer::operator[](unsigned I) {
236  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
237  // Try to reuse a previous value.
238  if (CV[I])
239  return CV[I];
240  IRBuilder<> Builder(BB, BBI);
241  if (PtrTy) {
242  if (!CV[0]) {
243  Type *Ty =
244  PointerType::get(PtrTy->getElementType()->getVectorElementType(),
245  PtrTy->getAddressSpace());
246  CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
247  }
248  if (I != 0)
249  CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
250  V->getName() + ".i" + Twine(I));
251  } else {
252  // Search through a chain of InsertElementInsts looking for element I.
253  // Record other elements in the cache. The new V is still suitable
254  // for all uncached indices.
255  while (true) {
257  if (!Insert)
258  break;
259  ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
260  if (!Idx)
261  break;
262  unsigned J = Idx->getZExtValue();
263  V = Insert->getOperand(0);
264  if (I == J) {
265  CV[J] = Insert->getOperand(1);
266  return CV[J];
267  } else if (!CV[J]) {
268  // Only cache the first entry we find for each index we're not actively
269  // searching for. This prevents us from going too far up the chain and
270  // caching incorrect entries.
271  CV[J] = Insert->getOperand(1);
272  }
273  }
274  CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
275  V->getName() + ".i" + Twine(I));
276  }
277  return CV[I];
278 }
279 
280 bool Scalarizer::doInitialization(Module &M) {
281  ParallelLoopAccessMDKind =
282  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
283  ScalarizeLoadStore =
284  M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
285  return false;
286 }
287 
289  if (skipFunction(F))
290  return false;
291  assert(Gathered.empty() && Scattered.empty());
292  for (BasicBlock &BB : F) {
293  for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
294  Instruction *I = &*II;
295  bool Done = visit(I);
296  ++II;
297  if (Done && I->getType()->isVoidTy())
298  I->eraseFromParent();
299  }
300  }
301  return finish();
302 }
303 
304 // Return a scattered form of V that can be accessed by Point. V must be a
305 // vector or a pointer to a vector.
306 Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
307  if (Argument *VArg = dyn_cast<Argument>(V)) {
308  // Put the scattered form of arguments in the entry block,
309  // so that it can be used everywhere.
310  Function *F = VArg->getParent();
311  BasicBlock *BB = &F->getEntryBlock();
312  return Scatterer(BB, BB->begin(), V, &Scattered[V]);
313  }
314  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
315  // Put the scattered form of an instruction directly after the
316  // instruction.
317  BasicBlock *BB = VOp->getParent();
318  return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
319  V, &Scattered[V]);
320  }
321  // In the fallback case, just put the scattered before Point and
322  // keep the result local to Point.
323  return Scatterer(Point->getParent(), Point->getIterator(), V);
324 }
325 
326 // Replace Op with the gathered form of the components in CV. Defer the
327 // deletion of Op and creation of the gathered form to the end of the pass,
328 // so that we can avoid creating the gathered form if all uses of Op are
329 // replaced with uses of CV.
330 void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
331  // Since we're not deleting Op yet, stub out its operands, so that it
332  // doesn't make anything live unnecessarily.
333  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
334  Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
335 
336  transferMetadata(Op, CV);
337 
338  // If we already have a scattered form of Op (created from ExtractElements
339  // of Op itself), replace them with the new form.
340  ValueVector &SV = Scattered[Op];
341  if (!SV.empty()) {
342  for (unsigned I = 0, E = SV.size(); I != E; ++I) {
343  Value *V = SV[I];
344  if (V == nullptr)
345  continue;
346 
347  Instruction *Old = cast<Instruction>(V);
348  CV[I]->takeName(Old);
349  Old->replaceAllUsesWith(CV[I]);
350  Old->eraseFromParent();
351  }
352  }
353  SV = CV;
354  Gathered.push_back(GatherList::value_type(Op, &SV));
355 }
356 
357 // Return true if it is safe to transfer the given metadata tag from
358 // vector to scalar instructions.
359 bool Scalarizer::canTransferMetadata(unsigned Tag) {
360  return (Tag == LLVMContext::MD_tbaa
361  || Tag == LLVMContext::MD_fpmath
365  || Tag == LLVMContext::MD_noalias
366  || Tag == ParallelLoopAccessMDKind);
367 }
368 
369 // Transfer metadata from Op to the instructions in CV if it is known
370 // to be safe to do so.
371 void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
374  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
375  if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
376  for (const auto &MD : MDs)
377  if (canTransferMetadata(MD.first))
378  New->setMetadata(MD.first, MD.second);
379  if (Op->getDebugLoc() && !New->getDebugLoc())
380  New->setDebugLoc(Op->getDebugLoc());
381  }
382  }
383 }
384 
385 // Try to fill in Layout from Ty, returning true on success. Alignment is
386 // the alignment of the vector, or 0 if the ABI default should be used.
387 bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
388  VectorLayout &Layout, const DataLayout &DL) {
389  // Make sure we're dealing with a vector.
390  Layout.VecTy = dyn_cast<VectorType>(Ty);
391  if (!Layout.VecTy)
392  return false;
393 
394  // Check that we're dealing with full-byte elements.
395  Layout.ElemTy = Layout.VecTy->getElementType();
396  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
397  DL.getTypeStoreSizeInBits(Layout.ElemTy))
398  return false;
399 
400  if (Alignment)
401  Layout.VecAlign = Alignment;
402  else
403  Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
404  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
405  return true;
406 }
407 
408 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
409 // to create an instruction like I with operands X and Y and name Name.
410 template<typename Splitter>
411 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
413  if (!VT)
414  return false;
415 
416  unsigned NumElems = VT->getNumElements();
417  IRBuilder<> Builder(&I);
418  Scatterer Op0 = scatter(&I, I.getOperand(0));
419  Scatterer Op1 = scatter(&I, I.getOperand(1));
420  assert(Op0.size() == NumElems && "Mismatched binary operation");
421  assert(Op1.size() == NumElems && "Mismatched binary operation");
422  ValueVector Res;
423  Res.resize(NumElems);
424  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
425  Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
426  I.getName() + ".i" + Twine(Elem));
427  gather(&I, Res);
428  return true;
429 }
430 
432  return isTriviallyVectorizable(ID);
433 }
434 
435 // All of the current scalarizable intrinsics only have one mangled type.
438  VectorType *Ty) {
439  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
440 }
441 
442 /// If a call to a vector typed intrinsic function, split into a scalar call per
443 /// element if possible for the intrinsic.
444 bool Scalarizer::splitCall(CallInst &CI) {
445  VectorType *VT = dyn_cast<VectorType>(CI.getType());
446  if (!VT)
447  return false;
448 
449  Function *F = CI.getCalledFunction();
450  if (!F)
451  return false;
452 
455  return false;
456 
457  unsigned NumElems = VT->getNumElements();
458  unsigned NumArgs = CI.getNumArgOperands();
459 
460  ValueVector ScalarOperands(NumArgs);
461  SmallVector<Scatterer, 8> Scattered(NumArgs);
462 
463  Scattered.resize(NumArgs);
464 
465  // Assumes that any vector type has the same number of elements as the return
466  // vector type, which is true for all current intrinsics.
467  for (unsigned I = 0; I != NumArgs; ++I) {
468  Value *OpI = CI.getOperand(I);
469  if (OpI->getType()->isVectorTy()) {
470  Scattered[I] = scatter(&CI, OpI);
471  assert(Scattered[I].size() == NumElems && "mismatched call operands");
472  } else {
473  ScalarOperands[I] = OpI;
474  }
475  }
476 
477  ValueVector Res(NumElems);
478  ValueVector ScalarCallOps(NumArgs);
479 
480  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
481  IRBuilder<> Builder(&CI);
482 
483  // Perform actual scalarization, taking care to preserve any scalar operands.
484  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
485  ScalarCallOps.clear();
486 
487  for (unsigned J = 0; J != NumArgs; ++J) {
488  if (hasVectorInstrinsicScalarOpd(ID, J))
489  ScalarCallOps.push_back(ScalarOperands[J]);
490  else
491  ScalarCallOps.push_back(Scattered[J][Elem]);
492  }
493 
494  Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
495  CI.getName() + ".i" + Twine(Elem));
496  }
497 
498  gather(&CI, Res);
499  return true;
500 }
501 
502 bool Scalarizer::visitSelectInst(SelectInst &SI) {
503  VectorType *VT = dyn_cast<VectorType>(SI.getType());
504  if (!VT)
505  return false;
506 
507  unsigned NumElems = VT->getNumElements();
508  IRBuilder<> Builder(&SI);
509  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
510  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
511  assert(Op1.size() == NumElems && "Mismatched select");
512  assert(Op2.size() == NumElems && "Mismatched select");
513  ValueVector Res;
514  Res.resize(NumElems);
515 
516  if (SI.getOperand(0)->getType()->isVectorTy()) {
517  Scatterer Op0 = scatter(&SI, SI.getOperand(0));
518  assert(Op0.size() == NumElems && "Mismatched select");
519  for (unsigned I = 0; I < NumElems; ++I)
520  Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
521  SI.getName() + ".i" + Twine(I));
522  } else {
523  Value *Op0 = SI.getOperand(0);
524  for (unsigned I = 0; I < NumElems; ++I)
525  Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
526  SI.getName() + ".i" + Twine(I));
527  }
528  gather(&SI, Res);
529  return true;
530 }
531 
532 bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
533  return splitBinary(ICI, ICmpSplitter(ICI));
534 }
535 
536 bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
537  return splitBinary(FCI, FCmpSplitter(FCI));
538 }
539 
540 bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
541  return splitBinary(BO, BinarySplitter(BO));
542 }
543 
544 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
545  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
546  if (!VT)
547  return false;
548 
549  IRBuilder<> Builder(&GEPI);
550  unsigned NumElems = VT->getNumElements();
551  unsigned NumIndices = GEPI.getNumIndices();
552 
553  // The base pointer might be scalar even if it's a vector GEP. In those cases,
554  // splat the pointer into a vector value, and scatter that vector.
555  Value *Op0 = GEPI.getOperand(0);
556  if (!Op0->getType()->isVectorTy())
557  Op0 = Builder.CreateVectorSplat(NumElems, Op0);
558  Scatterer Base = scatter(&GEPI, Op0);
559 
561  Ops.resize(NumIndices);
562  for (unsigned I = 0; I < NumIndices; ++I) {
563  Value *Op = GEPI.getOperand(I + 1);
564 
565  // The indices might be scalars even if it's a vector GEP. In those cases,
566  // splat the scalar into a vector value, and scatter that vector.
567  if (!Op->getType()->isVectorTy())
568  Op = Builder.CreateVectorSplat(NumElems, Op);
569 
570  Ops[I] = scatter(&GEPI, Op);
571  }
572 
573  ValueVector Res;
574  Res.resize(NumElems);
575  for (unsigned I = 0; I < NumElems; ++I) {
576  SmallVector<Value *, 8> Indices;
577  Indices.resize(NumIndices);
578  for (unsigned J = 0; J < NumIndices; ++J)
579  Indices[J] = Ops[J][I];
580  Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
581  GEPI.getName() + ".i" + Twine(I));
582  if (GEPI.isInBounds())
583  if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
584  NewGEPI->setIsInBounds();
585  }
586  gather(&GEPI, Res);
587  return true;
588 }
589 
590 bool Scalarizer::visitCastInst(CastInst &CI) {
592  if (!VT)
593  return false;
594 
595  unsigned NumElems = VT->getNumElements();
596  IRBuilder<> Builder(&CI);
597  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
598  assert(Op0.size() == NumElems && "Mismatched cast");
599  ValueVector Res;
600  Res.resize(NumElems);
601  for (unsigned I = 0; I < NumElems; ++I)
602  Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
603  CI.getName() + ".i" + Twine(I));
604  gather(&CI, Res);
605  return true;
606 }
607 
608 bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
609  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
610  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
611  if (!DstVT || !SrcVT)
612  return false;
613 
614  unsigned DstNumElems = DstVT->getNumElements();
615  unsigned SrcNumElems = SrcVT->getNumElements();
616  IRBuilder<> Builder(&BCI);
617  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
618  ValueVector Res;
619  Res.resize(DstNumElems);
620 
621  if (DstNumElems == SrcNumElems) {
622  for (unsigned I = 0; I < DstNumElems; ++I)
623  Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
624  BCI.getName() + ".i" + Twine(I));
625  } else if (DstNumElems > SrcNumElems) {
626  // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
627  // individual elements to the destination.
628  unsigned FanOut = DstNumElems / SrcNumElems;
629  Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
630  unsigned ResI = 0;
631  for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
632  Value *V = Op0[Op0I];
633  Instruction *VI;
634  // Look through any existing bitcasts before converting to <N x t2>.
635  // In the best case, the resulting conversion might be a no-op.
636  while ((VI = dyn_cast<Instruction>(V)) &&
637  VI->getOpcode() == Instruction::BitCast)
638  V = VI->getOperand(0);
639  V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
640  Scatterer Mid = scatter(&BCI, V);
641  for (unsigned MidI = 0; MidI < FanOut; ++MidI)
642  Res[ResI++] = Mid[MidI];
643  }
644  } else {
645  // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
646  unsigned FanIn = SrcNumElems / DstNumElems;
647  Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
648  unsigned Op0I = 0;
649  for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
650  Value *V = UndefValue::get(MidTy);
651  for (unsigned MidI = 0; MidI < FanIn; ++MidI)
652  V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
653  BCI.getName() + ".i" + Twine(ResI)
654  + ".upto" + Twine(MidI));
655  Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
656  BCI.getName() + ".i" + Twine(ResI));
657  }
658  }
659  gather(&BCI, Res);
660  return true;
661 }
662 
663 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
664  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
665  if (!VT)
666  return false;
667 
668  unsigned NumElems = VT->getNumElements();
669  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
670  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
671  ValueVector Res;
672  Res.resize(NumElems);
673 
674  for (unsigned I = 0; I < NumElems; ++I) {
675  int Selector = SVI.getMaskValue(I);
676  if (Selector < 0)
677  Res[I] = UndefValue::get(VT->getElementType());
678  else if (unsigned(Selector) < Op0.size())
679  Res[I] = Op0[Selector];
680  else
681  Res[I] = Op1[Selector - Op0.size()];
682  }
683  gather(&SVI, Res);
684  return true;
685 }
686 
687 bool Scalarizer::visitPHINode(PHINode &PHI) {
688  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
689  if (!VT)
690  return false;
691 
692  unsigned NumElems = VT->getNumElements();
693  IRBuilder<> Builder(&PHI);
694  ValueVector Res;
695  Res.resize(NumElems);
696 
697  unsigned NumOps = PHI.getNumOperands();
698  for (unsigned I = 0; I < NumElems; ++I)
699  Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
700  PHI.getName() + ".i" + Twine(I));
701 
702  for (unsigned I = 0; I < NumOps; ++I) {
703  Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
704  BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
705  for (unsigned J = 0; J < NumElems; ++J)
706  cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
707  }
708  gather(&PHI, Res);
709  return true;
710 }
711 
712 bool Scalarizer::visitLoadInst(LoadInst &LI) {
713  if (!ScalarizeLoadStore)
714  return false;
715  if (!LI.isSimple())
716  return false;
717 
718  VectorLayout Layout;
719  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
720  LI.getModule()->getDataLayout()))
721  return false;
722 
723  unsigned NumElems = Layout.VecTy->getNumElements();
724  IRBuilder<> Builder(&LI);
725  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
726  ValueVector Res;
727  Res.resize(NumElems);
728 
729  for (unsigned I = 0; I < NumElems; ++I)
730  Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
731  LI.getName() + ".i" + Twine(I));
732  gather(&LI, Res);
733  return true;
734 }
735 
736 bool Scalarizer::visitStoreInst(StoreInst &SI) {
737  if (!ScalarizeLoadStore)
738  return false;
739  if (!SI.isSimple())
740  return false;
741 
742  VectorLayout Layout;
743  Value *FullValue = SI.getValueOperand();
744  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
745  SI.getModule()->getDataLayout()))
746  return false;
747 
748  unsigned NumElems = Layout.VecTy->getNumElements();
749  IRBuilder<> Builder(&SI);
750  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
751  Scatterer Val = scatter(&SI, FullValue);
752 
753  ValueVector Stores;
754  Stores.resize(NumElems);
755  for (unsigned I = 0; I < NumElems; ++I) {
756  unsigned Align = Layout.getElemAlign(I);
757  Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
758  }
759  transferMetadata(&SI, Stores);
760  return true;
761 }
762 
763 bool Scalarizer::visitCallInst(CallInst &CI) {
764  return splitCall(CI);
765 }
766 
767 // Delete the instructions that we scalarized. If a full vector result
768 // is still needed, recreate it using InsertElements.
769 bool Scalarizer::finish() {
770  // The presence of data in Gathered or Scattered indicates changes
771  // made to the Function.
772  if (Gathered.empty() && Scattered.empty())
773  return false;
774  for (const auto &GMI : Gathered) {
775  Instruction *Op = GMI.first;
776  ValueVector &CV = *GMI.second;
777  if (!Op->use_empty()) {
778  // The value is still needed, so recreate it using a series of
779  // InsertElements.
780  Type *Ty = Op->getType();
781  Value *Res = UndefValue::get(Ty);
782  BasicBlock *BB = Op->getParent();
783  unsigned Count = Ty->getVectorNumElements();
784  IRBuilder<> Builder(Op);
785  if (isa<PHINode>(Op))
786  Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
787  for (unsigned I = 0; I < Count; ++I)
788  Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
789  Op->getName() + ".upto" + Twine(I));
790  Res->takeName(Op);
791  Op->replaceAllUsesWith(Res);
792  }
793  Op->eraseFromParent();
794  }
795  Gathered.clear();
796  Scattered.clear();
797  return true;
798 }
799 
801  return new Scalarizer();
802 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:218
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:394
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1638
bool isSimple() const
Definition: Instructions.h:262
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1278
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:818
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1112
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
This instruction constructs a fixed permutation of two input vectors.
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1203
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumArgOperands() const
Return the number of call arguments.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:237
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
Type * getSourceElementType() const
Definition: Instructions.h:934
unsigned getNumIndices() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:813
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1841
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer", "Scalarize vector operations", false, false) Scatterer
Definition: Scalarizer.cpp:215
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:292
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:979
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1645
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
This instruction inserts a single (scalar) element into a VectorType value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
Definition: Scalarizer.cpp:436
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1693
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:431
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
self_iterator getIterator()
Definition: ilist_node.h:82
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1713
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1319
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
ValT getOption() const
Query for a debug option&#39;s value.
Definition: LLVMContext.h:314
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1658
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1227
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Module.h This file contains the declarations for the Module class.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1726
This file declares helper objects for defining debug options that can be configured via the command l...
void initializeScalarizerPass(PassRegistry &)
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:308
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:820
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:72
static void registerOption(StringRef ArgStr, StringRef Desc, const ValT &InitValue)
Registers an option with the OptionRegistry singleton.
Definition: Options.h:96
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void push_back(pointer val)
Definition: ilist.h:326
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:530
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static int getMaskValue(Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
void clear()
Definition: ilist.h:322
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
const unsigned Kind
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1480
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1186
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:386
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Type * getElementType() const
Definition: DerivedTypes.h:360
FunctionPass * createScalarizerPass()
Definition: Scalarizer.cpp:800
bool isSimple() const
Definition: Instructions.h:387
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
Definition: Instructions.h:398
bool use_empty() const
Definition: Value.h:328
const BasicBlock * getParent() const
Definition: Instruction.h:67
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:35
void resize(size_type N)
Definition: SmallVector.h:355