LLVM  6.0.0svn
Scalarizer.cpp
Go to the documentation of this file.
1 //===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass converts vector operations into scalar operations, in order
11 // to expose optimization opportunities on the individual scalar operations.
12 // It is mainly intended for targets that do not have vector units, but it
13 // may also be useful for revectorizing code to different vector widths.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/IR/InstVisitor.h"
21 #include "llvm/Pass.h"
22 #include "llvm/Transforms/Scalar.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "scalarizer"
28 
29 namespace {
30 // Used to store the scattered form of a vector.
31 typedef SmallVector<Value *, 8> ValueVector;
32 
33 // Used to map a vector Value to its scattered form. We use std::map
34 // because we want iterators to persist across insertion and because the
35 // values are relatively large.
36 typedef std::map<Value *, ValueVector> ScatterMap;
37 
38 // Lists Instructions that have been replaced with scalar implementations,
39 // along with a pointer to their scattered forms.
41 
42 // Provides a very limited vector-like interface for lazily accessing one
43 // component of a scattered vector or vector pointer.
44 class Scatterer {
45 public:
46  Scatterer() {}
47 
48  // Scatter V into Size components. If new instructions are needed,
49  // insert them before BBI in BB. If Cache is nonnull, use it to cache
50  // the results.
51  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
52  ValueVector *cachePtr = nullptr);
53 
54  // Return component I, creating a new Value for it if necessary.
55  Value *operator[](unsigned I);
56 
57  // Return the number of components.
58  unsigned size() const { return Size; }
59 
60 private:
61  BasicBlock *BB;
63  Value *V;
64  ValueVector *CachePtr;
65  PointerType *PtrTy;
66  ValueVector Tmp;
67  unsigned Size;
68 };
69 
70 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
71 // called Name that compares X and Y in the same way as FCI.
72 struct FCmpSplitter {
73  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
74  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
75  const Twine &Name) const {
76  return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
77  }
78  FCmpInst &FCI;
79 };
80 
81 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
82 // called Name that compares X and Y in the same way as ICI.
83 struct ICmpSplitter {
84  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
85  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
86  const Twine &Name) const {
87  return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
88  }
89  ICmpInst &ICI;
90 };
91 
92 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
93 // a binary operator like BO called Name with operands X and Y.
94 struct BinarySplitter {
95  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
96  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
97  const Twine &Name) const {
98  return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
99  }
100  BinaryOperator &BO;
101 };
102 
103 // Information about a load or store that we're scalarizing.
104 struct VectorLayout {
105  VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
106 
107  // Return the alignment of element I.
108  uint64_t getElemAlign(unsigned I) {
109  return MinAlign(VecAlign, I * ElemSize);
110  }
111 
112  // The type of the vector.
113  VectorType *VecTy;
114 
115  // The type of each element.
116  Type *ElemTy;
117 
118  // The alignment of the vector.
119  uint64_t VecAlign;
120 
121  // The size of each element.
122  uint64_t ElemSize;
123 };
124 
125 class Scalarizer : public FunctionPass,
126  public InstVisitor<Scalarizer, bool> {
127 public:
128  static char ID;
129 
130  Scalarizer() :
131  FunctionPass(ID) {
133  }
134 
135  bool doInitialization(Module &M) override;
136  bool runOnFunction(Function &F) override;
137 
138  // InstVisitor methods. They return true if the instruction was scalarized,
139  // false if nothing changed.
140  bool visitInstruction(Instruction &) { return false; }
141  bool visitSelectInst(SelectInst &SI);
142  bool visitICmpInst(ICmpInst &);
143  bool visitFCmpInst(FCmpInst &);
144  bool visitBinaryOperator(BinaryOperator &);
145  bool visitGetElementPtrInst(GetElementPtrInst &);
146  bool visitCastInst(CastInst &);
147  bool visitBitCastInst(BitCastInst &);
148  bool visitShuffleVectorInst(ShuffleVectorInst &);
149  bool visitPHINode(PHINode &);
150  bool visitLoadInst(LoadInst &);
151  bool visitStoreInst(StoreInst &);
152  bool visitCallInst(CallInst &I);
153 
154  static void registerOptions() {
155  // This is disabled by default because having separate loads and stores
156  // makes it more likely that the -combiner-alias-analysis limits will be
157  // reached.
159  &Scalarizer::ScalarizeLoadStore>(
160  "scalarize-load-store",
161  "Allow the scalarizer pass to scalarize loads and store", false);
162  }
163 
164 private:
165  Scatterer scatter(Instruction *, Value *);
166  void gather(Instruction *, const ValueVector &);
167  bool canTransferMetadata(unsigned Kind);
168  void transferMetadata(Instruction *, const ValueVector &);
169  bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
170  bool finish();
171 
172  template<typename T> bool splitBinary(Instruction &, const T &);
173 
174  bool splitCall(CallInst &CI);
175 
176  ScatterMap Scattered;
177  GatherList Gathered;
178  unsigned ParallelLoopAccessMDKind;
179  bool ScalarizeLoadStore;
180 };
181 
182 char Scalarizer::ID = 0;
183 } // end anonymous namespace
184 
185 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
186  "Scalarize vector operations", false, false)
187 
188 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
189  ValueVector *cachePtr)
190  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
191  Type *Ty = V->getType();
192  PtrTy = dyn_cast<PointerType>(Ty);
193  if (PtrTy)
194  Ty = PtrTy->getElementType();
195  Size = Ty->getVectorNumElements();
196  if (!CachePtr)
197  Tmp.resize(Size, nullptr);
198  else if (CachePtr->empty())
199  CachePtr->resize(Size, nullptr);
200  else
201  assert(Size == CachePtr->size() && "Inconsistent vector sizes");
202 }
203 
204 // Return component I, creating a new Value for it if necessary.
205 Value *Scatterer::operator[](unsigned I) {
206  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
207  // Try to reuse a previous value.
208  if (CV[I])
209  return CV[I];
210  IRBuilder<> Builder(BB, BBI);
211  if (PtrTy) {
212  if (!CV[0]) {
213  Type *Ty =
214  PointerType::get(PtrTy->getElementType()->getVectorElementType(),
215  PtrTy->getAddressSpace());
216  CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
217  }
218  if (I != 0)
219  CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
220  V->getName() + ".i" + Twine(I));
221  } else {
222  // Search through a chain of InsertElementInsts looking for element I.
223  // Record other elements in the cache. The new V is still suitable
224  // for all uncached indices.
225  for (;;) {
227  if (!Insert)
228  break;
229  ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
230  if (!Idx)
231  break;
232  unsigned J = Idx->getZExtValue();
233  V = Insert->getOperand(0);
234  if (I == J) {
235  CV[J] = Insert->getOperand(1);
236  return CV[J];
237  } else if (!CV[J]) {
238  // Only cache the first entry we find for each index we're not actively
239  // searching for. This prevents us from going too far up the chain and
240  // caching incorrect entries.
241  CV[J] = Insert->getOperand(1);
242  }
243  }
244  CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
245  V->getName() + ".i" + Twine(I));
246  }
247  return CV[I];
248 }
249 
250 bool Scalarizer::doInitialization(Module &M) {
251  ParallelLoopAccessMDKind =
252  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
253  ScalarizeLoadStore =
254  M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
255  return false;
256 }
257 
258 bool Scalarizer::runOnFunction(Function &F) {
259  if (skipFunction(F))
260  return false;
261  assert(Gathered.empty() && Scattered.empty());
262  for (BasicBlock &BB : F) {
263  for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
264  Instruction *I = &*II;
265  bool Done = visit(I);
266  ++II;
267  if (Done && I->getType()->isVoidTy())
268  I->eraseFromParent();
269  }
270  }
271  return finish();
272 }
273 
274 // Return a scattered form of V that can be accessed by Point. V must be a
275 // vector or a pointer to a vector.
276 Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
277  if (Argument *VArg = dyn_cast<Argument>(V)) {
278  // Put the scattered form of arguments in the entry block,
279  // so that it can be used everywhere.
280  Function *F = VArg->getParent();
281  BasicBlock *BB = &F->getEntryBlock();
282  return Scatterer(BB, BB->begin(), V, &Scattered[V]);
283  }
284  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
285  // Put the scattered form of an instruction directly after the
286  // instruction.
287  BasicBlock *BB = VOp->getParent();
288  return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
289  V, &Scattered[V]);
290  }
291  // In the fallback case, just put the scattered before Point and
292  // keep the result local to Point.
293  return Scatterer(Point->getParent(), Point->getIterator(), V);
294 }
295 
296 // Replace Op with the gathered form of the components in CV. Defer the
297 // deletion of Op and creation of the gathered form to the end of the pass,
298 // so that we can avoid creating the gathered form if all uses of Op are
299 // replaced with uses of CV.
300 void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
301  // Since we're not deleting Op yet, stub out its operands, so that it
302  // doesn't make anything live unnecessarily.
303  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
304  Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
305 
306  transferMetadata(Op, CV);
307 
308  // If we already have a scattered form of Op (created from ExtractElements
309  // of Op itself), replace them with the new form.
310  ValueVector &SV = Scattered[Op];
311  if (!SV.empty()) {
312  for (unsigned I = 0, E = SV.size(); I != E; ++I) {
313  Value *V = SV[I];
314  if (V == nullptr)
315  continue;
316 
317  Instruction *Old = cast<Instruction>(V);
318  CV[I]->takeName(Old);
319  Old->replaceAllUsesWith(CV[I]);
320  Old->eraseFromParent();
321  }
322  }
323  SV = CV;
324  Gathered.push_back(GatherList::value_type(Op, &SV));
325 }
326 
327 // Return true if it is safe to transfer the given metadata tag from
328 // vector to scalar instructions.
329 bool Scalarizer::canTransferMetadata(unsigned Tag) {
330  return (Tag == LLVMContext::MD_tbaa
331  || Tag == LLVMContext::MD_fpmath
335  || Tag == LLVMContext::MD_noalias
336  || Tag == ParallelLoopAccessMDKind);
337 }
338 
339 // Transfer metadata from Op to the instructions in CV if it is known
340 // to be safe to do so.
341 void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
344  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
345  if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
346  for (const auto &MD : MDs)
347  if (canTransferMetadata(MD.first))
348  New->setMetadata(MD.first, MD.second);
349  if (Op->getDebugLoc() && !New->getDebugLoc())
350  New->setDebugLoc(Op->getDebugLoc());
351  }
352  }
353 }
354 
355 // Try to fill in Layout from Ty, returning true on success. Alignment is
356 // the alignment of the vector, or 0 if the ABI default should be used.
357 bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
358  VectorLayout &Layout, const DataLayout &DL) {
359  // Make sure we're dealing with a vector.
360  Layout.VecTy = dyn_cast<VectorType>(Ty);
361  if (!Layout.VecTy)
362  return false;
363 
364  // Check that we're dealing with full-byte elements.
365  Layout.ElemTy = Layout.VecTy->getElementType();
366  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
367  DL.getTypeStoreSizeInBits(Layout.ElemTy))
368  return false;
369 
370  if (Alignment)
371  Layout.VecAlign = Alignment;
372  else
373  Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
374  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
375  return true;
376 }
377 
378 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
379 // to create an instruction like I with operands X and Y and name Name.
380 template<typename Splitter>
381 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
383  if (!VT)
384  return false;
385 
386  unsigned NumElems = VT->getNumElements();
387  IRBuilder<> Builder(&I);
388  Scatterer Op0 = scatter(&I, I.getOperand(0));
389  Scatterer Op1 = scatter(&I, I.getOperand(1));
390  assert(Op0.size() == NumElems && "Mismatched binary operation");
391  assert(Op1.size() == NumElems && "Mismatched binary operation");
392  ValueVector Res;
393  Res.resize(NumElems);
394  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
395  Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
396  I.getName() + ".i" + Twine(Elem));
397  gather(&I, Res);
398  return true;
399 }
400 
402  return isTriviallyVectorizable(ID);
403 }
404 
405 // All of the current scalarizable intrinsics only have one mangled type.
408  VectorType *Ty) {
409  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
410 }
411 
412 /// If a call to a vector typed intrinsic function, split into a scalar call per
413 /// element if possible for the intrinsic.
414 bool Scalarizer::splitCall(CallInst &CI) {
415  VectorType *VT = dyn_cast<VectorType>(CI.getType());
416  if (!VT)
417  return false;
418 
419  Function *F = CI.getCalledFunction();
420  if (!F)
421  return false;
422 
425  return false;
426 
427  unsigned NumElems = VT->getNumElements();
428  unsigned NumArgs = CI.getNumArgOperands();
429 
430  ValueVector ScalarOperands(NumArgs);
431  SmallVector<Scatterer, 8> Scattered(NumArgs);
432 
433  Scattered.resize(NumArgs);
434 
435  // Assumes that any vector type has the same number of elements as the return
436  // vector type, which is true for all current intrinsics.
437  for (unsigned I = 0; I != NumArgs; ++I) {
438  Value *OpI = CI.getOperand(I);
439  if (OpI->getType()->isVectorTy()) {
440  Scattered[I] = scatter(&CI, OpI);
441  assert(Scattered[I].size() == NumElems && "mismatched call operands");
442  } else {
443  ScalarOperands[I] = OpI;
444  }
445  }
446 
447  ValueVector Res(NumElems);
448  ValueVector ScalarCallOps(NumArgs);
449 
450  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
451  IRBuilder<> Builder(&CI);
452 
453  // Perform actual scalarization, taking care to preserve any scalar operands.
454  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
455  ScalarCallOps.clear();
456 
457  for (unsigned J = 0; J != NumArgs; ++J) {
458  if (hasVectorInstrinsicScalarOpd(ID, J))
459  ScalarCallOps.push_back(ScalarOperands[J]);
460  else
461  ScalarCallOps.push_back(Scattered[J][Elem]);
462  }
463 
464  Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
465  CI.getName() + ".i" + Twine(Elem));
466  }
467 
468  gather(&CI, Res);
469  return true;
470 }
471 
472 bool Scalarizer::visitSelectInst(SelectInst &SI) {
473  VectorType *VT = dyn_cast<VectorType>(SI.getType());
474  if (!VT)
475  return false;
476 
477  unsigned NumElems = VT->getNumElements();
478  IRBuilder<> Builder(&SI);
479  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
480  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
481  assert(Op1.size() == NumElems && "Mismatched select");
482  assert(Op2.size() == NumElems && "Mismatched select");
483  ValueVector Res;
484  Res.resize(NumElems);
485 
486  if (SI.getOperand(0)->getType()->isVectorTy()) {
487  Scatterer Op0 = scatter(&SI, SI.getOperand(0));
488  assert(Op0.size() == NumElems && "Mismatched select");
489  for (unsigned I = 0; I < NumElems; ++I)
490  Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
491  SI.getName() + ".i" + Twine(I));
492  } else {
493  Value *Op0 = SI.getOperand(0);
494  for (unsigned I = 0; I < NumElems; ++I)
495  Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
496  SI.getName() + ".i" + Twine(I));
497  }
498  gather(&SI, Res);
499  return true;
500 }
501 
502 bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
503  return splitBinary(ICI, ICmpSplitter(ICI));
504 }
505 
506 bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
507  return splitBinary(FCI, FCmpSplitter(FCI));
508 }
509 
510 bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
511  return splitBinary(BO, BinarySplitter(BO));
512 }
513 
514 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
515  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
516  if (!VT)
517  return false;
518 
519  IRBuilder<> Builder(&GEPI);
520  unsigned NumElems = VT->getNumElements();
521  unsigned NumIndices = GEPI.getNumIndices();
522 
523  // The base pointer might be scalar even if it's a vector GEP. In those cases,
524  // splat the pointer into a vector value, and scatter that vector.
525  Value *Op0 = GEPI.getOperand(0);
526  if (!Op0->getType()->isVectorTy())
527  Op0 = Builder.CreateVectorSplat(NumElems, Op0);
528  Scatterer Base = scatter(&GEPI, Op0);
529 
531  Ops.resize(NumIndices);
532  for (unsigned I = 0; I < NumIndices; ++I) {
533  Value *Op = GEPI.getOperand(I + 1);
534 
535  // The indices might be scalars even if it's a vector GEP. In those cases,
536  // splat the scalar into a vector value, and scatter that vector.
537  if (!Op->getType()->isVectorTy())
538  Op = Builder.CreateVectorSplat(NumElems, Op);
539 
540  Ops[I] = scatter(&GEPI, Op);
541  }
542 
543  ValueVector Res;
544  Res.resize(NumElems);
545  for (unsigned I = 0; I < NumElems; ++I) {
546  SmallVector<Value *, 8> Indices;
547  Indices.resize(NumIndices);
548  for (unsigned J = 0; J < NumIndices; ++J)
549  Indices[J] = Ops[J][I];
550  Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
551  GEPI.getName() + ".i" + Twine(I));
552  if (GEPI.isInBounds())
553  if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
554  NewGEPI->setIsInBounds();
555  }
556  gather(&GEPI, Res);
557  return true;
558 }
559 
560 bool Scalarizer::visitCastInst(CastInst &CI) {
562  if (!VT)
563  return false;
564 
565  unsigned NumElems = VT->getNumElements();
566  IRBuilder<> Builder(&CI);
567  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
568  assert(Op0.size() == NumElems && "Mismatched cast");
569  ValueVector Res;
570  Res.resize(NumElems);
571  for (unsigned I = 0; I < NumElems; ++I)
572  Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
573  CI.getName() + ".i" + Twine(I));
574  gather(&CI, Res);
575  return true;
576 }
577 
578 bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
579  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
580  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
581  if (!DstVT || !SrcVT)
582  return false;
583 
584  unsigned DstNumElems = DstVT->getNumElements();
585  unsigned SrcNumElems = SrcVT->getNumElements();
586  IRBuilder<> Builder(&BCI);
587  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
588  ValueVector Res;
589  Res.resize(DstNumElems);
590 
591  if (DstNumElems == SrcNumElems) {
592  for (unsigned I = 0; I < DstNumElems; ++I)
593  Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
594  BCI.getName() + ".i" + Twine(I));
595  } else if (DstNumElems > SrcNumElems) {
596  // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
597  // individual elements to the destination.
598  unsigned FanOut = DstNumElems / SrcNumElems;
599  Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
600  unsigned ResI = 0;
601  for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
602  Value *V = Op0[Op0I];
603  Instruction *VI;
604  // Look through any existing bitcasts before converting to <N x t2>.
605  // In the best case, the resulting conversion might be a no-op.
606  while ((VI = dyn_cast<Instruction>(V)) &&
607  VI->getOpcode() == Instruction::BitCast)
608  V = VI->getOperand(0);
609  V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
610  Scatterer Mid = scatter(&BCI, V);
611  for (unsigned MidI = 0; MidI < FanOut; ++MidI)
612  Res[ResI++] = Mid[MidI];
613  }
614  } else {
615  // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
616  unsigned FanIn = SrcNumElems / DstNumElems;
617  Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
618  unsigned Op0I = 0;
619  for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
620  Value *V = UndefValue::get(MidTy);
621  for (unsigned MidI = 0; MidI < FanIn; ++MidI)
622  V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
623  BCI.getName() + ".i" + Twine(ResI)
624  + ".upto" + Twine(MidI));
625  Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
626  BCI.getName() + ".i" + Twine(ResI));
627  }
628  }
629  gather(&BCI, Res);
630  return true;
631 }
632 
633 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
634  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
635  if (!VT)
636  return false;
637 
638  unsigned NumElems = VT->getNumElements();
639  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
640  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
641  ValueVector Res;
642  Res.resize(NumElems);
643 
644  for (unsigned I = 0; I < NumElems; ++I) {
645  int Selector = SVI.getMaskValue(I);
646  if (Selector < 0)
647  Res[I] = UndefValue::get(VT->getElementType());
648  else if (unsigned(Selector) < Op0.size())
649  Res[I] = Op0[Selector];
650  else
651  Res[I] = Op1[Selector - Op0.size()];
652  }
653  gather(&SVI, Res);
654  return true;
655 }
656 
657 bool Scalarizer::visitPHINode(PHINode &PHI) {
658  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
659  if (!VT)
660  return false;
661 
662  unsigned NumElems = VT->getNumElements();
663  IRBuilder<> Builder(&PHI);
664  ValueVector Res;
665  Res.resize(NumElems);
666 
667  unsigned NumOps = PHI.getNumOperands();
668  for (unsigned I = 0; I < NumElems; ++I)
669  Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
670  PHI.getName() + ".i" + Twine(I));
671 
672  for (unsigned I = 0; I < NumOps; ++I) {
673  Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
674  BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
675  for (unsigned J = 0; J < NumElems; ++J)
676  cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
677  }
678  gather(&PHI, Res);
679  return true;
680 }
681 
682 bool Scalarizer::visitLoadInst(LoadInst &LI) {
683  if (!ScalarizeLoadStore)
684  return false;
685  if (!LI.isSimple())
686  return false;
687 
688  VectorLayout Layout;
689  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
690  LI.getModule()->getDataLayout()))
691  return false;
692 
693  unsigned NumElems = Layout.VecTy->getNumElements();
694  IRBuilder<> Builder(&LI);
695  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
696  ValueVector Res;
697  Res.resize(NumElems);
698 
699  for (unsigned I = 0; I < NumElems; ++I)
700  Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
701  LI.getName() + ".i" + Twine(I));
702  gather(&LI, Res);
703  return true;
704 }
705 
706 bool Scalarizer::visitStoreInst(StoreInst &SI) {
707  if (!ScalarizeLoadStore)
708  return false;
709  if (!SI.isSimple())
710  return false;
711 
712  VectorLayout Layout;
713  Value *FullValue = SI.getValueOperand();
714  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
715  SI.getModule()->getDataLayout()))
716  return false;
717 
718  unsigned NumElems = Layout.VecTy->getNumElements();
719  IRBuilder<> Builder(&SI);
720  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
721  Scatterer Val = scatter(&SI, FullValue);
722 
723  ValueVector Stores;
724  Stores.resize(NumElems);
725  for (unsigned I = 0; I < NumElems; ++I) {
726  unsigned Align = Layout.getElemAlign(I);
727  Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
728  }
729  transferMetadata(&SI, Stores);
730  return true;
731 }
732 
733 bool Scalarizer::visitCallInst(CallInst &CI) {
734  return splitCall(CI);
735 }
736 
737 // Delete the instructions that we scalarized. If a full vector result
738 // is still needed, recreate it using InsertElements.
739 bool Scalarizer::finish() {
740  // The presence of data in Gathered or Scattered indicates changes
741  // made to the Function.
742  if (Gathered.empty() && Scattered.empty())
743  return false;
744  for (const auto &GMI : Gathered) {
745  Instruction *Op = GMI.first;
746  ValueVector &CV = *GMI.second;
747  if (!Op->use_empty()) {
748  // The value is still needed, so recreate it using a series of
749  // InsertElements.
750  Type *Ty = Op->getType();
751  Value *Res = UndefValue::get(Ty);
752  BasicBlock *BB = Op->getParent();
753  unsigned Count = Ty->getVectorNumElements();
754  IRBuilder<> Builder(Op);
755  if (isa<PHINode>(Op))
756  Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
757  for (unsigned I = 0; I < Count; ++I)
758  Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
759  Op->getName() + ".upto" + Twine(I));
760  Res->takeName(Op);
761  Op->replaceAllUsesWith(Res);
762  }
763  Op->eraseFromParent();
764  }
765  Gathered.clear();
766  Scattered.clear();
767  return true;
768 }
769 
771  return new Scalarizer();
772 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:217
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:396
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1634
bool isSimple() const
Definition: Instructions.h:262
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1274
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:818
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1108
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
This instruction constructs a fixed permutation of two input vectors.
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1199
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumArgOperands() const
Return the number of call arguments.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:237
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:664
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
Type * getSourceElementType() const
Definition: Instructions.h:934
unsigned getNumIndices() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:813
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1835
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer", "Scalarize vector operations", false, false) Scatterer
Definition: Scalarizer.cpp:185
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:290
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:980
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1641
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
This instruction inserts a single (scalar) element into a VectorType value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
Definition: Scalarizer.cpp:406
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1689
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:401
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
self_iterator getIterator()
Definition: ilist_node.h:82
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1709
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
ValT getOption() const
Query for a debug option&#39;s value.
Definition: LLVMContext.h:313
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1654
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1223
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1722
void initializeScalarizerPass(PassRegistry &)
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:308
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:820
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:72
static void registerOption(StringRef ArgStr, StringRef Desc, const ValT &InitValue)
Registers an option with the OptionRegistry singleton.
Definition: Options.h:96
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
void push_back(pointer val)
Definition: ilist.h:326
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:532
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static int getMaskValue(Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
void clear()
Definition: ilist.h:322
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
const unsigned Kind
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1476
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1182
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
LLVM Value Representation.
Definition: Value.h:73
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:388
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Type * getElementType() const
Definition: DerivedTypes.h:360
FunctionPass * createScalarizerPass()
Definition: Scalarizer.cpp:770
bool isSimple() const
Definition: Instructions.h:387
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
Definition: Instructions.h:398
bool use_empty() const
Definition: Value.h:322
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:35
void resize(size_type N)
Definition: SmallVector.h:355