LLVM  10.0.0svn
Scalarizer.cpp
Go to the documentation of this file.
1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts vector operations into scalar operations, in order
10 // to expose optimization opportunities on the individual scalar operations.
11 // It is mainly intended for targets that do not have vector units, but it
12 // may also be useful for revectorizing code to different vector widths.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Options.h"
40 #include "llvm/Transforms/Scalar.h"
42 #include <cassert>
43 #include <cstdint>
44 #include <iterator>
45 #include <map>
46 #include <utility>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "scalarizer"
51 
52 // This is disabled by default because having separate loads and stores
53 // makes it more likely that the -combiner-alias-analysis limits will be
54 // reached.
55 static cl::opt<bool>
56  ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
57  cl::desc("Allow the scalarizer pass to scalarize loads and store"));
58 
59 namespace {
60 
61 // Used to store the scattered form of a vector.
62 using ValueVector = SmallVector<Value *, 8>;
63 
64 // Used to map a vector Value to its scattered form. We use std::map
65 // because we want iterators to persist across insertion and because the
66 // values are relatively large.
67 using ScatterMap = std::map<Value *, ValueVector>;
68 
69 // Lists Instructions that have been replaced with scalar implementations,
70 // along with a pointer to their scattered forms.
72 
73 // Provides a very limited vector-like interface for lazily accessing one
74 // component of a scattered vector or vector pointer.
75 class Scatterer {
76 public:
77  Scatterer() = default;
78 
79  // Scatter V into Size components. If new instructions are needed,
80  // insert them before BBI in BB. If Cache is nonnull, use it to cache
81  // the results.
82  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
83  ValueVector *cachePtr = nullptr);
84 
85  // Return component I, creating a new Value for it if necessary.
86  Value *operator[](unsigned I);
87 
88  // Return the number of components.
89  unsigned size() const { return Size; }
90 
91 private:
92  BasicBlock *BB;
94  Value *V;
95  ValueVector *CachePtr;
96  PointerType *PtrTy;
97  ValueVector Tmp;
98  unsigned Size;
99 };
100 
101 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
102 // called Name that compares X and Y in the same way as FCI.
103 struct FCmpSplitter {
104  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
105 
106  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
107  const Twine &Name) const {
108  return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
109  }
110 
111  FCmpInst &FCI;
112 };
113 
114 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
115 // called Name that compares X and Y in the same way as ICI.
116 struct ICmpSplitter {
117  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
118 
119  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
120  const Twine &Name) const {
121  return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
122  }
123 
124  ICmpInst &ICI;
125 };
126 
127 // UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
128 // a unary operator like UO called Name with operand X.
129 struct UnarySplitter {
130  UnarySplitter(UnaryOperator &uo) : UO(uo) {}
131 
132  Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
133  return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
134  }
135 
136  UnaryOperator &UO;
137 };
138 
139 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
140 // a binary operator like BO called Name with operands X and Y.
141 struct BinarySplitter {
142  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
143 
144  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
145  const Twine &Name) const {
146  return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
147  }
148 
149  BinaryOperator &BO;
150 };
151 
152 // Information about a load or store that we're scalarizing.
153 struct VectorLayout {
154  VectorLayout() = default;
155 
156  // Return the alignment of element I.
157  uint64_t getElemAlign(unsigned I) {
158  return MinAlign(VecAlign, I * ElemSize);
159  }
160 
161  // The type of the vector.
162  VectorType *VecTy = nullptr;
163 
164  // The type of each element.
165  Type *ElemTy = nullptr;
166 
167  // The alignment of the vector.
168  uint64_t VecAlign = 0;
169 
170  // The size of each element.
171  uint64_t ElemSize = 0;
172 };
173 
174 class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
175 public:
176  ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
177  : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
178  }
179 
180  bool visit(Function &F);
181 
182  // InstVisitor methods. They return true if the instruction was scalarized,
183  // false if nothing changed.
184  bool visitInstruction(Instruction &I) { return false; }
185  bool visitSelectInst(SelectInst &SI);
186  bool visitICmpInst(ICmpInst &ICI);
187  bool visitFCmpInst(FCmpInst &FCI);
188  bool visitUnaryOperator(UnaryOperator &UO);
189  bool visitBinaryOperator(BinaryOperator &BO);
190  bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
191  bool visitCastInst(CastInst &CI);
192  bool visitBitCastInst(BitCastInst &BCI);
193  bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
194  bool visitPHINode(PHINode &PHI);
195  bool visitLoadInst(LoadInst &LI);
196  bool visitStoreInst(StoreInst &SI);
197  bool visitCallInst(CallInst &ICI);
198 
199 private:
200  Scatterer scatter(Instruction *Point, Value *V);
201  void gather(Instruction *Op, const ValueVector &CV);
202  bool canTransferMetadata(unsigned Kind);
203  void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
204  bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
205  const DataLayout &DL);
206  bool finish();
207 
208  template<typename T> bool splitUnary(Instruction &, const T &);
209  template<typename T> bool splitBinary(Instruction &, const T &);
210 
211  bool splitCall(CallInst &CI);
212 
213  ScatterMap Scattered;
214  GatherList Gathered;
215 
216  unsigned ParallelLoopAccessMDKind;
217 };
218 
219 class ScalarizerLegacyPass : public FunctionPass {
220 public:
221  static char ID;
222 
223  ScalarizerLegacyPass() : FunctionPass(ID) {
225  }
226 
227  bool runOnFunction(Function &F) override;
228 };
229 
230 } // end anonymous namespace
231 
232 char ScalarizerLegacyPass::ID = 0;
233 INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
234  "Scalarize vector operations", false, false)
235 INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
236  "Scalarize vector operations", false, false)
237 
238 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
239  ValueVector *cachePtr)
240  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
241  Type *Ty = V->getType();
242  PtrTy = dyn_cast<PointerType>(Ty);
243  if (PtrTy)
244  Ty = PtrTy->getElementType();
245  Size = Ty->getVectorNumElements();
246  if (!CachePtr)
247  Tmp.resize(Size, nullptr);
248  else if (CachePtr->empty())
249  CachePtr->resize(Size, nullptr);
250  else
251  assert(Size == CachePtr->size() && "Inconsistent vector sizes");
252 }
253 
254 // Return component I, creating a new Value for it if necessary.
255 Value *Scatterer::operator[](unsigned I) {
256  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
257  // Try to reuse a previous value.
258  if (CV[I])
259  return CV[I];
260  IRBuilder<> Builder(BB, BBI);
261  if (PtrTy) {
262  Type *ElTy = PtrTy->getElementType()->getVectorElementType();
263  if (!CV[0]) {
264  Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
265  CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
266  }
267  if (I != 0)
268  CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
269  V->getName() + ".i" + Twine(I));
270  } else {
271  // Search through a chain of InsertElementInsts looking for element I.
272  // Record other elements in the cache. The new V is still suitable
273  // for all uncached indices.
274  while (true) {
276  if (!Insert)
277  break;
278  ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
279  if (!Idx)
280  break;
281  unsigned J = Idx->getZExtValue();
282  V = Insert->getOperand(0);
283  if (I == J) {
284  CV[J] = Insert->getOperand(1);
285  return CV[J];
286  } else if (!CV[J]) {
287  // Only cache the first entry we find for each index we're not actively
288  // searching for. This prevents us from going too far up the chain and
289  // caching incorrect entries.
290  CV[J] = Insert->getOperand(1);
291  }
292  }
293  CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
294  V->getName() + ".i" + Twine(I));
295  }
296  return CV[I];
297 }
298 
300  if (skipFunction(F))
301  return false;
302 
303  Module &M = *F.getParent();
304  unsigned ParallelLoopAccessMDKind =
305  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
306  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
307  return Impl.visit(F);
308 }
309 
311  return new ScalarizerLegacyPass();
312 }
313 
314 bool ScalarizerVisitor::visit(Function &F) {
315  assert(Gathered.empty() && Scattered.empty());
316 
317  // To ensure we replace gathered components correctly we need to do an ordered
318  // traversal of the basic blocks in the function.
320  for (BasicBlock *BB : RPOT) {
321  for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
322  Instruction *I = &*II;
323  bool Done = InstVisitor::visit(I);
324  ++II;
325  if (Done && I->getType()->isVoidTy())
326  I->eraseFromParent();
327  }
328  }
329  return finish();
330 }
331 
332 // Return a scattered form of V that can be accessed by Point. V must be a
333 // vector or a pointer to a vector.
334 Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
335  if (Argument *VArg = dyn_cast<Argument>(V)) {
336  // Put the scattered form of arguments in the entry block,
337  // so that it can be used everywhere.
338  Function *F = VArg->getParent();
339  BasicBlock *BB = &F->getEntryBlock();
340  return Scatterer(BB, BB->begin(), V, &Scattered[V]);
341  }
342  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
343  // Put the scattered form of an instruction directly after the
344  // instruction.
345  BasicBlock *BB = VOp->getParent();
346  return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
347  V, &Scattered[V]);
348  }
349  // In the fallback case, just put the scattered before Point and
350  // keep the result local to Point.
351  return Scatterer(Point->getParent(), Point->getIterator(), V);
352 }
353 
354 // Replace Op with the gathered form of the components in CV. Defer the
355 // deletion of Op and creation of the gathered form to the end of the pass,
356 // so that we can avoid creating the gathered form if all uses of Op are
357 // replaced with uses of CV.
358 void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
359  // Since we're not deleting Op yet, stub out its operands, so that it
360  // doesn't make anything live unnecessarily.
361  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
362  Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
363 
364  transferMetadataAndIRFlags(Op, CV);
365 
366  // If we already have a scattered form of Op (created from ExtractElements
367  // of Op itself), replace them with the new form.
368  ValueVector &SV = Scattered[Op];
369  if (!SV.empty()) {
370  for (unsigned I = 0, E = SV.size(); I != E; ++I) {
371  Value *V = SV[I];
372  if (V == nullptr)
373  continue;
374 
375  Instruction *Old = cast<Instruction>(V);
376  CV[I]->takeName(Old);
377  Old->replaceAllUsesWith(CV[I]);
378  Old->eraseFromParent();
379  }
380  }
381  SV = CV;
382  Gathered.push_back(GatherList::value_type(Op, &SV));
383 }
384 
385 // Return true if it is safe to transfer the given metadata tag from
386 // vector to scalar instructions.
387 bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
388  return (Tag == LLVMContext::MD_tbaa
389  || Tag == LLVMContext::MD_fpmath
390  || Tag == LLVMContext::MD_tbaa_struct
391  || Tag == LLVMContext::MD_invariant_load
392  || Tag == LLVMContext::MD_alias_scope
393  || Tag == LLVMContext::MD_noalias
394  || Tag == ParallelLoopAccessMDKind
395  || Tag == LLVMContext::MD_access_group);
396 }
397 
398 // Transfer metadata from Op to the instructions in CV if it is known
399 // to be safe to do so.
400 void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
401  const ValueVector &CV) {
404  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
405  if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
406  for (const auto &MD : MDs)
407  if (canTransferMetadata(MD.first))
408  New->setMetadata(MD.first, MD.second);
409  New->copyIRFlags(Op);
410  if (Op->getDebugLoc() && !New->getDebugLoc())
411  New->setDebugLoc(Op->getDebugLoc());
412  }
413  }
414 }
415 
416 // Try to fill in Layout from Ty, returning true on success. Alignment is
417 // the alignment of the vector, or 0 if the ABI default should be used.
418 bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
419  VectorLayout &Layout, const DataLayout &DL) {
420  // Make sure we're dealing with a vector.
421  Layout.VecTy = dyn_cast<VectorType>(Ty);
422  if (!Layout.VecTy)
423  return false;
424 
425  // Check that we're dealing with full-byte elements.
426  Layout.ElemTy = Layout.VecTy->getElementType();
427  if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
428  return false;
429 
430  if (Alignment)
431  Layout.VecAlign = Alignment;
432  else
433  Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
434  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
435  return true;
436 }
437 
438 // Scalarize one-operand instruction I, using Split(Builder, X, Name)
439 // to create an instruction like I with operand X and name Name.
440 template<typename Splitter>
441 bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
443  if (!VT)
444  return false;
445 
446  unsigned NumElems = VT->getNumElements();
447  IRBuilder<> Builder(&I);
448  Scatterer Op = scatter(&I, I.getOperand(0));
449  assert(Op.size() == NumElems && "Mismatched unary operation");
450  ValueVector Res;
451  Res.resize(NumElems);
452  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
453  Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
454  gather(&I, Res);
455  return true;
456 }
457 
458 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
459 // to create an instruction like I with operands X and Y and name Name.
460 template<typename Splitter>
461 bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
463  if (!VT)
464  return false;
465 
466  unsigned NumElems = VT->getNumElements();
467  IRBuilder<> Builder(&I);
468  Scatterer Op0 = scatter(&I, I.getOperand(0));
469  Scatterer Op1 = scatter(&I, I.getOperand(1));
470  assert(Op0.size() == NumElems && "Mismatched binary operation");
471  assert(Op1.size() == NumElems && "Mismatched binary operation");
472  ValueVector Res;
473  Res.resize(NumElems);
474  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
475  Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
476  I.getName() + ".i" + Twine(Elem));
477  gather(&I, Res);
478  return true;
479 }
480 
482  return isTriviallyVectorizable(ID);
483 }
484 
485 // All of the current scalarizable intrinsics only have one mangled type.
488  VectorType *Ty) {
489  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
490 }
491 
492 /// If a call to a vector typed intrinsic function, split into a scalar call per
493 /// element if possible for the intrinsic.
494 bool ScalarizerVisitor::splitCall(CallInst &CI) {
495  VectorType *VT = dyn_cast<VectorType>(CI.getType());
496  if (!VT)
497  return false;
498 
499  Function *F = CI.getCalledFunction();
500  if (!F)
501  return false;
502 
505  return false;
506 
507  unsigned NumElems = VT->getNumElements();
508  unsigned NumArgs = CI.getNumArgOperands();
509 
510  ValueVector ScalarOperands(NumArgs);
511  SmallVector<Scatterer, 8> Scattered(NumArgs);
512 
513  Scattered.resize(NumArgs);
514 
515  // Assumes that any vector type has the same number of elements as the return
516  // vector type, which is true for all current intrinsics.
517  for (unsigned I = 0; I != NumArgs; ++I) {
518  Value *OpI = CI.getOperand(I);
519  if (OpI->getType()->isVectorTy()) {
520  Scattered[I] = scatter(&CI, OpI);
521  assert(Scattered[I].size() == NumElems && "mismatched call operands");
522  } else {
523  ScalarOperands[I] = OpI;
524  }
525  }
526 
527  ValueVector Res(NumElems);
528  ValueVector ScalarCallOps(NumArgs);
529 
530  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
531  IRBuilder<> Builder(&CI);
532 
533  // Perform actual scalarization, taking care to preserve any scalar operands.
534  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
535  ScalarCallOps.clear();
536 
537  for (unsigned J = 0; J != NumArgs; ++J) {
538  if (hasVectorInstrinsicScalarOpd(ID, J))
539  ScalarCallOps.push_back(ScalarOperands[J]);
540  else
541  ScalarCallOps.push_back(Scattered[J][Elem]);
542  }
543 
544  Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
545  CI.getName() + ".i" + Twine(Elem));
546  }
547 
548  gather(&CI, Res);
549  return true;
550 }
551 
552 bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
553  VectorType *VT = dyn_cast<VectorType>(SI.getType());
554  if (!VT)
555  return false;
556 
557  unsigned NumElems = VT->getNumElements();
558  IRBuilder<> Builder(&SI);
559  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
560  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
561  assert(Op1.size() == NumElems && "Mismatched select");
562  assert(Op2.size() == NumElems && "Mismatched select");
563  ValueVector Res;
564  Res.resize(NumElems);
565 
566  if (SI.getOperand(0)->getType()->isVectorTy()) {
567  Scatterer Op0 = scatter(&SI, SI.getOperand(0));
568  assert(Op0.size() == NumElems && "Mismatched select");
569  for (unsigned I = 0; I < NumElems; ++I)
570  Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
571  SI.getName() + ".i" + Twine(I));
572  } else {
573  Value *Op0 = SI.getOperand(0);
574  for (unsigned I = 0; I < NumElems; ++I)
575  Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
576  SI.getName() + ".i" + Twine(I));
577  }
578  gather(&SI, Res);
579  return true;
580 }
581 
582 bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
583  return splitBinary(ICI, ICmpSplitter(ICI));
584 }
585 
586 bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
587  return splitBinary(FCI, FCmpSplitter(FCI));
588 }
589 
590 bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
591  return splitUnary(UO, UnarySplitter(UO));
592 }
593 
594 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
595  return splitBinary(BO, BinarySplitter(BO));
596 }
597 
598 bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
599  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
600  if (!VT)
601  return false;
602 
603  IRBuilder<> Builder(&GEPI);
604  unsigned NumElems = VT->getNumElements();
605  unsigned NumIndices = GEPI.getNumIndices();
606 
607  // The base pointer might be scalar even if it's a vector GEP. In those cases,
608  // splat the pointer into a vector value, and scatter that vector.
609  Value *Op0 = GEPI.getOperand(0);
610  if (!Op0->getType()->isVectorTy())
611  Op0 = Builder.CreateVectorSplat(NumElems, Op0);
612  Scatterer Base = scatter(&GEPI, Op0);
613 
615  Ops.resize(NumIndices);
616  for (unsigned I = 0; I < NumIndices; ++I) {
617  Value *Op = GEPI.getOperand(I + 1);
618 
619  // The indices might be scalars even if it's a vector GEP. In those cases,
620  // splat the scalar into a vector value, and scatter that vector.
621  if (!Op->getType()->isVectorTy())
622  Op = Builder.CreateVectorSplat(NumElems, Op);
623 
624  Ops[I] = scatter(&GEPI, Op);
625  }
626 
627  ValueVector Res;
628  Res.resize(NumElems);
629  for (unsigned I = 0; I < NumElems; ++I) {
630  SmallVector<Value *, 8> Indices;
631  Indices.resize(NumIndices);
632  for (unsigned J = 0; J < NumIndices; ++J)
633  Indices[J] = Ops[J][I];
634  Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
635  GEPI.getName() + ".i" + Twine(I));
636  if (GEPI.isInBounds())
637  if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
638  NewGEPI->setIsInBounds();
639  }
640  gather(&GEPI, Res);
641  return true;
642 }
643 
644 bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
646  if (!VT)
647  return false;
648 
649  unsigned NumElems = VT->getNumElements();
650  IRBuilder<> Builder(&CI);
651  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
652  assert(Op0.size() == NumElems && "Mismatched cast");
653  ValueVector Res;
654  Res.resize(NumElems);
655  for (unsigned I = 0; I < NumElems; ++I)
656  Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
657  CI.getName() + ".i" + Twine(I));
658  gather(&CI, Res);
659  return true;
660 }
661 
662 bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
663  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
664  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
665  if (!DstVT || !SrcVT)
666  return false;
667 
668  unsigned DstNumElems = DstVT->getNumElements();
669  unsigned SrcNumElems = SrcVT->getNumElements();
670  IRBuilder<> Builder(&BCI);
671  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
672  ValueVector Res;
673  Res.resize(DstNumElems);
674 
675  if (DstNumElems == SrcNumElems) {
676  for (unsigned I = 0; I < DstNumElems; ++I)
677  Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
678  BCI.getName() + ".i" + Twine(I));
679  } else if (DstNumElems > SrcNumElems) {
680  // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
681  // individual elements to the destination.
682  unsigned FanOut = DstNumElems / SrcNumElems;
683  Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
684  unsigned ResI = 0;
685  for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
686  Value *V = Op0[Op0I];
687  Instruction *VI;
688  // Look through any existing bitcasts before converting to <N x t2>.
689  // In the best case, the resulting conversion might be a no-op.
690  while ((VI = dyn_cast<Instruction>(V)) &&
691  VI->getOpcode() == Instruction::BitCast)
692  V = VI->getOperand(0);
693  V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
694  Scatterer Mid = scatter(&BCI, V);
695  for (unsigned MidI = 0; MidI < FanOut; ++MidI)
696  Res[ResI++] = Mid[MidI];
697  }
698  } else {
699  // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
700  unsigned FanIn = SrcNumElems / DstNumElems;
701  Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
702  unsigned Op0I = 0;
703  for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
704  Value *V = UndefValue::get(MidTy);
705  for (unsigned MidI = 0; MidI < FanIn; ++MidI)
706  V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
707  BCI.getName() + ".i" + Twine(ResI)
708  + ".upto" + Twine(MidI));
709  Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
710  BCI.getName() + ".i" + Twine(ResI));
711  }
712  }
713  gather(&BCI, Res);
714  return true;
715 }
716 
717 bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
718  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
719  if (!VT)
720  return false;
721 
722  unsigned NumElems = VT->getNumElements();
723  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
724  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
725  ValueVector Res;
726  Res.resize(NumElems);
727 
728  for (unsigned I = 0; I < NumElems; ++I) {
729  int Selector = SVI.getMaskValue(I);
730  if (Selector < 0)
731  Res[I] = UndefValue::get(VT->getElementType());
732  else if (unsigned(Selector) < Op0.size())
733  Res[I] = Op0[Selector];
734  else
735  Res[I] = Op1[Selector - Op0.size()];
736  }
737  gather(&SVI, Res);
738  return true;
739 }
740 
741 bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
742  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
743  if (!VT)
744  return false;
745 
746  unsigned NumElems = VT->getNumElements();
747  IRBuilder<> Builder(&PHI);
748  ValueVector Res;
749  Res.resize(NumElems);
750 
751  unsigned NumOps = PHI.getNumOperands();
752  for (unsigned I = 0; I < NumElems; ++I)
753  Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
754  PHI.getName() + ".i" + Twine(I));
755 
756  for (unsigned I = 0; I < NumOps; ++I) {
757  Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
758  BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
759  for (unsigned J = 0; J < NumElems; ++J)
760  cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
761  }
762  gather(&PHI, Res);
763  return true;
764 }
765 
766 bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
767  if (!ScalarizeLoadStore)
768  return false;
769  if (!LI.isSimple())
770  return false;
771 
772  VectorLayout Layout;
773  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
774  LI.getModule()->getDataLayout()))
775  return false;
776 
777  unsigned NumElems = Layout.VecTy->getNumElements();
778  IRBuilder<> Builder(&LI);
779  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
780  ValueVector Res;
781  Res.resize(NumElems);
782 
783  for (unsigned I = 0; I < NumElems; ++I)
784  Res[I] = Builder.CreateAlignedLoad(Layout.VecTy->getElementType(), Ptr[I],
785  Layout.getElemAlign(I),
786  LI.getName() + ".i" + Twine(I));
787  gather(&LI, Res);
788  return true;
789 }
790 
791 bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
792  if (!ScalarizeLoadStore)
793  return false;
794  if (!SI.isSimple())
795  return false;
796 
797  VectorLayout Layout;
798  Value *FullValue = SI.getValueOperand();
799  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
800  SI.getModule()->getDataLayout()))
801  return false;
802 
803  unsigned NumElems = Layout.VecTy->getNumElements();
804  IRBuilder<> Builder(&SI);
805  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
806  Scatterer Val = scatter(&SI, FullValue);
807 
808  ValueVector Stores;
809  Stores.resize(NumElems);
810  for (unsigned I = 0; I < NumElems; ++I) {
811  unsigned Align = Layout.getElemAlign(I);
812  Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
813  }
814  transferMetadataAndIRFlags(&SI, Stores);
815  return true;
816 }
817 
818 bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
819  return splitCall(CI);
820 }
821 
822 // Delete the instructions that we scalarized. If a full vector result
823 // is still needed, recreate it using InsertElements.
824 bool ScalarizerVisitor::finish() {
825  // The presence of data in Gathered or Scattered indicates changes
826  // made to the Function.
827  if (Gathered.empty() && Scattered.empty())
828  return false;
829  for (const auto &GMI : Gathered) {
830  Instruction *Op = GMI.first;
831  ValueVector &CV = *GMI.second;
832  if (!Op->use_empty()) {
833  // The value is still needed, so recreate it using a series of
834  // InsertElements.
835  Type *Ty = Op->getType();
836  Value *Res = UndefValue::get(Ty);
837  BasicBlock *BB = Op->getParent();
838  unsigned Count = Ty->getVectorNumElements();
839  IRBuilder<> Builder(Op);
840  if (isa<PHINode>(Op))
841  Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
842  for (unsigned I = 0; I < Count; ++I)
843  Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
844  Op->getName() + ".upto" + Twine(I));
845  Res->takeName(Op);
846  Op->replaceAllUsesWith(Res);
847  }
848  Op->eraseFromParent();
849  }
850  Gathered.clear();
851  Scattered.clear();
852  return true;
853 }
854 
856  Module &M = *F.getParent();
857  unsigned ParallelLoopAccessMDKind =
858  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
859  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
860  bool Changed = Impl.visit(F);
861  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
862 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:267
Type * getVectorElementType() const
Definition: Type.h:371
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2212
bool isSimple() const
Definition: Instructions.h:276
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1735
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:697
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1458
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1612
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
This instruction constructs a fixed permutation of two input vectors.
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type. ...
Definition: DataLayout.h:461
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1649
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", "Scalarize vector operations", false, false) INITIALIZE_PASS_END(ScalarizerLegacyPass
void initializeScalarizerLegacyPassPass(PassRegistry &)
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
Type * getSourceElementType() const
Definition: Instructions.h:972
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
unsigned getNumIndices() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1964
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:692
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2462
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1079
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: Scalarizer.cpp:855
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:570
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2220
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
Definition: Scalarizer.cpp:486
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
scalarizer
Definition: Scalarizer.cpp:235
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2285
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:481
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Scalarize vector operations
Definition: Scalarizer.cpp:235
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2307
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2232
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1677
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
Module.h This file contains the declarations for the Module class.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2320
This file declares helper objects for defining debug options that can be configured via the command l...
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:760
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:699
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:92
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:561
This pass converts vector operations into scalar operations, in order to expose optimization opportun...
Class to represent vector types.
Definition: DerivedTypes.h:427
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void push_back(pointer val)
Definition: ilist.h:311
static cl::opt< bool > ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1239
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
void clear()
Definition: ilist.h:307
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1530
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:73
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
Type * getElementType() const
Definition: DerivedTypes.h:394
FunctionPass * createScalarizerPass()
Create a legacy pass manager instance of the Scalarizer pass.
Definition: Scalarizer.cpp:310
A container for analyses that lazily runs them and caches their results.
bool isSimple() const
Definition: Instructions.h:401
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
Definition: Instructions.h:412
bool use_empty() const
Definition: Value.h:342
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:43
void resize(size_type N)
Definition: SmallVector.h:344