LLVM  9.0.0svn
Scalarizer.cpp
Go to the documentation of this file.
1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts vector operations into scalar operations, in order
10 // to expose optimization opportunities on the individual scalar operations.
11 // It is mainly intended for targets that do not have vector units, but it
12 // may also be useful for revectorizing code to different vector widths.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Options.h"
40 #include "llvm/Transforms/Scalar.h"
42 #include <cassert>
43 #include <cstdint>
44 #include <iterator>
45 #include <map>
46 #include <utility>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "scalarizer"
51 
52 // This is disabled by default because having separate loads and stores
53 // makes it more likely that the -combiner-alias-analysis limits will be
54 // reached.
55 static cl::opt<bool>
56  ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
57  cl::desc("Allow the scalarizer pass to scalarize loads and store"));
58 
59 namespace {
60 
61 // Used to store the scattered form of a vector.
62 using ValueVector = SmallVector<Value *, 8>;
63 
64 // Used to map a vector Value to its scattered form. We use std::map
65 // because we want iterators to persist across insertion and because the
66 // values are relatively large.
67 using ScatterMap = std::map<Value *, ValueVector>;
68 
69 // Lists Instructions that have been replaced with scalar implementations,
70 // along with a pointer to their scattered forms.
72 
73 // Provides a very limited vector-like interface for lazily accessing one
74 // component of a scattered vector or vector pointer.
75 class Scatterer {
76 public:
77  Scatterer() = default;
78 
79  // Scatter V into Size components. If new instructions are needed,
80  // insert them before BBI in BB. If Cache is nonnull, use it to cache
81  // the results.
82  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
83  ValueVector *cachePtr = nullptr);
84 
85  // Return component I, creating a new Value for it if necessary.
86  Value *operator[](unsigned I);
87 
88  // Return the number of components.
89  unsigned size() const { return Size; }
90 
91 private:
92  BasicBlock *BB;
94  Value *V;
95  ValueVector *CachePtr;
96  PointerType *PtrTy;
97  ValueVector Tmp;
98  unsigned Size;
99 };
100 
101 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
102 // called Name that compares X and Y in the same way as FCI.
103 struct FCmpSplitter {
104  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
105 
106  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
107  const Twine &Name) const {
108  return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
109  }
110 
111  FCmpInst &FCI;
112 };
113 
114 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
115 // called Name that compares X and Y in the same way as ICI.
116 struct ICmpSplitter {
117  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
118 
119  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
120  const Twine &Name) const {
121  return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
122  }
123 
124  ICmpInst &ICI;
125 };
126 
127 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
128 // a binary operator like BO called Name with operands X and Y.
129 struct BinarySplitter {
130  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
131 
132  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
133  const Twine &Name) const {
134  return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
135  }
136 
137  BinaryOperator &BO;
138 };
139 
140 // Information about a load or store that we're scalarizing.
141 struct VectorLayout {
142  VectorLayout() = default;
143 
144  // Return the alignment of element I.
145  uint64_t getElemAlign(unsigned I) {
146  return MinAlign(VecAlign, I * ElemSize);
147  }
148 
149  // The type of the vector.
150  VectorType *VecTy = nullptr;
151 
152  // The type of each element.
153  Type *ElemTy = nullptr;
154 
155  // The alignment of the vector.
156  uint64_t VecAlign = 0;
157 
158  // The size of each element.
159  uint64_t ElemSize = 0;
160 };
161 
162 class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
163 public:
164  ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
165  : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
166  }
167 
168  bool visit(Function &F);
169 
170  // InstVisitor methods. They return true if the instruction was scalarized,
171  // false if nothing changed.
172  bool visitInstruction(Instruction &I) { return false; }
173  bool visitSelectInst(SelectInst &SI);
174  bool visitICmpInst(ICmpInst &ICI);
175  bool visitFCmpInst(FCmpInst &FCI);
176  bool visitBinaryOperator(BinaryOperator &BO);
177  bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
178  bool visitCastInst(CastInst &CI);
179  bool visitBitCastInst(BitCastInst &BCI);
180  bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
181  bool visitPHINode(PHINode &PHI);
182  bool visitLoadInst(LoadInst &LI);
183  bool visitStoreInst(StoreInst &SI);
184  bool visitCallInst(CallInst &ICI);
185 
186 private:
187  Scatterer scatter(Instruction *Point, Value *V);
188  void gather(Instruction *Op, const ValueVector &CV);
189  bool canTransferMetadata(unsigned Kind);
190  void transferMetadata(Instruction *Op, const ValueVector &CV);
191  bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
192  const DataLayout &DL);
193  bool finish();
194 
195  template<typename T> bool splitBinary(Instruction &, const T &);
196 
197  bool splitCall(CallInst &CI);
198 
199  ScatterMap Scattered;
200  GatherList Gathered;
201 
202  unsigned ParallelLoopAccessMDKind;
203 };
204 
205 class ScalarizerLegacyPass : public FunctionPass {
206 public:
207  static char ID;
208 
209  ScalarizerLegacyPass() : FunctionPass(ID) {
211  }
212 
213  bool runOnFunction(Function &F) override;
214 };
215 
216 } // end anonymous namespace
217 
218 char ScalarizerLegacyPass::ID = 0;
219 INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
220  "Scalarize vector operations", false, false)
221 INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
222  "Scalarize vector operations", false, false)
223 
224 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
225  ValueVector *cachePtr)
226  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
227  Type *Ty = V->getType();
228  PtrTy = dyn_cast<PointerType>(Ty);
229  if (PtrTy)
230  Ty = PtrTy->getElementType();
231  Size = Ty->getVectorNumElements();
232  if (!CachePtr)
233  Tmp.resize(Size, nullptr);
234  else if (CachePtr->empty())
235  CachePtr->resize(Size, nullptr);
236  else
237  assert(Size == CachePtr->size() && "Inconsistent vector sizes");
238 }
239 
240 // Return component I, creating a new Value for it if necessary.
241 Value *Scatterer::operator[](unsigned I) {
242  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
243  // Try to reuse a previous value.
244  if (CV[I])
245  return CV[I];
246  IRBuilder<> Builder(BB, BBI);
247  if (PtrTy) {
248  if (!CV[0]) {
249  Type *Ty =
250  PointerType::get(PtrTy->getElementType()->getVectorElementType(),
251  PtrTy->getAddressSpace());
252  CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
253  }
254  if (I != 0)
255  CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
256  V->getName() + ".i" + Twine(I));
257  } else {
258  // Search through a chain of InsertElementInsts looking for element I.
259  // Record other elements in the cache. The new V is still suitable
260  // for all uncached indices.
261  while (true) {
263  if (!Insert)
264  break;
265  ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
266  if (!Idx)
267  break;
268  unsigned J = Idx->getZExtValue();
269  V = Insert->getOperand(0);
270  if (I == J) {
271  CV[J] = Insert->getOperand(1);
272  return CV[J];
273  } else if (!CV[J]) {
274  // Only cache the first entry we find for each index we're not actively
275  // searching for. This prevents us from going too far up the chain and
276  // caching incorrect entries.
277  CV[J] = Insert->getOperand(1);
278  }
279  }
280  CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
281  V->getName() + ".i" + Twine(I));
282  }
283  return CV[I];
284 }
285 
287  if (skipFunction(F))
288  return false;
289 
290  Module &M = *F.getParent();
291  unsigned ParallelLoopAccessMDKind =
292  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
293  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
294  return Impl.visit(F);
295 }
296 
298  return new ScalarizerLegacyPass();
299 }
300 
301 bool ScalarizerVisitor::visit(Function &F) {
302  assert(Gathered.empty() && Scattered.empty());
303 
304  // To ensure we replace gathered components correctly we need to do an ordered
305  // traversal of the basic blocks in the function.
307  for (BasicBlock *BB : RPOT) {
308  for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
309  Instruction *I = &*II;
310  bool Done = InstVisitor::visit(I);
311  ++II;
312  if (Done && I->getType()->isVoidTy())
313  I->eraseFromParent();
314  }
315  }
316  return finish();
317 }
318 
319 // Return a scattered form of V that can be accessed by Point. V must be a
320 // vector or a pointer to a vector.
321 Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
322  if (Argument *VArg = dyn_cast<Argument>(V)) {
323  // Put the scattered form of arguments in the entry block,
324  // so that it can be used everywhere.
325  Function *F = VArg->getParent();
326  BasicBlock *BB = &F->getEntryBlock();
327  return Scatterer(BB, BB->begin(), V, &Scattered[V]);
328  }
329  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
330  // Put the scattered form of an instruction directly after the
331  // instruction.
332  BasicBlock *BB = VOp->getParent();
333  return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
334  V, &Scattered[V]);
335  }
336  // In the fallback case, just put the scattered before Point and
337  // keep the result local to Point.
338  return Scatterer(Point->getParent(), Point->getIterator(), V);
339 }
340 
341 // Replace Op with the gathered form of the components in CV. Defer the
342 // deletion of Op and creation of the gathered form to the end of the pass,
343 // so that we can avoid creating the gathered form if all uses of Op are
344 // replaced with uses of CV.
345 void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
346  // Since we're not deleting Op yet, stub out its operands, so that it
347  // doesn't make anything live unnecessarily.
348  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
349  Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
350 
351  transferMetadata(Op, CV);
352 
353  // If we already have a scattered form of Op (created from ExtractElements
354  // of Op itself), replace them with the new form.
355  ValueVector &SV = Scattered[Op];
356  if (!SV.empty()) {
357  for (unsigned I = 0, E = SV.size(); I != E; ++I) {
358  Value *V = SV[I];
359  if (V == nullptr)
360  continue;
361 
362  Instruction *Old = cast<Instruction>(V);
363  CV[I]->takeName(Old);
364  Old->replaceAllUsesWith(CV[I]);
365  Old->eraseFromParent();
366  }
367  }
368  SV = CV;
369  Gathered.push_back(GatherList::value_type(Op, &SV));
370 }
371 
372 // Return true if it is safe to transfer the given metadata tag from
373 // vector to scalar instructions.
374 bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
375  return (Tag == LLVMContext::MD_tbaa
376  || Tag == LLVMContext::MD_fpmath
380  || Tag == LLVMContext::MD_noalias
381  || Tag == ParallelLoopAccessMDKind
382  || Tag == LLVMContext::MD_access_group);
383 }
384 
385 // Transfer metadata from Op to the instructions in CV if it is known
386 // to be safe to do so.
387 void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
390  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
391  if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
392  for (const auto &MD : MDs)
393  if (canTransferMetadata(MD.first))
394  New->setMetadata(MD.first, MD.second);
395  if (Op->getDebugLoc() && !New->getDebugLoc())
396  New->setDebugLoc(Op->getDebugLoc());
397  }
398  }
399 }
400 
401 // Try to fill in Layout from Ty, returning true on success. Alignment is
402 // the alignment of the vector, or 0 if the ABI default should be used.
403 bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
404  VectorLayout &Layout, const DataLayout &DL) {
405  // Make sure we're dealing with a vector.
406  Layout.VecTy = dyn_cast<VectorType>(Ty);
407  if (!Layout.VecTy)
408  return false;
409 
410  // Check that we're dealing with full-byte elements.
411  Layout.ElemTy = Layout.VecTy->getElementType();
412  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
413  DL.getTypeStoreSizeInBits(Layout.ElemTy))
414  return false;
415 
416  if (Alignment)
417  Layout.VecAlign = Alignment;
418  else
419  Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
420  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
421  return true;
422 }
423 
424 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
425 // to create an instruction like I with operands X and Y and name Name.
426 template<typename Splitter>
427 bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
429  if (!VT)
430  return false;
431 
432  unsigned NumElems = VT->getNumElements();
433  IRBuilder<> Builder(&I);
434  Scatterer Op0 = scatter(&I, I.getOperand(0));
435  Scatterer Op1 = scatter(&I, I.getOperand(1));
436  assert(Op0.size() == NumElems && "Mismatched binary operation");
437  assert(Op1.size() == NumElems && "Mismatched binary operation");
438  ValueVector Res;
439  Res.resize(NumElems);
440  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
441  Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
442  I.getName() + ".i" + Twine(Elem));
443  gather(&I, Res);
444  return true;
445 }
446 
448  return isTriviallyVectorizable(ID);
449 }
450 
451 // All of the current scalarizable intrinsics only have one mangled type.
454  VectorType *Ty) {
455  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
456 }
457 
458 /// If a call to a vector typed intrinsic function, split into a scalar call per
459 /// element if possible for the intrinsic.
460 bool ScalarizerVisitor::splitCall(CallInst &CI) {
461  VectorType *VT = dyn_cast<VectorType>(CI.getType());
462  if (!VT)
463  return false;
464 
465  Function *F = CI.getCalledFunction();
466  if (!F)
467  return false;
468 
471  return false;
472 
473  unsigned NumElems = VT->getNumElements();
474  unsigned NumArgs = CI.getNumArgOperands();
475 
476  ValueVector ScalarOperands(NumArgs);
477  SmallVector<Scatterer, 8> Scattered(NumArgs);
478 
479  Scattered.resize(NumArgs);
480 
481  // Assumes that any vector type has the same number of elements as the return
482  // vector type, which is true for all current intrinsics.
483  for (unsigned I = 0; I != NumArgs; ++I) {
484  Value *OpI = CI.getOperand(I);
485  if (OpI->getType()->isVectorTy()) {
486  Scattered[I] = scatter(&CI, OpI);
487  assert(Scattered[I].size() == NumElems && "mismatched call operands");
488  } else {
489  ScalarOperands[I] = OpI;
490  }
491  }
492 
493  ValueVector Res(NumElems);
494  ValueVector ScalarCallOps(NumArgs);
495 
496  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
497  IRBuilder<> Builder(&CI);
498 
499  // Perform actual scalarization, taking care to preserve any scalar operands.
500  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
501  ScalarCallOps.clear();
502 
503  for (unsigned J = 0; J != NumArgs; ++J) {
504  if (hasVectorInstrinsicScalarOpd(ID, J))
505  ScalarCallOps.push_back(ScalarOperands[J]);
506  else
507  ScalarCallOps.push_back(Scattered[J][Elem]);
508  }
509 
510  Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
511  CI.getName() + ".i" + Twine(Elem));
512  }
513 
514  gather(&CI, Res);
515  return true;
516 }
517 
518 bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
519  VectorType *VT = dyn_cast<VectorType>(SI.getType());
520  if (!VT)
521  return false;
522 
523  unsigned NumElems = VT->getNumElements();
524  IRBuilder<> Builder(&SI);
525  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
526  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
527  assert(Op1.size() == NumElems && "Mismatched select");
528  assert(Op2.size() == NumElems && "Mismatched select");
529  ValueVector Res;
530  Res.resize(NumElems);
531 
532  if (SI.getOperand(0)->getType()->isVectorTy()) {
533  Scatterer Op0 = scatter(&SI, SI.getOperand(0));
534  assert(Op0.size() == NumElems && "Mismatched select");
535  for (unsigned I = 0; I < NumElems; ++I)
536  Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
537  SI.getName() + ".i" + Twine(I));
538  } else {
539  Value *Op0 = SI.getOperand(0);
540  for (unsigned I = 0; I < NumElems; ++I)
541  Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
542  SI.getName() + ".i" + Twine(I));
543  }
544  gather(&SI, Res);
545  return true;
546 }
547 
548 bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
549  return splitBinary(ICI, ICmpSplitter(ICI));
550 }
551 
552 bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
553  return splitBinary(FCI, FCmpSplitter(FCI));
554 }
555 
556 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
557  return splitBinary(BO, BinarySplitter(BO));
558 }
559 
560 bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
561  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
562  if (!VT)
563  return false;
564 
565  IRBuilder<> Builder(&GEPI);
566  unsigned NumElems = VT->getNumElements();
567  unsigned NumIndices = GEPI.getNumIndices();
568 
569  // The base pointer might be scalar even if it's a vector GEP. In those cases,
570  // splat the pointer into a vector value, and scatter that vector.
571  Value *Op0 = GEPI.getOperand(0);
572  if (!Op0->getType()->isVectorTy())
573  Op0 = Builder.CreateVectorSplat(NumElems, Op0);
574  Scatterer Base = scatter(&GEPI, Op0);
575 
577  Ops.resize(NumIndices);
578  for (unsigned I = 0; I < NumIndices; ++I) {
579  Value *Op = GEPI.getOperand(I + 1);
580 
581  // The indices might be scalars even if it's a vector GEP. In those cases,
582  // splat the scalar into a vector value, and scatter that vector.
583  if (!Op->getType()->isVectorTy())
584  Op = Builder.CreateVectorSplat(NumElems, Op);
585 
586  Ops[I] = scatter(&GEPI, Op);
587  }
588 
589  ValueVector Res;
590  Res.resize(NumElems);
591  for (unsigned I = 0; I < NumElems; ++I) {
592  SmallVector<Value *, 8> Indices;
593  Indices.resize(NumIndices);
594  for (unsigned J = 0; J < NumIndices; ++J)
595  Indices[J] = Ops[J][I];
596  Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
597  GEPI.getName() + ".i" + Twine(I));
598  if (GEPI.isInBounds())
599  if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
600  NewGEPI->setIsInBounds();
601  }
602  gather(&GEPI, Res);
603  return true;
604 }
605 
606 bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
608  if (!VT)
609  return false;
610 
611  unsigned NumElems = VT->getNumElements();
612  IRBuilder<> Builder(&CI);
613  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
614  assert(Op0.size() == NumElems && "Mismatched cast");
615  ValueVector Res;
616  Res.resize(NumElems);
617  for (unsigned I = 0; I < NumElems; ++I)
618  Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
619  CI.getName() + ".i" + Twine(I));
620  gather(&CI, Res);
621  return true;
622 }
623 
624 bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
625  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
626  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
627  if (!DstVT || !SrcVT)
628  return false;
629 
630  unsigned DstNumElems = DstVT->getNumElements();
631  unsigned SrcNumElems = SrcVT->getNumElements();
632  IRBuilder<> Builder(&BCI);
633  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
634  ValueVector Res;
635  Res.resize(DstNumElems);
636 
637  if (DstNumElems == SrcNumElems) {
638  for (unsigned I = 0; I < DstNumElems; ++I)
639  Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
640  BCI.getName() + ".i" + Twine(I));
641  } else if (DstNumElems > SrcNumElems) {
642  // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
643  // individual elements to the destination.
644  unsigned FanOut = DstNumElems / SrcNumElems;
645  Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
646  unsigned ResI = 0;
647  for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
648  Value *V = Op0[Op0I];
649  Instruction *VI;
650  // Look through any existing bitcasts before converting to <N x t2>.
651  // In the best case, the resulting conversion might be a no-op.
652  while ((VI = dyn_cast<Instruction>(V)) &&
653  VI->getOpcode() == Instruction::BitCast)
654  V = VI->getOperand(0);
655  V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
656  Scatterer Mid = scatter(&BCI, V);
657  for (unsigned MidI = 0; MidI < FanOut; ++MidI)
658  Res[ResI++] = Mid[MidI];
659  }
660  } else {
661  // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
662  unsigned FanIn = SrcNumElems / DstNumElems;
663  Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
664  unsigned Op0I = 0;
665  for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
666  Value *V = UndefValue::get(MidTy);
667  for (unsigned MidI = 0; MidI < FanIn; ++MidI)
668  V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
669  BCI.getName() + ".i" + Twine(ResI)
670  + ".upto" + Twine(MidI));
671  Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
672  BCI.getName() + ".i" + Twine(ResI));
673  }
674  }
675  gather(&BCI, Res);
676  return true;
677 }
678 
679 bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
680  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
681  if (!VT)
682  return false;
683 
684  unsigned NumElems = VT->getNumElements();
685  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
686  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
687  ValueVector Res;
688  Res.resize(NumElems);
689 
690  for (unsigned I = 0; I < NumElems; ++I) {
691  int Selector = SVI.getMaskValue(I);
692  if (Selector < 0)
693  Res[I] = UndefValue::get(VT->getElementType());
694  else if (unsigned(Selector) < Op0.size())
695  Res[I] = Op0[Selector];
696  else
697  Res[I] = Op1[Selector - Op0.size()];
698  }
699  gather(&SVI, Res);
700  return true;
701 }
702 
703 bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
704  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
705  if (!VT)
706  return false;
707 
708  unsigned NumElems = VT->getNumElements();
709  IRBuilder<> Builder(&PHI);
710  ValueVector Res;
711  Res.resize(NumElems);
712 
713  unsigned NumOps = PHI.getNumOperands();
714  for (unsigned I = 0; I < NumElems; ++I)
715  Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
716  PHI.getName() + ".i" + Twine(I));
717 
718  for (unsigned I = 0; I < NumOps; ++I) {
719  Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
720  BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
721  for (unsigned J = 0; J < NumElems; ++J)
722  cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
723  }
724  gather(&PHI, Res);
725  return true;
726 }
727 
728 bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
729  if (!ScalarizeLoadStore)
730  return false;
731  if (!LI.isSimple())
732  return false;
733 
734  VectorLayout Layout;
735  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
736  LI.getModule()->getDataLayout()))
737  return false;
738 
739  unsigned NumElems = Layout.VecTy->getNumElements();
740  IRBuilder<> Builder(&LI);
741  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
742  ValueVector Res;
743  Res.resize(NumElems);
744 
745  for (unsigned I = 0; I < NumElems; ++I)
746  Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
747  LI.getName() + ".i" + Twine(I));
748  gather(&LI, Res);
749  return true;
750 }
751 
752 bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
753  if (!ScalarizeLoadStore)
754  return false;
755  if (!SI.isSimple())
756  return false;
757 
758  VectorLayout Layout;
759  Value *FullValue = SI.getValueOperand();
760  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
761  SI.getModule()->getDataLayout()))
762  return false;
763 
764  unsigned NumElems = Layout.VecTy->getNumElements();
765  IRBuilder<> Builder(&SI);
766  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
767  Scatterer Val = scatter(&SI, FullValue);
768 
769  ValueVector Stores;
770  Stores.resize(NumElems);
771  for (unsigned I = 0; I < NumElems; ++I) {
772  unsigned Align = Layout.getElemAlign(I);
773  Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
774  }
775  transferMetadata(&SI, Stores);
776  return true;
777 }
778 
779 bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
780  return splitCall(CI);
781 }
782 
783 // Delete the instructions that we scalarized. If a full vector result
784 // is still needed, recreate it using InsertElements.
785 bool ScalarizerVisitor::finish() {
786  // The presence of data in Gathered or Scattered indicates changes
787  // made to the Function.
788  if (Gathered.empty() && Scattered.empty())
789  return false;
790  for (const auto &GMI : Gathered) {
791  Instruction *Op = GMI.first;
792  ValueVector &CV = *GMI.second;
793  if (!Op->use_empty()) {
794  // The value is still needed, so recreate it using a series of
795  // InsertElements.
796  Type *Ty = Op->getType();
797  Value *Res = UndefValue::get(Ty);
798  BasicBlock *BB = Op->getParent();
799  unsigned Count = Ty->getVectorNumElements();
800  IRBuilder<> Builder(Op);
801  if (isa<PHINode>(Op))
802  Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
803  for (unsigned I = 0; I < Count; ++I)
804  Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
805  Op->getName() + ".upto" + Twine(I));
806  Res->takeName(Op);
807  Op->replaceAllUsesWith(Res);
808  }
809  Op->eraseFromParent();
810  }
811  Gathered.clear();
812  Scattered.clear();
813  return true;
814 }
815 
817  Module &M = *F.getParent();
818  unsigned ParallelLoopAccessMDKind =
819  M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
820  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
821  bool Changed = Impl.visit(F);
822  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
823 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:243
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:426
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1948
bool isSimple() const
Definition: Instructions.h:276
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1515
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:610
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1297
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1392
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
This instruction constructs a fixed permutation of two input vectors.
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1429
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", "Scalarize vector operations", false, false) INITIALIZE_PASS_END(ScalarizerLegacyPass
void initializeScalarizerLegacyPassPass(PassRegistry &)
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:370
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:352
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:243
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
uint64_t getNumElements() const
Definition: DerivedTypes.h:358
Type * getSourceElementType() const
Definition: Instructions.h:970
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
unsigned getNumIndices() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1731
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:605
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2195
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:290
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1019
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: Scalarizer.cpp:816
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:466
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1956
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
This instruction inserts a single (scalar) element into a VectorType value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
Definition: Scalarizer.cpp:452
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
scalarizer
Definition: Scalarizer.cpp:221
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2020
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:447
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Scalarize vector operations
Definition: Scalarizer.cpp:221
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2040
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1968
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1457
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
Module.h This file contains the declarations for the Module class.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2053
This file declares helper objects for defining debug options that can be configured via the command l...
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:729
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:306
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:612
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:88
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:461
This pass converts vector operations into scalar operations, in order to expose optimization opportun...
Class to represent vector types.
Definition: DerivedTypes.h:392
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void push_back(pointer val)
Definition: ilist.h:312
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:567
static cl::opt< bool > ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1132
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:310
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
void clear()
Definition: ilist.h:308
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:213
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1180
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
uint32_t Size
Definition: Profile.cpp:46
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
const unsigned Kind
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1768
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:418
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
Type * getElementType() const
Definition: DerivedTypes.h:359
FunctionPass * createScalarizerPass()
Create a legacy pass manager instance of the Scalarizer pass.
Definition: Scalarizer.cpp:297
A container for analyses that lazily runs them and caches their results.
bool isSimple() const
Definition: Instructions.h:401
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
Definition: Instructions.h:412
bool use_empty() const
Definition: Value.h:322
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42
void resize(size_type N)
Definition: SmallVector.h:343