LLVM  14.0.0git
HexagonVectorCombine.cpp
Go to the documentation of this file.
1 //===-- HexagonVectorCombine.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // HexagonVectorCombine is a utility class implementing a variety of functions
9 // that assist in vector-based optimizations.
10 //
11 // AlignVectors: replace unaligned vector loads and stores with aligned ones.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsHexagon.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/KnownBits.h"
39 
40 #include "HexagonSubtarget.h"
41 #include "HexagonTargetMachine.h"
42 
43 #include <algorithm>
44 #include <deque>
45 #include <map>
46 #include <set>
47 #include <utility>
48 #include <vector>
49 
50 #define DEBUG_TYPE "hexagon-vc"
51 
52 using namespace llvm;
53 
54 namespace {
55 class HexagonVectorCombine {
56 public:
57  HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
58  DominatorTree &DT_, TargetLibraryInfo &TLI_,
59  const TargetMachine &TM_)
60  : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
61  TLI(TLI_),
62  HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
63 
64  bool run();
65 
66  // Common integer type.
67  IntegerType *getIntTy() const;
68  // Byte type: either scalar (when Length = 0), or vector with given
69  // element count.
70  Type *getByteTy(int ElemCount = 0) const;
71  // Boolean type: either scalar (when Length = 0), or vector with given
72  // element count.
73  Type *getBoolTy(int ElemCount = 0) const;
74  // Create a ConstantInt of type returned by getIntTy with the value Val.
75  ConstantInt *getConstInt(int Val) const;
76  // Get the integer value of V, if it exists.
77  Optional<APInt> getIntValue(const Value *Val) const;
78  // Is V a constant 0, or a vector of 0s?
79  bool isZero(const Value *Val) const;
80  // Is V an undef value?
81  bool isUndef(const Value *Val) const;
82 
83  int getSizeOf(const Value *Val) const;
84  int getSizeOf(const Type *Ty) const;
85  int getAllocSizeOf(const Type *Ty) const;
86  int getTypeAlignment(Type *Ty) const;
87 
88  VectorType *getByteVectorTy(int ScLen) const;
89  Constant *getNullValue(Type *Ty) const;
90  Constant *getFullValue(Type *Ty) const;
91 
92  Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start,
93  int Length, int Where) const;
94  Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
95  Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
97  Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize,
98  Value *Pad) const;
99  Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy,
100  Type *ToTy) const;
101  Value *vlsb(IRBuilder<> &Builder, Value *Val) const;
102  Value *vbytes(IRBuilder<> &Builder, Value *Val) const;
103 
104  Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID,
105  Type *RetTy, ArrayRef<Value *> Args) const;
106 
107  Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
108 
109  template <typename T = std::vector<Instruction *>>
110  bool isSafeToMoveBeforeInBB(const Instruction &In,
112  const T &Ignore = {}) const;
113 
114  Function &F;
115  const DataLayout &DL;
116  AliasAnalysis &AA;
117  AssumptionCache &AC;
118  DominatorTree &DT;
119  TargetLibraryInfo &TLI;
120  const HexagonSubtarget &HST;
121 
122 private:
123 #ifndef NDEBUG
124  // These two functions are only used for assertions at the moment.
125  bool isByteVecTy(Type *Ty) const;
126  bool isSectorTy(Type *Ty) const;
127 #endif
128  Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start,
129  int Length) const;
130 };
131 
132 class AlignVectors {
133 public:
134  AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {}
135 
136  bool run();
137 
138 private:
139  using InstList = std::vector<Instruction *>;
140 
141  struct Segment {
142  void *Data;
143  int Start;
144  int Size;
145  };
146 
147  struct AddrInfo {
148  AddrInfo(const AddrInfo &) = default;
149  AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
150  Align H)
151  : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
152  NeedAlign(HVC.getTypeAlignment(ValTy)) {}
153 
154  // XXX: add Size member?
155  Instruction *Inst;
156  Value *Addr;
157  Type *ValTy;
158  Align HaveAlign;
159  Align NeedAlign;
160  int Offset = 0; // Offset (in bytes) from the first member of the
161  // containing AddrList.
162  };
163  using AddrList = std::vector<AddrInfo>;
164 
165  struct InstrLess {
166  bool operator()(const Instruction *A, const Instruction *B) const {
167  return A->comesBefore(B);
168  }
169  };
170  using DepList = std::set<Instruction *, InstrLess>;
171 
172  struct MoveGroup {
173  MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
174  : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
175  Instruction *Base; // Base instruction of the parent address group.
176  InstList Main; // Main group of instructions.
177  InstList Deps; // List of dependencies.
178  bool IsHvx; // Is this group of HVX instructions?
179  bool IsLoad; // Is this a load group?
180  };
181  using MoveList = std::vector<MoveGroup>;
182 
183  struct ByteSpan {
184  struct Segment {
185  // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
186  Segment(Value *Val, int Begin, int Len)
187  : Val(Val), Start(Begin), Size(Len) {}
188  Segment(const Segment &Seg) = default;
189  Value *Val; // Value representable as a sequence of bytes.
190  int Start; // First byte of the value that belongs to the segment.
191  int Size; // Number of bytes in the segment.
192  };
193 
194  struct Block {
195  Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
196  Block(Value *Val, int Off, int Len, int Pos)
197  : Seg(Val, Off, Len), Pos(Pos) {}
198  Block(const Block &Blk) = default;
199  Segment Seg; // Value segment.
200  int Pos; // Position (offset) of the segment in the Block.
201  };
202 
203  int extent() const;
204  ByteSpan section(int Start, int Length) const;
205  ByteSpan &shift(int Offset);
207 
208  int size() const { return Blocks.size(); }
209  Block &operator[](int i) { return Blocks[i]; }
210 
211  std::vector<Block> Blocks;
212 
213  using iterator = decltype(Blocks)::iterator;
214  iterator begin() { return Blocks.begin(); }
215  iterator end() { return Blocks.end(); }
216  using const_iterator = decltype(Blocks)::const_iterator;
217  const_iterator begin() const { return Blocks.begin(); }
218  const_iterator end() const { return Blocks.end(); }
219  };
220 
221  Align getAlignFromValue(const Value *V) const;
223  Optional<AddrInfo> getAddrInfo(Instruction &In) const;
224  bool isHvx(const AddrInfo &AI) const;
225 
226  Value *getPayload(Value *Val) const;
227  Value *getMask(Value *Val) const;
228  Value *getPassThrough(Value *Val) const;
229 
230  Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
231  int Adjust) const;
232  Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
233  int Alignment) const;
234  Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr,
235  int Alignment, Value *Mask, Value *PassThru) const;
236  Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr,
237  int Alignment, Value *Mask) const;
238 
239  bool createAddressGroups();
240  MoveList createLoadGroups(const AddrList &Group) const;
241  MoveList createStoreGroups(const AddrList &Group) const;
242  bool move(const MoveGroup &Move) const;
243  bool realignGroup(const MoveGroup &Move) const;
244 
245  friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
246  friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
247  friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
248 
249  std::map<Instruction *, AddrList> AddrGroups;
250  HexagonVectorCombine &HVC;
251 };
252 
254 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
255  OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
256  OS << "Addr: " << *AI.Addr << '\n';
257  OS << "Type: " << *AI.ValTy << '\n';
258  OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
259  OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
260  OS << "Offset: " << AI.Offset;
261  return OS;
262 }
263 
265 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
266  OS << "Main\n";
267  for (Instruction *I : MG.Main)
268  OS << " " << *I << '\n';
269  OS << "Deps\n";
270  for (Instruction *I : MG.Deps)
271  OS << " " << *I << '\n';
272  return OS;
273 }
274 
276 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
277  OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
278  for (const AlignVectors::ByteSpan::Block &B : BS) {
279  OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
280  << *B.Seg.Val << '\n';
281  }
282  OS << ']';
283  return OS;
284 }
285 
286 } // namespace
287 
288 namespace {
289 
290 template <typename T> T *getIfUnordered(T *MaybeT) {
291  return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
292 }
293 template <typename T> T *isCandidate(Instruction *In) {
294  return dyn_cast<T>(In);
295 }
296 template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
297  return getIfUnordered(dyn_cast<LoadInst>(In));
298 }
299 template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
300  return getIfUnordered(dyn_cast<StoreInst>(In));
301 }
302 
303 #if !defined(_MSC_VER) || _MSC_VER >= 1926
304 // VS2017 and some versions of VS2019 have trouble compiling this:
305 // error C2976: 'std::map': too few template arguments
306 // VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
307 template <typename Pred, typename... Ts>
308 void erase_if(std::map<Ts...> &map, Pred p)
309 #else
310 template <typename Pred, typename T, typename U>
311 void erase_if(std::map<T, U> &map, Pred p)
312 #endif
313 {
314  for (auto i = map.begin(), e = map.end(); i != e;) {
315  if (p(*i))
316  i = map.erase(i);
317  else
318  i = std::next(i);
319  }
320 }
321 
322 // Forward other erase_ifs to the LLVM implementations.
323 template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
324  llvm::erase_if(std::forward<T>(container), p);
325 }
326 
327 } // namespace
328 
329 // --- Begin AlignVectors
330 
331 auto AlignVectors::ByteSpan::extent() const -> int {
332  if (size() == 0)
333  return 0;
334  int Min = Blocks[0].Pos;
335  int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
336  for (int i = 1, e = size(); i != e; ++i) {
337  Min = std::min(Min, Blocks[i].Pos);
338  Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
339  }
340  return Max - Min;
341 }
342 
343 auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
344  ByteSpan Section;
345  for (const ByteSpan::Block &B : Blocks) {
346  int L = std::max(B.Pos, Start); // Left end.
347  int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
348  if (L < R) {
349  // How much to chop off the beginning of the segment:
350  int Off = L > B.Pos ? L - B.Pos : 0;
351  Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
352  }
353  }
354  return Section;
355 }
356 
357 auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
358  for (Block &B : Blocks)
359  B.Pos += Offset;
360  return *this;
361 }
362 
364  SmallVector<Value *, 8> Values(Blocks.size());
365  for (int i = 0, e = Blocks.size(); i != e; ++i)
366  Values[i] = Blocks[i].Seg.Val;
367  return Values;
368 }
369 
370 auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
371  const auto *C = dyn_cast<ConstantInt>(V);
372  assert(C && "Alignment must be a compile-time constant integer");
373  return C->getAlignValue();
374 }
375 
376 auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> {
377  if (auto *L = isCandidate<LoadInst>(&In))
378  return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
379  L->getAlign());
380  if (auto *S = isCandidate<StoreInst>(&In))
381  return AddrInfo(HVC, S, S->getPointerOperand(),
382  S->getValueOperand()->getType(), S->getAlign());
383  if (auto *II = isCandidate<IntrinsicInst>(&In)) {
384  Intrinsic::ID ID = II->getIntrinsicID();
385  switch (ID) {
386  case Intrinsic::masked_load:
387  return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
388  getAlignFromValue(II->getArgOperand(1)));
389  case Intrinsic::masked_store:
390  return AddrInfo(HVC, II, II->getArgOperand(1),
391  II->getArgOperand(0)->getType(),
392  getAlignFromValue(II->getArgOperand(2)));
393  }
394  }
395  return Optional<AddrInfo>();
396 }
397 
398 auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
399  return HVC.HST.isTypeForHVX(AI.ValTy);
400 }
401 
402 auto AlignVectors::getPayload(Value *Val) const -> Value * {
403  if (auto *In = dyn_cast<Instruction>(Val)) {
404  Intrinsic::ID ID = 0;
405  if (auto *II = dyn_cast<IntrinsicInst>(In))
406  ID = II->getIntrinsicID();
407  if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
408  return In->getOperand(0);
409  }
410  return Val;
411 }
412 
413 auto AlignVectors::getMask(Value *Val) const -> Value * {
414  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
415  switch (II->getIntrinsicID()) {
416  case Intrinsic::masked_load:
417  return II->getArgOperand(2);
418  case Intrinsic::masked_store:
419  return II->getArgOperand(3);
420  }
421  }
422 
423  Type *ValTy = getPayload(Val)->getType();
424  if (auto *VecTy = dyn_cast<VectorType>(ValTy)) {
425  int ElemCount = VecTy->getElementCount().getFixedValue();
426  return HVC.getFullValue(HVC.getBoolTy(ElemCount));
427  }
428  return HVC.getFullValue(HVC.getBoolTy());
429 }
430 
431 auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
432  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
433  if (II->getIntrinsicID() == Intrinsic::masked_load)
434  return II->getArgOperand(3);
435  }
436  return UndefValue::get(getPayload(Val)->getType());
437 }
438 
439 auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
440  Type *ValTy, int Adjust) const
441  -> Value * {
442  // The adjustment is in bytes, but if it's a multiple of the type size,
443  // we don't need to do pointer casts.
444  auto *PtrTy = cast<PointerType>(Ptr->getType());
445  if (!PtrTy->isOpaque()) {
446  Type *ElemTy = PtrTy->getElementType();
447  int ElemSize = HVC.getAllocSizeOf(ElemTy);
448  if (Adjust % ElemSize == 0 && Adjust != 0) {
449  Value *Tmp0 =
450  Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
451  return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
452  }
453  }
454 
455  PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
456  Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
457  Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0,
458  HVC.getConstInt(Adjust));
459  return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
460 }
461 
462 auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr,
463  Type *ValTy, int Alignment) const
464  -> Value * {
465  Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
466  Value *Mask = HVC.getConstInt(-Alignment);
467  Value *And = Builder.CreateAnd(AsInt, Mask);
468  return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
469 }
470 
471 auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy,
472  Value *Ptr, int Alignment, Value *Mask,
473  Value *PassThru) const -> Value * {
474  assert(!HVC.isUndef(Mask)); // Should this be allowed?
475  if (HVC.isZero(Mask))
476  return PassThru;
477  if (Mask == ConstantInt::getTrue(Mask->getType()))
478  return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
479  return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru);
480 }
481 
482 auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val,
483  Value *Ptr, int Alignment,
484  Value *Mask) const -> Value * {
485  if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
486  return UndefValue::get(Val->getType());
487  if (Mask == ConstantInt::getTrue(Mask->getType()))
488  return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
489  return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
490 }
491 
492 auto AlignVectors::createAddressGroups() -> bool {
493  // An address group created here may contain instructions spanning
494  // multiple basic blocks.
495  AddrList WorkStack;
496 
497  auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
498  for (AddrInfo &W : WorkStack) {
499  if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
500  return std::make_pair(W.Inst, *D);
501  }
502  return std::make_pair(nullptr, 0);
503  };
504 
505  auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
506  BasicBlock &Block = *DomN->getBlock();
507  for (Instruction &I : Block) {
508  auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
509  if (!AI)
510  continue;
511  auto F = findBaseAndOffset(*AI);
512  Instruction *GroupInst;
513  if (Instruction *BI = F.first) {
514  AI->Offset = F.second;
515  GroupInst = BI;
516  } else {
517  WorkStack.push_back(*AI);
518  GroupInst = AI->Inst;
519  }
520  AddrGroups[GroupInst].push_back(*AI);
521  }
522 
523  for (DomTreeNode *C : DomN->children())
524  Visit(C, Visit);
525 
526  while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
527  WorkStack.pop_back();
528  };
529 
530  traverseBlock(HVC.DT.getRootNode(), traverseBlock);
531  assert(WorkStack.empty());
532 
533  // AddrGroups are formed.
534 
535  // Remove groups of size 1.
536  erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
537  // Remove groups that don't use HVX types.
538  erase_if(AddrGroups, [&](auto &G) {
539  return !llvm::any_of(
540  G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
541  });
542 
543  return !AddrGroups.empty();
544 }
545 
546 auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
547  // Form load groups.
548  // To avoid complications with moving code across basic blocks, only form
549  // groups that are contained within a single basic block.
550 
551  auto getUpwardDeps = [](Instruction *In, Instruction *Base) {
552  BasicBlock *Parent = Base->getParent();
553  assert(In->getParent() == Parent &&
554  "Base and In should be in the same block");
555  assert(Base->comesBefore(In) && "Base should come before In");
556 
557  DepList Deps;
558  std::deque<Instruction *> WorkQ = {In};
559  while (!WorkQ.empty()) {
560  Instruction *D = WorkQ.front();
561  WorkQ.pop_front();
562  Deps.insert(D);
563  for (Value *Op : D->operands()) {
564  if (auto *I = dyn_cast<Instruction>(Op)) {
565  if (I->getParent() == Parent && Base->comesBefore(I))
566  WorkQ.push_back(I);
567  }
568  }
569  }
570  return Deps;
571  };
572 
573  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
574  assert(!Move.Main.empty() && "Move group should have non-empty Main");
575  // Don't mix HVX and non-HVX instructions.
576  if (Move.IsHvx != isHvx(Info))
577  return false;
578  // Leading instruction in the load group.
579  Instruction *Base = Move.Main.front();
580  if (Base->getParent() != Info.Inst->getParent())
581  return false;
582 
583  auto isSafeToMoveToBase = [&](const Instruction *I) {
584  return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
585  };
586  DepList Deps = getUpwardDeps(Info.Inst, Base);
587  if (!llvm::all_of(Deps, isSafeToMoveToBase))
588  return false;
589 
590  // The dependencies will be moved together with the load, so make sure
591  // that none of them could be moved independently in another group.
592  Deps.erase(Info.Inst);
593  auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
594  if (llvm::any_of(Deps, inAddrMap))
595  return false;
596  Move.Main.push_back(Info.Inst);
597  llvm::append_range(Move.Deps, Deps);
598  return true;
599  };
600 
601  MoveList LoadGroups;
602 
603  for (const AddrInfo &Info : Group) {
604  if (!Info.Inst->mayReadFromMemory())
605  continue;
606  if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
607  LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
608  }
609 
610  // Erase singleton groups.
611  erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
612  return LoadGroups;
613 }
614 
615 auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
616  // Form store groups.
617  // To avoid complications with moving code across basic blocks, only form
618  // groups that are contained within a single basic block.
619 
620  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
621  assert(!Move.Main.empty() && "Move group should have non-empty Main");
622  // For stores with return values we'd have to collect downward depenencies.
623  // There are no such stores that we handle at the moment, so omit that.
624  assert(Info.Inst->getType()->isVoidTy() &&
625  "Not handling stores with return values");
626  // Don't mix HVX and non-HVX instructions.
627  if (Move.IsHvx != isHvx(Info))
628  return false;
629  // For stores we need to be careful whether it's safe to move them.
630  // Stores that are otherwise safe to move together may not appear safe
631  // to move over one another (i.e. isSafeToMoveBefore may return false).
632  Instruction *Base = Move.Main.front();
633  if (Base->getParent() != Info.Inst->getParent())
634  return false;
635  if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
636  return false;
637  Move.Main.push_back(Info.Inst);
638  return true;
639  };
640 
641  MoveList StoreGroups;
642 
643  for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
644  const AddrInfo &Info = *I;
645  if (!Info.Inst->mayWriteToMemory())
646  continue;
647  if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
648  StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
649  }
650 
651  // Erase singleton groups.
652  erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
653  return StoreGroups;
654 }
655 
656 auto AlignVectors::move(const MoveGroup &Move) const -> bool {
657  assert(!Move.Main.empty() && "Move group should have non-empty Main");
658  Instruction *Where = Move.Main.front();
659 
660  if (Move.IsLoad) {
661  // Move all deps to before Where, keeping order.
662  for (Instruction *D : Move.Deps)
663  D->moveBefore(Where);
664  // Move all main instructions to after Where, keeping order.
665  ArrayRef<Instruction *> Main(Move.Main);
666  for (Instruction *M : Main.drop_front(1)) {
667  M->moveAfter(Where);
668  Where = M;
669  }
670  } else {
671  // NOTE: Deps are empty for "store" groups. If they need to be
672  // non-empty, decide on the order.
673  assert(Move.Deps.empty());
674  // Move all main instructions to before Where, inverting order.
675  ArrayRef<Instruction *> Main(Move.Main);
676  for (Instruction *M : Main.drop_front(1)) {
677  M->moveBefore(Where);
678  Where = M;
679  }
680  }
681 
682  return Move.Main.size() + Move.Deps.size() > 1;
683 }
684 
685 auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
686  // TODO: Needs support for masked loads/stores of "scalar" vectors.
687  if (!Move.IsHvx)
688  return false;
689 
690  // Return the element with the maximum alignment from Range,
691  // where GetValue obtains the value to compare from an element.
692  auto getMaxOf = [](auto Range, auto GetValue) {
693  return *std::max_element(
694  Range.begin(), Range.end(),
695  [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
696  };
697 
698  const AddrList &BaseInfos = AddrGroups.at(Move.Base);
699 
700  // Conceptually, there is a vector of N bytes covering the addresses
701  // starting from the minimum offset (i.e. Base.Addr+Start). This vector
702  // represents a contiguous memory region that spans all accessed memory
703  // locations.
704  // The correspondence between loaded or stored values will be expressed
705  // in terms of this vector. For example, the 0th element of the vector
706  // from the Base address info will start at byte Start from the beginning
707  // of this conceptual vector.
708  //
709  // This vector will be loaded/stored starting at the nearest down-aligned
710  // address and the amount od the down-alignment will be AlignVal:
711  // valign(load_vector(align_down(Base+Start)), AlignVal)
712 
713  std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
714  AddrList MoveInfos;
716  BaseInfos, std::back_inserter(MoveInfos),
717  [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
718 
719  // Maximum alignment present in the whole address group.
720  const AddrInfo &WithMaxAlign =
721  getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
722  Align MaxGiven = WithMaxAlign.HaveAlign;
723 
724  // Minimum alignment present in the move address group.
725  const AddrInfo &WithMinOffset =
726  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
727 
728  const AddrInfo &WithMaxNeeded =
729  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
730  Align MinNeeded = WithMaxNeeded.NeedAlign;
731 
732  // Set the builder at the top instruction in the move group.
733  Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
734  IRBuilder<> Builder(TopIn);
735  Value *AlignAddr = nullptr; // Actual aligned address.
736  Value *AlignVal = nullptr; // Right-shift amount (for valign).
737 
738  if (MinNeeded <= MaxGiven) {
739  int Start = WithMinOffset.Offset;
740  int OffAtMax = WithMaxAlign.Offset;
741  // Shift the offset of the maximally aligned instruction (OffAtMax)
742  // back by just enough multiples of the required alignment to cover the
743  // distance from Start to OffAtMax.
744  // Calculate the address adjustment amount based on the address with the
745  // maximum alignment. This is to allow a simple gep instruction instead
746  // of potential bitcasts to i8*.
747  int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
748  AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
749  WithMaxAlign.ValTy, Adjust);
750  int Diff = Start - (OffAtMax + Adjust);
751  AlignVal = HVC.getConstInt(Diff);
752  assert(Diff >= 0);
753  assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
754  } else {
755  // WithMinOffset is the lowest address in the group,
756  // WithMinOffset.Addr = Base+Start.
757  // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
758  // mask off unnecessary bits, so it's ok to just the original pointer as
759  // the alignment amount.
760  // Do an explicit down-alignment of the address to avoid creating an
761  // aligned instruction with an address that is not really aligned.
762  AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
763  WithMinOffset.ValTy, MinNeeded.value());
764  AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
765  }
766 
767  ByteSpan VSpan;
768  for (const AddrInfo &AI : MoveInfos) {
769  VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
770  AI.Offset - WithMinOffset.Offset);
771  }
772 
773  // The aligned loads/stores will use blocks that are either scalars,
774  // or HVX vectors. Let "sector" be the unified term for such a block.
775  // blend(scalar, vector) -> sector...
776  int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
777  : std::max<int>(MinNeeded.value(), 4);
778  assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
779  assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
780 
781  Type *SecTy = HVC.getByteTy(ScLen);
782  int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
783  bool DoAlign = !HVC.isZero(AlignVal);
784 
785  if (Move.IsLoad) {
786  ByteSpan ASpan;
787  auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
788  auto *Undef = UndefValue::get(SecTy);
789 
790  for (int i = 0; i != NumSectors + DoAlign; ++i) {
791  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
792  // FIXME: generate a predicated load?
793  Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
794  // If vector shifting is potentially needed, accumulate metadata
795  // from source sections of twice the load width.
796  int Start = (i - DoAlign) * ScLen;
797  int Width = (1 + DoAlign) * ScLen;
798  propagateMetadata(cast<Instruction>(Load),
799  VSpan.section(Start, Width).values());
800  ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen);
801  }
802 
803  if (DoAlign) {
804  for (int j = 0; j != NumSectors; ++j) {
805  ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val,
806  ASpan[j + 1].Seg.Val, AlignVal);
807  }
808  }
809 
810  for (ByteSpan::Block &B : VSpan) {
811  ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
812  Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
813  for (ByteSpan::Block &S : ASection) {
814  Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
815  Accum =
816  HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
817  }
818  // Instead of casting everything to bytes for the vselect, cast to the
819  // original value type. This will avoid complications with casting masks.
820  // For example, in cases when the original mask applied to i32, it could
821  // be converted to a mask applicable to i8 via pred_typecast intrinsic,
822  // but if the mask is not exactly of HVX length, extra handling would be
823  // needed to make it work.
824  Type *ValTy = getPayload(B.Seg.Val)->getType();
825  Value *Cast = Builder.CreateBitCast(Accum, ValTy);
826  Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
827  getPassThrough(B.Seg.Val));
828  B.Seg.Val->replaceAllUsesWith(Sel);
829  }
830  } else {
831  // Stores.
832  ByteSpan ASpanV, ASpanM;
833 
834  // Return a vector value corresponding to the input value Val:
835  // either <1 x Val> for scalar Val, or Val itself for vector Val.
836  auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * {
837  Type *Ty = Val->getType();
838  if (Ty->isVectorTy())
839  return Val;
840  auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false);
841  return Builder.CreateBitCast(Val, VecTy);
842  };
843 
844  // Create an extra "undef" sector at the beginning and at the end.
845  // They will be used as the left/right filler in the vlalign step.
846  for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
847  // For stores, the size of each section is an aligned vector length.
848  // Adjust the store offsets relative to the section start offset.
849  ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
850  Value *AccumV = UndefValue::get(SecTy);
851  Value *AccumM = HVC.getNullValue(SecTy);
852  for (ByteSpan::Block &S : VSection) {
853  Value *Pay = getPayload(S.Seg.Val);
854  Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
855  Pay->getType(), HVC.getByteTy());
856  AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
857  S.Seg.Start, S.Seg.Size, S.Pos);
858  AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
859  S.Seg.Start, S.Seg.Size, S.Pos);
860  }
861  ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
862  ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
863  }
864 
865  // vlalign
866  if (DoAlign) {
867  for (int j = 1; j != NumSectors + 2; ++j) {
868  ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val,
869  ASpanV[j].Seg.Val, AlignVal);
870  ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val,
871  ASpanM[j].Seg.Val, AlignVal);
872  }
873  }
874 
875  for (int i = 0; i != NumSectors + DoAlign; ++i) {
876  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
877  Value *Val = ASpanV[i].Seg.Val;
878  Value *Mask = ASpanM[i].Seg.Val; // bytes
879  if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
880  Value *Store = createAlignedStore(Builder, Val, Ptr, ScLen,
881  HVC.vlsb(Builder, Mask));
882  // If vector shifting is potentially needed, accumulate metadata
883  // from source sections of twice the store width.
884  int Start = (i - DoAlign) * ScLen;
885  int Width = (1 + DoAlign) * ScLen;
886  propagateMetadata(cast<Instruction>(Store),
887  VSpan.section(Start, Width).values());
888  }
889  }
890  }
891 
892  for (auto *Inst : Move.Main)
893  Inst->eraseFromParent();
894 
895  return true;
896 }
897 
898 auto AlignVectors::run() -> bool {
899  if (!createAddressGroups())
900  return false;
901 
902  bool Changed = false;
903  MoveList LoadGroups, StoreGroups;
904 
905  for (auto &G : AddrGroups) {
906  llvm::append_range(LoadGroups, createLoadGroups(G.second));
907  llvm::append_range(StoreGroups, createStoreGroups(G.second));
908  }
909 
910  for (auto &M : LoadGroups)
911  Changed |= move(M);
912  for (auto &M : StoreGroups)
913  Changed |= move(M);
914 
915  for (auto &M : LoadGroups)
916  Changed |= realignGroup(M);
917  for (auto &M : StoreGroups)
918  Changed |= realignGroup(M);
919 
920  return Changed;
921 }
922 
923 // --- End AlignVectors
924 
925 auto HexagonVectorCombine::run() -> bool {
926  if (!HST.useHVXOps())
927  return false;
928 
929  bool Changed = AlignVectors(*this).run();
930  return Changed;
931 }
932 
933 auto HexagonVectorCombine::getIntTy() const -> IntegerType * {
934  return Type::getInt32Ty(F.getContext());
935 }
936 
937 auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
938  assert(ElemCount >= 0);
939  IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
940  if (ElemCount == 0)
941  return ByteTy;
942  return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false);
943 }
944 
945 auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
946  assert(ElemCount >= 0);
947  IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
948  if (ElemCount == 0)
949  return BoolTy;
950  return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false);
951 }
952 
953 auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * {
954  return ConstantInt::getSigned(getIntTy(), Val);
955 }
956 
957 auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
958  if (auto *C = dyn_cast<Constant>(Val))
959  return C->isZeroValue();
960  return false;
961 }
962 
963 auto HexagonVectorCombine::getIntValue(const Value *Val) const
964  -> Optional<APInt> {
965  if (auto *CI = dyn_cast<ConstantInt>(Val))
966  return CI->getValue();
967  return None;
968 }
969 
970 auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
971  return isa<UndefValue>(Val);
972 }
973 
974 auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int {
975  return getSizeOf(Val->getType());
976 }
977 
978 auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
979  return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
980 }
981 
982 auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int {
983  return DL.getTypeAllocSize(const_cast<Type *>(Ty)).getFixedValue();
984 }
985 
986 auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
987  // The actual type may be shorter than the HVX vector, so determine
988  // the alignment based on subtarget info.
989  if (HST.isTypeForHVX(Ty))
990  return HST.getVectorLength();
991  return DL.getABITypeAlign(Ty).value();
992 }
993 
994 auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
995  assert(Ty->isIntOrIntVectorTy());
996  auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
997  if (auto *VecTy = dyn_cast<VectorType>(Ty))
998  return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
999  return Zero;
1000 }
1001 
1002 auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
1003  assert(Ty->isIntOrIntVectorTy());
1004  auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
1005  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1006  return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
1007  return Minus1;
1008 }
1009 
1010 // Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
1011 auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src,
1012  int Start, int Length, int Where) const
1013  -> Value * {
1014  assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1015  int SrcLen = getSizeOf(Src);
1016  int DstLen = getSizeOf(Dst);
1017  assert(0 <= Start && Start + Length <= SrcLen);
1018  assert(0 <= Where && Where + Length <= DstLen);
1019 
1020  int P2Len = PowerOf2Ceil(SrcLen | DstLen);
1021  auto *Undef = UndefValue::get(getByteTy());
1022  Value *P2Src = vresize(Builder, Src, P2Len, Undef);
1023  Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
1024 
1025  SmallVector<int, 256> SMask(P2Len);
1026  for (int i = 0; i != P2Len; ++i) {
1027  // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
1028  // Otherwise, pick Dst[i];
1029  SMask[i] =
1030  (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
1031  }
1032 
1033  Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1034  return vresize(Builder, P2Insert, DstLen, Undef);
1035 }
1036 
1037 auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1038  Value *Amt) const -> Value * {
1039  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1040  assert(isSectorTy(Hi->getType()));
1041  if (isZero(Amt))
1042  return Hi;
1043  int VecLen = getSizeOf(Hi);
1044  if (auto IntAmt = getIntValue(Amt))
1045  return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1046  VecLen);
1047 
1048  if (HST.isTypeForHVX(Hi->getType())) {
1049  int HwLen = HST.getVectorLength();
1050  assert(VecLen == HwLen && "Expecting an exact HVX type");
1051  Intrinsic::ID V6_vlalignb = HwLen == 64
1052  ? Intrinsic::hexagon_V6_vlalignb
1053  : Intrinsic::hexagon_V6_vlalignb_128B;
1054  return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(),
1055  {Hi, Lo, Amt});
1056  }
1057 
1058  if (VecLen == 4) {
1059  Value *Pair = concat(Builder, {Lo, Hi});
1060  Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
1061  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1062  return Builder.CreateBitCast(Trunc, Hi->getType());
1063  }
1064  if (VecLen == 8) {
1065  Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
1066  return vralignb(Builder, Lo, Hi, Sub);
1067  }
1068  llvm_unreachable("Unexpected vector length");
1069 }
1070 
1071 auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1072  Value *Amt) const -> Value * {
1073  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1074  assert(isSectorTy(Lo->getType()));
1075  if (isZero(Amt))
1076  return Lo;
1077  int VecLen = getSizeOf(Lo);
1078  if (auto IntAmt = getIntValue(Amt))
1079  return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1080 
1081  if (HST.isTypeForHVX(Lo->getType())) {
1082  int HwLen = HST.getVectorLength();
1083  assert(VecLen == HwLen && "Expecting an exact HVX type");
1084  Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb
1085  : Intrinsic::hexagon_V6_valignb_128B;
1086  return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(),
1087  {Hi, Lo, Amt});
1088  }
1089 
1090  if (VecLen == 4) {
1091  Value *Pair = concat(Builder, {Lo, Hi});
1092  Value *Shift = Builder.CreateLShr(Pair, Amt);
1093  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1094  return Builder.CreateBitCast(Trunc, Lo->getType());
1095  }
1096  if (VecLen == 8) {
1097  Type *Int64Ty = Type::getInt64Ty(F.getContext());
1098  Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
1099  Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
1100  Function *FI = Intrinsic::getDeclaration(F.getParent(),
1101  Intrinsic::hexagon_S2_valignrb);
1102  Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
1103  return Builder.CreateBitCast(Call, Lo->getType());
1104  }
1105  llvm_unreachable("Unexpected vector length");
1106 }
1107 
1108 // Concatenates a sequence of vectors of the same type.
1110  ArrayRef<Value *> Vecs) const -> Value * {
1111  assert(!Vecs.empty());
1112  SmallVector<int, 256> SMask;
1113  std::vector<Value *> Work[2];
1114  int ThisW = 0, OtherW = 1;
1115 
1116  Work[ThisW].assign(Vecs.begin(), Vecs.end());
1117  while (Work[ThisW].size() > 1) {
1118  auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
1119  int ElemCount = Ty->getElementCount().getFixedValue();
1120  SMask.resize(ElemCount * 2);
1121  std::iota(SMask.begin(), SMask.end(), 0);
1122 
1123  Work[OtherW].clear();
1124  if (Work[ThisW].size() % 2 != 0)
1125  Work[ThisW].push_back(UndefValue::get(Ty));
1126  for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
1127  Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
1128  Work[ThisW][i + 1], SMask);
1129  Work[OtherW].push_back(Joined);
1130  }
1131  std::swap(ThisW, OtherW);
1132  }
1133 
1134  // Since there may have been some undefs appended to make shuffle operands
1135  // have the same type, perform the last shuffle to only pick the original
1136  // elements.
1137  SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
1138  std::iota(SMask.begin(), SMask.end(), 0);
1139  Value *Total = Work[OtherW].front();
1140  return Builder.CreateShuffleVector(Total, SMask);
1141 }
1142 
1143 auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val,
1144  int NewSize, Value *Pad) const -> Value * {
1145  assert(isa<VectorType>(Val->getType()));
1146  auto *ValTy = cast<VectorType>(Val->getType());
1147  assert(ValTy->getElementType() == Pad->getType());
1148 
1149  int CurSize = ValTy->getElementCount().getFixedValue();
1150  if (CurSize == NewSize)
1151  return Val;
1152  // Truncate?
1153  if (CurSize > NewSize)
1154  return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize);
1155  // Extend.
1156  SmallVector<int, 128> SMask(NewSize);
1157  std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
1158  std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
1159  Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
1160  return Builder.CreateShuffleVector(Val, PadVec, SMask);
1161 }
1162 
1163 auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask,
1164  Type *FromTy, Type *ToTy) const -> Value * {
1165  // Mask is a vector <N x i1>, where each element corresponds to an
1166  // element of FromTy. Remap it so that each element will correspond
1167  // to an element of ToTy.
1168  assert(isa<VectorType>(Mask->getType()));
1169 
1170  Type *FromSTy = FromTy->getScalarType();
1171  Type *ToSTy = ToTy->getScalarType();
1172  if (FromSTy == ToSTy)
1173  return Mask;
1174 
1175  int FromSize = getSizeOf(FromSTy);
1176  int ToSize = getSizeOf(ToSTy);
1177  assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
1178 
1179  auto *MaskTy = cast<VectorType>(Mask->getType());
1180  int FromCount = MaskTy->getElementCount().getFixedValue();
1181  int ToCount = (FromCount * FromSize) / ToSize;
1182  assert((FromCount * FromSize) % ToSize == 0);
1183 
1184  // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
1185  // -> trunc to <M x i1>.
1186  Value *Ext = Builder.CreateSExt(
1187  Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false));
1188  Value *Cast = Builder.CreateBitCast(
1189  Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false));
1190  return Builder.CreateTrunc(
1191  Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false));
1192 }
1193 
1194 // Bitcast to bytes, and return least significant bits.
1195 auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const
1196  -> Value * {
1197  Type *ScalarTy = Val->getType()->getScalarType();
1198  if (ScalarTy == getBoolTy())
1199  return Val;
1200 
1201  Value *Bytes = vbytes(Builder, Val);
1202  if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
1203  return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
1204  // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
1205  // <1 x i1>.
1206  return Builder.CreateTrunc(Bytes, getBoolTy());
1207 }
1208 
1209 // Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
1210 auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const
1211  -> Value * {
1212  Type *ScalarTy = Val->getType()->getScalarType();
1213  if (ScalarTy == getByteTy())
1214  return Val;
1215 
1216  if (ScalarTy != getBoolTy())
1217  return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
1218  // For bool, return a sext from i1 to i8.
1219  if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
1220  return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
1221  return Builder.CreateSExt(Val, getByteTy());
1222 }
1223 
1224 auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder,
1225  Intrinsic::ID IntID, Type *RetTy,
1226  ArrayRef<Value *> Args) const
1227  -> Value * {
1228  int HwLen = HST.getVectorLength();
1229  Type *BoolTy = Type::getInt1Ty(F.getContext());
1230  Type *Int32Ty = Type::getInt32Ty(F.getContext());
1231  // HVX vector -> v16i32/v32i32
1232  // HVX vector predicate -> v512i1/v1024i1
1233  auto getTypeForIntrin = [&](Type *Ty) -> Type * {
1234  if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) {
1235  Type *ElemTy = cast<VectorType>(Ty)->getElementType();
1236  if (ElemTy == Int32Ty)
1237  return Ty;
1238  if (ElemTy == BoolTy)
1239  return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false);
1240  return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false);
1241  }
1242  // Non-HVX type. It should be a scalar.
1243  assert(Ty == Int32Ty || Ty->isIntegerTy(64));
1244  return Ty;
1245  };
1246 
1247  auto getCast = [&](IRBuilder<> &Builder, Value *Val,
1248  Type *DestTy) -> Value * {
1249  Type *SrcTy = Val->getType();
1250  if (SrcTy == DestTy)
1251  return Val;
1252  if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) {
1253  if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) {
1254  // This should take care of casts the other way too, for example
1255  // v1024i1 -> v32i1.
1256  Intrinsic::ID TC = HwLen == 64
1257  ? Intrinsic::hexagon_V6_pred_typecast
1258  : Intrinsic::hexagon_V6_pred_typecast_128B;
1259  Function *FI = Intrinsic::getDeclaration(F.getParent(), TC,
1260  {DestTy, Val->getType()});
1261  return Builder.CreateCall(FI, {Val});
1262  }
1263  // Non-predicate HVX vector.
1264  return Builder.CreateBitCast(Val, DestTy);
1265  }
1266  // Non-HVX type. It should be a scalar, and it should already have
1267  // a valid type.
1268  llvm_unreachable("Unexpected type");
1269  };
1270 
1271  SmallVector<Value *, 4> IntOps;
1272  for (Value *A : Args)
1273  IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType())));
1274  Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID);
1275  Value *Call = Builder.CreateCall(FI, IntOps);
1276 
1277  Type *CallTy = Call->getType();
1278  if (CallTy == RetTy)
1279  return Call;
1280  // Scalar types should have RetTy matching the call return type.
1281  assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true));
1282  if (cast<VectorType>(CallTy)->getElementType() == BoolTy)
1283  return getCast(Builder, Call, RetTy);
1284  return Builder.CreateBitCast(Call, RetTy);
1285 }
1286 
1287 auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
1288  Value *Ptr1) const
1289  -> Optional<int> {
1290  struct Builder : IRBuilder<> {
1291  Builder(BasicBlock *B) : IRBuilder<>(B) {}
1292  ~Builder() {
1293  for (Instruction *I : llvm::reverse(ToErase))
1294  I->eraseFromParent();
1295  }
1297  };
1298 
1299 #define CallBuilder(B, F) \
1300  [&](auto &B_) { \
1301  Value *V = B_.F; \
1302  if (auto *I = dyn_cast<Instruction>(V)) \
1303  B_.ToErase.push_back(I); \
1304  return V; \
1305  }(B)
1306 
1307  auto Simplify = [&](Value *V) {
1308  if (auto *I = dyn_cast<Instruction>(V)) {
1309  SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
1310  if (Value *S = SimplifyInstruction(I, Q))
1311  return S;
1312  }
1313  return V;
1314  };
1315 
1316  auto StripBitCast = [](Value *V) {
1317  while (auto *C = dyn_cast<BitCastInst>(V))
1318  V = C->getOperand(0);
1319  return V;
1320  };
1321 
1322  Ptr0 = StripBitCast(Ptr0);
1323  Ptr1 = StripBitCast(Ptr1);
1324  if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
1325  return None;
1326 
1327  auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
1328  auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
1329  if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
1330  return None;
1331 
1332  Builder B(Gep0->getParent());
1333  int Scale = getAllocSizeOf(Gep0->getSourceElementType());
1334 
1335  // FIXME: for now only check GEPs with a single index.
1336  if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
1337  return None;
1338 
1339  Value *Idx0 = Gep0->getOperand(1);
1340  Value *Idx1 = Gep1->getOperand(1);
1341 
1342  // First, try to simplify the subtraction directly.
1343  if (auto *Diff = dyn_cast<ConstantInt>(
1344  Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
1345  return Diff->getSExtValue() * Scale;
1346 
1347  KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
1348  KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
1349  APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
1350  if (Unknown.isAllOnes())
1351  return None;
1352 
1353  Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
1354  Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
1355  Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
1356  Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
1357  int Diff0 = 0;
1358  if (auto *C = dyn_cast<ConstantInt>(SubU)) {
1359  Diff0 = C->getSExtValue();
1360  } else {
1361  return None;
1362  }
1363 
1364  Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
1365  Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
1366  Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
1367  Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
1368  int Diff1 = 0;
1369  if (auto *C = dyn_cast<ConstantInt>(SubK)) {
1370  Diff1 = C->getSExtValue();
1371  } else {
1372  return None;
1373  }
1374 
1375  return (Diff0 + Diff1) * Scale;
1376 
1377 #undef CallBuilder
1378 }
1379 
1380 template <typename T>
1381 auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
1383  const T &Ignore) const
1384  -> bool {
1385  auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> {
1386  if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
1387  switch (II->getIntrinsicID()) {
1388  case Intrinsic::masked_load:
1389  return MemoryLocation::getForArgument(II, 0, TLI);
1390  case Intrinsic::masked_store:
1391  return MemoryLocation::getForArgument(II, 1, TLI);
1392  }
1393  }
1394  return MemoryLocation::getOrNone(&I);
1395  };
1396 
1397  // The source and the destination must be in the same basic block.
1398  const BasicBlock &Block = *In.getParent();
1399  assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
1400  // No PHIs.
1401  if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
1402  return false;
1403 
1404  if (!mayBeMemoryDependent(In))
1405  return true;
1406  bool MayWrite = In.mayWriteToMemory();
1407  auto MaybeLoc = getLocOrNone(In);
1408 
1409  auto From = In.getIterator();
1410  if (From == To)
1411  return true;
1412  bool MoveUp = (To != Block.end() && To->comesBefore(&In));
1413  auto Range =
1414  MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
1415  for (auto It = Range.first; It != Range.second; ++It) {
1416  const Instruction &I = *It;
1417  if (llvm::is_contained(Ignore, &I))
1418  continue;
1419  // assume intrinsic can be ignored
1420  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1421  if (II->getIntrinsicID() == Intrinsic::assume)
1422  continue;
1423  }
1424  // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
1425  if (I.mayThrow())
1426  return false;
1427  if (auto *CB = dyn_cast<CallBase>(&I)) {
1428  if (!CB->hasFnAttr(Attribute::WillReturn))
1429  return false;
1430  if (!CB->hasFnAttr(Attribute::NoSync))
1431  return false;
1432  }
1433  if (I.mayReadOrWriteMemory()) {
1434  auto MaybeLocI = getLocOrNone(I);
1435  if (MayWrite || I.mayWriteToMemory()) {
1436  if (!MaybeLoc || !MaybeLocI)
1437  return false;
1438  if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
1439  return false;
1440  }
1441  }
1442  }
1443  return true;
1444 }
1445 
1446 #ifndef NDEBUG
1447 auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
1448  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1449  return VecTy->getElementType() == getByteTy();
1450  return false;
1451 }
1452 
1453 auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool {
1454  if (!isByteVecTy(Ty))
1455  return false;
1456  int Size = getSizeOf(Ty);
1457  if (HST.isTypeForHVX(Ty))
1458  return Size == static_cast<int>(HST.getVectorLength());
1459  return Size == 4 || Size == 8;
1460 }
1461 #endif
1462 
1463 auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo,
1464  Value *Hi, int Start,
1465  int Length) const -> Value * {
1466  assert(0 <= Start && Start < Length);
1467  SmallVector<int, 128> SMask(Length);
1468  std::iota(SMask.begin(), SMask.end(), Start);
1469  return Builder.CreateShuffleVector(Lo, Hi, SMask);
1470 }
1471 
1472 // Pass management.
1473 
1474 namespace llvm {
1477 } // namespace llvm
1478 
1479 namespace {
1480 class HexagonVectorCombineLegacy : public FunctionPass {
1481 public:
1482  static char ID;
1483 
1484  HexagonVectorCombineLegacy() : FunctionPass(ID) {}
1485 
1486  StringRef getPassName() const override { return "Hexagon Vector Combine"; }
1487 
1488  void getAnalysisUsage(AnalysisUsage &AU) const override {
1489  AU.setPreservesCFG();
1496  }
1497 
1498  bool runOnFunction(Function &F) override {
1499  if (skipFunction(F))
1500  return false;
1501  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1502  AssumptionCache &AC =
1503  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1504  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1505  TargetLibraryInfo &TLI =
1506  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1507  auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
1508  HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
1509  return HVC.run();
1510  }
1511 };
1512 } // namespace
1513 
1515 
1516 INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
1517  "Hexagon Vector Combine", false, false)
1523 INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
1524  "Hexagon Vector Combine", false, false)
1525 
1527  return new HexagonVectorCombineLegacy();
1528 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
AssumptionCache.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE, "Hexagon Vector Combine", false, false) INITIALIZE_PASS_END(HexagonVectorCombineLegacy
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
Optional.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1384
Metadata.h
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:94
T
llvm::Function
Definition: Function.h:62
Pass.h
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:546
llvm::SmallVector< Value *, 8 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:208
llvm::createHexagonVectorCombineLegacyPass
FunctionPass * createHexagonVectorCombineLegacyPass()
Definition: HexagonVectorCombine.cpp:1526
HexagonSubtarget.h
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HexagonTargetMachine
Definition: HexagonTargetMachine.h:25
llvm::initializeHexagonVectorCombineLegacyPass
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1781
ValueTracking.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
APInt.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::Optional
Definition: APInt.h:33
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
STLExtras.h
llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:702
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:726
HexagonTargetMachine.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
KnownBits.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:912
AliasAnalysis.h
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:82
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1340
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1600
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::AAResults
Definition: AliasAnalysis.h:508
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::DomTreeNodeBase::children
iterator_range< iterator > children()
Definition: GenericDomTree.h:83
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:45
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6311
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::ConstantVector::getSplat
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1437
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::None
const NoneType None
Definition: None.h:23
llvm::mayBeMemoryDependent
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
Definition: ValueTracking.cpp:4715
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1524
VectorUtils.h
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:78
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
llvm::pdb::Unknown
@ Unknown
Definition: PDBTypes.h:395
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1665
ArrayRef.h
llvm::concat
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
Definition: STLExtras.h:1060
TargetPassConfig.h
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:224
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1658
getLocation
static MemoryLocation getLocation(Instruction *I, AAResults *AA)
Definition: SLPVectorizer.cpp:584
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:131
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1581
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:603
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1797
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HexagonVectorCombine.cpp:50
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::DomTreeNodeBase< BasicBlock >
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
CallBuilder
#define CallBuilder(B, F)
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:873
std
Definition: BitVector.h:838
llvm::KnownBits
Definition: KnownBits.h:23
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1646
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:196
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:939
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:414
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: TargetMachine.h:136
SmallVector.h
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
InstructionSimplify.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
shift
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
Definition: README.txt:30
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
raw_ostream.h
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:91
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38