LLVM  15.0.0git
HexagonVectorCombine.cpp
Go to the documentation of this file.
1 //===-- HexagonVectorCombine.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // HexagonVectorCombine is a utility class implementing a variety of functions
9 // that assist in vector-based optimizations.
10 //
11 // AlignVectors: replace unaligned vector loads and stores with aligned ones.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsHexagon.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/KnownBits.h"
39 
40 #include "HexagonSubtarget.h"
41 #include "HexagonTargetMachine.h"
42 
43 #include <algorithm>
44 #include <deque>
45 #include <map>
46 #include <set>
47 #include <utility>
48 #include <vector>
49 
50 #define DEBUG_TYPE "hexagon-vc"
51 
52 using namespace llvm;
53 
54 namespace {
55 class HexagonVectorCombine {
56 public:
57  HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
58  DominatorTree &DT_, TargetLibraryInfo &TLI_,
59  const TargetMachine &TM_)
60  : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
61  TLI(TLI_),
62  HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
63 
64  bool run();
65 
66  // Common integer type.
67  IntegerType *getIntTy() const;
68  // Byte type: either scalar (when Length = 0), or vector with given
69  // element count.
70  Type *getByteTy(int ElemCount = 0) const;
71  // Boolean type: either scalar (when Length = 0), or vector with given
72  // element count.
73  Type *getBoolTy(int ElemCount = 0) const;
74  // Create a ConstantInt of type returned by getIntTy with the value Val.
75  ConstantInt *getConstInt(int Val) const;
76  // Get the integer value of V, if it exists.
77  Optional<APInt> getIntValue(const Value *Val) const;
78  // Is V a constant 0, or a vector of 0s?
79  bool isZero(const Value *Val) const;
80  // Is V an undef value?
81  bool isUndef(const Value *Val) const;
82 
83  int getSizeOf(const Value *Val) const;
84  int getSizeOf(const Type *Ty) const;
85  int getAllocSizeOf(const Type *Ty) const;
86  int getTypeAlignment(Type *Ty) const;
87 
88  VectorType *getByteVectorTy(int ScLen) const;
89  Constant *getNullValue(Type *Ty) const;
90  Constant *getFullValue(Type *Ty) const;
91 
92  Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start,
93  int Length, int Where) const;
94  Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
95  Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
97  Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize,
98  Value *Pad) const;
99  Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy,
100  Type *ToTy) const;
101  Value *vlsb(IRBuilder<> &Builder, Value *Val) const;
102  Value *vbytes(IRBuilder<> &Builder, Value *Val) const;
103 
104  Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID,
105  Type *RetTy, ArrayRef<Value *> Args) const;
106 
107  Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
108 
109  template <typename T = std::vector<Instruction *>>
110  bool isSafeToMoveBeforeInBB(const Instruction &In,
112  const T &Ignore = {}) const;
113 
114  Function &F;
115  const DataLayout &DL;
116  AliasAnalysis &AA;
117  AssumptionCache &AC;
118  DominatorTree &DT;
119  TargetLibraryInfo &TLI;
120  const HexagonSubtarget &HST;
121 
122 private:
123 #ifndef NDEBUG
124  // These two functions are only used for assertions at the moment.
125  bool isByteVecTy(Type *Ty) const;
126  bool isSectorTy(Type *Ty) const;
127 #endif
128  Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start,
129  int Length) const;
130 };
131 
132 class AlignVectors {
133 public:
134  AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {}
135 
136  bool run();
137 
138 private:
139  using InstList = std::vector<Instruction *>;
140 
141  struct Segment {
142  void *Data;
143  int Start;
144  int Size;
145  };
146 
147  struct AddrInfo {
148  AddrInfo(const AddrInfo &) = default;
149  AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
150  Align H)
151  : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
152  NeedAlign(HVC.getTypeAlignment(ValTy)) {}
153 
154  // XXX: add Size member?
155  Instruction *Inst;
156  Value *Addr;
157  Type *ValTy;
158  Align HaveAlign;
159  Align NeedAlign;
160  int Offset = 0; // Offset (in bytes) from the first member of the
161  // containing AddrList.
162  };
163  using AddrList = std::vector<AddrInfo>;
164 
165  struct InstrLess {
166  bool operator()(const Instruction *A, const Instruction *B) const {
167  return A->comesBefore(B);
168  }
169  };
170  using DepList = std::set<Instruction *, InstrLess>;
171 
172  struct MoveGroup {
173  MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
174  : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
175  Instruction *Base; // Base instruction of the parent address group.
176  InstList Main; // Main group of instructions.
177  InstList Deps; // List of dependencies.
178  bool IsHvx; // Is this group of HVX instructions?
179  bool IsLoad; // Is this a load group?
180  };
181  using MoveList = std::vector<MoveGroup>;
182 
183  struct ByteSpan {
184  struct Segment {
185  // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
186  Segment(Value *Val, int Begin, int Len)
187  : Val(Val), Start(Begin), Size(Len) {}
188  Segment(const Segment &Seg) = default;
189  Value *Val; // Value representable as a sequence of bytes.
190  int Start; // First byte of the value that belongs to the segment.
191  int Size; // Number of bytes in the segment.
192  };
193 
194  struct Block {
195  Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
196  Block(Value *Val, int Off, int Len, int Pos)
197  : Seg(Val, Off, Len), Pos(Pos) {}
198  Block(const Block &Blk) = default;
199  Segment Seg; // Value segment.
200  int Pos; // Position (offset) of the segment in the Block.
201  };
202 
203  int extent() const;
204  ByteSpan section(int Start, int Length) const;
205  ByteSpan &shift(int Offset);
207 
208  int size() const { return Blocks.size(); }
209  Block &operator[](int i) { return Blocks[i]; }
210 
211  std::vector<Block> Blocks;
212 
213  using iterator = decltype(Blocks)::iterator;
214  iterator begin() { return Blocks.begin(); }
215  iterator end() { return Blocks.end(); }
216  using const_iterator = decltype(Blocks)::const_iterator;
217  const_iterator begin() const { return Blocks.begin(); }
218  const_iterator end() const { return Blocks.end(); }
219  };
220 
221  Align getAlignFromValue(const Value *V) const;
223  Optional<AddrInfo> getAddrInfo(Instruction &In) const;
224  bool isHvx(const AddrInfo &AI) const;
225 
226  Value *getPayload(Value *Val) const;
227  Value *getMask(Value *Val) const;
228  Value *getPassThrough(Value *Val) const;
229 
230  Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
231  int Adjust) const;
232  Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
233  int Alignment) const;
234  Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr,
235  int Alignment, Value *Mask, Value *PassThru) const;
236  Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr,
237  int Alignment, Value *Mask) const;
238 
239  bool createAddressGroups();
240  MoveList createLoadGroups(const AddrList &Group) const;
241  MoveList createStoreGroups(const AddrList &Group) const;
242  bool move(const MoveGroup &Move) const;
243  bool realignGroup(const MoveGroup &Move) const;
244 
245  friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
246  friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
247  friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
248 
249  std::map<Instruction *, AddrList> AddrGroups;
250  HexagonVectorCombine &HVC;
251 };
252 
254 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
255  OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
256  OS << "Addr: " << *AI.Addr << '\n';
257  OS << "Type: " << *AI.ValTy << '\n';
258  OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
259  OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
260  OS << "Offset: " << AI.Offset;
261  return OS;
262 }
263 
265 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
266  OS << "Main\n";
267  for (Instruction *I : MG.Main)
268  OS << " " << *I << '\n';
269  OS << "Deps\n";
270  for (Instruction *I : MG.Deps)
271  OS << " " << *I << '\n';
272  return OS;
273 }
274 
276 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
277  OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
278  for (const AlignVectors::ByteSpan::Block &B : BS) {
279  OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
280  << *B.Seg.Val << '\n';
281  }
282  OS << ']';
283  return OS;
284 }
285 
286 } // namespace
287 
288 namespace {
289 
290 template <typename T> T *getIfUnordered(T *MaybeT) {
291  return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
292 }
293 template <typename T> T *isCandidate(Instruction *In) {
294  return dyn_cast<T>(In);
295 }
296 template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
297  return getIfUnordered(dyn_cast<LoadInst>(In));
298 }
299 template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
300  return getIfUnordered(dyn_cast<StoreInst>(In));
301 }
302 
303 #if !defined(_MSC_VER) || _MSC_VER >= 1926
304 // VS2017 and some versions of VS2019 have trouble compiling this:
305 // error C2976: 'std::map': too few template arguments
306 // VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
307 template <typename Pred, typename... Ts>
308 void erase_if(std::map<Ts...> &map, Pred p)
309 #else
310 template <typename Pred, typename T, typename U>
311 void erase_if(std::map<T, U> &map, Pred p)
312 #endif
313 {
314  for (auto i = map.begin(), e = map.end(); i != e;) {
315  if (p(*i))
316  i = map.erase(i);
317  else
318  i = std::next(i);
319  }
320 }
321 
322 // Forward other erase_ifs to the LLVM implementations.
323 template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
324  llvm::erase_if(std::forward<T>(container), p);
325 }
326 
327 } // namespace
328 
329 // --- Begin AlignVectors
330 
331 auto AlignVectors::ByteSpan::extent() const -> int {
332  if (size() == 0)
333  return 0;
334  int Min = Blocks[0].Pos;
335  int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
336  for (int i = 1, e = size(); i != e; ++i) {
337  Min = std::min(Min, Blocks[i].Pos);
338  Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
339  }
340  return Max - Min;
341 }
342 
343 auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
344  ByteSpan Section;
345  for (const ByteSpan::Block &B : Blocks) {
346  int L = std::max(B.Pos, Start); // Left end.
347  int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
348  if (L < R) {
349  // How much to chop off the beginning of the segment:
350  int Off = L > B.Pos ? L - B.Pos : 0;
351  Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
352  }
353  }
354  return Section;
355 }
356 
357 auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
358  for (Block &B : Blocks)
359  B.Pos += Offset;
360  return *this;
361 }
362 
364  SmallVector<Value *, 8> Values(Blocks.size());
365  for (int i = 0, e = Blocks.size(); i != e; ++i)
366  Values[i] = Blocks[i].Seg.Val;
367  return Values;
368 }
369 
370 auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
371  const auto *C = dyn_cast<ConstantInt>(V);
372  assert(C && "Alignment must be a compile-time constant integer");
373  return C->getAlignValue();
374 }
375 
376 auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> {
377  if (auto *L = isCandidate<LoadInst>(&In))
378  return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
379  L->getAlign());
380  if (auto *S = isCandidate<StoreInst>(&In))
381  return AddrInfo(HVC, S, S->getPointerOperand(),
382  S->getValueOperand()->getType(), S->getAlign());
383  if (auto *II = isCandidate<IntrinsicInst>(&In)) {
384  Intrinsic::ID ID = II->getIntrinsicID();
385  switch (ID) {
386  case Intrinsic::masked_load:
387  return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
388  getAlignFromValue(II->getArgOperand(1)));
389  case Intrinsic::masked_store:
390  return AddrInfo(HVC, II, II->getArgOperand(1),
391  II->getArgOperand(0)->getType(),
392  getAlignFromValue(II->getArgOperand(2)));
393  }
394  }
395  return Optional<AddrInfo>();
396 }
397 
398 auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
399  return HVC.HST.isTypeForHVX(AI.ValTy);
400 }
401 
402 auto AlignVectors::getPayload(Value *Val) const -> Value * {
403  if (auto *In = dyn_cast<Instruction>(Val)) {
404  Intrinsic::ID ID = 0;
405  if (auto *II = dyn_cast<IntrinsicInst>(In))
406  ID = II->getIntrinsicID();
407  if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
408  return In->getOperand(0);
409  }
410  return Val;
411 }
412 
413 auto AlignVectors::getMask(Value *Val) const -> Value * {
414  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
415  switch (II->getIntrinsicID()) {
416  case Intrinsic::masked_load:
417  return II->getArgOperand(2);
418  case Intrinsic::masked_store:
419  return II->getArgOperand(3);
420  }
421  }
422 
423  Type *ValTy = getPayload(Val)->getType();
424  if (auto *VecTy = dyn_cast<VectorType>(ValTy)) {
425  int ElemCount = VecTy->getElementCount().getFixedValue();
426  return HVC.getFullValue(HVC.getBoolTy(ElemCount));
427  }
428  return HVC.getFullValue(HVC.getBoolTy());
429 }
430 
431 auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
432  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
433  if (II->getIntrinsicID() == Intrinsic::masked_load)
434  return II->getArgOperand(3);
435  }
436  return UndefValue::get(getPayload(Val)->getType());
437 }
438 
439 auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
440  Type *ValTy, int Adjust) const
441  -> Value * {
442  // The adjustment is in bytes, but if it's a multiple of the type size,
443  // we don't need to do pointer casts.
444  auto *PtrTy = cast<PointerType>(Ptr->getType());
445  if (!PtrTy->isOpaque()) {
446  Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
447  int ElemSize = HVC.getAllocSizeOf(ElemTy);
448  if (Adjust % ElemSize == 0 && Adjust != 0) {
449  Value *Tmp0 =
450  Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
451  return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
452  }
453  }
454 
455  PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
456  Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
457  Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0,
458  HVC.getConstInt(Adjust));
459  return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
460 }
461 
462 auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr,
463  Type *ValTy, int Alignment) const
464  -> Value * {
465  Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
466  Value *Mask = HVC.getConstInt(-Alignment);
467  Value *And = Builder.CreateAnd(AsInt, Mask);
468  return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
469 }
470 
471 auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy,
472  Value *Ptr, int Alignment, Value *Mask,
473  Value *PassThru) const -> Value * {
474  assert(!HVC.isUndef(Mask)); // Should this be allowed?
475  if (HVC.isZero(Mask))
476  return PassThru;
477  if (Mask == ConstantInt::getTrue(Mask->getType()))
478  return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
479  return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru);
480 }
481 
482 auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val,
483  Value *Ptr, int Alignment,
484  Value *Mask) const -> Value * {
485  if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
486  return UndefValue::get(Val->getType());
487  if (Mask == ConstantInt::getTrue(Mask->getType()))
488  return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
489  return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
490 }
491 
492 auto AlignVectors::createAddressGroups() -> bool {
493  // An address group created here may contain instructions spanning
494  // multiple basic blocks.
495  AddrList WorkStack;
496 
497  auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
498  for (AddrInfo &W : WorkStack) {
499  if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
500  return std::make_pair(W.Inst, *D);
501  }
502  return std::make_pair(nullptr, 0);
503  };
504 
505  auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
506  BasicBlock &Block = *DomN->getBlock();
507  for (Instruction &I : Block) {
508  auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
509  if (!AI)
510  continue;
511  auto F = findBaseAndOffset(*AI);
512  Instruction *GroupInst;
513  if (Instruction *BI = F.first) {
514  AI->Offset = F.second;
515  GroupInst = BI;
516  } else {
517  WorkStack.push_back(*AI);
518  GroupInst = AI->Inst;
519  }
520  AddrGroups[GroupInst].push_back(*AI);
521  }
522 
523  for (DomTreeNode *C : DomN->children())
524  Visit(C, Visit);
525 
526  while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
527  WorkStack.pop_back();
528  };
529 
530  traverseBlock(HVC.DT.getRootNode(), traverseBlock);
531  assert(WorkStack.empty());
532 
533  // AddrGroups are formed.
534 
535  // Remove groups of size 1.
536  erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
537  // Remove groups that don't use HVX types.
538  erase_if(AddrGroups, [&](auto &G) {
539  return llvm::none_of(
540  G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
541  });
542 
543  return !AddrGroups.empty();
544 }
545 
546 auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
547  // Form load groups.
548  // To avoid complications with moving code across basic blocks, only form
549  // groups that are contained within a single basic block.
550 
551  auto getUpwardDeps = [](Instruction *In, Instruction *Base) {
552  BasicBlock *Parent = Base->getParent();
553  assert(In->getParent() == Parent &&
554  "Base and In should be in the same block");
555  assert(Base->comesBefore(In) && "Base should come before In");
556 
557  DepList Deps;
558  std::deque<Instruction *> WorkQ = {In};
559  while (!WorkQ.empty()) {
560  Instruction *D = WorkQ.front();
561  WorkQ.pop_front();
562  Deps.insert(D);
563  for (Value *Op : D->operands()) {
564  if (auto *I = dyn_cast<Instruction>(Op)) {
565  if (I->getParent() == Parent && Base->comesBefore(I))
566  WorkQ.push_back(I);
567  }
568  }
569  }
570  return Deps;
571  };
572 
573  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
574  assert(!Move.Main.empty() && "Move group should have non-empty Main");
575  // Don't mix HVX and non-HVX instructions.
576  if (Move.IsHvx != isHvx(Info))
577  return false;
578  // Leading instruction in the load group.
579  Instruction *Base = Move.Main.front();
580  if (Base->getParent() != Info.Inst->getParent())
581  return false;
582 
583  auto isSafeToMoveToBase = [&](const Instruction *I) {
584  return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
585  };
586  DepList Deps = getUpwardDeps(Info.Inst, Base);
587  if (!llvm::all_of(Deps, isSafeToMoveToBase))
588  return false;
589 
590  // The dependencies will be moved together with the load, so make sure
591  // that none of them could be moved independently in another group.
592  Deps.erase(Info.Inst);
593  auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
594  if (llvm::any_of(Deps, inAddrMap))
595  return false;
596  Move.Main.push_back(Info.Inst);
597  llvm::append_range(Move.Deps, Deps);
598  return true;
599  };
600 
601  MoveList LoadGroups;
602 
603  for (const AddrInfo &Info : Group) {
604  if (!Info.Inst->mayReadFromMemory())
605  continue;
606  if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
607  LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
608  }
609 
610  // Erase singleton groups.
611  erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
612  return LoadGroups;
613 }
614 
615 auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
616  // Form store groups.
617  // To avoid complications with moving code across basic blocks, only form
618  // groups that are contained within a single basic block.
619 
620  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
621  assert(!Move.Main.empty() && "Move group should have non-empty Main");
622  // For stores with return values we'd have to collect downward depenencies.
623  // There are no such stores that we handle at the moment, so omit that.
624  assert(Info.Inst->getType()->isVoidTy() &&
625  "Not handling stores with return values");
626  // Don't mix HVX and non-HVX instructions.
627  if (Move.IsHvx != isHvx(Info))
628  return false;
629  // For stores we need to be careful whether it's safe to move them.
630  // Stores that are otherwise safe to move together may not appear safe
631  // to move over one another (i.e. isSafeToMoveBefore may return false).
632  Instruction *Base = Move.Main.front();
633  if (Base->getParent() != Info.Inst->getParent())
634  return false;
635  if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
636  return false;
637  Move.Main.push_back(Info.Inst);
638  return true;
639  };
640 
641  MoveList StoreGroups;
642 
643  for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
644  const AddrInfo &Info = *I;
645  if (!Info.Inst->mayWriteToMemory())
646  continue;
647  if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
648  StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
649  }
650 
651  // Erase singleton groups.
652  erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
653  return StoreGroups;
654 }
655 
656 auto AlignVectors::move(const MoveGroup &Move) const -> bool {
657  assert(!Move.Main.empty() && "Move group should have non-empty Main");
658  Instruction *Where = Move.Main.front();
659 
660  if (Move.IsLoad) {
661  // Move all deps to before Where, keeping order.
662  for (Instruction *D : Move.Deps)
663  D->moveBefore(Where);
664  // Move all main instructions to after Where, keeping order.
665  ArrayRef<Instruction *> Main(Move.Main);
666  for (Instruction *M : Main.drop_front(1)) {
667  M->moveAfter(Where);
668  Where = M;
669  }
670  } else {
671  // NOTE: Deps are empty for "store" groups. If they need to be
672  // non-empty, decide on the order.
673  assert(Move.Deps.empty());
674  // Move all main instructions to before Where, inverting order.
675  ArrayRef<Instruction *> Main(Move.Main);
676  for (Instruction *M : Main.drop_front(1)) {
677  M->moveBefore(Where);
678  Where = M;
679  }
680  }
681 
682  return Move.Main.size() + Move.Deps.size() > 1;
683 }
684 
685 auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
686  // TODO: Needs support for masked loads/stores of "scalar" vectors.
687  if (!Move.IsHvx)
688  return false;
689 
690  // Return the element with the maximum alignment from Range,
691  // where GetValue obtains the value to compare from an element.
692  auto getMaxOf = [](auto Range, auto GetValue) {
693  return *std::max_element(
694  Range.begin(), Range.end(),
695  [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
696  };
697 
698  const AddrList &BaseInfos = AddrGroups.at(Move.Base);
699 
700  // Conceptually, there is a vector of N bytes covering the addresses
701  // starting from the minimum offset (i.e. Base.Addr+Start). This vector
702  // represents a contiguous memory region that spans all accessed memory
703  // locations.
704  // The correspondence between loaded or stored values will be expressed
705  // in terms of this vector. For example, the 0th element of the vector
706  // from the Base address info will start at byte Start from the beginning
707  // of this conceptual vector.
708  //
709  // This vector will be loaded/stored starting at the nearest down-aligned
710  // address and the amount od the down-alignment will be AlignVal:
711  // valign(load_vector(align_down(Base+Start)), AlignVal)
712 
713  std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
714  AddrList MoveInfos;
716  BaseInfos, std::back_inserter(MoveInfos),
717  [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
718 
719  // Maximum alignment present in the whole address group.
720  const AddrInfo &WithMaxAlign =
721  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
722  Align MaxGiven = WithMaxAlign.HaveAlign;
723 
724  // Minimum alignment present in the move address group.
725  const AddrInfo &WithMinOffset =
726  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
727 
728  const AddrInfo &WithMaxNeeded =
729  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
730  Align MinNeeded = WithMaxNeeded.NeedAlign;
731 
732  // Set the builder at the top instruction in the move group.
733  Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
734  IRBuilder<> Builder(TopIn);
735  Value *AlignAddr = nullptr; // Actual aligned address.
736  Value *AlignVal = nullptr; // Right-shift amount (for valign).
737 
738  if (MinNeeded <= MaxGiven) {
739  int Start = WithMinOffset.Offset;
740  int OffAtMax = WithMaxAlign.Offset;
741  // Shift the offset of the maximally aligned instruction (OffAtMax)
742  // back by just enough multiples of the required alignment to cover the
743  // distance from Start to OffAtMax.
744  // Calculate the address adjustment amount based on the address with the
745  // maximum alignment. This is to allow a simple gep instruction instead
746  // of potential bitcasts to i8*.
747  int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
748  AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
749  WithMaxAlign.ValTy, Adjust);
750  int Diff = Start - (OffAtMax + Adjust);
751  AlignVal = HVC.getConstInt(Diff);
752  assert(Diff >= 0);
753  assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
754  } else {
755  // WithMinOffset is the lowest address in the group,
756  // WithMinOffset.Addr = Base+Start.
757  // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
758  // mask off unnecessary bits, so it's ok to just the original pointer as
759  // the alignment amount.
760  // Do an explicit down-alignment of the address to avoid creating an
761  // aligned instruction with an address that is not really aligned.
762  AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
763  WithMinOffset.ValTy, MinNeeded.value());
764  AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
765  }
766 
767  ByteSpan VSpan;
768  for (const AddrInfo &AI : MoveInfos) {
769  VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
770  AI.Offset - WithMinOffset.Offset);
771  }
772 
773  // The aligned loads/stores will use blocks that are either scalars,
774  // or HVX vectors. Let "sector" be the unified term for such a block.
775  // blend(scalar, vector) -> sector...
776  int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
777  : std::max<int>(MinNeeded.value(), 4);
778  assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
779  assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
780 
781  Type *SecTy = HVC.getByteTy(ScLen);
782  int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
783  bool DoAlign = !HVC.isZero(AlignVal);
784 
785  if (Move.IsLoad) {
786  ByteSpan ASpan;
787  auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
788  auto *Undef = UndefValue::get(SecTy);
789 
790  for (int i = 0; i != NumSectors + DoAlign; ++i) {
791  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
792  // FIXME: generate a predicated load?
793  Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
794  // If vector shifting is potentially needed, accumulate metadata
795  // from source sections of twice the load width.
796  int Start = (i - DoAlign) * ScLen;
797  int Width = (1 + DoAlign) * ScLen;
798  propagateMetadata(cast<Instruction>(Load),
799  VSpan.section(Start, Width).values());
800  ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen);
801  }
802 
803  if (DoAlign) {
804  for (int j = 0; j != NumSectors; ++j) {
805  ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val,
806  ASpan[j + 1].Seg.Val, AlignVal);
807  }
808  }
809 
810  for (ByteSpan::Block &B : VSpan) {
811  ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
812  Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
813  for (ByteSpan::Block &S : ASection) {
814  Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
815  Accum =
816  HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
817  }
818  // Instead of casting everything to bytes for the vselect, cast to the
819  // original value type. This will avoid complications with casting masks.
820  // For example, in cases when the original mask applied to i32, it could
821  // be converted to a mask applicable to i8 via pred_typecast intrinsic,
822  // but if the mask is not exactly of HVX length, extra handling would be
823  // needed to make it work.
824  Type *ValTy = getPayload(B.Seg.Val)->getType();
825  Value *Cast = Builder.CreateBitCast(Accum, ValTy);
826  Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
827  getPassThrough(B.Seg.Val));
828  B.Seg.Val->replaceAllUsesWith(Sel);
829  }
830  } else {
831  // Stores.
832  ByteSpan ASpanV, ASpanM;
833 
834  // Return a vector value corresponding to the input value Val:
835  // either <1 x Val> for scalar Val, or Val itself for vector Val.
836  auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * {
837  Type *Ty = Val->getType();
838  if (Ty->isVectorTy())
839  return Val;
840  auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false);
841  return Builder.CreateBitCast(Val, VecTy);
842  };
843 
844  // Create an extra "undef" sector at the beginning and at the end.
845  // They will be used as the left/right filler in the vlalign step.
846  for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
847  // For stores, the size of each section is an aligned vector length.
848  // Adjust the store offsets relative to the section start offset.
849  ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
850  Value *AccumV = UndefValue::get(SecTy);
851  Value *AccumM = HVC.getNullValue(SecTy);
852  for (ByteSpan::Block &S : VSection) {
853  Value *Pay = getPayload(S.Seg.Val);
854  Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
855  Pay->getType(), HVC.getByteTy());
856  AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
857  S.Seg.Start, S.Seg.Size, S.Pos);
858  AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
859  S.Seg.Start, S.Seg.Size, S.Pos);
860  }
861  ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
862  ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
863  }
864 
865  // vlalign
866  if (DoAlign) {
867  for (int j = 1; j != NumSectors + 2; ++j) {
868  ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val,
869  ASpanV[j].Seg.Val, AlignVal);
870  ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val,
871  ASpanM[j].Seg.Val, AlignVal);
872  }
873  }
874 
875  for (int i = 0; i != NumSectors + DoAlign; ++i) {
876  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
877  Value *Val = ASpanV[i].Seg.Val;
878  Value *Mask = ASpanM[i].Seg.Val; // bytes
879  if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
880  Value *Store = createAlignedStore(Builder, Val, Ptr, ScLen,
881  HVC.vlsb(Builder, Mask));
882  // If vector shifting is potentially needed, accumulate metadata
883  // from source sections of twice the store width.
884  int Start = (i - DoAlign) * ScLen;
885  int Width = (1 + DoAlign) * ScLen;
886  propagateMetadata(cast<Instruction>(Store),
887  VSpan.section(Start, Width).values());
888  }
889  }
890  }
891 
892  for (auto *Inst : Move.Main)
893  Inst->eraseFromParent();
894 
895  return true;
896 }
897 
898 auto AlignVectors::run() -> bool {
899  if (!createAddressGroups())
900  return false;
901 
902  bool Changed = false;
903  MoveList LoadGroups, StoreGroups;
904 
905  for (auto &G : AddrGroups) {
906  llvm::append_range(LoadGroups, createLoadGroups(G.second));
907  llvm::append_range(StoreGroups, createStoreGroups(G.second));
908  }
909 
910  for (auto &M : LoadGroups)
911  Changed |= move(M);
912  for (auto &M : StoreGroups)
913  Changed |= move(M);
914 
915  for (auto &M : LoadGroups)
916  Changed |= realignGroup(M);
917  for (auto &M : StoreGroups)
918  Changed |= realignGroup(M);
919 
920  return Changed;
921 }
922 
923 // --- End AlignVectors
924 
925 auto HexagonVectorCombine::run() -> bool {
926  if (!HST.useHVXOps())
927  return false;
928 
929  bool Changed = AlignVectors(*this).run();
930  return Changed;
931 }
932 
933 auto HexagonVectorCombine::getIntTy() const -> IntegerType * {
934  return Type::getInt32Ty(F.getContext());
935 }
936 
937 auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
938  assert(ElemCount >= 0);
939  IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
940  if (ElemCount == 0)
941  return ByteTy;
942  return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false);
943 }
944 
945 auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
946  assert(ElemCount >= 0);
947  IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
948  if (ElemCount == 0)
949  return BoolTy;
950  return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false);
951 }
952 
953 auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * {
954  return ConstantInt::getSigned(getIntTy(), Val);
955 }
956 
957 auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
958  if (auto *C = dyn_cast<Constant>(Val))
959  return C->isZeroValue();
960  return false;
961 }
962 
963 auto HexagonVectorCombine::getIntValue(const Value *Val) const
964  -> Optional<APInt> {
965  if (auto *CI = dyn_cast<ConstantInt>(Val))
966  return CI->getValue();
967  return None;
968 }
969 
970 auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
971  return isa<UndefValue>(Val);
972 }
973 
974 auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int {
975  return getSizeOf(Val->getType());
976 }
977 
978 auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
979  return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
980 }
981 
982 auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int {
983  return DL.getTypeAllocSize(const_cast<Type *>(Ty)).getFixedValue();
984 }
985 
986 auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
987  // The actual type may be shorter than the HVX vector, so determine
988  // the alignment based on subtarget info.
989  if (HST.isTypeForHVX(Ty))
990  return HST.getVectorLength();
991  return DL.getABITypeAlign(Ty).value();
992 }
993 
994 auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
995  assert(Ty->isIntOrIntVectorTy());
996  auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
997  if (auto *VecTy = dyn_cast<VectorType>(Ty))
998  return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
999  return Zero;
1000 }
1001 
1002 auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
1003  assert(Ty->isIntOrIntVectorTy());
1004  auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
1005  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1006  return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
1007  return Minus1;
1008 }
1009 
1010 // Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
1011 auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src,
1012  int Start, int Length, int Where) const
1013  -> Value * {
1014  assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1015  int SrcLen = getSizeOf(Src);
1016  int DstLen = getSizeOf(Dst);
1017  assert(0 <= Start && Start + Length <= SrcLen);
1018  assert(0 <= Where && Where + Length <= DstLen);
1019 
1020  int P2Len = PowerOf2Ceil(SrcLen | DstLen);
1021  auto *Undef = UndefValue::get(getByteTy());
1022  Value *P2Src = vresize(Builder, Src, P2Len, Undef);
1023  Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
1024 
1025  SmallVector<int, 256> SMask(P2Len);
1026  for (int i = 0; i != P2Len; ++i) {
1027  // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
1028  // Otherwise, pick Dst[i];
1029  SMask[i] =
1030  (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
1031  }
1032 
1033  Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1034  return vresize(Builder, P2Insert, DstLen, Undef);
1035 }
1036 
1037 auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1038  Value *Amt) const -> Value * {
1039  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1040  assert(isSectorTy(Hi->getType()));
1041  if (isZero(Amt))
1042  return Hi;
1043  int VecLen = getSizeOf(Hi);
1044  if (auto IntAmt = getIntValue(Amt))
1045  return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1046  VecLen);
1047 
1048  if (HST.isTypeForHVX(Hi->getType())) {
1049  int HwLen = HST.getVectorLength();
1050  assert(VecLen == HwLen && "Expecting an exact HVX type");
1051  Intrinsic::ID V6_vlalignb = HwLen == 64
1052  ? Intrinsic::hexagon_V6_vlalignb
1053  : Intrinsic::hexagon_V6_vlalignb_128B;
1054  return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(),
1055  {Hi, Lo, Amt});
1056  }
1057 
1058  if (VecLen == 4) {
1059  Value *Pair = concat(Builder, {Lo, Hi});
1060  Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
1061  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1062  return Builder.CreateBitCast(Trunc, Hi->getType());
1063  }
1064  if (VecLen == 8) {
1065  Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
1066  return vralignb(Builder, Lo, Hi, Sub);
1067  }
1068  llvm_unreachable("Unexpected vector length");
1069 }
1070 
1071 auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1072  Value *Amt) const -> Value * {
1073  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1074  assert(isSectorTy(Lo->getType()));
1075  if (isZero(Amt))
1076  return Lo;
1077  int VecLen = getSizeOf(Lo);
1078  if (auto IntAmt = getIntValue(Amt))
1079  return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1080 
1081  if (HST.isTypeForHVX(Lo->getType())) {
1082  int HwLen = HST.getVectorLength();
1083  assert(VecLen == HwLen && "Expecting an exact HVX type");
1084  Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb
1085  : Intrinsic::hexagon_V6_valignb_128B;
1086  return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(),
1087  {Hi, Lo, Amt});
1088  }
1089 
1090  if (VecLen == 4) {
1091  Value *Pair = concat(Builder, {Lo, Hi});
1092  Value *Shift = Builder.CreateLShr(Pair, Amt);
1093  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1094  return Builder.CreateBitCast(Trunc, Lo->getType());
1095  }
1096  if (VecLen == 8) {
1097  Type *Int64Ty = Type::getInt64Ty(F.getContext());
1098  Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
1099  Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
1100  Function *FI = Intrinsic::getDeclaration(F.getParent(),
1101  Intrinsic::hexagon_S2_valignrb);
1102  Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
1103  return Builder.CreateBitCast(Call, Lo->getType());
1104  }
1105  llvm_unreachable("Unexpected vector length");
1106 }
1107 
1108 // Concatenates a sequence of vectors of the same type.
1110  ArrayRef<Value *> Vecs) const -> Value * {
1111  assert(!Vecs.empty());
1112  SmallVector<int, 256> SMask;
1113  std::vector<Value *> Work[2];
1114  int ThisW = 0, OtherW = 1;
1115 
1116  Work[ThisW].assign(Vecs.begin(), Vecs.end());
1117  while (Work[ThisW].size() > 1) {
1118  auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
1119  int ElemCount = Ty->getElementCount().getFixedValue();
1120  SMask.resize(ElemCount * 2);
1121  std::iota(SMask.begin(), SMask.end(), 0);
1122 
1123  Work[OtherW].clear();
1124  if (Work[ThisW].size() % 2 != 0)
1125  Work[ThisW].push_back(UndefValue::get(Ty));
1126  for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
1127  Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
1128  Work[ThisW][i + 1], SMask);
1129  Work[OtherW].push_back(Joined);
1130  }
1131  std::swap(ThisW, OtherW);
1132  }
1133 
1134  // Since there may have been some undefs appended to make shuffle operands
1135  // have the same type, perform the last shuffle to only pick the original
1136  // elements.
1137  SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
1138  std::iota(SMask.begin(), SMask.end(), 0);
1139  Value *Total = Work[OtherW].front();
1140  return Builder.CreateShuffleVector(Total, SMask);
1141 }
1142 
1143 auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val,
1144  int NewSize, Value *Pad) const -> Value * {
1145  assert(isa<VectorType>(Val->getType()));
1146  auto *ValTy = cast<VectorType>(Val->getType());
1147  assert(ValTy->getElementType() == Pad->getType());
1148 
1149  int CurSize = ValTy->getElementCount().getFixedValue();
1150  if (CurSize == NewSize)
1151  return Val;
1152  // Truncate?
1153  if (CurSize > NewSize)
1154  return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize);
1155  // Extend.
1156  SmallVector<int, 128> SMask(NewSize);
1157  std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
1158  std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
1159  Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
1160  return Builder.CreateShuffleVector(Val, PadVec, SMask);
1161 }
1162 
1163 auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask,
1164  Type *FromTy, Type *ToTy) const -> Value * {
1165  // Mask is a vector <N x i1>, where each element corresponds to an
1166  // element of FromTy. Remap it so that each element will correspond
1167  // to an element of ToTy.
1168  assert(isa<VectorType>(Mask->getType()));
1169 
1170  Type *FromSTy = FromTy->getScalarType();
1171  Type *ToSTy = ToTy->getScalarType();
1172  if (FromSTy == ToSTy)
1173  return Mask;
1174 
1175  int FromSize = getSizeOf(FromSTy);
1176  int ToSize = getSizeOf(ToSTy);
1177  assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
1178 
1179  auto *MaskTy = cast<VectorType>(Mask->getType());
1180  int FromCount = MaskTy->getElementCount().getFixedValue();
1181  int ToCount = (FromCount * FromSize) / ToSize;
1182  assert((FromCount * FromSize) % ToSize == 0);
1183 
1184  auto *FromITy = IntegerType::get(F.getContext(), FromSize * 8);
1185  auto *ToITy = IntegerType::get(F.getContext(), ToSize * 8);
1186 
1187  // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
1188  // -> trunc to <M x i1>.
1189  Value *Ext = Builder.CreateSExt(
1190  Mask, VectorType::get(FromITy, FromCount, /*Scalable*/ false));
1191  Value *Cast = Builder.CreateBitCast(
1192  Ext, VectorType::get(ToITy, ToCount, /*Scalable*/ false));
1193  return Builder.CreateTrunc(
1194  Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false));
1195 }
1196 
1197 // Bitcast to bytes, and return least significant bits.
1198 auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const
1199  -> Value * {
1200  Type *ScalarTy = Val->getType()->getScalarType();
1201  if (ScalarTy == getBoolTy())
1202  return Val;
1203 
1204  Value *Bytes = vbytes(Builder, Val);
1205  if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
1206  return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
1207  // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
1208  // <1 x i1>.
1209  return Builder.CreateTrunc(Bytes, getBoolTy());
1210 }
1211 
1212 // Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
1213 auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const
1214  -> Value * {
1215  Type *ScalarTy = Val->getType()->getScalarType();
1216  if (ScalarTy == getByteTy())
1217  return Val;
1218 
1219  if (ScalarTy != getBoolTy())
1220  return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
1221  // For bool, return a sext from i1 to i8.
1222  if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
1223  return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
1224  return Builder.CreateSExt(Val, getByteTy());
1225 }
1226 
1227 auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder,
1228  Intrinsic::ID IntID, Type *RetTy,
1229  ArrayRef<Value *> Args) const
1230  -> Value * {
1231  int HwLen = HST.getVectorLength();
1232  Type *BoolTy = Type::getInt1Ty(F.getContext());
1233  Type *Int32Ty = Type::getInt32Ty(F.getContext());
1234  // HVX vector -> v16i32/v32i32
1235  // HVX vector predicate -> v512i1/v1024i1
1236  auto getTypeForIntrin = [&](Type *Ty) -> Type * {
1237  if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) {
1238  Type *ElemTy = cast<VectorType>(Ty)->getElementType();
1239  if (ElemTy == Int32Ty)
1240  return Ty;
1241  if (ElemTy == BoolTy)
1242  return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false);
1243  return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false);
1244  }
1245  // Non-HVX type. It should be a scalar.
1246  assert(Ty == Int32Ty || Ty->isIntegerTy(64));
1247  return Ty;
1248  };
1249 
1250  auto getCast = [&](IRBuilder<> &Builder, Value *Val,
1251  Type *DestTy) -> Value * {
1252  Type *SrcTy = Val->getType();
1253  if (SrcTy == DestTy)
1254  return Val;
1255  if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) {
1256  if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) {
1257  // This should take care of casts the other way too, for example
1258  // v1024i1 -> v32i1.
1259  Intrinsic::ID TC = HwLen == 64
1260  ? Intrinsic::hexagon_V6_pred_typecast
1261  : Intrinsic::hexagon_V6_pred_typecast_128B;
1262  Function *FI = Intrinsic::getDeclaration(F.getParent(), TC,
1263  {DestTy, Val->getType()});
1264  return Builder.CreateCall(FI, {Val});
1265  }
1266  // Non-predicate HVX vector.
1267  return Builder.CreateBitCast(Val, DestTy);
1268  }
1269  // Non-HVX type. It should be a scalar, and it should already have
1270  // a valid type.
1271  llvm_unreachable("Unexpected type");
1272  };
1273 
1274  SmallVector<Value *, 4> IntOps;
1275  for (Value *A : Args)
1276  IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType())));
1277  Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID);
1278  Value *Call = Builder.CreateCall(FI, IntOps);
1279 
1280  Type *CallTy = Call->getType();
1281  if (CallTy == RetTy)
1282  return Call;
1283  // Scalar types should have RetTy matching the call return type.
1284  assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true));
1285  if (cast<VectorType>(CallTy)->getElementType() == BoolTy)
1286  return getCast(Builder, Call, RetTy);
1287  return Builder.CreateBitCast(Call, RetTy);
1288 }
1289 
1290 auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
1291  Value *Ptr1) const
1292  -> Optional<int> {
1293  struct Builder : IRBuilder<> {
1294  Builder(BasicBlock *B) : IRBuilder<>(B) {}
1295  ~Builder() {
1296  for (Instruction *I : llvm::reverse(ToErase))
1297  I->eraseFromParent();
1298  }
1300  };
1301 
1302 #define CallBuilder(B, F) \
1303  [&](auto &B_) { \
1304  Value *V = B_.F; \
1305  if (auto *I = dyn_cast<Instruction>(V)) \
1306  B_.ToErase.push_back(I); \
1307  return V; \
1308  }(B)
1309 
1310  auto Simplify = [&](Value *V) {
1311  if (auto *I = dyn_cast<Instruction>(V)) {
1312  SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
1313  if (Value *S = SimplifyInstruction(I, Q))
1314  return S;
1315  }
1316  return V;
1317  };
1318 
1319  auto StripBitCast = [](Value *V) {
1320  while (auto *C = dyn_cast<BitCastInst>(V))
1321  V = C->getOperand(0);
1322  return V;
1323  };
1324 
1325  Ptr0 = StripBitCast(Ptr0);
1326  Ptr1 = StripBitCast(Ptr1);
1327  if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
1328  return None;
1329 
1330  auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
1331  auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
1332  if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
1333  return None;
1334 
1335  Builder B(Gep0->getParent());
1336  int Scale = getAllocSizeOf(Gep0->getSourceElementType());
1337 
1338  // FIXME: for now only check GEPs with a single index.
1339  if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
1340  return None;
1341 
1342  Value *Idx0 = Gep0->getOperand(1);
1343  Value *Idx1 = Gep1->getOperand(1);
1344 
1345  // First, try to simplify the subtraction directly.
1346  if (auto *Diff = dyn_cast<ConstantInt>(
1347  Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
1348  return Diff->getSExtValue() * Scale;
1349 
1350  KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
1351  KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
1352  APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
1353  if (Unknown.isAllOnes())
1354  return None;
1355 
1356  Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
1357  Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
1358  Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
1359  Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
1360  int Diff0 = 0;
1361  if (auto *C = dyn_cast<ConstantInt>(SubU)) {
1362  Diff0 = C->getSExtValue();
1363  } else {
1364  return None;
1365  }
1366 
1367  Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
1368  Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
1369  Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
1370  Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
1371  int Diff1 = 0;
1372  if (auto *C = dyn_cast<ConstantInt>(SubK)) {
1373  Diff1 = C->getSExtValue();
1374  } else {
1375  return None;
1376  }
1377 
1378  return (Diff0 + Diff1) * Scale;
1379 
1380 #undef CallBuilder
1381 }
1382 
1383 template <typename T>
1384 auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
1386  const T &Ignore) const
1387  -> bool {
1388  auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> {
1389  if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
1390  switch (II->getIntrinsicID()) {
1391  case Intrinsic::masked_load:
1392  return MemoryLocation::getForArgument(II, 0, TLI);
1393  case Intrinsic::masked_store:
1394  return MemoryLocation::getForArgument(II, 1, TLI);
1395  }
1396  }
1397  return MemoryLocation::getOrNone(&I);
1398  };
1399 
1400  // The source and the destination must be in the same basic block.
1401  const BasicBlock &Block = *In.getParent();
1402  assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
1403  // No PHIs.
1404  if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
1405  return false;
1406 
1408  return true;
1409  bool MayWrite = In.mayWriteToMemory();
1410  auto MaybeLoc = getLocOrNone(In);
1411 
1412  auto From = In.getIterator();
1413  if (From == To)
1414  return true;
1415  bool MoveUp = (To != Block.end() && To->comesBefore(&In));
1416  auto Range =
1417  MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
1418  for (auto It = Range.first; It != Range.second; ++It) {
1419  const Instruction &I = *It;
1420  if (llvm::is_contained(Ignore, &I))
1421  continue;
1422  // assume intrinsic can be ignored
1423  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1424  if (II->getIntrinsicID() == Intrinsic::assume)
1425  continue;
1426  }
1427  // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
1428  if (I.mayThrow())
1429  return false;
1430  if (auto *CB = dyn_cast<CallBase>(&I)) {
1431  if (!CB->hasFnAttr(Attribute::WillReturn))
1432  return false;
1433  if (!CB->hasFnAttr(Attribute::NoSync))
1434  return false;
1435  }
1436  if (I.mayReadOrWriteMemory()) {
1437  auto MaybeLocI = getLocOrNone(I);
1438  if (MayWrite || I.mayWriteToMemory()) {
1439  if (!MaybeLoc || !MaybeLocI)
1440  return false;
1441  if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
1442  return false;
1443  }
1444  }
1445  }
1446  return true;
1447 }
1448 
1449 #ifndef NDEBUG
1450 auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
1451  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1452  return VecTy->getElementType() == getByteTy();
1453  return false;
1454 }
1455 
1456 auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool {
1457  if (!isByteVecTy(Ty))
1458  return false;
1459  int Size = getSizeOf(Ty);
1460  if (HST.isTypeForHVX(Ty))
1461  return Size == static_cast<int>(HST.getVectorLength());
1462  return Size == 4 || Size == 8;
1463 }
1464 #endif
1465 
1466 auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo,
1467  Value *Hi, int Start,
1468  int Length) const -> Value * {
1469  assert(0 <= Start && Start < Length);
1470  SmallVector<int, 128> SMask(Length);
1471  std::iota(SMask.begin(), SMask.end(), Start);
1472  return Builder.CreateShuffleVector(Lo, Hi, SMask);
1473 }
1474 
1475 // Pass management.
1476 
1477 namespace llvm {
1480 } // namespace llvm
1481 
1482 namespace {
1483 class HexagonVectorCombineLegacy : public FunctionPass {
1484 public:
1485  static char ID;
1486 
1487  HexagonVectorCombineLegacy() : FunctionPass(ID) {}
1488 
1489  StringRef getPassName() const override { return "Hexagon Vector Combine"; }
1490 
1491  void getAnalysisUsage(AnalysisUsage &AU) const override {
1492  AU.setPreservesCFG();
1499  }
1500 
1501  bool runOnFunction(Function &F) override {
1502  if (skipFunction(F))
1503  return false;
1504  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1505  AssumptionCache &AC =
1506  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1507  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1508  TargetLibraryInfo &TLI =
1509  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1510  auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
1511  HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
1512  return HVC.run();
1513  }
1514 };
1515 } // namespace
1516 
1518 
1519 INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
1520  "Hexagon Vector Combine", false, false)
1526 INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
1527  "Hexagon Vector Combine", false, false)
1528 
1530  return new HexagonVectorCombineLegacy();
1531 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
AssumptionCache.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE, "Hexagon Vector Combine", false, false) INITIALIZE_PASS_END(HexagonVectorCombineLegacy
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1619
Optional.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::mayHaveNonDefUseDependency
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
Definition: ValueTracking.cpp:4661
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
Metadata.h
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
T
llvm::Function
Definition: Function.h:60
Pass.h
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:542
llvm::SmallVector< Value *, 8 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::createHexagonVectorCombineLegacyPass
FunctionPass * createHexagonVectorCombineLegacyPass()
Definition: HexagonVectorCombine.cpp:1529
HexagonSubtarget.h
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HexagonTargetMachine
Definition: HexagonTargetMachine.h:25
llvm::initializeHexagonVectorCombineLegacyPass
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1795
ValueTracking.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
APInt.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::Optional
Definition: APInt.h:33
STLExtras.h
llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:729
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:841
HexagonTargetMachine.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:182
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:382
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
KnownBits.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:909
AliasAnalysis.h
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:82
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1366
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1605
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:518
llvm::AAResults
Definition: AliasAnalysis.h:511
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::DomTreeNodeBase::children
iterator_range< iterator > children()
Definition: GenericDomTree.h:83
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:42
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6465
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::ConstantVector::getSplat
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1432
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1769
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::None
const NoneType None
Definition: None.h:24
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1527
VectorUtils.h
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:406
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:76
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
llvm::pdb::Unknown
@ Unknown
Definition: PDBTypes.h:396
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1670
ArrayRef.h
llvm::concat
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
Definition: STLExtras.h:1075
TargetPassConfig.h
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:222
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1663
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:158
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1586
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:604
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1811
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HexagonVectorCombine.cpp:50
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::DomTreeNodeBase< BasicBlock >
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
CallBuilder
#define CallBuilder(B, F)
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:867
std
Definition: BitVector.h:851
llvm::KnownBits
Definition: KnownBits.h:23
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1651
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:688
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:338
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:197
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:933
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:774
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:429
AA
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: TargetMachine.h:133
SmallVector.h
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1347
InstructionSimplify.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
shift
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
Definition: README.txt:30
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:97
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
getLocation
static MemoryLocation getLocation(Instruction *I)
Definition: SLPVectorizer.cpp:655
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
raw_ostream.h
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37