LLVM 22.0.0git
HexagonVectorCombine.cpp
Go to the documentation of this file.
1//===-- HexagonVectorCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// HexagonVectorCombine is a utility class implementing a variety of functions
9// that assist in vector-based optimizations.
10//
11// AlignVectors: replace unaligned vector loads and stores with aligned ones.
12// HvxIdioms: recognize various opportunities to generate HVX intrinsic code.
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsHexagon.h"
35#include "llvm/IR/Metadata.h"
38#include "llvm/Pass.h"
45
46#include "Hexagon.h"
47#include "HexagonSubtarget.h"
49
50#include <algorithm>
51#include <deque>
52#include <map>
53#include <optional>
54#include <set>
55#include <utility>
56#include <vector>
57
58#define DEBUG_TYPE "hexagon-vc"
59
60using namespace llvm;
61
62namespace {
63cl::opt<bool> DumpModule("hvc-dump-module", cl::Hidden);
64cl::opt<bool> VAEnabled("hvc-va", cl::Hidden, cl::init(true)); // Align
65cl::opt<bool> VIEnabled("hvc-vi", cl::Hidden, cl::init(true)); // Idioms
66cl::opt<bool> VADoFullStores("hvc-va-full-stores", cl::Hidden);
67
68cl::opt<unsigned> VAGroupCountLimit("hvc-va-group-count-limit", cl::Hidden,
69 cl::init(~0));
70cl::opt<unsigned> VAGroupSizeLimit("hvc-va-group-size-limit", cl::Hidden,
71 cl::init(~0));
72
73class HexagonVectorCombine {
74public:
75 HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
77 TargetLibraryInfo &TLI_, const TargetMachine &TM_)
78 : F(F_), DL(F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
79 SE(SE_), TLI(TLI_),
80 HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
81
82 bool run();
83
84 // Common integer type.
85 IntegerType *getIntTy(unsigned Width = 32) const;
86 // Byte type: either scalar (when Length = 0), or vector with given
87 // element count.
88 Type *getByteTy(int ElemCount = 0) const;
89 // Boolean type: either scalar (when Length = 0), or vector with given
90 // element count.
91 Type *getBoolTy(int ElemCount = 0) const;
92 // Create a ConstantInt of type returned by getIntTy with the value Val.
93 ConstantInt *getConstInt(int Val, unsigned Width = 32) const;
94 // Get the integer value of V, if it exists.
95 std::optional<APInt> getIntValue(const Value *Val) const;
96 // Is Val a constant 0, or a vector of 0s?
97 bool isZero(const Value *Val) const;
98 // Is Val an undef value?
99 bool isUndef(const Value *Val) const;
100 // Is Val a scalar (i1 true) or a vector of (i1 true)?
101 bool isTrue(const Value *Val) const;
102 // Is Val a scalar (i1 false) or a vector of (i1 false)?
103 bool isFalse(const Value *Val) const;
104
105 // Get HVX vector type with the given element type.
106 VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
107
108 enum SizeKind {
109 Store, // Store size
110 Alloc, // Alloc size
111 };
112 int getSizeOf(const Value *Val, SizeKind Kind = Store) const;
113 int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;
114 int getTypeAlignment(Type *Ty) const;
115 size_t length(Value *Val) const;
116 size_t length(Type *Ty) const;
117
118 Constant *getNullValue(Type *Ty) const;
119 Constant *getFullValue(Type *Ty) const;
120 Constant *getConstSplat(Type *Ty, int Val) const;
121
122 Value *simplify(Value *Val) const;
123
124 Value *insertb(IRBuilderBase &Builder, Value *Dest, Value *Src, int Start,
125 int Length, int Where) const;
126 Value *vlalignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
127 Value *Amt) const;
128 Value *vralignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
129 Value *Amt) const;
130 Value *concat(IRBuilderBase &Builder, ArrayRef<Value *> Vecs) const;
131 Value *vresize(IRBuilderBase &Builder, Value *Val, int NewSize,
132 Value *Pad) const;
133 Value *rescale(IRBuilderBase &Builder, Value *Mask, Type *FromTy,
134 Type *ToTy) const;
135 Value *vlsb(IRBuilderBase &Builder, Value *Val) const;
136 Value *vbytes(IRBuilderBase &Builder, Value *Val) const;
137 Value *subvector(IRBuilderBase &Builder, Value *Val, unsigned Start,
138 unsigned Length) const;
139 Value *sublo(IRBuilderBase &Builder, Value *Val) const;
140 Value *subhi(IRBuilderBase &Builder, Value *Val) const;
141 Value *vdeal(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
142 Value *vshuff(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
143
144 Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID,
145 Type *RetTy, ArrayRef<Value *> Args,
146 ArrayRef<Type *> ArgTys = {},
147 ArrayRef<Value *> MDSources = {}) const;
148 SmallVector<Value *> splitVectorElements(IRBuilderBase &Builder, Value *Vec,
149 unsigned ToWidth) const;
150 Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef<Value *> Values,
151 VectorType *ToType) const;
152
153 std::optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
154
155 unsigned getNumSignificantBits(const Value *V,
156 const Instruction *CtxI = nullptr) const;
157 KnownBits getKnownBits(const Value *V,
158 const Instruction *CtxI = nullptr) const;
159
160 bool isSafeToClone(const Instruction &In) const;
161
162 template <typename T = std::vector<Instruction *>>
163 bool isSafeToMoveBeforeInBB(const Instruction &In,
165 const T &IgnoreInsts = {}) const;
166
167 // This function is only used for assertions at the moment.
168 [[maybe_unused]] bool isByteVecTy(Type *Ty) const;
169
170 Function &F;
171 const DataLayout &DL;
173 AssumptionCache &AC;
174 DominatorTree &DT;
175 ScalarEvolution &SE;
177 const HexagonSubtarget &HST;
178
179private:
180 Value *getElementRange(IRBuilderBase &Builder, Value *Lo, Value *Hi,
181 int Start, int Length) const;
182};
183
184class AlignVectors {
185 // This code tries to replace unaligned vector loads/stores with aligned
186 // ones.
187 // Consider unaligned load:
188 // %v = original_load %some_addr, align <bad>
189 // %user = %v
190 // It will generate
191 // = load ..., align <good>
192 // = load ..., align <good>
193 // = valign
194 // etc.
195 // %synthesize = combine/shuffle the loaded data so that it looks
196 // exactly like what "original_load" has loaded.
197 // %user = %synthesize
198 // Similarly for stores.
199public:
200 AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
201
202 bool run();
203
204private:
205 using InstList = std::vector<Instruction *>;
207
208 struct AddrInfo {
209 AddrInfo(const AddrInfo &) = default;
210 AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
211 Align H)
212 : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
213 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
214 AddrInfo &operator=(const AddrInfo &) = default;
215
216 // XXX: add Size member?
217 Instruction *Inst;
218 Value *Addr;
219 Type *ValTy;
220 Align HaveAlign;
221 Align NeedAlign;
222 int Offset = 0; // Offset (in bytes) from the first member of the
223 // containing AddrList.
224 };
225 using AddrList = std::vector<AddrInfo>;
226
227 struct InstrLess {
228 bool operator()(const Instruction *A, const Instruction *B) const {
229 return A->comesBefore(B);
230 }
231 };
232 using DepList = std::set<Instruction *, InstrLess>;
233
234 struct MoveGroup {
235 MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
236 : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
237 MoveGroup() = default;
238 Instruction *Base; // Base instruction of the parent address group.
239 InstList Main; // Main group of instructions.
240 InstList Deps; // List of dependencies.
241 InstMap Clones; // Map from original Deps to cloned ones.
242 bool IsHvx; // Is this group of HVX instructions?
243 bool IsLoad; // Is this a load group?
244 };
245 using MoveList = std::vector<MoveGroup>;
246
247 struct ByteSpan {
248 // A representation of "interesting" bytes within a given span of memory.
249 // These bytes are those that are loaded or stored, and they don't have
250 // to cover the entire span of memory.
251 //
252 // The representation works by picking a contiguous sequence of bytes
253 // from somewhere within a llvm::Value, and placing it at a given offset
254 // within the span.
255 //
256 // The sequence of bytes from llvm:Value is represented by Segment.
257 // Block is Segment, plus where it goes in the span.
258 //
259 // An important feature of ByteSpan is being able to make a "section",
260 // i.e. creating another ByteSpan corresponding to a range of offsets
261 // relative to the source span.
262
263 struct Segment {
264 // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
265 Segment(Value *Val, int Begin, int Len)
266 : Val(Val), Start(Begin), Size(Len) {}
267 Segment(const Segment &Seg) = default;
268 Segment &operator=(const Segment &Seg) = default;
269 Value *Val; // Value representable as a sequence of bytes.
270 int Start; // First byte of the value that belongs to the segment.
271 int Size; // Number of bytes in the segment.
272 };
273
274 struct Block {
275 Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
276 Block(Value *Val, int Off, int Len, int Pos)
277 : Seg(Val, Off, Len), Pos(Pos) {}
278 Block(const Block &Blk) = default;
279 Block &operator=(const Block &Blk) = default;
280 Segment Seg; // Value segment.
281 int Pos; // Position (offset) of the block in the span.
282 };
283
284 int extent() const;
285 ByteSpan section(int Start, int Length) const;
286 ByteSpan &shift(int Offset);
287 SmallVector<Value *, 8> values() const;
288
289 int size() const { return Blocks.size(); }
290 Block &operator[](int i) { return Blocks[i]; }
291 const Block &operator[](int i) const { return Blocks[i]; }
292
293 std::vector<Block> Blocks;
294
295 using iterator = decltype(Blocks)::iterator;
296 iterator begin() { return Blocks.begin(); }
297 iterator end() { return Blocks.end(); }
298 using const_iterator = decltype(Blocks)::const_iterator;
299 const_iterator begin() const { return Blocks.begin(); }
300 const_iterator end() const { return Blocks.end(); }
301 };
302
303 std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
304 bool isHvx(const AddrInfo &AI) const;
305 // This function is only used for assertions at the moment.
306 [[maybe_unused]] bool isSectorTy(Type *Ty) const;
307
308 Value *getPayload(Value *Val) const;
309 Value *getMask(Value *Val) const;
310 Value *getPassThrough(Value *Val) const;
311
312 Value *createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
313 int Adjust,
314 const InstMap &CloneMap = InstMap()) const;
315 Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
316 int Alignment,
317 const InstMap &CloneMap = InstMap()) const;
318
319 Value *createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
320 Value *Predicate, int Alignment, Value *Mask,
321 Value *PassThru, ArrayRef<Value *> MDSources = {}) const;
322 Value *createSimpleLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
323 int Alignment,
324 ArrayRef<Value *> MDSources = {}) const;
325
326 Value *createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
327 Value *Predicate, int Alignment, Value *Mask,
328 ArrayRef<Value *> MDSources = {}) const;
329 Value *createSimpleStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
330 int Alignment,
331 ArrayRef<Value *> MDSources = {}) const;
332
333 Value *createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
334 Value *Predicate, int Alignment,
335 ArrayRef<Value *> MDSources = {}) const;
336 Value *createPredicatedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
337 Value *Predicate, int Alignment,
338 ArrayRef<Value *> MDSources = {}) const;
339
340 DepList getUpwardDeps(Instruction *In, Instruction *Base) const;
341 bool createAddressGroups();
342 MoveList createLoadGroups(const AddrList &Group) const;
343 MoveList createStoreGroups(const AddrList &Group) const;
344 bool moveTogether(MoveGroup &Move) const;
345 template <typename T>
346 InstMap cloneBefore(BasicBlock::iterator To, T &&Insts) const;
347
348 void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
349 int ScLen, Value *AlignVal, Value *AlignAddr) const;
350 void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
351 int ScLen, Value *AlignVal, Value *AlignAddr) const;
352 bool realignGroup(const MoveGroup &Move) const;
353
354 Value *makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
355 int Alignment) const;
356
357 friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
358 friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
359 friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan::Block &B);
360 friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
361
362 std::map<Instruction *, AddrList> AddrGroups;
363 const HexagonVectorCombine &HVC;
364};
365
366[[maybe_unused]]
367raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
368 OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
369 OS << "Addr: " << *AI.Addr << '\n';
370 OS << "Type: " << *AI.ValTy << '\n';
371 OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
372 OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
373 OS << "Offset: " << AI.Offset;
374 return OS;
375}
376
377[[maybe_unused]]
378raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
379 OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
380 OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
381 OS << "Main\n";
382 for (Instruction *I : MG.Main)
383 OS << " " << *I << '\n';
384 OS << "Deps\n";
385 for (Instruction *I : MG.Deps)
386 OS << " " << *I << '\n';
387 OS << "Clones\n";
388 for (auto [K, V] : MG.Clones) {
389 OS << " ";
390 K->printAsOperand(OS, false);
391 OS << "\t-> " << *V << '\n';
392 }
393 return OS;
394}
395
396[[maybe_unused]]
398 const AlignVectors::ByteSpan::Block &B) {
399 OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
400 if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {
401 OS << "(self:" << B.Seg.Val << ')';
402 } else if (B.Seg.Val != nullptr) {
403 OS << *B.Seg.Val;
404 } else {
405 OS << "(null)";
406 }
407 return OS;
408}
409
410[[maybe_unused]]
411raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
412 OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
413 for (const AlignVectors::ByteSpan::Block &B : BS)
414 OS << B << '\n';
415 OS << ']';
416 return OS;
417}
418
419class HvxIdioms {
420public:
421 HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
422 auto *Int32Ty = HVC.getIntTy(32);
423 HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
424 HvxP32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/true);
425 }
426
427 bool run();
428
429private:
430 enum Signedness { Positive, Signed, Unsigned };
431
432 // Value + sign
433 // This is to keep track of whether the value should be treated as signed
434 // or unsigned, or is known to be positive.
435 struct SValue {
436 Value *Val;
437 Signedness Sgn;
438 };
439
440 struct FxpOp {
441 unsigned Opcode;
442 unsigned Frac; // Number of fraction bits
443 SValue X, Y;
444 // If present, add 1 << RoundAt before shift:
445 std::optional<unsigned> RoundAt;
446 VectorType *ResTy;
447 };
448
449 auto getNumSignificantBits(Value *V, Instruction *In) const
450 -> std::pair<unsigned, Signedness>;
451 auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;
452
453 auto matchFxpMul(Instruction &In) const -> std::optional<FxpOp>;
454 auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;
455
456 auto processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
457 const FxpOp &Op) const -> Value *;
458 auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
459 bool Rounding) const -> Value *;
460 auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
461 bool Rounding) const -> Value *;
462 // Return {Result, Carry}, where Carry is a vector predicate.
463 auto createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
464 Value *CarryIn = nullptr) const
465 -> std::pair<Value *, Value *>;
466 auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;
467 auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
468 -> Value *;
469 auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
470 -> std::pair<Value *, Value *>;
471 auto createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
473 auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
474 Signedness SgnX, ArrayRef<Value *> WordY,
475 Signedness SgnY) const -> SmallVector<Value *>;
476
477 VectorType *HvxI32Ty;
478 VectorType *HvxP32Ty;
479 const HexagonVectorCombine &HVC;
480
481 friend raw_ostream &operator<<(raw_ostream &, const FxpOp &);
482};
483
484[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
485 const HvxIdioms::FxpOp &Op) {
486 static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};
487 OS << Instruction::getOpcodeName(Op.Opcode) << '.' << Op.Frac;
488 if (Op.RoundAt.has_value()) {
489 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {
490 OS << ":rnd";
491 } else {
492 OS << " + 1<<" << *Op.RoundAt;
493 }
494 }
495 OS << "\n X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"
496 << " Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;
497 return OS;
498}
499
500} // namespace
501
502namespace {
503
504template <typename T> T *getIfUnordered(T *MaybeT) {
505 return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
506}
507template <typename T> T *isCandidate(Instruction *In) {
508 return dyn_cast<T>(In);
509}
511 return getIfUnordered(dyn_cast<LoadInst>(In));
512}
514 return getIfUnordered(dyn_cast<StoreInst>(In));
515}
516
517#if !defined(_MSC_VER) || _MSC_VER >= 1926
518// VS2017 and some versions of VS2019 have trouble compiling this:
519// error C2976: 'std::map': too few template arguments
520// VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
521template <typename Pred, typename... Ts>
522void erase_if(std::map<Ts...> &map, Pred p)
523#else
524template <typename Pred, typename T, typename U>
525void erase_if(std::map<T, U> &map, Pred p)
526#endif
527{
528 for (auto i = map.begin(), e = map.end(); i != e;) {
529 if (p(*i))
530 i = map.erase(i);
531 else
532 i = std::next(i);
533 }
534}
535
536// Forward other erase_ifs to the LLVM implementations.
537template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
538 llvm::erase_if(std::forward<T>(container), p);
539}
540
541} // namespace
542
543// --- Begin AlignVectors
544
545// For brevity, only consider loads. We identify a group of loads where we
546// know the relative differences between their addresses, so we know how they
547// are laid out in memory (relative to one another). These loads can overlap,
548// can be shorter or longer than the desired vector length.
549// Ultimately we want to generate a sequence of aligned loads that will load
550// every byte that the original loads loaded, and have the program use these
551// loaded values instead of the original loads.
552// We consider the contiguous memory area spanned by all these loads.
553//
554// Let's say that a single aligned vector load can load 16 bytes at a time.
555// If the program wanted to use a byte at offset 13 from the beginning of the
556// original span, it will be a byte at offset 13+x in the aligned data for
557// some x>=0. This may happen to be in the first aligned load, or in the load
558// following it. Since we generally don't know what the that alignment value
559// is at compile time, we proactively do valigns on the aligned loads, so that
560// byte that was at offset 13 is still at offset 13 after the valigns.
561//
562// This will be the starting point for making the rest of the program use the
563// data loaded by the new loads.
564// For each original load, and its users:
565// %v = load ...
566// ... = %v
567// ... = %v
568// we create
569// %new_v = extract/combine/shuffle data from loaded/valigned vectors so
570// it contains the same value as %v did before
571// then replace all users of %v with %new_v.
572// ... = %new_v
573// ... = %new_v
574
575auto AlignVectors::ByteSpan::extent() const -> int {
576 if (size() == 0)
577 return 0;
578 int Min = Blocks[0].Pos;
579 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
580 for (int i = 1, e = size(); i != e; ++i) {
581 Min = std::min(Min, Blocks[i].Pos);
582 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
583 }
584 return Max - Min;
585}
586
587auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
588 ByteSpan Section;
589 for (const ByteSpan::Block &B : Blocks) {
590 int L = std::max(B.Pos, Start); // Left end.
591 int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
592 if (L < R) {
593 // How much to chop off the beginning of the segment:
594 int Off = L > B.Pos ? L - B.Pos : 0;
595 Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
596 }
597 }
598 return Section;
599}
600
601auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
602 for (Block &B : Blocks)
603 B.Pos += Offset;
604 return *this;
605}
606
607auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> {
608 SmallVector<Value *, 8> Values(Blocks.size());
609 for (int i = 0, e = Blocks.size(); i != e; ++i)
610 Values[i] = Blocks[i].Seg.Val;
611 return Values;
612}
613
614auto AlignVectors::getAddrInfo(Instruction &In) const
615 -> std::optional<AddrInfo> {
616 if (auto *L = isCandidate<LoadInst>(&In))
617 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
618 L->getAlign());
619 if (auto *S = isCandidate<StoreInst>(&In))
620 return AddrInfo(HVC, S, S->getPointerOperand(),
621 S->getValueOperand()->getType(), S->getAlign());
622 if (auto *II = isCandidate<IntrinsicInst>(&In)) {
623 Intrinsic::ID ID = II->getIntrinsicID();
624 switch (ID) {
625 case Intrinsic::masked_load:
626 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
627 II->getParamAlign(0).valueOrOne());
628 case Intrinsic::masked_store:
629 return AddrInfo(HVC, II, II->getArgOperand(1),
630 II->getArgOperand(0)->getType(),
631 II->getParamAlign(1).valueOrOne());
632 }
633 }
634 return std::nullopt;
635}
636
637auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
638 return HVC.HST.isTypeForHVX(AI.ValTy);
639}
640
641auto AlignVectors::getPayload(Value *Val) const -> Value * {
642 if (auto *In = dyn_cast<Instruction>(Val)) {
643 Intrinsic::ID ID = 0;
644 if (auto *II = dyn_cast<IntrinsicInst>(In))
645 ID = II->getIntrinsicID();
646 if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
647 return In->getOperand(0);
648 }
649 return Val;
650}
651
652auto AlignVectors::getMask(Value *Val) const -> Value * {
653 if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
654 switch (II->getIntrinsicID()) {
655 case Intrinsic::masked_load:
656 return II->getArgOperand(1);
657 case Intrinsic::masked_store:
658 return II->getArgOperand(2);
659 }
660 }
661
662 Type *ValTy = getPayload(Val)->getType();
663 if (auto *VecTy = dyn_cast<VectorType>(ValTy))
664 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
665 return HVC.getFullValue(HVC.getBoolTy());
666}
667
668auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
669 if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
670 if (II->getIntrinsicID() == Intrinsic::masked_load)
671 return II->getArgOperand(2);
672 }
673 return UndefValue::get(getPayload(Val)->getType());
674}
675
676auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
677 Type *ValTy, int Adjust,
678 const InstMap &CloneMap) const
679 -> Value * {
680 if (auto *I = dyn_cast<Instruction>(Ptr))
681 if (Instruction *New = CloneMap.lookup(I))
682 Ptr = New;
683 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust), "gep");
684}
685
686auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
687 Type *ValTy, int Alignment,
688 const InstMap &CloneMap) const
689 -> Value * {
690 auto remap = [&](Value *V) -> Value * {
691 if (auto *I = dyn_cast<Instruction>(V)) {
692 for (auto [Old, New] : CloneMap)
693 I->replaceUsesOfWith(Old, New);
694 return I;
695 }
696 return V;
697 };
698 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");
699 Value *Mask = HVC.getConstInt(-Alignment);
700 Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");
701 return Builder.CreateIntToPtr(
702 And, PointerType::getUnqual(ValTy->getContext()), "itp");
703}
704
705auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
706 Value *Predicate, int Alignment, Value *Mask,
707 Value *PassThru,
708 ArrayRef<Value *> MDSources) const -> Value * {
709 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
710 // Predicate is nullptr if not creating predicated load
711 if (Predicate) {
712 assert(!Predicate->getType()->isVectorTy() &&
713 "Expectning scalar predicate");
714 if (HVC.isFalse(Predicate))
715 return UndefValue::get(ValTy);
716 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
717 Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
718 Alignment, MDSources);
719 return Builder.CreateSelect(Mask, Load, PassThru);
720 }
721 // Predicate == true here.
722 }
723 assert(!HVC.isUndef(Mask)); // Should this be allowed?
724 if (HVC.isZero(Mask))
725 return PassThru;
726 if (HVC.isTrue(Mask))
727 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
728
729 Instruction *Load = Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment),
730 Mask, PassThru, "mld");
731 propagateMetadata(Load, MDSources);
732 return Load;
733}
734
735auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,
736 Value *Ptr, int Alignment,
737 ArrayRef<Value *> MDSources) const
738 -> Value * {
740 Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
741 propagateMetadata(Load, MDSources);
742 return Load;
743}
744
745auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,
746 Value *Ptr, Value *Predicate,
747 int Alignment,
748 ArrayRef<Value *> MDSources) const
749 -> Value * {
750 assert(HVC.HST.isTypeForHVX(ValTy) &&
751 "Predicates 'scalar' vector loads not yet supported");
752 assert(Predicate);
753 assert(!Predicate->getType()->isVectorTy() && "Expectning scalar predicate");
754 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
755 if (HVC.isFalse(Predicate))
756 return UndefValue::get(ValTy);
757 if (HVC.isTrue(Predicate))
758 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
759
760 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
761 // FIXME: This may not put the offset from Ptr into the vmem offset.
762 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
763 {Predicate, Ptr, HVC.getConstInt(0)}, {},
764 MDSources);
765}
766
767auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
768 Value *Predicate, int Alignment, Value *Mask,
769 ArrayRef<Value *> MDSources) const -> Value * {
770 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
771 return UndefValue::get(Val->getType());
772 assert(!Predicate || (!Predicate->getType()->isVectorTy() &&
773 "Expectning scalar predicate"));
774 if (Predicate) {
775 if (HVC.isFalse(Predicate))
776 return UndefValue::get(Val->getType());
777 if (HVC.isTrue(Predicate))
778 Predicate = nullptr;
779 }
780 // Here both Predicate and Mask are true or unknown.
781
782 if (HVC.isTrue(Mask)) {
783 if (Predicate) { // Predicate unknown
784 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
785 MDSources);
786 }
787 // Predicate is true:
788 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
789 }
790
791 // Mask is unknown
792 if (!Predicate) {
794 Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
795 propagateMetadata(Store, MDSources);
796 return Store;
797 }
798
799 // Both Predicate and Mask are unknown.
800 // Emulate masked store with predicated-load + mux + predicated-store.
801 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
802 Predicate, Alignment, MDSources);
803 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
804 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
805 MDSources);
806}
807
808auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,
809 Value *Ptr, int Alignment,
810 ArrayRef<Value *> MDSources) const
811 -> Value * {
812 Instruction *Store = Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
813 propagateMetadata(Store, MDSources);
814 return Store;
815}
816
817auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,
818 Value *Ptr, Value *Predicate,
819 int Alignment,
820 ArrayRef<Value *> MDSources) const
821 -> Value * {
822 assert(HVC.HST.isTypeForHVX(Val->getType()) &&
823 "Predicates 'scalar' vector stores not yet supported");
824 assert(Predicate);
825 if (HVC.isFalse(Predicate))
826 return UndefValue::get(Val->getType());
827 if (HVC.isTrue(Predicate))
828 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
829
830 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
831 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
832 // FIXME: This may not put the offset from Ptr into the vmem offset.
833 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,
834 {Predicate, Ptr, HVC.getConstInt(0), Val}, {},
835 MDSources);
836}
837
838auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
839 -> DepList {
840 BasicBlock *Parent = Base->getParent();
841 assert(In->getParent() == Parent &&
842 "Base and In should be in the same block");
843 assert(Base->comesBefore(In) && "Base should come before In");
844
845 DepList Deps;
846 std::deque<Instruction *> WorkQ = {In};
847 while (!WorkQ.empty()) {
848 Instruction *D = WorkQ.front();
849 WorkQ.pop_front();
850 if (D != In)
851 Deps.insert(D);
852 for (Value *Op : D->operands()) {
853 if (auto *I = dyn_cast<Instruction>(Op)) {
854 if (I->getParent() == Parent && Base->comesBefore(I))
855 WorkQ.push_back(I);
856 }
857 }
858 }
859 return Deps;
860}
861
862auto AlignVectors::createAddressGroups() -> bool {
863 // An address group created here may contain instructions spanning
864 // multiple basic blocks.
865 AddrList WorkStack;
866
867 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
868 for (AddrInfo &W : WorkStack) {
869 if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
870 return std::make_pair(W.Inst, *D);
871 }
872 return std::make_pair(nullptr, 0);
873 };
874
875 auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
876 BasicBlock &Block = *DomN->getBlock();
877 for (Instruction &I : Block) {
878 auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
879 if (!AI)
880 continue;
881 auto F = findBaseAndOffset(*AI);
882 Instruction *GroupInst;
883 if (Instruction *BI = F.first) {
884 AI->Offset = F.second;
885 GroupInst = BI;
886 } else {
887 WorkStack.push_back(*AI);
888 GroupInst = AI->Inst;
889 }
890 AddrGroups[GroupInst].push_back(*AI);
891 }
892
893 for (DomTreeNode *C : DomN->children())
894 Visit(C, Visit);
895
896 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
897 WorkStack.pop_back();
898 };
899
900 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
901 assert(WorkStack.empty());
902
903 // AddrGroups are formed.
904
905 // Remove groups of size 1.
906 erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
907 // Remove groups that don't use HVX types.
908 erase_if(AddrGroups, [&](auto &G) {
909 return llvm::none_of(
910 G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
911 });
912
913 return !AddrGroups.empty();
914}
915
916auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
917 // Form load groups.
918 // To avoid complications with moving code across basic blocks, only form
919 // groups that are contained within a single basic block.
920 unsigned SizeLimit = VAGroupSizeLimit;
921 if (SizeLimit == 0)
922 return {};
923
924 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
925 assert(!Move.Main.empty() && "Move group should have non-empty Main");
926 if (Move.Main.size() >= SizeLimit)
927 return false;
928 // Don't mix HVX and non-HVX instructions.
929 if (Move.IsHvx != isHvx(Info))
930 return false;
931 // Leading instruction in the load group.
932 Instruction *Base = Move.Main.front();
933 if (Base->getParent() != Info.Inst->getParent())
934 return false;
935 // Check if it's safe to move the load.
936 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))
937 return false;
938 // And if it's safe to clone the dependencies.
939 auto isSafeToCopyAtBase = [&](const Instruction *I) {
940 return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&
941 HVC.isSafeToClone(*I);
942 };
943 DepList Deps = getUpwardDeps(Info.Inst, Base);
944 if (!llvm::all_of(Deps, isSafeToCopyAtBase))
945 return false;
946
947 Move.Main.push_back(Info.Inst);
948 llvm::append_range(Move.Deps, Deps);
949 return true;
950 };
951
952 MoveList LoadGroups;
953
954 for (const AddrInfo &Info : Group) {
955 if (!Info.Inst->mayReadFromMemory())
956 continue;
957 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
958 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
959 }
960
961 // Erase singleton groups.
962 erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
963
964 // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
965 if (!HVC.HST.useHVXV62Ops())
966 erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });
967
968 return LoadGroups;
969}
970
971auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
972 // Form store groups.
973 // To avoid complications with moving code across basic blocks, only form
974 // groups that are contained within a single basic block.
975 unsigned SizeLimit = VAGroupSizeLimit;
976 if (SizeLimit == 0)
977 return {};
978
979 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
980 assert(!Move.Main.empty() && "Move group should have non-empty Main");
981 if (Move.Main.size() >= SizeLimit)
982 return false;
983 // For stores with return values we'd have to collect downward dependencies.
984 // There are no such stores that we handle at the moment, so omit that.
985 assert(Info.Inst->getType()->isVoidTy() &&
986 "Not handling stores with return values");
987 // Don't mix HVX and non-HVX instructions.
988 if (Move.IsHvx != isHvx(Info))
989 return false;
990 // For stores we need to be careful whether it's safe to move them.
991 // Stores that are otherwise safe to move together may not appear safe
992 // to move over one another (i.e. isSafeToMoveBefore may return false).
993 Instruction *Base = Move.Main.front();
994 if (Base->getParent() != Info.Inst->getParent())
995 return false;
996 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
997 return false;
998 Move.Main.push_back(Info.Inst);
999 return true;
1000 };
1001
1002 MoveList StoreGroups;
1003
1004 for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
1005 const AddrInfo &Info = *I;
1006 if (!Info.Inst->mayWriteToMemory())
1007 continue;
1008 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1009 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
1010 }
1011
1012 // Erase singleton groups.
1013 erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
1014
1015 // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
1016 if (!HVC.HST.useHVXV62Ops())
1017 erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });
1018
1019 // Erase groups where every store is a full HVX vector. The reason is that
1020 // aligning predicated stores generates complex code that may be less
1021 // efficient than a sequence of unaligned vector stores.
1022 if (!VADoFullStores) {
1023 erase_if(StoreGroups, [this](const MoveGroup &G) {
1024 return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {
1025 auto MaybeInfo = this->getAddrInfo(*S);
1026 assert(MaybeInfo.has_value());
1027 return HVC.HST.isHVXVectorType(
1028 EVT::getEVT(MaybeInfo->ValTy, false));
1029 });
1030 });
1031 }
1032
1033 return StoreGroups;
1034}
1035
1036auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {
1037 // Move all instructions to be adjacent.
1038 assert(!Move.Main.empty() && "Move group should have non-empty Main");
1039 Instruction *Where = Move.Main.front();
1040
1041 if (Move.IsLoad) {
1042 // Move all the loads (and dependencies) to where the first load is.
1043 // Clone all deps to before Where, keeping order.
1044 Move.Clones = cloneBefore(Where->getIterator(), Move.Deps);
1045 // Move all main instructions to after Where, keeping order.
1046 ArrayRef<Instruction *> Main(Move.Main);
1047 for (Instruction *M : Main) {
1048 if (M != Where)
1049 M->moveAfter(Where);
1050 for (auto [Old, New] : Move.Clones)
1051 M->replaceUsesOfWith(Old, New);
1052 Where = M;
1053 }
1054 // Replace Deps with the clones.
1055 for (int i = 0, e = Move.Deps.size(); i != e; ++i)
1056 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1057 } else {
1058 // Move all the stores to where the last store is.
1059 // NOTE: Deps are empty for "store" groups. If they need to be
1060 // non-empty, decide on the order.
1061 assert(Move.Deps.empty());
1062 // Move all main instructions to before Where, inverting order.
1063 ArrayRef<Instruction *> Main(Move.Main);
1064 for (Instruction *M : Main.drop_front(1)) {
1065 M->moveBefore(Where->getIterator());
1066 Where = M;
1067 }
1068 }
1069
1070 return Move.Main.size() + Move.Deps.size() > 1;
1071}
1072
1073template <typename T>
1074auto AlignVectors::cloneBefore(BasicBlock::iterator To, T &&Insts) const
1075 -> InstMap {
1076 InstMap Map;
1077
1078 for (Instruction *I : Insts) {
1079 assert(HVC.isSafeToClone(*I));
1080 Instruction *C = I->clone();
1081 C->setName(Twine("c.") + I->getName() + ".");
1082 C->insertBefore(To);
1083
1084 for (auto [Old, New] : Map)
1085 C->replaceUsesOfWith(Old, New);
1086 Map.insert(std::make_pair(I, C));
1087 }
1088 return Map;
1089}
1090
1091auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1092 const ByteSpan &VSpan, int ScLen,
1093 Value *AlignVal, Value *AlignAddr) const
1094 -> void {
1095 LLVM_DEBUG(dbgs() << __func__ << "\n");
1096
1097 Type *SecTy = HVC.getByteTy(ScLen);
1098 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1099 bool DoAlign = !HVC.isZero(AlignVal);
1100 BasicBlock::iterator BasePos = Builder.GetInsertPoint();
1101 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1102
1103 ByteSpan ASpan;
1104 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1105 auto *Undef = UndefValue::get(SecTy);
1106
1107 // Created load does not have to be "Instruction" (e.g. "undef").
1108 SmallVector<Value *> Loads(NumSectors + DoAlign, nullptr);
1109
1110 // We could create all of the aligned loads, and generate the valigns
1111 // at the location of the first load, but for large load groups, this
1112 // could create highly suboptimal code (there have been groups of 140+
1113 // loads in real code).
1114 // Instead, place the loads/valigns as close to the users as possible.
1115 // In any case we need to have a mapping from the blocks of VSpan (the
1116 // span covered by the pre-existing loads) to ASpan (the span covered
1117 // by the aligned loads). There is a small problem, though: ASpan needs
1118 // to have pointers to the loads/valigns, but we don't have these loads
1119 // because we don't know where to put them yet. We find out by creating
1120 // a section of ASpan that corresponds to values (blocks) from VSpan,
1121 // and checking where the new load should be placed. We need to attach
1122 // this location information to each block in ASpan somehow, so we put
1123 // distincts values for Seg.Val in each ASpan.Blocks[i], and use a map
1124 // to store the location for each Seg.Val.
1125 // The distinct values happen to be Blocks[i].Seg.Val = &Blocks[i],
1126 // which helps with printing ByteSpans without crashing when printing
1127 // Segments with these temporary identifiers in place of Val.
1128
1129 // Populate the blocks first, to avoid reallocations of the vector
1130 // interfering with generating the placeholder addresses.
1131 for (int Index = 0; Index != NumSectors; ++Index)
1132 ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);
1133 for (int Index = 0; Index != NumSectors; ++Index) {
1134 ASpan.Blocks[Index].Seg.Val =
1135 reinterpret_cast<Value *>(&ASpan.Blocks[Index]);
1136 }
1137
1138 // Multiple values from VSpan can map to the same value in ASpan. Since we
1139 // try to create loads lazily, we need to find the earliest use for each
1140 // value from ASpan.
1141 DenseMap<void *, Instruction *> EarliestUser;
1142 auto isEarlier = [](Instruction *A, Instruction *B) {
1143 if (B == nullptr)
1144 return true;
1145 if (A == nullptr)
1146 return false;
1147 assert(A->getParent() == B->getParent());
1148 return A->comesBefore(B);
1149 };
1150 auto earliestUser = [&](const auto &Uses) {
1151 Instruction *User = nullptr;
1152 for (const Use &U : Uses) {
1153 auto *I = dyn_cast<Instruction>(U.getUser());
1154 assert(I != nullptr && "Load used in a non-instruction?");
1155 // Make sure we only consider users in this block, but we need
1156 // to remember if there were users outside the block too. This is
1157 // because if no users are found, aligned loads will not be created.
1158 if (I->getParent() == BaseBlock) {
1159 if (!isa<PHINode>(I))
1160 User = std::min(User, I, isEarlier);
1161 } else {
1162 User = std::min(User, BaseBlock->getTerminator(), isEarlier);
1163 }
1164 }
1165 return User;
1166 };
1167
1168 for (const ByteSpan::Block &B : VSpan) {
1169 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);
1170 for (const ByteSpan::Block &S : ASection) {
1171 auto &EU = EarliestUser[S.Seg.Val];
1172 EU = std::min(EU, earliestUser(B.Seg.Val->uses()), isEarlier);
1173 }
1174 }
1175
1176 LLVM_DEBUG({
1177 dbgs() << "ASpan:\n" << ASpan << '\n';
1178 dbgs() << "Earliest users of ASpan:\n";
1179 for (auto &[Val, User] : EarliestUser) {
1180 dbgs() << Val << "\n ->" << *User << '\n';
1181 }
1182 });
1183
1184 auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
1185 int Index, bool MakePred) {
1186 Value *Ptr =
1187 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1188 Value *Predicate =
1189 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1190
1191 // If vector shifting is potentially needed, accumulate metadata
1192 // from source sections of twice the load width.
1193 int Start = (Index - DoAlign) * ScLen;
1194 int Width = (1 + DoAlign) * ScLen;
1195 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1196 VSpan.section(Start, Width).values());
1197 };
1198
1199 auto moveBefore = [this](BasicBlock::iterator In, BasicBlock::iterator To) {
1200 // Move In and its upward dependencies to before To.
1201 assert(In->getParent() == To->getParent());
1202 DepList Deps = getUpwardDeps(&*In, &*To);
1203 In->moveBefore(To);
1204 // DepList is sorted with respect to positions in the basic block.
1205 InstMap Map = cloneBefore(In, Deps);
1206 for (auto [Old, New] : Map)
1207 In->replaceUsesOfWith(Old, New);
1208 };
1209
1210 // Generate necessary loads at appropriate locations.
1211 LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");
1212 for (int Index = 0; Index != NumSectors + 1; ++Index) {
1213 // In ASpan, each block will be either a single aligned load, or a
1214 // valign of a pair of loads. In the latter case, an aligned load j
1215 // will belong to the current valign, and the one in the previous
1216 // block (for j > 0).
1217 // Place the load at a location which will dominate the valign, assuming
1218 // the valign will be placed right before the earliest user.
1219 Instruction *PrevAt =
1220 DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;
1221 Instruction *ThisAt =
1222 Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
1223 if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1224 Builder.SetInsertPoint(Where);
1225 Loads[Index] =
1226 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1227 // We know it's safe to put the load at BasePos, but we'd prefer to put
1228 // it at "Where". To see if the load is safe to be placed at Where, put
1229 // it there first and then check if it's safe to move it to BasePos.
1230 // If not, then the load needs to be placed at BasePos.
1231 // We can't do this check proactively because we need the load to exist
1232 // in order to check legality.
1233 if (auto *Load = dyn_cast<Instruction>(Loads[Index])) {
1234 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1235 moveBefore(Load->getIterator(), BasePos);
1236 }
1237 LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');
1238 }
1239 }
1240
1241 // Generate valigns if needed, and fill in proper values in ASpan
1242 LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");
1243 for (int Index = 0; Index != NumSectors; ++Index) {
1244 ASpan[Index].Seg.Val = nullptr;
1245 if (auto *Where = EarliestUser[&ASpan[Index]]) {
1246 Builder.SetInsertPoint(Where);
1247 Value *Val = Loads[Index];
1248 assert(Val != nullptr);
1249 if (DoAlign) {
1250 Value *NextLoad = Loads[Index + 1];
1251 assert(NextLoad != nullptr);
1252 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1253 }
1254 ASpan[Index].Seg.Val = Val;
1255 LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');
1256 }
1257 }
1258
1259 for (const ByteSpan::Block &B : VSpan) {
1260 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
1261 Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
1262 Builder.SetInsertPoint(cast<Instruction>(B.Seg.Val));
1263
1264 // We're generating a reduction, where each instruction depends on
1265 // the previous one, so we need to order them according to the position
1266 // of their inputs in the code.
1267 std::vector<ByteSpan::Block *> ABlocks;
1268 for (ByteSpan::Block &S : ASection) {
1269 if (S.Seg.Val != nullptr)
1270 ABlocks.push_back(&S);
1271 }
1272 llvm::sort(ABlocks,
1273 [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {
1274 return isEarlier(cast<Instruction>(A->Seg.Val),
1275 cast<Instruction>(B->Seg.Val));
1276 });
1277 for (ByteSpan::Block *S : ABlocks) {
1278 // The processing of the data loaded by the aligned loads
1279 // needs to be inserted after the data is available.
1280 Instruction *SegI = cast<Instruction>(S->Seg.Val);
1281 Builder.SetInsertPoint(&*std::next(SegI->getIterator()));
1282 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1283 Accum =
1284 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1285 }
1286 // Instead of casting everything to bytes for the vselect, cast to the
1287 // original value type. This will avoid complications with casting masks.
1288 // For example, in cases when the original mask applied to i32, it could
1289 // be converted to a mask applicable to i8 via pred_typecast intrinsic,
1290 // but if the mask is not exactly of HVX length, extra handling would be
1291 // needed to make it work.
1292 Type *ValTy = getPayload(B.Seg.Val)->getType();
1293 Value *Cast = Builder.CreateBitCast(Accum, ValTy, "cst");
1294 Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
1295 getPassThrough(B.Seg.Val), "sel");
1296 B.Seg.Val->replaceAllUsesWith(Sel);
1297 }
1298}
1299
1300auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1301 const ByteSpan &VSpan, int ScLen,
1302 Value *AlignVal, Value *AlignAddr) const
1303 -> void {
1304 LLVM_DEBUG(dbgs() << __func__ << "\n");
1305
1306 Type *SecTy = HVC.getByteTy(ScLen);
1307 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1308 bool DoAlign = !HVC.isZero(AlignVal);
1309
1310 // Stores.
1311 ByteSpan ASpanV, ASpanM;
1312
1313 // Return a vector value corresponding to the input value Val:
1314 // either <1 x Val> for scalar Val, or Val itself for vector Val.
1315 auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {
1316 Type *Ty = Val->getType();
1317 if (Ty->isVectorTy())
1318 return Val;
1319 auto *VecTy = VectorType::get(Ty, 1, /*Scalable=*/false);
1320 return Builder.CreateBitCast(Val, VecTy, "cst");
1321 };
1322
1323 // Create an extra "undef" sector at the beginning and at the end.
1324 // They will be used as the left/right filler in the vlalign step.
1325 for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {
1326 // For stores, the size of each section is an aligned vector length.
1327 // Adjust the store offsets relative to the section start offset.
1328 ByteSpan VSection =
1329 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1330 Value *Undef = UndefValue::get(SecTy);
1331 Value *Zero = HVC.getNullValue(SecTy);
1332 Value *AccumV = Undef;
1333 Value *AccumM = Zero;
1334 for (ByteSpan::Block &S : VSection) {
1335 Value *Pay = getPayload(S.Seg.Val);
1336 Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1337 Pay->getType(), HVC.getByteTy());
1338 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1339 S.Seg.Start, S.Seg.Size, S.Pos);
1340 AccumM = Builder.CreateOr(AccumM, PartM);
1341
1342 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1343 S.Seg.Start, S.Seg.Size, S.Pos);
1344
1345 AccumV = Builder.CreateSelect(
1346 Builder.CreateICmp(CmpInst::ICMP_NE, PartM, Zero), PartV, AccumV);
1347 }
1348 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1349 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1350 }
1351
1352 LLVM_DEBUG({
1353 dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';
1354 dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';
1355 });
1356
1357 // vlalign
1358 if (DoAlign) {
1359 for (int Index = 1; Index != NumSectors + 2; ++Index) {
1360 Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;
1361 Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;
1362 assert(isSectorTy(PrevV->getType()) && isSectorTy(PrevM->getType()));
1363 ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1364 ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1365 }
1366 }
1367
1368 LLVM_DEBUG({
1369 dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';
1370 dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';
1371 });
1372
1373 auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,
1374 const ByteSpan &ASpanM, int Index, bool MakePred) {
1375 Value *Val = ASpanV[Index].Seg.Val;
1376 Value *Mask = ASpanM[Index].Seg.Val; // bytes
1377 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1378 return;
1379 Value *Ptr =
1380 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1381 Value *Predicate =
1382 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1383
1384 // If vector shifting is potentially needed, accumulate metadata
1385 // from source sections of twice the store width.
1386 int Start = (Index - DoAlign) * ScLen;
1387 int Width = (1 + DoAlign) * ScLen;
1388 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1389 HVC.vlsb(Builder, Mask),
1390 VSpan.section(Start, Width).values());
1391 };
1392
1393 for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {
1394 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1395 }
1396}
1397
1398auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
1399 LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');
1400
1401 // TODO: Needs support for masked loads/stores of "scalar" vectors.
1402 if (!Move.IsHvx)
1403 return false;
1404
1405 // Return the element with the maximum alignment from Range,
1406 // where GetValue obtains the value to compare from an element.
1407 auto getMaxOf = [](auto Range, auto GetValue) {
1408 return *llvm::max_element(Range, [&GetValue](auto &A, auto &B) {
1409 return GetValue(A) < GetValue(B);
1410 });
1411 };
1412
1413 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1414
1415 // Conceptually, there is a vector of N bytes covering the addresses
1416 // starting from the minimum offset (i.e. Base.Addr+Start). This vector
1417 // represents a contiguous memory region that spans all accessed memory
1418 // locations.
1419 // The correspondence between loaded or stored values will be expressed
1420 // in terms of this vector. For example, the 0th element of the vector
1421 // from the Base address info will start at byte Start from the beginning
1422 // of this conceptual vector.
1423 //
1424 // This vector will be loaded/stored starting at the nearest down-aligned
1425 // address and the amount od the down-alignment will be AlignVal:
1426 // valign(load_vector(align_down(Base+Start)), AlignVal)
1427
1428 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1429 AddrList MoveInfos;
1431 BaseInfos, std::back_inserter(MoveInfos),
1432 [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
1433
1434 // Maximum alignment present in the whole address group.
1435 const AddrInfo &WithMaxAlign =
1436 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
1437 Align MaxGiven = WithMaxAlign.HaveAlign;
1438
1439 // Minimum alignment present in the move address group.
1440 const AddrInfo &WithMinOffset =
1441 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
1442
1443 const AddrInfo &WithMaxNeeded =
1444 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
1445 Align MinNeeded = WithMaxNeeded.NeedAlign;
1446
1447 // Set the builder's insertion point right before the load group, or
1448 // immediately after the store group. (Instructions in a store group are
1449 // listed in reverse order.)
1450 Instruction *InsertAt = Move.Main.front();
1451 if (!Move.IsLoad) {
1452 // There should be a terminator (which store isn't, but check anyways).
1453 assert(InsertAt->getIterator() != InsertAt->getParent()->end());
1454 InsertAt = &*std::next(InsertAt->getIterator());
1455 }
1456
1457 IRBuilder Builder(InsertAt->getParent(), InsertAt->getIterator(),
1458 InstSimplifyFolder(HVC.DL));
1459 Value *AlignAddr = nullptr; // Actual aligned address.
1460 Value *AlignVal = nullptr; // Right-shift amount (for valign).
1461
1462 if (MinNeeded <= MaxGiven) {
1463 int Start = WithMinOffset.Offset;
1464 int OffAtMax = WithMaxAlign.Offset;
1465 // Shift the offset of the maximally aligned instruction (OffAtMax)
1466 // back by just enough multiples of the required alignment to cover the
1467 // distance from Start to OffAtMax.
1468 // Calculate the address adjustment amount based on the address with the
1469 // maximum alignment. This is to allow a simple gep instruction instead
1470 // of potential bitcasts to i8*.
1471 int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
1472 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1473 WithMaxAlign.ValTy, Adjust, Move.Clones);
1474 int Diff = Start - (OffAtMax + Adjust);
1475 AlignVal = HVC.getConstInt(Diff);
1476 assert(Diff >= 0);
1477 assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
1478 } else {
1479 // WithMinOffset is the lowest address in the group,
1480 // WithMinOffset.Addr = Base+Start.
1481 // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
1482 // mask off unnecessary bits, so it's ok to just the original pointer as
1483 // the alignment amount.
1484 // Do an explicit down-alignment of the address to avoid creating an
1485 // aligned instruction with an address that is not really aligned.
1486 AlignAddr =
1487 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1488 MinNeeded.value(), Move.Clones);
1489 AlignVal =
1490 Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");
1491 if (auto *I = dyn_cast<Instruction>(AlignVal)) {
1492 for (auto [Old, New] : Move.Clones)
1493 I->replaceUsesOfWith(Old, New);
1494 }
1495 }
1496
1497 ByteSpan VSpan;
1498 for (const AddrInfo &AI : MoveInfos) {
1499 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1500 AI.Offset - WithMinOffset.Offset);
1501 }
1502
1503 // The aligned loads/stores will use blocks that are either scalars,
1504 // or HVX vectors. Let "sector" be the unified term for such a block.
1505 // blend(scalar, vector) -> sector...
1506 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1507 : std::max<int>(MinNeeded.value(), 4);
1508 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1509 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1510
1511 LLVM_DEBUG({
1512 dbgs() << "ScLen: " << ScLen << "\n";
1513 dbgs() << "AlignVal:" << *AlignVal << "\n";
1514 dbgs() << "AlignAddr:" << *AlignAddr << "\n";
1515 dbgs() << "VSpan:\n" << VSpan << '\n';
1516 });
1517
1518 if (Move.IsLoad)
1519 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1520 else
1521 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1522
1523 for (auto *Inst : Move.Main)
1524 Inst->eraseFromParent();
1525
1526 return true;
1527}
1528
1529auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
1530 int Alignment) const -> Value * {
1531 auto *AlignTy = AlignVal->getType();
1532 Value *And = Builder.CreateAnd(
1533 AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");
1534 Value *Zero = ConstantInt::get(AlignTy, 0);
1535 return Builder.CreateICmpNE(And, Zero, "isz");
1536}
1537
1538auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
1539 if (!HVC.isByteVecTy(Ty))
1540 return false;
1541 int Size = HVC.getSizeOf(Ty);
1542 if (HVC.HST.isTypeForHVX(Ty))
1543 return Size == static_cast<int>(HVC.HST.getVectorLength());
1544 return Size == 4 || Size == 8;
1545}
1546
1547auto AlignVectors::run() -> bool {
1548 LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName()
1549 << '\n');
1550 if (!createAddressGroups())
1551 return false;
1552
1553 LLVM_DEBUG({
1554 dbgs() << "Address groups(" << AddrGroups.size() << "):\n";
1555 for (auto &[In, AL] : AddrGroups) {
1556 for (const AddrInfo &AI : AL)
1557 dbgs() << "---\n" << AI << '\n';
1558 }
1559 });
1560
1561 bool Changed = false;
1562 MoveList LoadGroups, StoreGroups;
1563
1564 for (auto &G : AddrGroups) {
1565 llvm::append_range(LoadGroups, createLoadGroups(G.second));
1566 llvm::append_range(StoreGroups, createStoreGroups(G.second));
1567 }
1568
1569 LLVM_DEBUG({
1570 dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";
1571 for (const MoveGroup &G : LoadGroups)
1572 dbgs() << G << "\n";
1573 dbgs() << "Store groups(" << StoreGroups.size() << "):\n";
1574 for (const MoveGroup &G : StoreGroups)
1575 dbgs() << G << "\n";
1576 });
1577
1578 // Cumulative limit on the number of groups.
1579 unsigned CountLimit = VAGroupCountLimit;
1580 if (CountLimit == 0)
1581 return false;
1582
1583 if (LoadGroups.size() > CountLimit) {
1584 LoadGroups.resize(CountLimit);
1585 StoreGroups.clear();
1586 } else {
1587 unsigned StoreLimit = CountLimit - LoadGroups.size();
1588 if (StoreGroups.size() > StoreLimit)
1589 StoreGroups.resize(StoreLimit);
1590 }
1591
1592 for (auto &M : LoadGroups)
1593 Changed |= moveTogether(M);
1594 for (auto &M : StoreGroups)
1595 Changed |= moveTogether(M);
1596
1597 LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);
1598
1599 for (auto &M : LoadGroups)
1600 Changed |= realignGroup(M);
1601 for (auto &M : StoreGroups)
1602 Changed |= realignGroup(M);
1603
1604 return Changed;
1605}
1606
1607// --- End AlignVectors
1608
1609// --- Begin HvxIdioms
1610
1611auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const
1612 -> std::pair<unsigned, Signedness> {
1613 unsigned Bits = HVC.getNumSignificantBits(V, In);
1614 // The significant bits are calculated including the sign bit. This may
1615 // add an extra bit for zero-extended values, e.g. (zext i32 to i64) may
1616 // result in 33 significant bits. To avoid extra words, skip the extra
1617 // sign bit, but keep information that the value is to be treated as
1618 // unsigned.
1619 KnownBits Known = HVC.getKnownBits(V, In);
1620 Signedness Sign = Signed;
1621 unsigned NumToTest = 0; // Number of bits used in test for unsignedness.
1622 if (isPowerOf2_32(Bits))
1623 NumToTest = Bits;
1624 else if (Bits > 1 && isPowerOf2_32(Bits - 1))
1625 NumToTest = Bits - 1;
1626
1627 if (NumToTest != 0 && Known.Zero.ashr(NumToTest).isAllOnes()) {
1628 Sign = Unsigned;
1629 Bits = NumToTest;
1630 }
1631
1632 // If the top bit of the nearest power-of-2 is zero, this value is
1633 // positive. It could be treated as either signed or unsigned.
1634 if (unsigned Pow2 = PowerOf2Ceil(Bits); Pow2 != Bits) {
1635 if (Known.Zero.ashr(Pow2 - 1).isAllOnes())
1636 Sign = Positive;
1637 }
1638 return {Bits, Sign};
1639}
1640
1641auto HvxIdioms::canonSgn(SValue X, SValue Y) const
1642 -> std::pair<SValue, SValue> {
1643 // Canonicalize the signedness of X and Y, so that the result is one of:
1644 // S, S
1645 // U/P, S
1646 // U/P, U/P
1647 if (X.Sgn == Signed && Y.Sgn != Signed)
1648 std::swap(X, Y);
1649 return {X, Y};
1650}
1651
1652// Match
1653// (X * Y) [>> N], or
1654// ((X * Y) + (1 << M)) >> N
1655auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional<FxpOp> {
1656 using namespace PatternMatch;
1657 auto *Ty = In.getType();
1658
1659 if (!Ty->isVectorTy() || !Ty->getScalarType()->isIntegerTy())
1660 return std::nullopt;
1661
1662 unsigned Width = cast<IntegerType>(Ty->getScalarType())->getBitWidth();
1663
1664 FxpOp Op;
1665 Value *Exp = &In;
1666
1667 // Fixed-point multiplication is always shifted right (except when the
1668 // fraction is 0 bits).
1669 auto m_Shr = [](auto &&V, auto &&S) {
1670 return m_CombineOr(m_LShr(V, S), m_AShr(V, S));
1671 };
1672
1673 uint64_t Qn = 0;
1674 if (Value *T; match(Exp, m_Shr(m_Value(T), m_ConstantInt(Qn)))) {
1675 Op.Frac = Qn;
1676 Exp = T;
1677 } else {
1678 Op.Frac = 0;
1679 }
1680
1681 if (Op.Frac > Width)
1682 return std::nullopt;
1683
1684 // Check if there is rounding added.
1685 uint64_t CV;
1686 if (Value *T;
1687 Op.Frac > 0 && match(Exp, m_Add(m_Value(T), m_ConstantInt(CV)))) {
1688 if (CV != 0 && !isPowerOf2_64(CV))
1689 return std::nullopt;
1690 if (CV != 0)
1691 Op.RoundAt = Log2_64(CV);
1692 Exp = T;
1693 }
1694
1695 // Check if the rest is a multiplication.
1696 if (match(Exp, m_Mul(m_Value(Op.X.Val), m_Value(Op.Y.Val)))) {
1697 Op.Opcode = Instruction::Mul;
1698 // FIXME: The information below is recomputed.
1699 Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;
1700 Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;
1701 Op.ResTy = cast<VectorType>(Ty);
1702 return Op;
1703 }
1704
1705 return std::nullopt;
1706}
1707
1708auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
1709 -> Value * {
1710 assert(Op.X.Val->getType() == Op.Y.Val->getType());
1711
1712 auto *VecTy = dyn_cast<VectorType>(Op.X.Val->getType());
1713 if (VecTy == nullptr)
1714 return nullptr;
1715 auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1716 unsigned ElemWidth = ElemTy->getBitWidth();
1717
1718 // TODO: This can be relaxed after legalization is done pre-isel.
1719 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1720 return nullptr;
1721
1722 // There are no special intrinsics that should be used for multiplying
1723 // signed 8-bit values, so just skip them. Normal codegen should handle
1724 // this just fine.
1725 if (ElemWidth <= 8)
1726 return nullptr;
1727 // Similarly, if this is just a multiplication that can be handled without
1728 // intervention, then leave it alone.
1729 if (ElemWidth <= 32 && Op.Frac == 0)
1730 return nullptr;
1731
1732 auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);
1733 auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);
1734
1735 // TODO: Add multiplication of vectors by scalar registers (up to 4 bytes).
1736
1737 Value *X = Op.X.Val, *Y = Op.Y.Val;
1738 IRBuilder Builder(In.getParent(), In.getIterator(),
1739 InstSimplifyFolder(HVC.DL));
1740
1741 auto roundUpWidth = [](unsigned Width) -> unsigned {
1742 if (Width <= 32 && !isPowerOf2_32(Width)) {
1743 // If the element width is not a power of 2, round it up
1744 // to the next one. Do this for widths not exceeding 32.
1745 return PowerOf2Ceil(Width);
1746 }
1747 if (Width > 32 && Width % 32 != 0) {
1748 // For wider elements, round it up to the multiple of 32.
1749 return alignTo(Width, 32u);
1750 }
1751 return Width;
1752 };
1753
1754 BitsX = roundUpWidth(BitsX);
1755 BitsY = roundUpWidth(BitsY);
1756
1757 // For elementwise multiplication vectors must have the same lengths, so
1758 // resize the elements of both inputs to the same width, the max of the
1759 // calculated significant bits.
1760 unsigned Width = std::max(BitsX, BitsY);
1761
1762 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1763 if (Width < ElemWidth) {
1764 X = Builder.CreateTrunc(X, ResizeTy, "trn");
1765 Y = Builder.CreateTrunc(Y, ResizeTy, "trn");
1766 } else if (Width > ElemWidth) {
1767 X = SignX == Signed ? Builder.CreateSExt(X, ResizeTy, "sxt")
1768 : Builder.CreateZExt(X, ResizeTy, "zxt");
1769 Y = SignY == Signed ? Builder.CreateSExt(Y, ResizeTy, "sxt")
1770 : Builder.CreateZExt(Y, ResizeTy, "zxt");
1771 };
1772
1773 assert(X->getType() == Y->getType() && X->getType() == ResizeTy);
1774
1775 unsigned VecLen = HVC.length(ResizeTy);
1776 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1777
1779 FxpOp ChopOp = Op;
1780 ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);
1781
1782 for (unsigned V = 0; V != VecLen / ChopLen; ++V) {
1783 ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);
1784 ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);
1785 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1786 if (Results.back() == nullptr)
1787 break;
1788 }
1789
1790 if (Results.empty() || Results.back() == nullptr)
1791 return nullptr;
1792
1793 Value *Cat = HVC.concat(Builder, Results);
1794 Value *Ext = SignX == Signed || SignY == Signed
1795 ? Builder.CreateSExt(Cat, VecTy, "sxt")
1796 : Builder.CreateZExt(Cat, VecTy, "zxt");
1797 return Ext;
1798}
1799
1800auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
1801 const FxpOp &Op) const -> Value * {
1802 assert(Op.X.Val->getType() == Op.Y.Val->getType());
1803 auto *InpTy = cast<VectorType>(Op.X.Val->getType());
1804 unsigned Width = InpTy->getScalarSizeInBits();
1805 bool Rounding = Op.RoundAt.has_value();
1806
1807 if (!Op.RoundAt || *Op.RoundAt == Op.Frac - 1) {
1808 // The fixed-point intrinsics do signed multiplication.
1809 if (Width == Op.Frac + 1 && Op.X.Sgn != Unsigned && Op.Y.Sgn != Unsigned) {
1810 Value *QMul = nullptr;
1811 if (Width == 16) {
1812 QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);
1813 } else if (Width == 32) {
1814 QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);
1815 }
1816 if (QMul != nullptr)
1817 return QMul;
1818 }
1819 }
1820
1821 assert(Width >= 32 || isPowerOf2_32(Width)); // Width <= 32 => Width is 2^n
1822 assert(Width < 32 || Width % 32 == 0); // Width > 32 => Width is 32*k
1823
1824 // If Width < 32, then it should really be 16.
1825 if (Width < 32) {
1826 if (Width < 16)
1827 return nullptr;
1828 // Getting here with Op.Frac == 0 isn't wrong, but suboptimal: here we
1829 // generate a full precision products, which is unnecessary if there is
1830 // no shift.
1831 assert(Width == 16);
1832 assert(Op.Frac != 0 && "Unshifted mul should have been skipped");
1833 if (Op.Frac == 16) {
1834 // Multiply high
1835 if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))
1836 return MulH;
1837 }
1838 // Do full-precision multiply and shift.
1839 Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
1840 if (Rounding) {
1841 Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);
1842 Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");
1843 }
1844
1845 Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);
1846 Value *Shifted = Op.X.Sgn == Signed || Op.Y.Sgn == Signed
1847 ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")
1848 : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");
1849 return Builder.CreateTrunc(Shifted, InpTy, "trn");
1850 }
1851
1852 // Width >= 32
1853
1854 // Break up the arguments Op.X and Op.Y into vectors of smaller widths
1855 // in preparation of doing the multiplication by 32-bit parts.
1856 auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, /*ToWidth=*/32);
1857 auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, /*ToWidth=*/32);
1858 auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);
1859
1860 auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1861
1862 // Add the optional rounding to the proper word.
1863 if (Op.RoundAt.has_value()) {
1864 Value *Zero = HVC.getNullValue(WordX[0]->getType());
1865 SmallVector<Value *> RoundV(WordP.size(), Zero);
1866 RoundV[*Op.RoundAt / 32] =
1867 HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32));
1868 WordP = createAddLong(Builder, WordP, RoundV);
1869 }
1870
1871 // createRightShiftLong?
1872
1873 // Shift all products right by Op.Frac.
1874 unsigned SkipWords = Op.Frac / 32;
1875 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);
1876
1877 for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
1878 int Src = Dst + SkipWords;
1879 Value *Lo = WordP[Src];
1880 if (Src + 1 < End) {
1881 Value *Hi = WordP[Src + 1];
1882 WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
1883 {Hi, Lo, ShiftAmt},
1884 /*FMFSource*/ nullptr, "int");
1885 } else {
1886 // The shift of the most significant word.
1887 WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");
1888 }
1889 }
1890 if (SkipWords != 0)
1891 WordP.resize(WordP.size() - SkipWords);
1892
1893 return HVC.joinVectorElements(Builder, WordP, Op.ResTy);
1894}
1895
1896auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
1897 bool Rounding) const -> Value * {
1898 assert(X.Val->getType() == Y.Val->getType());
1899 assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1900 assert(HVC.HST.isHVXVectorType(EVT::getEVT(X.Val->getType(), false)));
1901
1902 // There is no non-rounding intrinsic for i16.
1903 if (!Rounding || X.Sgn == Unsigned || Y.Sgn == Unsigned)
1904 return nullptr;
1905
1906 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1907 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),
1908 {X.Val, Y.Val});
1909}
1910
1911auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
1912 bool Rounding) const -> Value * {
1913 Type *InpTy = X.Val->getType();
1914 assert(InpTy == Y.Val->getType());
1915 assert(InpTy->getScalarType() == HVC.getIntTy(32));
1916 assert(HVC.HST.isHVXVectorType(EVT::getEVT(InpTy, false)));
1917
1918 if (X.Sgn == Unsigned || Y.Sgn == Unsigned)
1919 return nullptr;
1920
1921 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1922 auto V6_vmpyo_acc = Rounding
1923 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1924 : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1925 Value *V1 =
1926 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});
1927 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1928 {V1, X.Val, Y.Val});
1929}
1930
1931auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
1932 Value *CarryIn) const
1933 -> std::pair<Value *, Value *> {
1934 assert(X->getType() == Y->getType());
1935 auto VecTy = cast<VectorType>(X->getType());
1936 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1938 Intrinsic::ID AddCarry;
1939 if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {
1940 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1941 } else {
1942 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1943 if (CarryIn == nullptr)
1944 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1945 Args.push_back(CarryIn);
1946 }
1947 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1948 /*RetTy=*/nullptr, Args);
1949 Value *Result = Builder.CreateExtractValue(Ret, {0}, "ext");
1950 Value *CarryOut = Builder.CreateExtractValue(Ret, {1}, "ext");
1951 return {Result, CarryOut};
1952 }
1953
1954 // In other cases, do a regular add, and unsigned compare-less-than.
1955 // The carry-out can originate in two places: adding the carry-in or adding
1956 // the two input values.
1957 Value *Result1 = X; // Result1 = X + CarryIn
1958 if (CarryIn != nullptr) {
1959 unsigned Width = VecTy->getScalarSizeInBits();
1960 uint32_t Mask = 1;
1961 if (Width < 32) {
1962 for (unsigned i = 0, e = 32 / Width; i != e; ++i)
1963 Mask = (Mask << Width) | 1;
1964 }
1965 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1966 Value *ValueIn =
1967 HVC.createHvxIntrinsic(Builder, V6_vandqrt, /*RetTy=*/nullptr,
1968 {CarryIn, HVC.getConstInt(Mask)});
1969 Result1 = Builder.CreateAdd(X, ValueIn, "add");
1970 }
1971
1972 Value *CarryOut1 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result1, X, "cmp");
1973 Value *Result2 = Builder.CreateAdd(Result1, Y, "add");
1974 Value *CarryOut2 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result2, Y, "cmp");
1975 return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};
1976}
1977
1978auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
1979 -> Value * {
1980 Intrinsic::ID V6_vmpyh = 0;
1981 std::tie(X, Y) = canonSgn(X, Y);
1982
1983 if (X.Sgn == Signed) {
1984 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1985 } else if (Y.Sgn == Signed) {
1986 // In vmpyhus the second operand is unsigned
1987 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1988 } else {
1989 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
1990 }
1991
1992 // i16*i16 -> i32 / interleaved
1993 Value *P =
1994 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
1995 // Deinterleave
1996 return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
1997}
1998
1999auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
2000 -> Value * {
2001 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/false);
2002
2003 if (HVC.HST.useHVXV69Ops()) {
2004 if (X.Sgn != Signed && Y.Sgn != Signed) {
2005 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2006 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2007 {X.Val, Y.Val});
2008 }
2009 }
2010
2011 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/true);
2012 Value *Pair16 =
2013 Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");
2014 unsigned Len = HVC.length(HvxP16Ty) / 2;
2015
2016 SmallVector<int, 128> PickOdd(Len);
2017 for (int i = 0; i != static_cast<int>(Len); ++i)
2018 PickOdd[i] = 2 * i + 1;
2019
2020 return Builder.CreateShuffleVector(
2021 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");
2022}
2023
2024auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
2025 -> std::pair<Value *, Value *> {
2026 assert(X.Val->getType() == Y.Val->getType());
2027 assert(X.Val->getType() == HvxI32Ty);
2028
2029 Intrinsic::ID V6_vmpy_parts;
2030 std::tie(X, Y) = canonSgn(X, Y);
2031
2032 if (X.Sgn == Signed) {
2033 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2034 } else if (Y.Sgn == Signed) {
2035 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2036 } else {
2037 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2038 }
2039
2040 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,
2041 {X.Val, Y.Val}, {HvxI32Ty});
2042 Value *Hi = Builder.CreateExtractValue(Parts, {0}, "ext");
2043 Value *Lo = Builder.CreateExtractValue(Parts, {1}, "ext");
2044 return {Lo, Hi};
2045}
2046
2047auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2048 ArrayRef<Value *> WordY) const
2050 assert(WordX.size() == WordY.size());
2051 unsigned Idx = 0, Length = WordX.size();
2053
2054 while (Idx != Length) {
2055 if (HVC.isZero(WordX[Idx]))
2056 Sum[Idx] = WordY[Idx];
2057 else if (HVC.isZero(WordY[Idx]))
2058 Sum[Idx] = WordX[Idx];
2059 else
2060 break;
2061 ++Idx;
2062 }
2063
2064 Value *Carry = nullptr;
2065 for (; Idx != Length; ++Idx) {
2066 std::tie(Sum[Idx], Carry) =
2067 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2068 }
2069
2070 // This drops the final carry beyond the highest word.
2071 return Sum;
2072}
2073
2074auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2075 Signedness SgnX, ArrayRef<Value *> WordY,
2076 Signedness SgnY) const -> SmallVector<Value *> {
2077 SmallVector<SmallVector<Value *>> Products(WordX.size() + WordY.size());
2078
2079 // WordX[i] * WordY[j] produces words i+j and i+j+1 of the results,
2080 // that is halves 2(i+j), 2(i+j)+1, 2(i+j)+2, 2(i+j)+3.
2081 for (int i = 0, e = WordX.size(); i != e; ++i) {
2082 for (int j = 0, f = WordY.size(); j != f; ++j) {
2083 // Check the 4 halves that this multiplication can generate.
2084 Signedness SX = (i + 1 == e) ? SgnX : Unsigned;
2085 Signedness SY = (j + 1 == f) ? SgnY : Unsigned;
2086 auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});
2087 Products[i + j + 0].push_back(Lo);
2088 Products[i + j + 1].push_back(Hi);
2089 }
2090 }
2091
2092 Value *Zero = HVC.getNullValue(WordX[0]->getType());
2093
2094 auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {
2095 if (Vector.empty())
2096 return Zero;
2097 auto Last = Vector.back();
2098 Vector.pop_back();
2099 return Last;
2100 };
2101
2102 for (int i = 0, e = Products.size(); i != e; ++i) {
2103 while (Products[i].size() > 1) {
2104 Value *Carry = nullptr; // no carry-in
2105 for (int j = i; j != e; ++j) {
2106 auto &ProdJ = Products[j];
2107 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2108 pop_back_or_zero(ProdJ), Carry);
2109 ProdJ.insert(ProdJ.begin(), Sum);
2110 Carry = CarryOut;
2111 }
2112 }
2113 }
2114
2116 for (auto &P : Products) {
2117 assert(P.size() == 1 && "Should have been added together");
2118 WordP.push_back(P.front());
2119 }
2120
2121 return WordP;
2122}
2123
2124auto HvxIdioms::run() -> bool {
2125 bool Changed = false;
2126
2127 for (BasicBlock &B : HVC.F) {
2128 for (auto It = B.rbegin(); It != B.rend(); ++It) {
2129 if (auto Fxm = matchFxpMul(*It)) {
2130 Value *New = processFxpMul(*It, *Fxm);
2131 // Always report "changed" for now.
2132 Changed = true;
2133 if (!New)
2134 continue;
2135 bool StartOver = !isa<Instruction>(New);
2136 It->replaceAllUsesWith(New);
2138 It = StartOver ? B.rbegin()
2139 : cast<Instruction>(New)->getReverseIterator();
2140 Changed = true;
2141 }
2142 }
2143 }
2144
2145 return Changed;
2146}
2147
2148// --- End HvxIdioms
2149
2150auto HexagonVectorCombine::run() -> bool {
2151 if (DumpModule)
2152 dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();
2153
2154 bool Changed = false;
2155 if (HST.useHVXOps()) {
2156 if (VAEnabled)
2157 Changed |= AlignVectors(*this).run();
2158 if (VIEnabled)
2159 Changed |= HvxIdioms(*this).run();
2160 }
2161
2162 if (DumpModule) {
2163 dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")
2164 << " after HexagonVectorCombine\n"
2165 << *F.getParent();
2166 }
2167 return Changed;
2168}
2169
2170auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {
2171 return IntegerType::get(F.getContext(), Width);
2172}
2173
2174auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
2175 assert(ElemCount >= 0);
2176 IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
2177 if (ElemCount == 0)
2178 return ByteTy;
2179 return VectorType::get(ByteTy, ElemCount, /*Scalable=*/false);
2180}
2181
2182auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
2183 assert(ElemCount >= 0);
2184 IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
2185 if (ElemCount == 0)
2186 return BoolTy;
2187 return VectorType::get(BoolTy, ElemCount, /*Scalable=*/false);
2188}
2189
2190auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const
2191 -> ConstantInt * {
2192 return ConstantInt::getSigned(getIntTy(Width), Val);
2193}
2194
2195auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
2196 if (auto *C = dyn_cast<Constant>(Val))
2197 return C->isZeroValue();
2198 return false;
2199}
2200
2201auto HexagonVectorCombine::getIntValue(const Value *Val) const
2202 -> std::optional<APInt> {
2203 if (auto *CI = dyn_cast<ConstantInt>(Val))
2204 return CI->getValue();
2205 return std::nullopt;
2206}
2207
2208auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
2209 return isa<UndefValue>(Val);
2210}
2211
2212auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {
2213 return Val == ConstantInt::getTrue(Val->getType());
2214}
2215
2216auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {
2217 return isZero(Val);
2218}
2219
2220auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
2221 -> VectorType * {
2222 EVT ETy = EVT::getEVT(ElemTy, false);
2223 assert(ETy.isSimple() && "Invalid HVX element type");
2224 // Do not allow boolean types here: they don't have a fixed length.
2225 assert(HST.isHVXElementType(ETy.getSimpleVT(), /*IncludeBool=*/false) &&
2226 "Invalid HVX element type");
2227 unsigned HwLen = HST.getVectorLength();
2228 unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();
2229 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
2230 /*Scalable=*/false);
2231}
2232
2233auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const
2234 -> int {
2235 return getSizeOf(Val->getType(), Kind);
2236}
2237
2238auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const
2239 -> int {
2240 auto *NcTy = const_cast<Type *>(Ty);
2241 switch (Kind) {
2242 case Store:
2243 return DL.getTypeStoreSize(NcTy).getFixedValue();
2244 case Alloc:
2245 return DL.getTypeAllocSize(NcTy).getFixedValue();
2246 }
2247 llvm_unreachable("Unhandled SizeKind enum");
2248}
2249
2250auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
2251 // The actual type may be shorter than the HVX vector, so determine
2252 // the alignment based on subtarget info.
2253 if (HST.isTypeForHVX(Ty))
2254 return HST.getVectorLength();
2255 return DL.getABITypeAlign(Ty).value();
2256}
2257
2258auto HexagonVectorCombine::length(Value *Val) const -> size_t {
2259 return length(Val->getType());
2260}
2261
2262auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
2263 auto *VecTy = dyn_cast<VectorType>(Ty);
2264 assert(VecTy && "Must be a vector type");
2265 return VecTy->getElementCount().getFixedValue();
2266}
2267
2268auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
2270 auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
2271 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2272 return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
2273 return Zero;
2274}
2275
2276auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
2278 auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
2279 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2280 return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
2281 return Minus1;
2282}
2283
2284auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const
2285 -> Constant * {
2286 assert(Ty->isVectorTy());
2287 auto VecTy = cast<VectorType>(Ty);
2288 Type *ElemTy = VecTy->getElementType();
2289 // Add support for floats if needed.
2290 auto *Splat = ConstantVector::getSplat(VecTy->getElementCount(),
2291 ConstantInt::get(ElemTy, Val));
2292 return Splat;
2293}
2294
2295auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
2296 if (auto *In = dyn_cast<Instruction>(V)) {
2297 SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
2298 return simplifyInstruction(In, Q);
2299 }
2300 return nullptr;
2301}
2302
2303// Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
2304auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
2305 Value *Src, int Start, int Length,
2306 int Where) const -> Value * {
2307 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
2308 int SrcLen = getSizeOf(Src);
2309 int DstLen = getSizeOf(Dst);
2310 assert(0 <= Start && Start + Length <= SrcLen);
2311 assert(0 <= Where && Where + Length <= DstLen);
2312
2313 int P2Len = PowerOf2Ceil(SrcLen | DstLen);
2314 auto *Poison = PoisonValue::get(getByteTy());
2315 Value *P2Src = vresize(Builder, Src, P2Len, Poison);
2316 Value *P2Dst = vresize(Builder, Dst, P2Len, Poison);
2317
2318 SmallVector<int, 256> SMask(P2Len);
2319 for (int i = 0; i != P2Len; ++i) {
2320 // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
2321 // Otherwise, pick Dst[i];
2322 SMask[i] =
2323 (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
2324 }
2325
2326 Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask, "shf");
2327 return vresize(Builder, P2Insert, DstLen, Poison);
2328}
2329
2330auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
2331 Value *Hi, Value *Amt) const -> Value * {
2332 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
2333 if (isZero(Amt))
2334 return Hi;
2335 int VecLen = getSizeOf(Hi);
2336 if (auto IntAmt = getIntValue(Amt))
2337 return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
2338 VecLen);
2339
2340 if (HST.isTypeForHVX(Hi->getType())) {
2341 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2342 "Expecting an exact HVX type");
2343 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
2344 Hi->getType(), {Hi, Lo, Amt});
2345 }
2346
2347 if (VecLen == 4) {
2348 Value *Pair = concat(Builder, {Lo, Hi});
2349 Value *Shift =
2350 Builder.CreateLShr(Builder.CreateShl(Pair, Amt, "shl"), 32, "lsr");
2351 Value *Trunc =
2352 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
2353 return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");
2354 }
2355 if (VecLen == 8) {
2356 Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt, "sub");
2357 return vralignb(Builder, Lo, Hi, Sub);
2358 }
2359 llvm_unreachable("Unexpected vector length");
2360}
2361
2362auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
2363 Value *Hi, Value *Amt) const -> Value * {
2364 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
2365 if (isZero(Amt))
2366 return Lo;
2367 int VecLen = getSizeOf(Lo);
2368 if (auto IntAmt = getIntValue(Amt))
2369 return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
2370
2371 if (HST.isTypeForHVX(Lo->getType())) {
2372 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2373 "Expecting an exact HVX type");
2374 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
2375 Lo->getType(), {Hi, Lo, Amt});
2376 }
2377
2378 if (VecLen == 4) {
2379 Value *Pair = concat(Builder, {Lo, Hi});
2380 Value *Shift = Builder.CreateLShr(Pair, Amt, "lsr");
2381 Value *Trunc =
2382 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
2383 return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");
2384 }
2385 if (VecLen == 8) {
2386 Type *Int64Ty = Type::getInt64Ty(F.getContext());
2387 Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty, "cst");
2388 Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty, "cst");
2389 Value *Call = Builder.CreateIntrinsic(Intrinsic::hexagon_S2_valignrb,
2390 {Hi64, Lo64, Amt},
2391 /*FMFSource=*/nullptr, "cup");
2392 return Builder.CreateBitCast(Call, Lo->getType(), "cst");
2393 }
2394 llvm_unreachable("Unexpected vector length");
2395}
2396
2397// Concatenates a sequence of vectors of the same type.
2398auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
2399 ArrayRef<Value *> Vecs) const -> Value * {
2400 assert(!Vecs.empty());
2402 std::vector<Value *> Work[2];
2403 int ThisW = 0, OtherW = 1;
2404
2405 Work[ThisW].assign(Vecs.begin(), Vecs.end());
2406 while (Work[ThisW].size() > 1) {
2407 auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
2408 SMask.resize(length(Ty) * 2);
2409 std::iota(SMask.begin(), SMask.end(), 0);
2410
2411 Work[OtherW].clear();
2412 if (Work[ThisW].size() % 2 != 0)
2413 Work[ThisW].push_back(UndefValue::get(Ty));
2414 for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
2415 Value *Joined = Builder.CreateShuffleVector(
2416 Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");
2417 Work[OtherW].push_back(Joined);
2418 }
2419 std::swap(ThisW, OtherW);
2420 }
2421
2422 // Since there may have been some undefs appended to make shuffle operands
2423 // have the same type, perform the last shuffle to only pick the original
2424 // elements.
2425 SMask.resize(Vecs.size() * length(Vecs.front()->getType()));
2426 std::iota(SMask.begin(), SMask.end(), 0);
2427 Value *Total = Work[ThisW].front();
2428 return Builder.CreateShuffleVector(Total, SMask, "shf");
2429}
2430
2431auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
2432 int NewSize, Value *Pad) const -> Value * {
2434 auto *ValTy = cast<VectorType>(Val->getType());
2435 assert(ValTy->getElementType() == Pad->getType());
2436
2437 int CurSize = length(ValTy);
2438 if (CurSize == NewSize)
2439 return Val;
2440 // Truncate?
2441 if (CurSize > NewSize)
2442 return getElementRange(Builder, Val, /*Ignored*/ Val, 0, NewSize);
2443 // Extend.
2444 SmallVector<int, 128> SMask(NewSize);
2445 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
2446 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
2447 Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad, "spt");
2448 return Builder.CreateShuffleVector(Val, PadVec, SMask, "shf");
2449}
2450
2451auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
2452 Type *FromTy, Type *ToTy) const -> Value * {
2453 // Mask is a vector <N x i1>, where each element corresponds to an
2454 // element of FromTy. Remap it so that each element will correspond
2455 // to an element of ToTy.
2456 assert(isa<VectorType>(Mask->getType()));
2457
2458 Type *FromSTy = FromTy->getScalarType();
2459 Type *ToSTy = ToTy->getScalarType();
2460 if (FromSTy == ToSTy)
2461 return Mask;
2462
2463 int FromSize = getSizeOf(FromSTy);
2464 int ToSize = getSizeOf(ToSTy);
2465 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2466
2467 auto *MaskTy = cast<VectorType>(Mask->getType());
2468 int FromCount = length(MaskTy);
2469 int ToCount = (FromCount * FromSize) / ToSize;
2470 assert((FromCount * FromSize) % ToSize == 0);
2471
2472 auto *FromITy = getIntTy(FromSize * 8);
2473 auto *ToITy = getIntTy(ToSize * 8);
2474
2475 // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
2476 // -> trunc to <M x i1>.
2477 Value *Ext = Builder.CreateSExt(
2478 Mask, VectorType::get(FromITy, FromCount, /*Scalable=*/false), "sxt");
2479 Value *Cast = Builder.CreateBitCast(
2480 Ext, VectorType::get(ToITy, ToCount, /*Scalable=*/false), "cst");
2481 return Builder.CreateTrunc(
2482 Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable=*/false), "trn");
2483}
2484
2485// Bitcast to bytes, and return least significant bits.
2486auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
2487 -> Value * {
2488 Type *ScalarTy = Val->getType()->getScalarType();
2489 if (ScalarTy == getBoolTy())
2490 return Val;
2491
2492 Value *Bytes = vbytes(Builder, Val);
2493 if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
2494 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");
2495 // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
2496 // <1 x i1>.
2497 return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");
2498}
2499
2500// Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
2501auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
2502 -> Value * {
2503 Type *ScalarTy = Val->getType()->getScalarType();
2504 if (ScalarTy == getByteTy())
2505 return Val;
2506
2507 if (ScalarTy != getBoolTy())
2508 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");
2509 // For bool, return a sext from i1 to i8.
2510 if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
2511 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");
2512 return Builder.CreateSExt(Val, getByteTy(), "sxt");
2513}
2514
2515auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
2516 unsigned Start, unsigned Length) const
2517 -> Value * {
2518 assert(Start + Length <= length(Val));
2519 return getElementRange(Builder, Val, /*Ignored*/ Val, Start, Length);
2520}
2521
2522auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const
2523 -> Value * {
2524 size_t Len = length(Val);
2525 assert(Len % 2 == 0 && "Length should be even");
2526 return subvector(Builder, Val, 0, Len / 2);
2527}
2528
2529auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const
2530 -> Value * {
2531 size_t Len = length(Val);
2532 assert(Len % 2 == 0 && "Length should be even");
2533 return subvector(Builder, Val, Len / 2, Len / 2);
2534}
2535
2536auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
2537 Value *Val1) const -> Value * {
2538 assert(Val0->getType() == Val1->getType());
2539 int Len = length(Val0);
2540 SmallVector<int, 128> Mask(2 * Len);
2541
2542 for (int i = 0; i != Len; ++i) {
2543 Mask[i] = 2 * i; // Even
2544 Mask[i + Len] = 2 * i + 1; // Odd
2545 }
2546 return Builder.CreateShuffleVector(Val0, Val1, Mask, "shf");
2547}
2548
2549auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
2550 Value *Val1) const -> Value * { //
2551 assert(Val0->getType() == Val1->getType());
2552 int Len = length(Val0);
2553 SmallVector<int, 128> Mask(2 * Len);
2554
2555 for (int i = 0; i != Len; ++i) {
2556 Mask[2 * i + 0] = i; // Val0
2557 Mask[2 * i + 1] = i + Len; // Val1
2558 }
2559 return Builder.CreateShuffleVector(Val0, Val1, Mask, "shf");
2560}
2561
2562auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
2563 Intrinsic::ID IntID, Type *RetTy,
2564 ArrayRef<Value *> Args,
2565 ArrayRef<Type *> ArgTys,
2566 ArrayRef<Value *> MDSources) const
2567 -> Value * {
2568 auto getCast = [&](IRBuilderBase &Builder, Value *Val,
2569 Type *DestTy) -> Value * {
2570 Type *SrcTy = Val->getType();
2571 if (SrcTy == DestTy)
2572 return Val;
2573
2574 // Non-HVX type. It should be a scalar, and it should already have
2575 // a valid type.
2576 assert(HST.isTypeForHVX(SrcTy, /*IncludeBool=*/true));
2577
2578 Type *BoolTy = Type::getInt1Ty(F.getContext());
2579 if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2580 return Builder.CreateBitCast(Val, DestTy, "cst");
2581
2582 // Predicate HVX vector.
2583 unsigned HwLen = HST.getVectorLength();
2584 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2585 : Intrinsic::hexagon_V6_pred_typecast_128B;
2586 return Builder.CreateIntrinsic(TC, {DestTy, Val->getType()}, {Val},
2587 /*FMFSource=*/nullptr, "cup");
2588 };
2589
2590 Function *IntrFn =
2591 Intrinsic::getOrInsertDeclaration(F.getParent(), IntID, ArgTys);
2592 FunctionType *IntrTy = IntrFn->getFunctionType();
2593
2594 SmallVector<Value *, 4> IntrArgs;
2595 for (int i = 0, e = Args.size(); i != e; ++i) {
2596 Value *A = Args[i];
2597 Type *T = IntrTy->getParamType(i);
2598 if (A->getType() != T) {
2599 IntrArgs.push_back(getCast(Builder, A, T));
2600 } else {
2601 IntrArgs.push_back(A);
2602 }
2603 }
2604 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";
2605 CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);
2606
2607 MemoryEffects ME = Call->getAttributes().getMemoryEffects();
2609 propagateMetadata(Call, MDSources);
2610
2611 Type *CallTy = Call->getType();
2612 if (RetTy == nullptr || CallTy == RetTy)
2613 return Call;
2614 // Scalar types should have RetTy matching the call return type.
2615 assert(HST.isTypeForHVX(CallTy, /*IncludeBool=*/true));
2616 return getCast(Builder, Call, RetTy);
2617}
2618
2619auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
2620 Value *Vec,
2621 unsigned ToWidth) const
2623 // Break a vector of wide elements into a series of vectors with narrow
2624 // elements:
2625 // (...c0:b0:a0, ...c1:b1:a1, ...c2:b2:a2, ...)
2626 // -->
2627 // (a0, a1, a2, ...) // lowest "ToWidth" bits
2628 // (b0, b1, b2, ...) // the next lowest...
2629 // (c0, c1, c2, ...) // ...
2630 // ...
2631 //
2632 // The number of elements in each resulting vector is the same as
2633 // in the original vector.
2634
2635 auto *VecTy = cast<VectorType>(Vec->getType());
2636 assert(VecTy->getElementType()->isIntegerTy());
2637 unsigned FromWidth = VecTy->getScalarSizeInBits();
2638 assert(isPowerOf2_32(ToWidth) && isPowerOf2_32(FromWidth));
2639 assert(ToWidth <= FromWidth && "Breaking up into wider elements?");
2640 unsigned NumResults = FromWidth / ToWidth;
2641
2642 SmallVector<Value *> Results(NumResults);
2643 Results[0] = Vec;
2644 unsigned Length = length(VecTy);
2645
2646 // Do it by splitting in half, since those operations correspond to deal
2647 // instructions.
2648 auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {
2649 // Take V = Results[Begin], split it in L, H.
2650 // Store Results[Begin] = L, Results[(Begin+End)/2] = H
2651 // Call itself recursively split(Begin, Half), split(Half+1, End)
2652 if (Begin + 1 == End)
2653 return;
2654
2655 Value *Val = Results[Begin];
2656 unsigned Width = Val->getType()->getScalarSizeInBits();
2657
2658 auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
2659 Value *VVal = Builder.CreateBitCast(Val, VTy, "cst");
2660
2661 Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2662
2663 unsigned Half = (Begin + End) / 2;
2664 Results[Begin] = sublo(Builder, Res);
2665 Results[Half] = subhi(Builder, Res);
2666
2667 splitFunc(Begin, Half, splitFunc);
2668 splitFunc(Half, End, splitFunc);
2669 };
2670
2671 splitInHalf(0, NumResults, splitInHalf);
2672 return Results;
2673}
2674
2675auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
2676 ArrayRef<Value *> Values,
2677 VectorType *ToType) const
2678 -> Value * {
2679 assert(ToType->getElementType()->isIntegerTy());
2680
2681 // If the list of values does not have power-of-2 elements, append copies
2682 // of the sign bit to it, to make the size be 2^n.
2683 // The reason for this is that the values will be joined in pairs, because
2684 // otherwise the shuffles will result in convoluted code. With pairwise
2685 // joins, the shuffles will hopefully be folded into a perfect shuffle.
2686 // The output will need to be sign-extended to a type with element width
2687 // being a power-of-2 anyways.
2688 SmallVector<Value *> Inputs(Values);
2689
2690 unsigned ToWidth = ToType->getScalarSizeInBits();
2691 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2692 assert(Width <= ToWidth);
2693 assert(isPowerOf2_32(Width) && isPowerOf2_32(ToWidth));
2694 unsigned Length = length(Inputs.front()->getType());
2695
2696 unsigned NeedInputs = ToWidth / Width;
2697 if (Inputs.size() != NeedInputs) {
2698 // Having too many inputs is ok: drop the high bits (usual wrap-around).
2699 // If there are too few, fill them with the sign bit.
2700 Value *Last = Inputs.back();
2701 Value *Sign = Builder.CreateAShr(
2702 Last, getConstSplat(Last->getType(), Width - 1), "asr");
2703 Inputs.resize(NeedInputs, Sign);
2704 }
2705
2706 while (Inputs.size() > 1) {
2707 Width *= 2;
2708 auto *VTy = VectorType::get(getIntTy(Width), Length, false);
2709 for (int i = 0, e = Inputs.size(); i < e; i += 2) {
2710 Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
2711 Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");
2712 }
2713 Inputs.resize(Inputs.size() / 2);
2714 }
2715
2716 assert(Inputs.front()->getType() == ToType);
2717 return Inputs.front();
2718}
2719
2720auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
2721 Value *Ptr1) const
2722 -> std::optional<int> {
2723 // Try SCEV first.
2724 const SCEV *Scev0 = SE.getSCEV(Ptr0);
2725 const SCEV *Scev1 = SE.getSCEV(Ptr1);
2726 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
2727 if (auto *Const = dyn_cast<SCEVConstant>(ScevDiff)) {
2728 APInt V = Const->getAPInt();
2729 if (V.isSignedIntN(8 * sizeof(int)))
2730 return static_cast<int>(V.getSExtValue());
2731 }
2732
2733 struct Builder : IRBuilder<> {
2734 Builder(BasicBlock *B) : IRBuilder<>(B->getTerminator()) {}
2735 ~Builder() {
2736 for (Instruction *I : llvm::reverse(ToErase))
2737 I->eraseFromParent();
2738 }
2739 SmallVector<Instruction *, 8> ToErase;
2740 };
2741
2742#define CallBuilder(B, F) \
2743 [&](auto &B_) { \
2744 Value *V = B_.F; \
2745 if (auto *I = dyn_cast<Instruction>(V)) \
2746 B_.ToErase.push_back(I); \
2747 return V; \
2748 }(B)
2749
2750 auto Simplify = [this](Value *V) {
2751 if (Value *S = simplify(V))
2752 return S;
2753 return V;
2754 };
2755
2756 auto StripBitCast = [](Value *V) {
2757 while (auto *C = dyn_cast<BitCastInst>(V))
2758 V = C->getOperand(0);
2759 return V;
2760 };
2761
2762 Ptr0 = StripBitCast(Ptr0);
2763 Ptr1 = StripBitCast(Ptr1);
2765 return std::nullopt;
2766
2767 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2768 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2769 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2770 return std::nullopt;
2771 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2772 return std::nullopt;
2773
2774 Builder B(Gep0->getParent());
2775 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2776
2777 // FIXME: for now only check GEPs with a single index.
2778 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2779 return std::nullopt;
2780
2781 Value *Idx0 = Gep0->getOperand(1);
2782 Value *Idx1 = Gep1->getOperand(1);
2783
2784 // First, try to simplify the subtraction directly.
2785 if (auto *Diff = dyn_cast<ConstantInt>(
2786 Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
2787 return Diff->getSExtValue() * Scale;
2788
2789 KnownBits Known0 = getKnownBits(Idx0, Gep0);
2790 KnownBits Known1 = getKnownBits(Idx1, Gep1);
2791 APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
2792 if (Unknown.isAllOnes())
2793 return std::nullopt;
2794
2795 Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
2796 Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
2797 Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
2798 Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
2799 int Diff0 = 0;
2800 if (auto *C = dyn_cast<ConstantInt>(SubU)) {
2801 Diff0 = C->getSExtValue();
2802 } else {
2803 return std::nullopt;
2804 }
2805
2806 Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
2807 Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
2808 Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
2809 Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
2810 int Diff1 = 0;
2811 if (auto *C = dyn_cast<ConstantInt>(SubK)) {
2812 Diff1 = C->getSExtValue();
2813 } else {
2814 return std::nullopt;
2815 }
2816
2817 return (Diff0 + Diff1) * Scale;
2818
2819#undef CallBuilder
2820}
2821
2822auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
2823 const Instruction *CtxI) const
2824 -> unsigned {
2825 return ComputeMaxSignificantBits(V, DL, &AC, CtxI, &DT);
2826}
2827
2828auto HexagonVectorCombine::getKnownBits(const Value *V,
2829 const Instruction *CtxI) const
2830 -> KnownBits {
2831 return computeKnownBits(V, DL, &AC, CtxI, &DT);
2832}
2833
2834auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {
2835 if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||
2836 In.isFenceLike() || In.mayReadOrWriteMemory()) {
2837 return false;
2838 }
2839 if (isa<CallBase>(In) || isa<AllocaInst>(In))
2840 return false;
2841 return true;
2842}
2843
2844template <typename T>
2845auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
2847 const T &IgnoreInsts) const
2848 -> bool {
2849 auto getLocOrNone =
2850 [this](const Instruction &I) -> std::optional<MemoryLocation> {
2851 if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
2852 switch (II->getIntrinsicID()) {
2853 case Intrinsic::masked_load:
2854 return MemoryLocation::getForArgument(II, 0, TLI);
2855 case Intrinsic::masked_store:
2856 return MemoryLocation::getForArgument(II, 1, TLI);
2857 }
2858 }
2860 };
2861
2862 // The source and the destination must be in the same basic block.
2863 const BasicBlock &Block = *In.getParent();
2864 assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
2865 // No PHIs.
2866 if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
2867 return false;
2868
2870 return true;
2871 bool MayWrite = In.mayWriteToMemory();
2872 auto MaybeLoc = getLocOrNone(In);
2873
2874 auto From = In.getIterator();
2875 if (From == To)
2876 return true;
2877 bool MoveUp = (To != Block.end() && To->comesBefore(&In));
2878 auto Range =
2879 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
2880 for (auto It = Range.first; It != Range.second; ++It) {
2881 const Instruction &I = *It;
2882 if (llvm::is_contained(IgnoreInsts, &I))
2883 continue;
2884 // assume intrinsic can be ignored
2885 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
2886 if (II->getIntrinsicID() == Intrinsic::assume)
2887 continue;
2888 }
2889 // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
2890 if (I.mayThrow())
2891 return false;
2892 if (auto *CB = dyn_cast<CallBase>(&I)) {
2893 if (!CB->hasFnAttr(Attribute::WillReturn))
2894 return false;
2895 if (!CB->hasFnAttr(Attribute::NoSync))
2896 return false;
2897 }
2898 if (I.mayReadOrWriteMemory()) {
2899 auto MaybeLocI = getLocOrNone(I);
2900 if (MayWrite || I.mayWriteToMemory()) {
2901 if (!MaybeLoc || !MaybeLocI)
2902 return false;
2903 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2904 return false;
2905 }
2906 }
2907 }
2908 return true;
2909}
2910
2911auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
2912 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2913 return VecTy->getElementType() == getByteTy();
2914 return false;
2915}
2916
2917auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
2918 Value *Hi, int Start,
2919 int Length) const -> Value * {
2920 assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
2921 SmallVector<int, 128> SMask(Length);
2922 std::iota(SMask.begin(), SMask.end(), Start);
2923 return Builder.CreateShuffleVector(Lo, Hi, SMask, "shf");
2924}
2925
2926// Pass management.
2927
2928namespace {
2929class HexagonVectorCombineLegacy : public FunctionPass {
2930public:
2931 static char ID;
2932
2933 HexagonVectorCombineLegacy() : FunctionPass(ID) {}
2934
2935 StringRef getPassName() const override { return "Hexagon Vector Combine"; }
2936
2937 void getAnalysisUsage(AnalysisUsage &AU) const override {
2938 AU.setPreservesCFG();
2939 AU.addRequired<AAResultsWrapperPass>();
2940 AU.addRequired<AssumptionCacheTracker>();
2941 AU.addRequired<DominatorTreeWrapperPass>();
2942 AU.addRequired<ScalarEvolutionWrapperPass>();
2943 AU.addRequired<TargetLibraryInfoWrapperPass>();
2944 AU.addRequired<TargetPassConfig>();
2945 FunctionPass::getAnalysisUsage(AU);
2946 }
2947
2948 bool runOnFunction(Function &F) override {
2949 if (skipFunction(F))
2950 return false;
2951 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2952 AssumptionCache &AC =
2953 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2954 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2955 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2956 TargetLibraryInfo &TLI =
2957 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2958 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
2959 HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);
2960 return HVC.run();
2961 }
2962};
2963} // namespace
2964
2965char HexagonVectorCombineLegacy::ID = 0;
2966
2967INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
2968 "Hexagon Vector Combine", false, false)
2975INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
2976 "Hexagon Vector Combine", false, false)
2977
2979 return new HexagonVectorCombineLegacy();
2980}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
hexagon bit simplify
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
#define CallBuilder(B, F)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
#define H(x, y, z)
Definition MD5.cpp:57
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
This file contains the declarations for metadata subclasses.
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
iterator_range< iterator > children()
NodeT * getBlock() const
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool empty() const
Definition Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
const BasicBlock & back() const
Definition Function.h:860
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
unsigned getVectorLength() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2626
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2097
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2466
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2601
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1532
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:215
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
Definition ModRef.h:234
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:296
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1777
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316