LLVM 22.0.0git
HexagonVectorCombine.cpp
Go to the documentation of this file.
1//===-- HexagonVectorCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// HexagonVectorCombine is a utility class implementing a variety of functions
9// that assist in vector-based optimizations.
10//
11// AlignVectors: replace unaligned vector loads and stores with aligned ones.
12// HvxIdioms: recognize various opportunities to generate HVX intrinsic code.
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsHexagon.h"
35#include "llvm/IR/Metadata.h"
38#include "llvm/Pass.h"
45
46#include "Hexagon.h"
47#include "HexagonSubtarget.h"
49
50#include <algorithm>
51#include <deque>
52#include <map>
53#include <optional>
54#include <set>
55#include <utility>
56#include <vector>
57
58#define DEBUG_TYPE "hexagon-vc"
59
60using namespace llvm;
61
62namespace {
63cl::opt<bool> DumpModule("hvc-dump-module", cl::Hidden);
64cl::opt<bool> VAEnabled("hvc-va", cl::Hidden, cl::init(true)); // Align
65cl::opt<bool> VIEnabled("hvc-vi", cl::Hidden, cl::init(true)); // Idioms
66cl::opt<bool> VADoFullStores("hvc-va-full-stores", cl::Hidden);
67
68cl::opt<unsigned> VAGroupCountLimit("hvc-va-group-count-limit", cl::Hidden,
69 cl::init(~0));
70cl::opt<unsigned> VAGroupSizeLimit("hvc-va-group-size-limit", cl::Hidden,
71 cl::init(~0));
72
73class HexagonVectorCombine {
74public:
75 HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
77 TargetLibraryInfo &TLI_, const TargetMachine &TM_)
78 : F(F_), DL(F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
79 SE(SE_), TLI(TLI_),
80 HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
81
82 bool run();
83
84 // Common integer type.
85 IntegerType *getIntTy(unsigned Width = 32) const;
86 // Byte type: either scalar (when Length = 0), or vector with given
87 // element count.
88 Type *getByteTy(int ElemCount = 0) const;
89 // Boolean type: either scalar (when Length = 0), or vector with given
90 // element count.
91 Type *getBoolTy(int ElemCount = 0) const;
92 // Create a ConstantInt of type returned by getIntTy with the value Val.
93 ConstantInt *getConstInt(int Val, unsigned Width = 32) const;
94 // Get the integer value of V, if it exists.
95 std::optional<APInt> getIntValue(const Value *Val) const;
96 // Is Val a constant 0, or a vector of 0s?
97 bool isZero(const Value *Val) const;
98 // Is Val an undef value?
99 bool isUndef(const Value *Val) const;
100 // Is Val a scalar (i1 true) or a vector of (i1 true)?
101 bool isTrue(const Value *Val) const;
102 // Is Val a scalar (i1 false) or a vector of (i1 false)?
103 bool isFalse(const Value *Val) const;
104
105 // Get HVX vector type with the given element type.
106 VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
107
108 enum SizeKind {
109 Store, // Store size
110 Alloc, // Alloc size
111 };
112 int getSizeOf(const Value *Val, SizeKind Kind = Store) const;
113 int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;
114 int getTypeAlignment(Type *Ty) const;
115 size_t length(Value *Val) const;
116 size_t length(Type *Ty) const;
117
118 Constant *getNullValue(Type *Ty) const;
119 Constant *getFullValue(Type *Ty) const;
120 Constant *getConstSplat(Type *Ty, int Val) const;
121
122 Value *simplify(Value *Val) const;
123
124 Value *insertb(IRBuilderBase &Builder, Value *Dest, Value *Src, int Start,
125 int Length, int Where) const;
126 Value *vlalignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
127 Value *Amt) const;
128 Value *vralignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
129 Value *Amt) const;
130 Value *concat(IRBuilderBase &Builder, ArrayRef<Value *> Vecs) const;
131 Value *vresize(IRBuilderBase &Builder, Value *Val, int NewSize,
132 Value *Pad) const;
133 Value *rescale(IRBuilderBase &Builder, Value *Mask, Type *FromTy,
134 Type *ToTy) const;
135 Value *vlsb(IRBuilderBase &Builder, Value *Val) const;
136 Value *vbytes(IRBuilderBase &Builder, Value *Val) const;
137 Value *subvector(IRBuilderBase &Builder, Value *Val, unsigned Start,
138 unsigned Length) const;
139 Value *sublo(IRBuilderBase &Builder, Value *Val) const;
140 Value *subhi(IRBuilderBase &Builder, Value *Val) const;
141 Value *vdeal(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
142 Value *vshuff(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
143
144 Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID,
145 Type *RetTy, ArrayRef<Value *> Args,
146 ArrayRef<Type *> ArgTys = {},
147 ArrayRef<Value *> MDSources = {}) const;
148 SmallVector<Value *> splitVectorElements(IRBuilderBase &Builder, Value *Vec,
149 unsigned ToWidth) const;
150 Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef<Value *> Values,
151 VectorType *ToType) const;
152
153 std::optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
154
155 unsigned getNumSignificantBits(const Value *V,
156 const Instruction *CtxI = nullptr) const;
157 KnownBits getKnownBits(const Value *V,
158 const Instruction *CtxI = nullptr) const;
159
160 bool isSafeToClone(const Instruction &In) const;
161
162 template <typename T = std::vector<Instruction *>>
163 bool isSafeToMoveBeforeInBB(const Instruction &In,
165 const T &IgnoreInsts = {}) const;
166
167 // This function is only used for assertions at the moment.
168 [[maybe_unused]] bool isByteVecTy(Type *Ty) const;
169
170 Function &F;
171 const DataLayout &DL;
173 AssumptionCache &AC;
174 DominatorTree &DT;
175 ScalarEvolution &SE;
177 const HexagonSubtarget &HST;
178
179private:
180 Value *getElementRange(IRBuilderBase &Builder, Value *Lo, Value *Hi,
181 int Start, int Length) const;
182};
183
184class AlignVectors {
185 // This code tries to replace unaligned vector loads/stores with aligned
186 // ones.
187 // Consider unaligned load:
188 // %v = original_load %some_addr, align <bad>
189 // %user = %v
190 // It will generate
191 // = load ..., align <good>
192 // = load ..., align <good>
193 // = valign
194 // etc.
195 // %synthesize = combine/shuffle the loaded data so that it looks
196 // exactly like what "original_load" has loaded.
197 // %user = %synthesize
198 // Similarly for stores.
199public:
200 AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
201
202 bool run();
203
204private:
205 using InstList = std::vector<Instruction *>;
207
208 struct AddrInfo {
209 AddrInfo(const AddrInfo &) = default;
210 AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
211 Align H)
212 : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
213 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
214 AddrInfo &operator=(const AddrInfo &) = default;
215
216 // XXX: add Size member?
217 Instruction *Inst;
218 Value *Addr;
219 Type *ValTy;
220 Align HaveAlign;
221 Align NeedAlign;
222 int Offset = 0; // Offset (in bytes) from the first member of the
223 // containing AddrList.
224 };
225 using AddrList = std::vector<AddrInfo>;
226
227 struct InstrLess {
228 bool operator()(const Instruction *A, const Instruction *B) const {
229 return A->comesBefore(B);
230 }
231 };
232 using DepList = std::set<Instruction *, InstrLess>;
233
234 struct MoveGroup {
235 MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
236 : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
237 MoveGroup() = default;
238 Instruction *Base; // Base instruction of the parent address group.
239 InstList Main; // Main group of instructions.
240 InstList Deps; // List of dependencies.
241 InstMap Clones; // Map from original Deps to cloned ones.
242 bool IsHvx; // Is this group of HVX instructions?
243 bool IsLoad; // Is this a load group?
244 };
245 using MoveList = std::vector<MoveGroup>;
246
247 struct ByteSpan {
248 // A representation of "interesting" bytes within a given span of memory.
249 // These bytes are those that are loaded or stored, and they don't have
250 // to cover the entire span of memory.
251 //
252 // The representation works by picking a contiguous sequence of bytes
253 // from somewhere within a llvm::Value, and placing it at a given offset
254 // within the span.
255 //
256 // The sequence of bytes from llvm:Value is represented by Segment.
257 // Block is Segment, plus where it goes in the span.
258 //
259 // An important feature of ByteSpan is being able to make a "section",
260 // i.e. creating another ByteSpan corresponding to a range of offsets
261 // relative to the source span.
262
263 struct Segment {
264 // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
265 Segment(Value *Val, int Begin, int Len)
266 : Val(Val), Start(Begin), Size(Len) {}
267 Segment(const Segment &Seg) = default;
268 Segment &operator=(const Segment &Seg) = default;
269 Value *Val; // Value representable as a sequence of bytes.
270 int Start; // First byte of the value that belongs to the segment.
271 int Size; // Number of bytes in the segment.
272 };
273
274 struct Block {
275 Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
276 Block(Value *Val, int Off, int Len, int Pos)
277 : Seg(Val, Off, Len), Pos(Pos) {}
278 Block(const Block &Blk) = default;
279 Block &operator=(const Block &Blk) = default;
280 Segment Seg; // Value segment.
281 int Pos; // Position (offset) of the block in the span.
282 };
283
284 int extent() const;
285 ByteSpan section(int Start, int Length) const;
286 ByteSpan &shift(int Offset);
287 SmallVector<Value *, 8> values() const;
288
289 int size() const { return Blocks.size(); }
290 Block &operator[](int i) { return Blocks[i]; }
291 const Block &operator[](int i) const { return Blocks[i]; }
292
293 std::vector<Block> Blocks;
294
295 using iterator = decltype(Blocks)::iterator;
296 iterator begin() { return Blocks.begin(); }
297 iterator end() { return Blocks.end(); }
298 using const_iterator = decltype(Blocks)::const_iterator;
299 const_iterator begin() const { return Blocks.begin(); }
300 const_iterator end() const { return Blocks.end(); }
301 };
302
303 Align getAlignFromValue(const Value *V) const;
304 std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
305 bool isHvx(const AddrInfo &AI) const;
306 // This function is only used for assertions at the moment.
307 [[maybe_unused]] bool isSectorTy(Type *Ty) const;
308
309 Value *getPayload(Value *Val) const;
310 Value *getMask(Value *Val) const;
311 Value *getPassThrough(Value *Val) const;
312
313 Value *createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
314 int Adjust,
315 const InstMap &CloneMap = InstMap()) const;
316 Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
317 int Alignment,
318 const InstMap &CloneMap = InstMap()) const;
319
320 Value *createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
321 Value *Predicate, int Alignment, Value *Mask,
322 Value *PassThru, ArrayRef<Value *> MDSources = {}) const;
323 Value *createSimpleLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
324 int Alignment,
325 ArrayRef<Value *> MDSources = {}) const;
326
327 Value *createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
328 Value *Predicate, int Alignment, Value *Mask,
329 ArrayRef<Value *> MDSources = {}) const;
330 Value *createSimpleStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
331 int Alignment,
332 ArrayRef<Value *> MDSources = {}) const;
333
334 Value *createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
335 Value *Predicate, int Alignment,
336 ArrayRef<Value *> MDSources = {}) const;
337 Value *createPredicatedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
338 Value *Predicate, int Alignment,
339 ArrayRef<Value *> MDSources = {}) const;
340
341 DepList getUpwardDeps(Instruction *In, Instruction *Base) const;
342 bool createAddressGroups();
343 MoveList createLoadGroups(const AddrList &Group) const;
344 MoveList createStoreGroups(const AddrList &Group) const;
345 bool moveTogether(MoveGroup &Move) const;
346 template <typename T>
347 InstMap cloneBefore(BasicBlock::iterator To, T &&Insts) const;
348
349 void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
350 int ScLen, Value *AlignVal, Value *AlignAddr) const;
351 void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
352 int ScLen, Value *AlignVal, Value *AlignAddr) const;
353 bool realignGroup(const MoveGroup &Move) const;
354
355 Value *makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
356 int Alignment) const;
357
358 friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
359 friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
360 friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan::Block &B);
361 friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
362
363 std::map<Instruction *, AddrList> AddrGroups;
364 const HexagonVectorCombine &HVC;
365};
366
368raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
369 OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
370 OS << "Addr: " << *AI.Addr << '\n';
371 OS << "Type: " << *AI.ValTy << '\n';
372 OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
373 OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
374 OS << "Offset: " << AI.Offset;
375 return OS;
376}
377
379raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
380 OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
381 OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
382 OS << "Main\n";
383 for (Instruction *I : MG.Main)
384 OS << " " << *I << '\n';
385 OS << "Deps\n";
386 for (Instruction *I : MG.Deps)
387 OS << " " << *I << '\n';
388 OS << "Clones\n";
389 for (auto [K, V] : MG.Clones) {
390 OS << " ";
391 K->printAsOperand(OS, false);
392 OS << "\t-> " << *V << '\n';
393 }
394 return OS;
395}
396
399 const AlignVectors::ByteSpan::Block &B) {
400 OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
401 if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {
402 OS << "(self:" << B.Seg.Val << ')';
403 } else if (B.Seg.Val != nullptr) {
404 OS << *B.Seg.Val;
405 } else {
406 OS << "(null)";
407 }
408 return OS;
409}
410
412raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
413 OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
414 for (const AlignVectors::ByteSpan::Block &B : BS)
415 OS << B << '\n';
416 OS << ']';
417 return OS;
418}
419
420class HvxIdioms {
421public:
422 HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
423 auto *Int32Ty = HVC.getIntTy(32);
424 HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
425 HvxP32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/true);
426 }
427
428 bool run();
429
430private:
431 enum Signedness { Positive, Signed, Unsigned };
432
433 // Value + sign
434 // This is to keep track of whether the value should be treated as signed
435 // or unsigned, or is known to be positive.
436 struct SValue {
437 Value *Val;
438 Signedness Sgn;
439 };
440
441 struct FxpOp {
442 unsigned Opcode;
443 unsigned Frac; // Number of fraction bits
444 SValue X, Y;
445 // If present, add 1 << RoundAt before shift:
446 std::optional<unsigned> RoundAt;
447 VectorType *ResTy;
448 };
449
450 auto getNumSignificantBits(Value *V, Instruction *In) const
451 -> std::pair<unsigned, Signedness>;
452 auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;
453
454 auto matchFxpMul(Instruction &In) const -> std::optional<FxpOp>;
455 auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;
456
457 auto processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
458 const FxpOp &Op) const -> Value *;
459 auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
460 bool Rounding) const -> Value *;
461 auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
462 bool Rounding) const -> Value *;
463 // Return {Result, Carry}, where Carry is a vector predicate.
464 auto createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
465 Value *CarryIn = nullptr) const
466 -> std::pair<Value *, Value *>;
467 auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;
468 auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
469 -> Value *;
470 auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
471 -> std::pair<Value *, Value *>;
472 auto createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
474 auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
475 Signedness SgnX, ArrayRef<Value *> WordY,
476 Signedness SgnY) const -> SmallVector<Value *>;
477
478 VectorType *HvxI32Ty;
479 VectorType *HvxP32Ty;
480 const HexagonVectorCombine &HVC;
481
482 friend raw_ostream &operator<<(raw_ostream &, const FxpOp &);
483};
484
485[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
486 const HvxIdioms::FxpOp &Op) {
487 static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};
488 OS << Instruction::getOpcodeName(Op.Opcode) << '.' << Op.Frac;
489 if (Op.RoundAt.has_value()) {
490 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {
491 OS << ":rnd";
492 } else {
493 OS << " + 1<<" << *Op.RoundAt;
494 }
495 }
496 OS << "\n X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"
497 << " Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;
498 return OS;
499}
500
501} // namespace
502
503namespace {
504
505template <typename T> T *getIfUnordered(T *MaybeT) {
506 return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
507}
508template <typename T> T *isCandidate(Instruction *In) {
509 return dyn_cast<T>(In);
510}
512 return getIfUnordered(dyn_cast<LoadInst>(In));
513}
515 return getIfUnordered(dyn_cast<StoreInst>(In));
516}
517
518#if !defined(_MSC_VER) || _MSC_VER >= 1926
519// VS2017 and some versions of VS2019 have trouble compiling this:
520// error C2976: 'std::map': too few template arguments
521// VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
522template <typename Pred, typename... Ts>
523void erase_if(std::map<Ts...> &map, Pred p)
524#else
525template <typename Pred, typename T, typename U>
526void erase_if(std::map<T, U> &map, Pred p)
527#endif
528{
529 for (auto i = map.begin(), e = map.end(); i != e;) {
530 if (p(*i))
531 i = map.erase(i);
532 else
533 i = std::next(i);
534 }
535}
536
537// Forward other erase_ifs to the LLVM implementations.
538template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
539 llvm::erase_if(std::forward<T>(container), p);
540}
541
542} // namespace
543
544// --- Begin AlignVectors
545
546// For brevity, only consider loads. We identify a group of loads where we
547// know the relative differences between their addresses, so we know how they
548// are laid out in memory (relative to one another). These loads can overlap,
549// can be shorter or longer than the desired vector length.
550// Ultimately we want to generate a sequence of aligned loads that will load
551// every byte that the original loads loaded, and have the program use these
552// loaded values instead of the original loads.
553// We consider the contiguous memory area spanned by all these loads.
554//
555// Let's say that a single aligned vector load can load 16 bytes at a time.
556// If the program wanted to use a byte at offset 13 from the beginning of the
557// original span, it will be a byte at offset 13+x in the aligned data for
558// some x>=0. This may happen to be in the first aligned load, or in the load
559// following it. Since we generally don't know what the that alignment value
560// is at compile time, we proactively do valigns on the aligned loads, so that
561// byte that was at offset 13 is still at offset 13 after the valigns.
562//
563// This will be the starting point for making the rest of the program use the
564// data loaded by the new loads.
565// For each original load, and its users:
566// %v = load ...
567// ... = %v
568// ... = %v
569// we create
570// %new_v = extract/combine/shuffle data from loaded/valigned vectors so
571// it contains the same value as %v did before
572// then replace all users of %v with %new_v.
573// ... = %new_v
574// ... = %new_v
575
576auto AlignVectors::ByteSpan::extent() const -> int {
577 if (size() == 0)
578 return 0;
579 int Min = Blocks[0].Pos;
580 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
581 for (int i = 1, e = size(); i != e; ++i) {
582 Min = std::min(Min, Blocks[i].Pos);
583 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
584 }
585 return Max - Min;
586}
587
588auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
589 ByteSpan Section;
590 for (const ByteSpan::Block &B : Blocks) {
591 int L = std::max(B.Pos, Start); // Left end.
592 int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
593 if (L < R) {
594 // How much to chop off the beginning of the segment:
595 int Off = L > B.Pos ? L - B.Pos : 0;
596 Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
597 }
598 }
599 return Section;
600}
601
602auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
603 for (Block &B : Blocks)
604 B.Pos += Offset;
605 return *this;
606}
607
608auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> {
609 SmallVector<Value *, 8> Values(Blocks.size());
610 for (int i = 0, e = Blocks.size(); i != e; ++i)
611 Values[i] = Blocks[i].Seg.Val;
612 return Values;
613}
614
615auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
616 const auto *C = dyn_cast<ConstantInt>(V);
617 assert(C && "Alignment must be a compile-time constant integer");
618 return C->getAlignValue();
619}
620
621auto AlignVectors::getAddrInfo(Instruction &In) const
622 -> std::optional<AddrInfo> {
623 if (auto *L = isCandidate<LoadInst>(&In))
624 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
625 L->getAlign());
626 if (auto *S = isCandidate<StoreInst>(&In))
627 return AddrInfo(HVC, S, S->getPointerOperand(),
628 S->getValueOperand()->getType(), S->getAlign());
629 if (auto *II = isCandidate<IntrinsicInst>(&In)) {
630 Intrinsic::ID ID = II->getIntrinsicID();
631 switch (ID) {
632 case Intrinsic::masked_load:
633 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
634 getAlignFromValue(II->getArgOperand(1)));
635 case Intrinsic::masked_store:
636 return AddrInfo(HVC, II, II->getArgOperand(1),
637 II->getArgOperand(0)->getType(),
638 getAlignFromValue(II->getArgOperand(2)));
639 }
640 }
641 return std::nullopt;
642}
643
644auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
645 return HVC.HST.isTypeForHVX(AI.ValTy);
646}
647
648auto AlignVectors::getPayload(Value *Val) const -> Value * {
649 if (auto *In = dyn_cast<Instruction>(Val)) {
650 Intrinsic::ID ID = 0;
651 if (auto *II = dyn_cast<IntrinsicInst>(In))
652 ID = II->getIntrinsicID();
653 if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
654 return In->getOperand(0);
655 }
656 return Val;
657}
658
659auto AlignVectors::getMask(Value *Val) const -> Value * {
660 if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
661 switch (II->getIntrinsicID()) {
662 case Intrinsic::masked_load:
663 return II->getArgOperand(2);
664 case Intrinsic::masked_store:
665 return II->getArgOperand(3);
666 }
667 }
668
669 Type *ValTy = getPayload(Val)->getType();
670 if (auto *VecTy = dyn_cast<VectorType>(ValTy))
671 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
672 return HVC.getFullValue(HVC.getBoolTy());
673}
674
675auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
676 if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
677 if (II->getIntrinsicID() == Intrinsic::masked_load)
678 return II->getArgOperand(3);
679 }
680 return UndefValue::get(getPayload(Val)->getType());
681}
682
683auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
684 Type *ValTy, int Adjust,
685 const InstMap &CloneMap) const
686 -> Value * {
687 if (auto *I = dyn_cast<Instruction>(Ptr))
688 if (Instruction *New = CloneMap.lookup(I))
689 Ptr = New;
690 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust), "gep");
691}
692
693auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
694 Type *ValTy, int Alignment,
695 const InstMap &CloneMap) const
696 -> Value * {
697 auto remap = [&](Value *V) -> Value * {
698 if (auto *I = dyn_cast<Instruction>(V)) {
699 for (auto [Old, New] : CloneMap)
700 I->replaceUsesOfWith(Old, New);
701 return I;
702 }
703 return V;
704 };
705 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");
706 Value *Mask = HVC.getConstInt(-Alignment);
707 Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");
708 return Builder.CreateIntToPtr(
709 And, PointerType::getUnqual(ValTy->getContext()), "itp");
710}
711
712auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
713 Value *Predicate, int Alignment, Value *Mask,
714 Value *PassThru,
715 ArrayRef<Value *> MDSources) const -> Value * {
716 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
717 // Predicate is nullptr if not creating predicated load
718 if (Predicate) {
719 assert(!Predicate->getType()->isVectorTy() &&
720 "Expectning scalar predicate");
721 if (HVC.isFalse(Predicate))
722 return UndefValue::get(ValTy);
723 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
724 Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
725 Alignment, MDSources);
726 return Builder.CreateSelect(Mask, Load, PassThru);
727 }
728 // Predicate == true here.
729 }
730 assert(!HVC.isUndef(Mask)); // Should this be allowed?
731 if (HVC.isZero(Mask))
732 return PassThru;
733 if (HVC.isTrue(Mask))
734 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
735
736 Instruction *Load = Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment),
737 Mask, PassThru, "mld");
738 propagateMetadata(Load, MDSources);
739 return Load;
740}
741
742auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,
743 Value *Ptr, int Alignment,
744 ArrayRef<Value *> MDSources) const
745 -> Value * {
747 Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
748 propagateMetadata(Load, MDSources);
749 return Load;
750}
751
752auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,
753 Value *Ptr, Value *Predicate,
754 int Alignment,
755 ArrayRef<Value *> MDSources) const
756 -> Value * {
757 assert(HVC.HST.isTypeForHVX(ValTy) &&
758 "Predicates 'scalar' vector loads not yet supported");
759 assert(Predicate);
760 assert(!Predicate->getType()->isVectorTy() && "Expectning scalar predicate");
761 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
762 if (HVC.isFalse(Predicate))
763 return UndefValue::get(ValTy);
764 if (HVC.isTrue(Predicate))
765 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
766
767 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
768 // FIXME: This may not put the offset from Ptr into the vmem offset.
769 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
770 {Predicate, Ptr, HVC.getConstInt(0)}, {},
771 MDSources);
772}
773
774auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
775 Value *Predicate, int Alignment, Value *Mask,
776 ArrayRef<Value *> MDSources) const -> Value * {
777 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
778 return UndefValue::get(Val->getType());
779 assert(!Predicate || (!Predicate->getType()->isVectorTy() &&
780 "Expectning scalar predicate"));
781 if (Predicate) {
782 if (HVC.isFalse(Predicate))
783 return UndefValue::get(Val->getType());
784 if (HVC.isTrue(Predicate))
785 Predicate = nullptr;
786 }
787 // Here both Predicate and Mask are true or unknown.
788
789 if (HVC.isTrue(Mask)) {
790 if (Predicate) { // Predicate unknown
791 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
792 MDSources);
793 }
794 // Predicate is true:
795 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
796 }
797
798 // Mask is unknown
799 if (!Predicate) {
801 Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
802 propagateMetadata(Store, MDSources);
803 return Store;
804 }
805
806 // Both Predicate and Mask are unknown.
807 // Emulate masked store with predicated-load + mux + predicated-store.
808 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
809 Predicate, Alignment, MDSources);
810 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
811 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
812 MDSources);
813}
814
815auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,
816 Value *Ptr, int Alignment,
817 ArrayRef<Value *> MDSources) const
818 -> Value * {
819 Instruction *Store = Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
820 propagateMetadata(Store, MDSources);
821 return Store;
822}
823
824auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,
825 Value *Ptr, Value *Predicate,
826 int Alignment,
827 ArrayRef<Value *> MDSources) const
828 -> Value * {
829 assert(HVC.HST.isTypeForHVX(Val->getType()) &&
830 "Predicates 'scalar' vector stores not yet supported");
831 assert(Predicate);
832 if (HVC.isFalse(Predicate))
833 return UndefValue::get(Val->getType());
834 if (HVC.isTrue(Predicate))
835 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
836
837 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
838 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
839 // FIXME: This may not put the offset from Ptr into the vmem offset.
840 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,
841 {Predicate, Ptr, HVC.getConstInt(0), Val}, {},
842 MDSources);
843}
844
845auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
846 -> DepList {
847 BasicBlock *Parent = Base->getParent();
848 assert(In->getParent() == Parent &&
849 "Base and In should be in the same block");
850 assert(Base->comesBefore(In) && "Base should come before In");
851
852 DepList Deps;
853 std::deque<Instruction *> WorkQ = {In};
854 while (!WorkQ.empty()) {
855 Instruction *D = WorkQ.front();
856 WorkQ.pop_front();
857 if (D != In)
858 Deps.insert(D);
859 for (Value *Op : D->operands()) {
860 if (auto *I = dyn_cast<Instruction>(Op)) {
861 if (I->getParent() == Parent && Base->comesBefore(I))
862 WorkQ.push_back(I);
863 }
864 }
865 }
866 return Deps;
867}
868
869auto AlignVectors::createAddressGroups() -> bool {
870 // An address group created here may contain instructions spanning
871 // multiple basic blocks.
872 AddrList WorkStack;
873
874 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
875 for (AddrInfo &W : WorkStack) {
876 if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
877 return std::make_pair(W.Inst, *D);
878 }
879 return std::make_pair(nullptr, 0);
880 };
881
882 auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
883 BasicBlock &Block = *DomN->getBlock();
884 for (Instruction &I : Block) {
885 auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
886 if (!AI)
887 continue;
888 auto F = findBaseAndOffset(*AI);
889 Instruction *GroupInst;
890 if (Instruction *BI = F.first) {
891 AI->Offset = F.second;
892 GroupInst = BI;
893 } else {
894 WorkStack.push_back(*AI);
895 GroupInst = AI->Inst;
896 }
897 AddrGroups[GroupInst].push_back(*AI);
898 }
899
900 for (DomTreeNode *C : DomN->children())
901 Visit(C, Visit);
902
903 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
904 WorkStack.pop_back();
905 };
906
907 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
908 assert(WorkStack.empty());
909
910 // AddrGroups are formed.
911
912 // Remove groups of size 1.
913 erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
914 // Remove groups that don't use HVX types.
915 erase_if(AddrGroups, [&](auto &G) {
916 return llvm::none_of(
917 G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
918 });
919
920 return !AddrGroups.empty();
921}
922
923auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
924 // Form load groups.
925 // To avoid complications with moving code across basic blocks, only form
926 // groups that are contained within a single basic block.
927 unsigned SizeLimit = VAGroupSizeLimit;
928 if (SizeLimit == 0)
929 return {};
930
931 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
932 assert(!Move.Main.empty() && "Move group should have non-empty Main");
933 if (Move.Main.size() >= SizeLimit)
934 return false;
935 // Don't mix HVX and non-HVX instructions.
936 if (Move.IsHvx != isHvx(Info))
937 return false;
938 // Leading instruction in the load group.
939 Instruction *Base = Move.Main.front();
940 if (Base->getParent() != Info.Inst->getParent())
941 return false;
942 // Check if it's safe to move the load.
943 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))
944 return false;
945 // And if it's safe to clone the dependencies.
946 auto isSafeToCopyAtBase = [&](const Instruction *I) {
947 return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&
948 HVC.isSafeToClone(*I);
949 };
950 DepList Deps = getUpwardDeps(Info.Inst, Base);
951 if (!llvm::all_of(Deps, isSafeToCopyAtBase))
952 return false;
953
954 Move.Main.push_back(Info.Inst);
955 llvm::append_range(Move.Deps, Deps);
956 return true;
957 };
958
959 MoveList LoadGroups;
960
961 for (const AddrInfo &Info : Group) {
962 if (!Info.Inst->mayReadFromMemory())
963 continue;
964 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
965 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
966 }
967
968 // Erase singleton groups.
969 erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
970
971 // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
972 if (!HVC.HST.useHVXV62Ops())
973 erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });
974
975 return LoadGroups;
976}
977
978auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
979 // Form store groups.
980 // To avoid complications with moving code across basic blocks, only form
981 // groups that are contained within a single basic block.
982 unsigned SizeLimit = VAGroupSizeLimit;
983 if (SizeLimit == 0)
984 return {};
985
986 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
987 assert(!Move.Main.empty() && "Move group should have non-empty Main");
988 if (Move.Main.size() >= SizeLimit)
989 return false;
990 // For stores with return values we'd have to collect downward dependencies.
991 // There are no such stores that we handle at the moment, so omit that.
992 assert(Info.Inst->getType()->isVoidTy() &&
993 "Not handling stores with return values");
994 // Don't mix HVX and non-HVX instructions.
995 if (Move.IsHvx != isHvx(Info))
996 return false;
997 // For stores we need to be careful whether it's safe to move them.
998 // Stores that are otherwise safe to move together may not appear safe
999 // to move over one another (i.e. isSafeToMoveBefore may return false).
1000 Instruction *Base = Move.Main.front();
1001 if (Base->getParent() != Info.Inst->getParent())
1002 return false;
1003 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
1004 return false;
1005 Move.Main.push_back(Info.Inst);
1006 return true;
1007 };
1008
1009 MoveList StoreGroups;
1010
1011 for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
1012 const AddrInfo &Info = *I;
1013 if (!Info.Inst->mayWriteToMemory())
1014 continue;
1015 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1016 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
1017 }
1018
1019 // Erase singleton groups.
1020 erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
1021
1022 // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
1023 if (!HVC.HST.useHVXV62Ops())
1024 erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });
1025
1026 // Erase groups where every store is a full HVX vector. The reason is that
1027 // aligning predicated stores generates complex code that may be less
1028 // efficient than a sequence of unaligned vector stores.
1029 if (!VADoFullStores) {
1030 erase_if(StoreGroups, [this](const MoveGroup &G) {
1031 return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {
1032 auto MaybeInfo = this->getAddrInfo(*S);
1033 assert(MaybeInfo.has_value());
1034 return HVC.HST.isHVXVectorType(
1035 EVT::getEVT(MaybeInfo->ValTy, false));
1036 });
1037 });
1038 }
1039
1040 return StoreGroups;
1041}
1042
1043auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {
1044 // Move all instructions to be adjacent.
1045 assert(!Move.Main.empty() && "Move group should have non-empty Main");
1046 Instruction *Where = Move.Main.front();
1047
1048 if (Move.IsLoad) {
1049 // Move all the loads (and dependencies) to where the first load is.
1050 // Clone all deps to before Where, keeping order.
1051 Move.Clones = cloneBefore(Where->getIterator(), Move.Deps);
1052 // Move all main instructions to after Where, keeping order.
1053 ArrayRef<Instruction *> Main(Move.Main);
1054 for (Instruction *M : Main) {
1055 if (M != Where)
1056 M->moveAfter(Where);
1057 for (auto [Old, New] : Move.Clones)
1058 M->replaceUsesOfWith(Old, New);
1059 Where = M;
1060 }
1061 // Replace Deps with the clones.
1062 for (int i = 0, e = Move.Deps.size(); i != e; ++i)
1063 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1064 } else {
1065 // Move all the stores to where the last store is.
1066 // NOTE: Deps are empty for "store" groups. If they need to be
1067 // non-empty, decide on the order.
1068 assert(Move.Deps.empty());
1069 // Move all main instructions to before Where, inverting order.
1070 ArrayRef<Instruction *> Main(Move.Main);
1071 for (Instruction *M : Main.drop_front(1)) {
1072 M->moveBefore(Where->getIterator());
1073 Where = M;
1074 }
1075 }
1076
1077 return Move.Main.size() + Move.Deps.size() > 1;
1078}
1079
1080template <typename T>
1081auto AlignVectors::cloneBefore(BasicBlock::iterator To, T &&Insts) const
1082 -> InstMap {
1083 InstMap Map;
1084
1085 for (Instruction *I : Insts) {
1086 assert(HVC.isSafeToClone(*I));
1087 Instruction *C = I->clone();
1088 C->setName(Twine("c.") + I->getName() + ".");
1089 C->insertBefore(To);
1090
1091 for (auto [Old, New] : Map)
1092 C->replaceUsesOfWith(Old, New);
1093 Map.insert(std::make_pair(I, C));
1094 }
1095 return Map;
1096}
1097
1098auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1099 const ByteSpan &VSpan, int ScLen,
1100 Value *AlignVal, Value *AlignAddr) const
1101 -> void {
1102 LLVM_DEBUG(dbgs() << __func__ << "\n");
1103
1104 Type *SecTy = HVC.getByteTy(ScLen);
1105 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1106 bool DoAlign = !HVC.isZero(AlignVal);
1107 BasicBlock::iterator BasePos = Builder.GetInsertPoint();
1108 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1109
1110 ByteSpan ASpan;
1111 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1112 auto *Undef = UndefValue::get(SecTy);
1113
1114 // Created load does not have to be "Instruction" (e.g. "undef").
1115 SmallVector<Value *> Loads(NumSectors + DoAlign, nullptr);
1116
1117 // We could create all of the aligned loads, and generate the valigns
1118 // at the location of the first load, but for large load groups, this
1119 // could create highly suboptimal code (there have been groups of 140+
1120 // loads in real code).
1121 // Instead, place the loads/valigns as close to the users as possible.
1122 // In any case we need to have a mapping from the blocks of VSpan (the
1123 // span covered by the pre-existing loads) to ASpan (the span covered
1124 // by the aligned loads). There is a small problem, though: ASpan needs
1125 // to have pointers to the loads/valigns, but we don't have these loads
1126 // because we don't know where to put them yet. We find out by creating
1127 // a section of ASpan that corresponds to values (blocks) from VSpan,
1128 // and checking where the new load should be placed. We need to attach
1129 // this location information to each block in ASpan somehow, so we put
1130 // distincts values for Seg.Val in each ASpan.Blocks[i], and use a map
1131 // to store the location for each Seg.Val.
1132 // The distinct values happen to be Blocks[i].Seg.Val = &Blocks[i],
1133 // which helps with printing ByteSpans without crashing when printing
1134 // Segments with these temporary identifiers in place of Val.
1135
1136 // Populate the blocks first, to avoid reallocations of the vector
1137 // interfering with generating the placeholder addresses.
1138 for (int Index = 0; Index != NumSectors; ++Index)
1139 ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);
1140 for (int Index = 0; Index != NumSectors; ++Index) {
1141 ASpan.Blocks[Index].Seg.Val =
1142 reinterpret_cast<Value *>(&ASpan.Blocks[Index]);
1143 }
1144
1145 // Multiple values from VSpan can map to the same value in ASpan. Since we
1146 // try to create loads lazily, we need to find the earliest use for each
1147 // value from ASpan.
1148 DenseMap<void *, Instruction *> EarliestUser;
1149 auto isEarlier = [](Instruction *A, Instruction *B) {
1150 if (B == nullptr)
1151 return true;
1152 if (A == nullptr)
1153 return false;
1154 assert(A->getParent() == B->getParent());
1155 return A->comesBefore(B);
1156 };
1157 auto earliestUser = [&](const auto &Uses) {
1158 Instruction *User = nullptr;
1159 for (const Use &U : Uses) {
1160 auto *I = dyn_cast<Instruction>(U.getUser());
1161 assert(I != nullptr && "Load used in a non-instruction?");
1162 // Make sure we only consider users in this block, but we need
1163 // to remember if there were users outside the block too. This is
1164 // because if no users are found, aligned loads will not be created.
1165 if (I->getParent() == BaseBlock) {
1166 if (!isa<PHINode>(I))
1167 User = std::min(User, I, isEarlier);
1168 } else {
1169 User = std::min(User, BaseBlock->getTerminator(), isEarlier);
1170 }
1171 }
1172 return User;
1173 };
1174
1175 for (const ByteSpan::Block &B : VSpan) {
1176 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);
1177 for (const ByteSpan::Block &S : ASection) {
1178 auto &EU = EarliestUser[S.Seg.Val];
1179 EU = std::min(EU, earliestUser(B.Seg.Val->uses()), isEarlier);
1180 }
1181 }
1182
1183 LLVM_DEBUG({
1184 dbgs() << "ASpan:\n" << ASpan << '\n';
1185 dbgs() << "Earliest users of ASpan:\n";
1186 for (auto &[Val, User] : EarliestUser) {
1187 dbgs() << Val << "\n ->" << *User << '\n';
1188 }
1189 });
1190
1191 auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
1192 int Index, bool MakePred) {
1193 Value *Ptr =
1194 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1195 Value *Predicate =
1196 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1197
1198 // If vector shifting is potentially needed, accumulate metadata
1199 // from source sections of twice the load width.
1200 int Start = (Index - DoAlign) * ScLen;
1201 int Width = (1 + DoAlign) * ScLen;
1202 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1203 VSpan.section(Start, Width).values());
1204 };
1205
1206 auto moveBefore = [this](BasicBlock::iterator In, BasicBlock::iterator To) {
1207 // Move In and its upward dependencies to before To.
1208 assert(In->getParent() == To->getParent());
1209 DepList Deps = getUpwardDeps(&*In, &*To);
1210 In->moveBefore(To);
1211 // DepList is sorted with respect to positions in the basic block.
1212 InstMap Map = cloneBefore(In, Deps);
1213 for (auto [Old, New] : Map)
1214 In->replaceUsesOfWith(Old, New);
1215 };
1216
1217 // Generate necessary loads at appropriate locations.
1218 LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");
1219 for (int Index = 0; Index != NumSectors + 1; ++Index) {
1220 // In ASpan, each block will be either a single aligned load, or a
1221 // valign of a pair of loads. In the latter case, an aligned load j
1222 // will belong to the current valign, and the one in the previous
1223 // block (for j > 0).
1224 // Place the load at a location which will dominate the valign, assuming
1225 // the valign will be placed right before the earliest user.
1226 Instruction *PrevAt =
1227 DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;
1228 Instruction *ThisAt =
1229 Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
1230 if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1231 Builder.SetInsertPoint(Where);
1232 Loads[Index] =
1233 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1234 // We know it's safe to put the load at BasePos, but we'd prefer to put
1235 // it at "Where". To see if the load is safe to be placed at Where, put
1236 // it there first and then check if it's safe to move it to BasePos.
1237 // If not, then the load needs to be placed at BasePos.
1238 // We can't do this check proactively because we need the load to exist
1239 // in order to check legality.
1240 if (auto *Load = dyn_cast<Instruction>(Loads[Index])) {
1241 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1242 moveBefore(Load->getIterator(), BasePos);
1243 }
1244 LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');
1245 }
1246 }
1247
1248 // Generate valigns if needed, and fill in proper values in ASpan
1249 LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");
1250 for (int Index = 0; Index != NumSectors; ++Index) {
1251 ASpan[Index].Seg.Val = nullptr;
1252 if (auto *Where = EarliestUser[&ASpan[Index]]) {
1253 Builder.SetInsertPoint(Where);
1254 Value *Val = Loads[Index];
1255 assert(Val != nullptr);
1256 if (DoAlign) {
1257 Value *NextLoad = Loads[Index + 1];
1258 assert(NextLoad != nullptr);
1259 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1260 }
1261 ASpan[Index].Seg.Val = Val;
1262 LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');
1263 }
1264 }
1265
1266 for (const ByteSpan::Block &B : VSpan) {
1267 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
1268 Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
1269 Builder.SetInsertPoint(cast<Instruction>(B.Seg.Val));
1270
1271 // We're generating a reduction, where each instruction depends on
1272 // the previous one, so we need to order them according to the position
1273 // of their inputs in the code.
1274 std::vector<ByteSpan::Block *> ABlocks;
1275 for (ByteSpan::Block &S : ASection) {
1276 if (S.Seg.Val != nullptr)
1277 ABlocks.push_back(&S);
1278 }
1279 llvm::sort(ABlocks,
1280 [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {
1281 return isEarlier(cast<Instruction>(A->Seg.Val),
1282 cast<Instruction>(B->Seg.Val));
1283 });
1284 for (ByteSpan::Block *S : ABlocks) {
1285 // The processing of the data loaded by the aligned loads
1286 // needs to be inserted after the data is available.
1287 Instruction *SegI = cast<Instruction>(S->Seg.Val);
1288 Builder.SetInsertPoint(&*std::next(SegI->getIterator()));
1289 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1290 Accum =
1291 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1292 }
1293 // Instead of casting everything to bytes for the vselect, cast to the
1294 // original value type. This will avoid complications with casting masks.
1295 // For example, in cases when the original mask applied to i32, it could
1296 // be converted to a mask applicable to i8 via pred_typecast intrinsic,
1297 // but if the mask is not exactly of HVX length, extra handling would be
1298 // needed to make it work.
1299 Type *ValTy = getPayload(B.Seg.Val)->getType();
1300 Value *Cast = Builder.CreateBitCast(Accum, ValTy, "cst");
1301 Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
1302 getPassThrough(B.Seg.Val), "sel");
1303 B.Seg.Val->replaceAllUsesWith(Sel);
1304 }
1305}
1306
1307auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1308 const ByteSpan &VSpan, int ScLen,
1309 Value *AlignVal, Value *AlignAddr) const
1310 -> void {
1311 LLVM_DEBUG(dbgs() << __func__ << "\n");
1312
1313 Type *SecTy = HVC.getByteTy(ScLen);
1314 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1315 bool DoAlign = !HVC.isZero(AlignVal);
1316
1317 // Stores.
1318 ByteSpan ASpanV, ASpanM;
1319
1320 // Return a vector value corresponding to the input value Val:
1321 // either <1 x Val> for scalar Val, or Val itself for vector Val.
1322 auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {
1323 Type *Ty = Val->getType();
1324 if (Ty->isVectorTy())
1325 return Val;
1326 auto *VecTy = VectorType::get(Ty, 1, /*Scalable=*/false);
1327 return Builder.CreateBitCast(Val, VecTy, "cst");
1328 };
1329
1330 // Create an extra "undef" sector at the beginning and at the end.
1331 // They will be used as the left/right filler in the vlalign step.
1332 for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {
1333 // For stores, the size of each section is an aligned vector length.
1334 // Adjust the store offsets relative to the section start offset.
1335 ByteSpan VSection =
1336 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1337 Value *Undef = UndefValue::get(SecTy);
1338 Value *Zero = HVC.getNullValue(SecTy);
1339 Value *AccumV = Undef;
1340 Value *AccumM = Zero;
1341 for (ByteSpan::Block &S : VSection) {
1342 Value *Pay = getPayload(S.Seg.Val);
1343 Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1344 Pay->getType(), HVC.getByteTy());
1345 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1346 S.Seg.Start, S.Seg.Size, S.Pos);
1347 AccumM = Builder.CreateOr(AccumM, PartM);
1348
1349 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1350 S.Seg.Start, S.Seg.Size, S.Pos);
1351
1352 AccumV = Builder.CreateSelect(
1353 Builder.CreateICmp(CmpInst::ICMP_NE, PartM, Zero), PartV, AccumV);
1354 }
1355 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1356 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1357 }
1358
1359 LLVM_DEBUG({
1360 dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';
1361 dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';
1362 });
1363
1364 // vlalign
1365 if (DoAlign) {
1366 for (int Index = 1; Index != NumSectors + 2; ++Index) {
1367 Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;
1368 Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;
1369 assert(isSectorTy(PrevV->getType()) && isSectorTy(PrevM->getType()));
1370 ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1371 ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1372 }
1373 }
1374
1375 LLVM_DEBUG({
1376 dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';
1377 dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';
1378 });
1379
1380 auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,
1381 const ByteSpan &ASpanM, int Index, bool MakePred) {
1382 Value *Val = ASpanV[Index].Seg.Val;
1383 Value *Mask = ASpanM[Index].Seg.Val; // bytes
1384 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1385 return;
1386 Value *Ptr =
1387 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1388 Value *Predicate =
1389 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1390
1391 // If vector shifting is potentially needed, accumulate metadata
1392 // from source sections of twice the store width.
1393 int Start = (Index - DoAlign) * ScLen;
1394 int Width = (1 + DoAlign) * ScLen;
1395 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1396 HVC.vlsb(Builder, Mask),
1397 VSpan.section(Start, Width).values());
1398 };
1399
1400 for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {
1401 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1402 }
1403}
1404
1405auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
1406 LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');
1407
1408 // TODO: Needs support for masked loads/stores of "scalar" vectors.
1409 if (!Move.IsHvx)
1410 return false;
1411
1412 // Return the element with the maximum alignment from Range,
1413 // where GetValue obtains the value to compare from an element.
1414 auto getMaxOf = [](auto Range, auto GetValue) {
1415 return *llvm::max_element(Range, [&GetValue](auto &A, auto &B) {
1416 return GetValue(A) < GetValue(B);
1417 });
1418 };
1419
1420 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1421
1422 // Conceptually, there is a vector of N bytes covering the addresses
1423 // starting from the minimum offset (i.e. Base.Addr+Start). This vector
1424 // represents a contiguous memory region that spans all accessed memory
1425 // locations.
1426 // The correspondence between loaded or stored values will be expressed
1427 // in terms of this vector. For example, the 0th element of the vector
1428 // from the Base address info will start at byte Start from the beginning
1429 // of this conceptual vector.
1430 //
1431 // This vector will be loaded/stored starting at the nearest down-aligned
1432 // address and the amount od the down-alignment will be AlignVal:
1433 // valign(load_vector(align_down(Base+Start)), AlignVal)
1434
1435 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1436 AddrList MoveInfos;
1438 BaseInfos, std::back_inserter(MoveInfos),
1439 [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
1440
1441 // Maximum alignment present in the whole address group.
1442 const AddrInfo &WithMaxAlign =
1443 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
1444 Align MaxGiven = WithMaxAlign.HaveAlign;
1445
1446 // Minimum alignment present in the move address group.
1447 const AddrInfo &WithMinOffset =
1448 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
1449
1450 const AddrInfo &WithMaxNeeded =
1451 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
1452 Align MinNeeded = WithMaxNeeded.NeedAlign;
1453
1454 // Set the builder's insertion point right before the load group, or
1455 // immediately after the store group. (Instructions in a store group are
1456 // listed in reverse order.)
1457 Instruction *InsertAt = Move.Main.front();
1458 if (!Move.IsLoad) {
1459 // There should be a terminator (which store isn't, but check anyways).
1460 assert(InsertAt->getIterator() != InsertAt->getParent()->end());
1461 InsertAt = &*std::next(InsertAt->getIterator());
1462 }
1463
1464 IRBuilder Builder(InsertAt->getParent(), InsertAt->getIterator(),
1465 InstSimplifyFolder(HVC.DL));
1466 Value *AlignAddr = nullptr; // Actual aligned address.
1467 Value *AlignVal = nullptr; // Right-shift amount (for valign).
1468
1469 if (MinNeeded <= MaxGiven) {
1470 int Start = WithMinOffset.Offset;
1471 int OffAtMax = WithMaxAlign.Offset;
1472 // Shift the offset of the maximally aligned instruction (OffAtMax)
1473 // back by just enough multiples of the required alignment to cover the
1474 // distance from Start to OffAtMax.
1475 // Calculate the address adjustment amount based on the address with the
1476 // maximum alignment. This is to allow a simple gep instruction instead
1477 // of potential bitcasts to i8*.
1478 int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
1479 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1480 WithMaxAlign.ValTy, Adjust, Move.Clones);
1481 int Diff = Start - (OffAtMax + Adjust);
1482 AlignVal = HVC.getConstInt(Diff);
1483 assert(Diff >= 0);
1484 assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
1485 } else {
1486 // WithMinOffset is the lowest address in the group,
1487 // WithMinOffset.Addr = Base+Start.
1488 // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
1489 // mask off unnecessary bits, so it's ok to just the original pointer as
1490 // the alignment amount.
1491 // Do an explicit down-alignment of the address to avoid creating an
1492 // aligned instruction with an address that is not really aligned.
1493 AlignAddr =
1494 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1495 MinNeeded.value(), Move.Clones);
1496 AlignVal =
1497 Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");
1498 if (auto *I = dyn_cast<Instruction>(AlignVal)) {
1499 for (auto [Old, New] : Move.Clones)
1500 I->replaceUsesOfWith(Old, New);
1501 }
1502 }
1503
1504 ByteSpan VSpan;
1505 for (const AddrInfo &AI : MoveInfos) {
1506 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1507 AI.Offset - WithMinOffset.Offset);
1508 }
1509
1510 // The aligned loads/stores will use blocks that are either scalars,
1511 // or HVX vectors. Let "sector" be the unified term for such a block.
1512 // blend(scalar, vector) -> sector...
1513 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1514 : std::max<int>(MinNeeded.value(), 4);
1515 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1516 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1517
1518 LLVM_DEBUG({
1519 dbgs() << "ScLen: " << ScLen << "\n";
1520 dbgs() << "AlignVal:" << *AlignVal << "\n";
1521 dbgs() << "AlignAddr:" << *AlignAddr << "\n";
1522 dbgs() << "VSpan:\n" << VSpan << '\n';
1523 });
1524
1525 if (Move.IsLoad)
1526 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1527 else
1528 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1529
1530 for (auto *Inst : Move.Main)
1531 Inst->eraseFromParent();
1532
1533 return true;
1534}
1535
1536auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
1537 int Alignment) const -> Value * {
1538 auto *AlignTy = AlignVal->getType();
1539 Value *And = Builder.CreateAnd(
1540 AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");
1541 Value *Zero = ConstantInt::get(AlignTy, 0);
1542 return Builder.CreateICmpNE(And, Zero, "isz");
1543}
1544
1545auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
1546 if (!HVC.isByteVecTy(Ty))
1547 return false;
1548 int Size = HVC.getSizeOf(Ty);
1549 if (HVC.HST.isTypeForHVX(Ty))
1550 return Size == static_cast<int>(HVC.HST.getVectorLength());
1551 return Size == 4 || Size == 8;
1552}
1553
1554auto AlignVectors::run() -> bool {
1555 LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName()
1556 << '\n');
1557 if (!createAddressGroups())
1558 return false;
1559
1560 LLVM_DEBUG({
1561 dbgs() << "Address groups(" << AddrGroups.size() << "):\n";
1562 for (auto &[In, AL] : AddrGroups) {
1563 for (const AddrInfo &AI : AL)
1564 dbgs() << "---\n" << AI << '\n';
1565 }
1566 });
1567
1568 bool Changed = false;
1569 MoveList LoadGroups, StoreGroups;
1570
1571 for (auto &G : AddrGroups) {
1572 llvm::append_range(LoadGroups, createLoadGroups(G.second));
1573 llvm::append_range(StoreGroups, createStoreGroups(G.second));
1574 }
1575
1576 LLVM_DEBUG({
1577 dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";
1578 for (const MoveGroup &G : LoadGroups)
1579 dbgs() << G << "\n";
1580 dbgs() << "Store groups(" << StoreGroups.size() << "):\n";
1581 for (const MoveGroup &G : StoreGroups)
1582 dbgs() << G << "\n";
1583 });
1584
1585 // Cumulative limit on the number of groups.
1586 unsigned CountLimit = VAGroupCountLimit;
1587 if (CountLimit == 0)
1588 return false;
1589
1590 if (LoadGroups.size() > CountLimit) {
1591 LoadGroups.resize(CountLimit);
1592 StoreGroups.clear();
1593 } else {
1594 unsigned StoreLimit = CountLimit - LoadGroups.size();
1595 if (StoreGroups.size() > StoreLimit)
1596 StoreGroups.resize(StoreLimit);
1597 }
1598
1599 for (auto &M : LoadGroups)
1600 Changed |= moveTogether(M);
1601 for (auto &M : StoreGroups)
1602 Changed |= moveTogether(M);
1603
1604 LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);
1605
1606 for (auto &M : LoadGroups)
1607 Changed |= realignGroup(M);
1608 for (auto &M : StoreGroups)
1609 Changed |= realignGroup(M);
1610
1611 return Changed;
1612}
1613
1614// --- End AlignVectors
1615
1616// --- Begin HvxIdioms
1617
1618auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const
1619 -> std::pair<unsigned, Signedness> {
1620 unsigned Bits = HVC.getNumSignificantBits(V, In);
1621 // The significant bits are calculated including the sign bit. This may
1622 // add an extra bit for zero-extended values, e.g. (zext i32 to i64) may
1623 // result in 33 significant bits. To avoid extra words, skip the extra
1624 // sign bit, but keep information that the value is to be treated as
1625 // unsigned.
1626 KnownBits Known = HVC.getKnownBits(V, In);
1627 Signedness Sign = Signed;
1628 unsigned NumToTest = 0; // Number of bits used in test for unsignedness.
1629 if (isPowerOf2_32(Bits))
1630 NumToTest = Bits;
1631 else if (Bits > 1 && isPowerOf2_32(Bits - 1))
1632 NumToTest = Bits - 1;
1633
1634 if (NumToTest != 0 && Known.Zero.ashr(NumToTest).isAllOnes()) {
1635 Sign = Unsigned;
1636 Bits = NumToTest;
1637 }
1638
1639 // If the top bit of the nearest power-of-2 is zero, this value is
1640 // positive. It could be treated as either signed or unsigned.
1641 if (unsigned Pow2 = PowerOf2Ceil(Bits); Pow2 != Bits) {
1642 if (Known.Zero.ashr(Pow2 - 1).isAllOnes())
1643 Sign = Positive;
1644 }
1645 return {Bits, Sign};
1646}
1647
1648auto HvxIdioms::canonSgn(SValue X, SValue Y) const
1649 -> std::pair<SValue, SValue> {
1650 // Canonicalize the signedness of X and Y, so that the result is one of:
1651 // S, S
1652 // U/P, S
1653 // U/P, U/P
1654 if (X.Sgn == Signed && Y.Sgn != Signed)
1655 std::swap(X, Y);
1656 return {X, Y};
1657}
1658
1659// Match
1660// (X * Y) [>> N], or
1661// ((X * Y) + (1 << M)) >> N
1662auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional<FxpOp> {
1663 using namespace PatternMatch;
1664 auto *Ty = In.getType();
1665
1666 if (!Ty->isVectorTy() || !Ty->getScalarType()->isIntegerTy())
1667 return std::nullopt;
1668
1669 unsigned Width = cast<IntegerType>(Ty->getScalarType())->getBitWidth();
1670
1671 FxpOp Op;
1672 Value *Exp = &In;
1673
1674 // Fixed-point multiplication is always shifted right (except when the
1675 // fraction is 0 bits).
1676 auto m_Shr = [](auto &&V, auto &&S) {
1677 return m_CombineOr(m_LShr(V, S), m_AShr(V, S));
1678 };
1679
1680 uint64_t Qn = 0;
1681 if (Value *T; match(Exp, m_Shr(m_Value(T), m_ConstantInt(Qn)))) {
1682 Op.Frac = Qn;
1683 Exp = T;
1684 } else {
1685 Op.Frac = 0;
1686 }
1687
1688 if (Op.Frac > Width)
1689 return std::nullopt;
1690
1691 // Check if there is rounding added.
1692 uint64_t CV;
1693 if (Value *T;
1694 Op.Frac > 0 && match(Exp, m_Add(m_Value(T), m_ConstantInt(CV)))) {
1695 if (CV != 0 && !isPowerOf2_64(CV))
1696 return std::nullopt;
1697 if (CV != 0)
1698 Op.RoundAt = Log2_64(CV);
1699 Exp = T;
1700 }
1701
1702 // Check if the rest is a multiplication.
1703 if (match(Exp, m_Mul(m_Value(Op.X.Val), m_Value(Op.Y.Val)))) {
1704 Op.Opcode = Instruction::Mul;
1705 // FIXME: The information below is recomputed.
1706 Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;
1707 Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;
1708 Op.ResTy = cast<VectorType>(Ty);
1709 return Op;
1710 }
1711
1712 return std::nullopt;
1713}
1714
1715auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
1716 -> Value * {
1717 assert(Op.X.Val->getType() == Op.Y.Val->getType());
1718
1719 auto *VecTy = dyn_cast<VectorType>(Op.X.Val->getType());
1720 if (VecTy == nullptr)
1721 return nullptr;
1722 auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1723 unsigned ElemWidth = ElemTy->getBitWidth();
1724
1725 // TODO: This can be relaxed after legalization is done pre-isel.
1726 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1727 return nullptr;
1728
1729 // There are no special intrinsics that should be used for multiplying
1730 // signed 8-bit values, so just skip them. Normal codegen should handle
1731 // this just fine.
1732 if (ElemWidth <= 8)
1733 return nullptr;
1734 // Similarly, if this is just a multiplication that can be handled without
1735 // intervention, then leave it alone.
1736 if (ElemWidth <= 32 && Op.Frac == 0)
1737 return nullptr;
1738
1739 auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);
1740 auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);
1741
1742 // TODO: Add multiplication of vectors by scalar registers (up to 4 bytes).
1743
1744 Value *X = Op.X.Val, *Y = Op.Y.Val;
1745 IRBuilder Builder(In.getParent(), In.getIterator(),
1746 InstSimplifyFolder(HVC.DL));
1747
1748 auto roundUpWidth = [](unsigned Width) -> unsigned {
1749 if (Width <= 32 && !isPowerOf2_32(Width)) {
1750 // If the element width is not a power of 2, round it up
1751 // to the next one. Do this for widths not exceeding 32.
1752 return PowerOf2Ceil(Width);
1753 }
1754 if (Width > 32 && Width % 32 != 0) {
1755 // For wider elements, round it up to the multiple of 32.
1756 return alignTo(Width, 32u);
1757 }
1758 return Width;
1759 };
1760
1761 BitsX = roundUpWidth(BitsX);
1762 BitsY = roundUpWidth(BitsY);
1763
1764 // For elementwise multiplication vectors must have the same lengths, so
1765 // resize the elements of both inputs to the same width, the max of the
1766 // calculated significant bits.
1767 unsigned Width = std::max(BitsX, BitsY);
1768
1769 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1770 if (Width < ElemWidth) {
1771 X = Builder.CreateTrunc(X, ResizeTy, "trn");
1772 Y = Builder.CreateTrunc(Y, ResizeTy, "trn");
1773 } else if (Width > ElemWidth) {
1774 X = SignX == Signed ? Builder.CreateSExt(X, ResizeTy, "sxt")
1775 : Builder.CreateZExt(X, ResizeTy, "zxt");
1776 Y = SignY == Signed ? Builder.CreateSExt(Y, ResizeTy, "sxt")
1777 : Builder.CreateZExt(Y, ResizeTy, "zxt");
1778 };
1779
1780 assert(X->getType() == Y->getType() && X->getType() == ResizeTy);
1781
1782 unsigned VecLen = HVC.length(ResizeTy);
1783 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1784
1786 FxpOp ChopOp = Op;
1787 ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);
1788
1789 for (unsigned V = 0; V != VecLen / ChopLen; ++V) {
1790 ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);
1791 ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);
1792 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1793 if (Results.back() == nullptr)
1794 break;
1795 }
1796
1797 if (Results.empty() || Results.back() == nullptr)
1798 return nullptr;
1799
1800 Value *Cat = HVC.concat(Builder, Results);
1801 Value *Ext = SignX == Signed || SignY == Signed
1802 ? Builder.CreateSExt(Cat, VecTy, "sxt")
1803 : Builder.CreateZExt(Cat, VecTy, "zxt");
1804 return Ext;
1805}
1806
1807auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
1808 const FxpOp &Op) const -> Value * {
1809 assert(Op.X.Val->getType() == Op.Y.Val->getType());
1810 auto *InpTy = cast<VectorType>(Op.X.Val->getType());
1811 unsigned Width = InpTy->getScalarSizeInBits();
1812 bool Rounding = Op.RoundAt.has_value();
1813
1814 if (!Op.RoundAt || *Op.RoundAt == Op.Frac - 1) {
1815 // The fixed-point intrinsics do signed multiplication.
1816 if (Width == Op.Frac + 1 && Op.X.Sgn != Unsigned && Op.Y.Sgn != Unsigned) {
1817 Value *QMul = nullptr;
1818 if (Width == 16) {
1819 QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);
1820 } else if (Width == 32) {
1821 QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);
1822 }
1823 if (QMul != nullptr)
1824 return QMul;
1825 }
1826 }
1827
1828 assert(Width >= 32 || isPowerOf2_32(Width)); // Width <= 32 => Width is 2^n
1829 assert(Width < 32 || Width % 32 == 0); // Width > 32 => Width is 32*k
1830
1831 // If Width < 32, then it should really be 16.
1832 if (Width < 32) {
1833 if (Width < 16)
1834 return nullptr;
1835 // Getting here with Op.Frac == 0 isn't wrong, but suboptimal: here we
1836 // generate a full precision products, which is unnecessary if there is
1837 // no shift.
1838 assert(Width == 16);
1839 assert(Op.Frac != 0 && "Unshifted mul should have been skipped");
1840 if (Op.Frac == 16) {
1841 // Multiply high
1842 if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))
1843 return MulH;
1844 }
1845 // Do full-precision multiply and shift.
1846 Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
1847 if (Rounding) {
1848 Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);
1849 Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");
1850 }
1851
1852 Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);
1853 Value *Shifted = Op.X.Sgn == Signed || Op.Y.Sgn == Signed
1854 ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")
1855 : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");
1856 return Builder.CreateTrunc(Shifted, InpTy, "trn");
1857 }
1858
1859 // Width >= 32
1860
1861 // Break up the arguments Op.X and Op.Y into vectors of smaller widths
1862 // in preparation of doing the multiplication by 32-bit parts.
1863 auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, /*ToWidth=*/32);
1864 auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, /*ToWidth=*/32);
1865 auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);
1866
1867 auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1868
1869 // Add the optional rounding to the proper word.
1870 if (Op.RoundAt.has_value()) {
1871 Value *Zero = HVC.getNullValue(WordX[0]->getType());
1872 SmallVector<Value *> RoundV(WordP.size(), Zero);
1873 RoundV[*Op.RoundAt / 32] =
1874 HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32));
1875 WordP = createAddLong(Builder, WordP, RoundV);
1876 }
1877
1878 // createRightShiftLong?
1879
1880 // Shift all products right by Op.Frac.
1881 unsigned SkipWords = Op.Frac / 32;
1882 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);
1883
1884 for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
1885 int Src = Dst + SkipWords;
1886 Value *Lo = WordP[Src];
1887 if (Src + 1 < End) {
1888 Value *Hi = WordP[Src + 1];
1889 WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
1890 {Hi, Lo, ShiftAmt},
1891 /*FMFSource*/ nullptr, "int");
1892 } else {
1893 // The shift of the most significant word.
1894 WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");
1895 }
1896 }
1897 if (SkipWords != 0)
1898 WordP.resize(WordP.size() - SkipWords);
1899
1900 return HVC.joinVectorElements(Builder, WordP, Op.ResTy);
1901}
1902
1903auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
1904 bool Rounding) const -> Value * {
1905 assert(X.Val->getType() == Y.Val->getType());
1906 assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1907 assert(HVC.HST.isHVXVectorType(EVT::getEVT(X.Val->getType(), false)));
1908
1909 // There is no non-rounding intrinsic for i16.
1910 if (!Rounding || X.Sgn == Unsigned || Y.Sgn == Unsigned)
1911 return nullptr;
1912
1913 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1914 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),
1915 {X.Val, Y.Val});
1916}
1917
1918auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
1919 bool Rounding) const -> Value * {
1920 Type *InpTy = X.Val->getType();
1921 assert(InpTy == Y.Val->getType());
1922 assert(InpTy->getScalarType() == HVC.getIntTy(32));
1923 assert(HVC.HST.isHVXVectorType(EVT::getEVT(InpTy, false)));
1924
1925 if (X.Sgn == Unsigned || Y.Sgn == Unsigned)
1926 return nullptr;
1927
1928 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1929 auto V6_vmpyo_acc = Rounding
1930 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1931 : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1932 Value *V1 =
1933 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});
1934 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1935 {V1, X.Val, Y.Val});
1936}
1937
1938auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
1939 Value *CarryIn) const
1940 -> std::pair<Value *, Value *> {
1941 assert(X->getType() == Y->getType());
1942 auto VecTy = cast<VectorType>(X->getType());
1943 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1945 Intrinsic::ID AddCarry;
1946 if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {
1947 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1948 } else {
1949 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1950 if (CarryIn == nullptr)
1951 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1952 Args.push_back(CarryIn);
1953 }
1954 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1955 /*RetTy=*/nullptr, Args);
1956 Value *Result = Builder.CreateExtractValue(Ret, {0}, "ext");
1957 Value *CarryOut = Builder.CreateExtractValue(Ret, {1}, "ext");
1958 return {Result, CarryOut};
1959 }
1960
1961 // In other cases, do a regular add, and unsigned compare-less-than.
1962 // The carry-out can originate in two places: adding the carry-in or adding
1963 // the two input values.
1964 Value *Result1 = X; // Result1 = X + CarryIn
1965 if (CarryIn != nullptr) {
1966 unsigned Width = VecTy->getScalarSizeInBits();
1967 uint32_t Mask = 1;
1968 if (Width < 32) {
1969 for (unsigned i = 0, e = 32 / Width; i != e; ++i)
1970 Mask = (Mask << Width) | 1;
1971 }
1972 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1973 Value *ValueIn =
1974 HVC.createHvxIntrinsic(Builder, V6_vandqrt, /*RetTy=*/nullptr,
1975 {CarryIn, HVC.getConstInt(Mask)});
1976 Result1 = Builder.CreateAdd(X, ValueIn, "add");
1977 }
1978
1979 Value *CarryOut1 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result1, X, "cmp");
1980 Value *Result2 = Builder.CreateAdd(Result1, Y, "add");
1981 Value *CarryOut2 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result2, Y, "cmp");
1982 return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};
1983}
1984
1985auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
1986 -> Value * {
1987 Intrinsic::ID V6_vmpyh = 0;
1988 std::tie(X, Y) = canonSgn(X, Y);
1989
1990 if (X.Sgn == Signed) {
1991 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1992 } else if (Y.Sgn == Signed) {
1993 // In vmpyhus the second operand is unsigned
1994 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1995 } else {
1996 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
1997 }
1998
1999 // i16*i16 -> i32 / interleaved
2000 Value *P =
2001 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
2002 // Deinterleave
2003 return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
2004}
2005
2006auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
2007 -> Value * {
2008 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/false);
2009
2010 if (HVC.HST.useHVXV69Ops()) {
2011 if (X.Sgn != Signed && Y.Sgn != Signed) {
2012 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2013 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2014 {X.Val, Y.Val});
2015 }
2016 }
2017
2018 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/true);
2019 Value *Pair16 =
2020 Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");
2021 unsigned Len = HVC.length(HvxP16Ty) / 2;
2022
2023 SmallVector<int, 128> PickOdd(Len);
2024 for (int i = 0; i != static_cast<int>(Len); ++i)
2025 PickOdd[i] = 2 * i + 1;
2026
2027 return Builder.CreateShuffleVector(
2028 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");
2029}
2030
2031auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
2032 -> std::pair<Value *, Value *> {
2033 assert(X.Val->getType() == Y.Val->getType());
2034 assert(X.Val->getType() == HvxI32Ty);
2035
2036 Intrinsic::ID V6_vmpy_parts;
2037 std::tie(X, Y) = canonSgn(X, Y);
2038
2039 if (X.Sgn == Signed) {
2040 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2041 } else if (Y.Sgn == Signed) {
2042 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2043 } else {
2044 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2045 }
2046
2047 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,
2048 {X.Val, Y.Val}, {HvxI32Ty});
2049 Value *Hi = Builder.CreateExtractValue(Parts, {0}, "ext");
2050 Value *Lo = Builder.CreateExtractValue(Parts, {1}, "ext");
2051 return {Lo, Hi};
2052}
2053
2054auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2055 ArrayRef<Value *> WordY) const
2057 assert(WordX.size() == WordY.size());
2058 unsigned Idx = 0, Length = WordX.size();
2060
2061 while (Idx != Length) {
2062 if (HVC.isZero(WordX[Idx]))
2063 Sum[Idx] = WordY[Idx];
2064 else if (HVC.isZero(WordY[Idx]))
2065 Sum[Idx] = WordX[Idx];
2066 else
2067 break;
2068 ++Idx;
2069 }
2070
2071 Value *Carry = nullptr;
2072 for (; Idx != Length; ++Idx) {
2073 std::tie(Sum[Idx], Carry) =
2074 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2075 }
2076
2077 // This drops the final carry beyond the highest word.
2078 return Sum;
2079}
2080
2081auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2082 Signedness SgnX, ArrayRef<Value *> WordY,
2083 Signedness SgnY) const -> SmallVector<Value *> {
2084 SmallVector<SmallVector<Value *>> Products(WordX.size() + WordY.size());
2085
2086 // WordX[i] * WordY[j] produces words i+j and i+j+1 of the results,
2087 // that is halves 2(i+j), 2(i+j)+1, 2(i+j)+2, 2(i+j)+3.
2088 for (int i = 0, e = WordX.size(); i != e; ++i) {
2089 for (int j = 0, f = WordY.size(); j != f; ++j) {
2090 // Check the 4 halves that this multiplication can generate.
2091 Signedness SX = (i + 1 == e) ? SgnX : Unsigned;
2092 Signedness SY = (j + 1 == f) ? SgnY : Unsigned;
2093 auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});
2094 Products[i + j + 0].push_back(Lo);
2095 Products[i + j + 1].push_back(Hi);
2096 }
2097 }
2098
2099 Value *Zero = HVC.getNullValue(WordX[0]->getType());
2100
2101 auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {
2102 if (Vector.empty())
2103 return Zero;
2104 auto Last = Vector.back();
2105 Vector.pop_back();
2106 return Last;
2107 };
2108
2109 for (int i = 0, e = Products.size(); i != e; ++i) {
2110 while (Products[i].size() > 1) {
2111 Value *Carry = nullptr; // no carry-in
2112 for (int j = i; j != e; ++j) {
2113 auto &ProdJ = Products[j];
2114 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2115 pop_back_or_zero(ProdJ), Carry);
2116 ProdJ.insert(ProdJ.begin(), Sum);
2117 Carry = CarryOut;
2118 }
2119 }
2120 }
2121
2123 for (auto &P : Products) {
2124 assert(P.size() == 1 && "Should have been added together");
2125 WordP.push_back(P.front());
2126 }
2127
2128 return WordP;
2129}
2130
2131auto HvxIdioms::run() -> bool {
2132 bool Changed = false;
2133
2134 for (BasicBlock &B : HVC.F) {
2135 for (auto It = B.rbegin(); It != B.rend(); ++It) {
2136 if (auto Fxm = matchFxpMul(*It)) {
2137 Value *New = processFxpMul(*It, *Fxm);
2138 // Always report "changed" for now.
2139 Changed = true;
2140 if (!New)
2141 continue;
2142 bool StartOver = !isa<Instruction>(New);
2143 It->replaceAllUsesWith(New);
2145 It = StartOver ? B.rbegin()
2146 : cast<Instruction>(New)->getReverseIterator();
2147 Changed = true;
2148 }
2149 }
2150 }
2151
2152 return Changed;
2153}
2154
2155// --- End HvxIdioms
2156
2157auto HexagonVectorCombine::run() -> bool {
2158 if (DumpModule)
2159 dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();
2160
2161 bool Changed = false;
2162 if (HST.useHVXOps()) {
2163 if (VAEnabled)
2164 Changed |= AlignVectors(*this).run();
2165 if (VIEnabled)
2166 Changed |= HvxIdioms(*this).run();
2167 }
2168
2169 if (DumpModule) {
2170 dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")
2171 << " after HexagonVectorCombine\n"
2172 << *F.getParent();
2173 }
2174 return Changed;
2175}
2176
2177auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {
2178 return IntegerType::get(F.getContext(), Width);
2179}
2180
2181auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
2182 assert(ElemCount >= 0);
2183 IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
2184 if (ElemCount == 0)
2185 return ByteTy;
2186 return VectorType::get(ByteTy, ElemCount, /*Scalable=*/false);
2187}
2188
2189auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
2190 assert(ElemCount >= 0);
2191 IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
2192 if (ElemCount == 0)
2193 return BoolTy;
2194 return VectorType::get(BoolTy, ElemCount, /*Scalable=*/false);
2195}
2196
2197auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const
2198 -> ConstantInt * {
2199 return ConstantInt::getSigned(getIntTy(Width), Val);
2200}
2201
2202auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
2203 if (auto *C = dyn_cast<Constant>(Val))
2204 return C->isZeroValue();
2205 return false;
2206}
2207
2208auto HexagonVectorCombine::getIntValue(const Value *Val) const
2209 -> std::optional<APInt> {
2210 if (auto *CI = dyn_cast<ConstantInt>(Val))
2211 return CI->getValue();
2212 return std::nullopt;
2213}
2214
2215auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
2216 return isa<UndefValue>(Val);
2217}
2218
2219auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {
2220 return Val == ConstantInt::getTrue(Val->getType());
2221}
2222
2223auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {
2224 return isZero(Val);
2225}
2226
2227auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
2228 -> VectorType * {
2229 EVT ETy = EVT::getEVT(ElemTy, false);
2230 assert(ETy.isSimple() && "Invalid HVX element type");
2231 // Do not allow boolean types here: they don't have a fixed length.
2232 assert(HST.isHVXElementType(ETy.getSimpleVT(), /*IncludeBool=*/false) &&
2233 "Invalid HVX element type");
2234 unsigned HwLen = HST.getVectorLength();
2235 unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();
2236 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
2237 /*Scalable=*/false);
2238}
2239
2240auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const
2241 -> int {
2242 return getSizeOf(Val->getType(), Kind);
2243}
2244
2245auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const
2246 -> int {
2247 auto *NcTy = const_cast<Type *>(Ty);
2248 switch (Kind) {
2249 case Store:
2250 return DL.getTypeStoreSize(NcTy).getFixedValue();
2251 case Alloc:
2252 return DL.getTypeAllocSize(NcTy).getFixedValue();
2253 }
2254 llvm_unreachable("Unhandled SizeKind enum");
2255}
2256
2257auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
2258 // The actual type may be shorter than the HVX vector, so determine
2259 // the alignment based on subtarget info.
2260 if (HST.isTypeForHVX(Ty))
2261 return HST.getVectorLength();
2262 return DL.getABITypeAlign(Ty).value();
2263}
2264
2265auto HexagonVectorCombine::length(Value *Val) const -> size_t {
2266 return length(Val->getType());
2267}
2268
2269auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
2270 auto *VecTy = dyn_cast<VectorType>(Ty);
2271 assert(VecTy && "Must be a vector type");
2272 return VecTy->getElementCount().getFixedValue();
2273}
2274
2275auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
2277 auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
2278 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2279 return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
2280 return Zero;
2281}
2282
2283auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
2285 auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
2286 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2287 return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
2288 return Minus1;
2289}
2290
2291auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const
2292 -> Constant * {
2293 assert(Ty->isVectorTy());
2294 auto VecTy = cast<VectorType>(Ty);
2295 Type *ElemTy = VecTy->getElementType();
2296 // Add support for floats if needed.
2297 auto *Splat = ConstantVector::getSplat(VecTy->getElementCount(),
2298 ConstantInt::get(ElemTy, Val));
2299 return Splat;
2300}
2301
2302auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
2303 if (auto *In = dyn_cast<Instruction>(V)) {
2304 SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
2305 return simplifyInstruction(In, Q);
2306 }
2307 return nullptr;
2308}
2309
2310// Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
2311auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
2312 Value *Src, int Start, int Length,
2313 int Where) const -> Value * {
2314 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
2315 int SrcLen = getSizeOf(Src);
2316 int DstLen = getSizeOf(Dst);
2317 assert(0 <= Start && Start + Length <= SrcLen);
2318 assert(0 <= Where && Where + Length <= DstLen);
2319
2320 int P2Len = PowerOf2Ceil(SrcLen | DstLen);
2321 auto *Poison = PoisonValue::get(getByteTy());
2322 Value *P2Src = vresize(Builder, Src, P2Len, Poison);
2323 Value *P2Dst = vresize(Builder, Dst, P2Len, Poison);
2324
2325 SmallVector<int, 256> SMask(P2Len);
2326 for (int i = 0; i != P2Len; ++i) {
2327 // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
2328 // Otherwise, pick Dst[i];
2329 SMask[i] =
2330 (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
2331 }
2332
2333 Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask, "shf");
2334 return vresize(Builder, P2Insert, DstLen, Poison);
2335}
2336
2337auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
2338 Value *Hi, Value *Amt) const -> Value * {
2339 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
2340 if (isZero(Amt))
2341 return Hi;
2342 int VecLen = getSizeOf(Hi);
2343 if (auto IntAmt = getIntValue(Amt))
2344 return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
2345 VecLen);
2346
2347 if (HST.isTypeForHVX(Hi->getType())) {
2348 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2349 "Expecting an exact HVX type");
2350 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
2351 Hi->getType(), {Hi, Lo, Amt});
2352 }
2353
2354 if (VecLen == 4) {
2355 Value *Pair = concat(Builder, {Lo, Hi});
2356 Value *Shift =
2357 Builder.CreateLShr(Builder.CreateShl(Pair, Amt, "shl"), 32, "lsr");
2358 Value *Trunc =
2359 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
2360 return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");
2361 }
2362 if (VecLen == 8) {
2363 Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt, "sub");
2364 return vralignb(Builder, Lo, Hi, Sub);
2365 }
2366 llvm_unreachable("Unexpected vector length");
2367}
2368
2369auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
2370 Value *Hi, Value *Amt) const -> Value * {
2371 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
2372 if (isZero(Amt))
2373 return Lo;
2374 int VecLen = getSizeOf(Lo);
2375 if (auto IntAmt = getIntValue(Amt))
2376 return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
2377
2378 if (HST.isTypeForHVX(Lo->getType())) {
2379 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2380 "Expecting an exact HVX type");
2381 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
2382 Lo->getType(), {Hi, Lo, Amt});
2383 }
2384
2385 if (VecLen == 4) {
2386 Value *Pair = concat(Builder, {Lo, Hi});
2387 Value *Shift = Builder.CreateLShr(Pair, Amt, "lsr");
2388 Value *Trunc =
2389 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
2390 return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");
2391 }
2392 if (VecLen == 8) {
2393 Type *Int64Ty = Type::getInt64Ty(F.getContext());
2394 Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty, "cst");
2395 Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty, "cst");
2396 Value *Call = Builder.CreateIntrinsic(Intrinsic::hexagon_S2_valignrb,
2397 {Hi64, Lo64, Amt},
2398 /*FMFSource=*/nullptr, "cup");
2399 return Builder.CreateBitCast(Call, Lo->getType(), "cst");
2400 }
2401 llvm_unreachable("Unexpected vector length");
2402}
2403
2404// Concatenates a sequence of vectors of the same type.
2405auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
2406 ArrayRef<Value *> Vecs) const -> Value * {
2407 assert(!Vecs.empty());
2409 std::vector<Value *> Work[2];
2410 int ThisW = 0, OtherW = 1;
2411
2412 Work[ThisW].assign(Vecs.begin(), Vecs.end());
2413 while (Work[ThisW].size() > 1) {
2414 auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
2415 SMask.resize(length(Ty) * 2);
2416 std::iota(SMask.begin(), SMask.end(), 0);
2417
2418 Work[OtherW].clear();
2419 if (Work[ThisW].size() % 2 != 0)
2420 Work[ThisW].push_back(UndefValue::get(Ty));
2421 for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
2422 Value *Joined = Builder.CreateShuffleVector(
2423 Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");
2424 Work[OtherW].push_back(Joined);
2425 }
2426 std::swap(ThisW, OtherW);
2427 }
2428
2429 // Since there may have been some undefs appended to make shuffle operands
2430 // have the same type, perform the last shuffle to only pick the original
2431 // elements.
2432 SMask.resize(Vecs.size() * length(Vecs.front()->getType()));
2433 std::iota(SMask.begin(), SMask.end(), 0);
2434 Value *Total = Work[ThisW].front();
2435 return Builder.CreateShuffleVector(Total, SMask, "shf");
2436}
2437
2438auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
2439 int NewSize, Value *Pad) const -> Value * {
2441 auto *ValTy = cast<VectorType>(Val->getType());
2442 assert(ValTy->getElementType() == Pad->getType());
2443
2444 int CurSize = length(ValTy);
2445 if (CurSize == NewSize)
2446 return Val;
2447 // Truncate?
2448 if (CurSize > NewSize)
2449 return getElementRange(Builder, Val, /*Ignored*/ Val, 0, NewSize);
2450 // Extend.
2451 SmallVector<int, 128> SMask(NewSize);
2452 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
2453 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
2454 Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad, "spt");
2455 return Builder.CreateShuffleVector(Val, PadVec, SMask, "shf");
2456}
2457
2458auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
2459 Type *FromTy, Type *ToTy) const -> Value * {
2460 // Mask is a vector <N x i1>, where each element corresponds to an
2461 // element of FromTy. Remap it so that each element will correspond
2462 // to an element of ToTy.
2463 assert(isa<VectorType>(Mask->getType()));
2464
2465 Type *FromSTy = FromTy->getScalarType();
2466 Type *ToSTy = ToTy->getScalarType();
2467 if (FromSTy == ToSTy)
2468 return Mask;
2469
2470 int FromSize = getSizeOf(FromSTy);
2471 int ToSize = getSizeOf(ToSTy);
2472 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2473
2474 auto *MaskTy = cast<VectorType>(Mask->getType());
2475 int FromCount = length(MaskTy);
2476 int ToCount = (FromCount * FromSize) / ToSize;
2477 assert((FromCount * FromSize) % ToSize == 0);
2478
2479 auto *FromITy = getIntTy(FromSize * 8);
2480 auto *ToITy = getIntTy(ToSize * 8);
2481
2482 // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
2483 // -> trunc to <M x i1>.
2484 Value *Ext = Builder.CreateSExt(
2485 Mask, VectorType::get(FromITy, FromCount, /*Scalable=*/false), "sxt");
2486 Value *Cast = Builder.CreateBitCast(
2487 Ext, VectorType::get(ToITy, ToCount, /*Scalable=*/false), "cst");
2488 return Builder.CreateTrunc(
2489 Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable=*/false), "trn");
2490}
2491
2492// Bitcast to bytes, and return least significant bits.
2493auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
2494 -> Value * {
2495 Type *ScalarTy = Val->getType()->getScalarType();
2496 if (ScalarTy == getBoolTy())
2497 return Val;
2498
2499 Value *Bytes = vbytes(Builder, Val);
2500 if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
2501 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");
2502 // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
2503 // <1 x i1>.
2504 return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");
2505}
2506
2507// Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
2508auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
2509 -> Value * {
2510 Type *ScalarTy = Val->getType()->getScalarType();
2511 if (ScalarTy == getByteTy())
2512 return Val;
2513
2514 if (ScalarTy != getBoolTy())
2515 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");
2516 // For bool, return a sext from i1 to i8.
2517 if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
2518 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");
2519 return Builder.CreateSExt(Val, getByteTy(), "sxt");
2520}
2521
2522auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
2523 unsigned Start, unsigned Length) const
2524 -> Value * {
2525 assert(Start + Length <= length(Val));
2526 return getElementRange(Builder, Val, /*Ignored*/ Val, Start, Length);
2527}
2528
2529auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const
2530 -> Value * {
2531 size_t Len = length(Val);
2532 assert(Len % 2 == 0 && "Length should be even");
2533 return subvector(Builder, Val, 0, Len / 2);
2534}
2535
2536auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const
2537 -> Value * {
2538 size_t Len = length(Val);
2539 assert(Len % 2 == 0 && "Length should be even");
2540 return subvector(Builder, Val, Len / 2, Len / 2);
2541}
2542
2543auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
2544 Value *Val1) const -> Value * {
2545 assert(Val0->getType() == Val1->getType());
2546 int Len = length(Val0);
2547 SmallVector<int, 128> Mask(2 * Len);
2548
2549 for (int i = 0; i != Len; ++i) {
2550 Mask[i] = 2 * i; // Even
2551 Mask[i + Len] = 2 * i + 1; // Odd
2552 }
2553 return Builder.CreateShuffleVector(Val0, Val1, Mask, "shf");
2554}
2555
2556auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
2557 Value *Val1) const -> Value * { //
2558 assert(Val0->getType() == Val1->getType());
2559 int Len = length(Val0);
2560 SmallVector<int, 128> Mask(2 * Len);
2561
2562 for (int i = 0; i != Len; ++i) {
2563 Mask[2 * i + 0] = i; // Val0
2564 Mask[2 * i + 1] = i + Len; // Val1
2565 }
2566 return Builder.CreateShuffleVector(Val0, Val1, Mask, "shf");
2567}
2568
2569auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
2570 Intrinsic::ID IntID, Type *RetTy,
2571 ArrayRef<Value *> Args,
2572 ArrayRef<Type *> ArgTys,
2573 ArrayRef<Value *> MDSources) const
2574 -> Value * {
2575 auto getCast = [&](IRBuilderBase &Builder, Value *Val,
2576 Type *DestTy) -> Value * {
2577 Type *SrcTy = Val->getType();
2578 if (SrcTy == DestTy)
2579 return Val;
2580
2581 // Non-HVX type. It should be a scalar, and it should already have
2582 // a valid type.
2583 assert(HST.isTypeForHVX(SrcTy, /*IncludeBool=*/true));
2584
2585 Type *BoolTy = Type::getInt1Ty(F.getContext());
2586 if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2587 return Builder.CreateBitCast(Val, DestTy, "cst");
2588
2589 // Predicate HVX vector.
2590 unsigned HwLen = HST.getVectorLength();
2591 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2592 : Intrinsic::hexagon_V6_pred_typecast_128B;
2593 return Builder.CreateIntrinsic(TC, {DestTy, Val->getType()}, {Val},
2594 /*FMFSource=*/nullptr, "cup");
2595 };
2596
2597 Function *IntrFn =
2598 Intrinsic::getOrInsertDeclaration(F.getParent(), IntID, ArgTys);
2599 FunctionType *IntrTy = IntrFn->getFunctionType();
2600
2601 SmallVector<Value *, 4> IntrArgs;
2602 for (int i = 0, e = Args.size(); i != e; ++i) {
2603 Value *A = Args[i];
2604 Type *T = IntrTy->getParamType(i);
2605 if (A->getType() != T) {
2606 IntrArgs.push_back(getCast(Builder, A, T));
2607 } else {
2608 IntrArgs.push_back(A);
2609 }
2610 }
2611 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";
2612 CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);
2613
2614 MemoryEffects ME = Call->getAttributes().getMemoryEffects();
2616 propagateMetadata(Call, MDSources);
2617
2618 Type *CallTy = Call->getType();
2619 if (RetTy == nullptr || CallTy == RetTy)
2620 return Call;
2621 // Scalar types should have RetTy matching the call return type.
2622 assert(HST.isTypeForHVX(CallTy, /*IncludeBool=*/true));
2623 return getCast(Builder, Call, RetTy);
2624}
2625
2626auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
2627 Value *Vec,
2628 unsigned ToWidth) const
2630 // Break a vector of wide elements into a series of vectors with narrow
2631 // elements:
2632 // (...c0:b0:a0, ...c1:b1:a1, ...c2:b2:a2, ...)
2633 // -->
2634 // (a0, a1, a2, ...) // lowest "ToWidth" bits
2635 // (b0, b1, b2, ...) // the next lowest...
2636 // (c0, c1, c2, ...) // ...
2637 // ...
2638 //
2639 // The number of elements in each resulting vector is the same as
2640 // in the original vector.
2641
2642 auto *VecTy = cast<VectorType>(Vec->getType());
2643 assert(VecTy->getElementType()->isIntegerTy());
2644 unsigned FromWidth = VecTy->getScalarSizeInBits();
2645 assert(isPowerOf2_32(ToWidth) && isPowerOf2_32(FromWidth));
2646 assert(ToWidth <= FromWidth && "Breaking up into wider elements?");
2647 unsigned NumResults = FromWidth / ToWidth;
2648
2649 SmallVector<Value *> Results(NumResults);
2650 Results[0] = Vec;
2651 unsigned Length = length(VecTy);
2652
2653 // Do it by splitting in half, since those operations correspond to deal
2654 // instructions.
2655 auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {
2656 // Take V = Results[Begin], split it in L, H.
2657 // Store Results[Begin] = L, Results[(Begin+End)/2] = H
2658 // Call itself recursively split(Begin, Half), split(Half+1, End)
2659 if (Begin + 1 == End)
2660 return;
2661
2662 Value *Val = Results[Begin];
2663 unsigned Width = Val->getType()->getScalarSizeInBits();
2664
2665 auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
2666 Value *VVal = Builder.CreateBitCast(Val, VTy, "cst");
2667
2668 Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2669
2670 unsigned Half = (Begin + End) / 2;
2671 Results[Begin] = sublo(Builder, Res);
2672 Results[Half] = subhi(Builder, Res);
2673
2674 splitFunc(Begin, Half, splitFunc);
2675 splitFunc(Half, End, splitFunc);
2676 };
2677
2678 splitInHalf(0, NumResults, splitInHalf);
2679 return Results;
2680}
2681
2682auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
2683 ArrayRef<Value *> Values,
2684 VectorType *ToType) const
2685 -> Value * {
2686 assert(ToType->getElementType()->isIntegerTy());
2687
2688 // If the list of values does not have power-of-2 elements, append copies
2689 // of the sign bit to it, to make the size be 2^n.
2690 // The reason for this is that the values will be joined in pairs, because
2691 // otherwise the shuffles will result in convoluted code. With pairwise
2692 // joins, the shuffles will hopefully be folded into a perfect shuffle.
2693 // The output will need to be sign-extended to a type with element width
2694 // being a power-of-2 anyways.
2695 SmallVector<Value *> Inputs(Values);
2696
2697 unsigned ToWidth = ToType->getScalarSizeInBits();
2698 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2699 assert(Width <= ToWidth);
2700 assert(isPowerOf2_32(Width) && isPowerOf2_32(ToWidth));
2701 unsigned Length = length(Inputs.front()->getType());
2702
2703 unsigned NeedInputs = ToWidth / Width;
2704 if (Inputs.size() != NeedInputs) {
2705 // Having too many inputs is ok: drop the high bits (usual wrap-around).
2706 // If there are too few, fill them with the sign bit.
2707 Value *Last = Inputs.back();
2708 Value *Sign = Builder.CreateAShr(
2709 Last, getConstSplat(Last->getType(), Width - 1), "asr");
2710 Inputs.resize(NeedInputs, Sign);
2711 }
2712
2713 while (Inputs.size() > 1) {
2714 Width *= 2;
2715 auto *VTy = VectorType::get(getIntTy(Width), Length, false);
2716 for (int i = 0, e = Inputs.size(); i < e; i += 2) {
2717 Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
2718 Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");
2719 }
2720 Inputs.resize(Inputs.size() / 2);
2721 }
2722
2723 assert(Inputs.front()->getType() == ToType);
2724 return Inputs.front();
2725}
2726
2727auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
2728 Value *Ptr1) const
2729 -> std::optional<int> {
2730 // Try SCEV first.
2731 const SCEV *Scev0 = SE.getSCEV(Ptr0);
2732 const SCEV *Scev1 = SE.getSCEV(Ptr1);
2733 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
2734 if (auto *Const = dyn_cast<SCEVConstant>(ScevDiff)) {
2735 APInt V = Const->getAPInt();
2736 if (V.isSignedIntN(8 * sizeof(int)))
2737 return static_cast<int>(V.getSExtValue());
2738 }
2739
2740 struct Builder : IRBuilder<> {
2741 Builder(BasicBlock *B) : IRBuilder<>(B->getTerminator()) {}
2742 ~Builder() {
2743 for (Instruction *I : llvm::reverse(ToErase))
2744 I->eraseFromParent();
2745 }
2746 SmallVector<Instruction *, 8> ToErase;
2747 };
2748
2749#define CallBuilder(B, F) \
2750 [&](auto &B_) { \
2751 Value *V = B_.F; \
2752 if (auto *I = dyn_cast<Instruction>(V)) \
2753 B_.ToErase.push_back(I); \
2754 return V; \
2755 }(B)
2756
2757 auto Simplify = [this](Value *V) {
2758 if (Value *S = simplify(V))
2759 return S;
2760 return V;
2761 };
2762
2763 auto StripBitCast = [](Value *V) {
2764 while (auto *C = dyn_cast<BitCastInst>(V))
2765 V = C->getOperand(0);
2766 return V;
2767 };
2768
2769 Ptr0 = StripBitCast(Ptr0);
2770 Ptr1 = StripBitCast(Ptr1);
2772 return std::nullopt;
2773
2774 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2775 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2776 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2777 return std::nullopt;
2778 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2779 return std::nullopt;
2780
2781 Builder B(Gep0->getParent());
2782 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2783
2784 // FIXME: for now only check GEPs with a single index.
2785 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2786 return std::nullopt;
2787
2788 Value *Idx0 = Gep0->getOperand(1);
2789 Value *Idx1 = Gep1->getOperand(1);
2790
2791 // First, try to simplify the subtraction directly.
2792 if (auto *Diff = dyn_cast<ConstantInt>(
2793 Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
2794 return Diff->getSExtValue() * Scale;
2795
2796 KnownBits Known0 = getKnownBits(Idx0, Gep0);
2797 KnownBits Known1 = getKnownBits(Idx1, Gep1);
2798 APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
2799 if (Unknown.isAllOnes())
2800 return std::nullopt;
2801
2802 Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
2803 Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
2804 Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
2805 Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
2806 int Diff0 = 0;
2807 if (auto *C = dyn_cast<ConstantInt>(SubU)) {
2808 Diff0 = C->getSExtValue();
2809 } else {
2810 return std::nullopt;
2811 }
2812
2813 Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
2814 Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
2815 Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
2816 Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
2817 int Diff1 = 0;
2818 if (auto *C = dyn_cast<ConstantInt>(SubK)) {
2819 Diff1 = C->getSExtValue();
2820 } else {
2821 return std::nullopt;
2822 }
2823
2824 return (Diff0 + Diff1) * Scale;
2825
2826#undef CallBuilder
2827}
2828
2829auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
2830 const Instruction *CtxI) const
2831 -> unsigned {
2832 return ComputeMaxSignificantBits(V, DL, &AC, CtxI, &DT);
2833}
2834
2835auto HexagonVectorCombine::getKnownBits(const Value *V,
2836 const Instruction *CtxI) const
2837 -> KnownBits {
2838 return computeKnownBits(V, DL, &AC, CtxI, &DT);
2839}
2840
2841auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {
2842 if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||
2843 In.isFenceLike() || In.mayReadOrWriteMemory()) {
2844 return false;
2845 }
2846 if (isa<CallBase>(In) || isa<AllocaInst>(In))
2847 return false;
2848 return true;
2849}
2850
2851template <typename T>
2852auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
2854 const T &IgnoreInsts) const
2855 -> bool {
2856 auto getLocOrNone =
2857 [this](const Instruction &I) -> std::optional<MemoryLocation> {
2858 if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
2859 switch (II->getIntrinsicID()) {
2860 case Intrinsic::masked_load:
2861 return MemoryLocation::getForArgument(II, 0, TLI);
2862 case Intrinsic::masked_store:
2863 return MemoryLocation::getForArgument(II, 1, TLI);
2864 }
2865 }
2867 };
2868
2869 // The source and the destination must be in the same basic block.
2870 const BasicBlock &Block = *In.getParent();
2871 assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
2872 // No PHIs.
2873 if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
2874 return false;
2875
2877 return true;
2878 bool MayWrite = In.mayWriteToMemory();
2879 auto MaybeLoc = getLocOrNone(In);
2880
2881 auto From = In.getIterator();
2882 if (From == To)
2883 return true;
2884 bool MoveUp = (To != Block.end() && To->comesBefore(&In));
2885 auto Range =
2886 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
2887 for (auto It = Range.first; It != Range.second; ++It) {
2888 const Instruction &I = *It;
2889 if (llvm::is_contained(IgnoreInsts, &I))
2890 continue;
2891 // assume intrinsic can be ignored
2892 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
2893 if (II->getIntrinsicID() == Intrinsic::assume)
2894 continue;
2895 }
2896 // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
2897 if (I.mayThrow())
2898 return false;
2899 if (auto *CB = dyn_cast<CallBase>(&I)) {
2900 if (!CB->hasFnAttr(Attribute::WillReturn))
2901 return false;
2902 if (!CB->hasFnAttr(Attribute::NoSync))
2903 return false;
2904 }
2905 if (I.mayReadOrWriteMemory()) {
2906 auto MaybeLocI = getLocOrNone(I);
2907 if (MayWrite || I.mayWriteToMemory()) {
2908 if (!MaybeLoc || !MaybeLocI)
2909 return false;
2910 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2911 return false;
2912 }
2913 }
2914 }
2915 return true;
2916}
2917
2918auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
2919 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2920 return VecTy->getElementType() == getByteTy();
2921 return false;
2922}
2923
2924auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
2925 Value *Hi, int Start,
2926 int Length) const -> Value * {
2927 assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
2928 SmallVector<int, 128> SMask(Length);
2929 std::iota(SMask.begin(), SMask.end(), Start);
2930 return Builder.CreateShuffleVector(Lo, Hi, SMask, "shf");
2931}
2932
2933// Pass management.
2934
2935namespace {
2936class HexagonVectorCombineLegacy : public FunctionPass {
2937public:
2938 static char ID;
2939
2940 HexagonVectorCombineLegacy() : FunctionPass(ID) {}
2941
2942 StringRef getPassName() const override { return "Hexagon Vector Combine"; }
2943
2944 void getAnalysisUsage(AnalysisUsage &AU) const override {
2945 AU.setPreservesCFG();
2946 AU.addRequired<AAResultsWrapperPass>();
2947 AU.addRequired<AssumptionCacheTracker>();
2948 AU.addRequired<DominatorTreeWrapperPass>();
2949 AU.addRequired<ScalarEvolutionWrapperPass>();
2950 AU.addRequired<TargetLibraryInfoWrapperPass>();
2951 AU.addRequired<TargetPassConfig>();
2952 FunctionPass::getAnalysisUsage(AU);
2953 }
2954
2955 bool runOnFunction(Function &F) override {
2956 if (skipFunction(F))
2957 return false;
2958 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2959 AssumptionCache &AC =
2960 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2961 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2962 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2963 TargetLibraryInfo &TLI =
2964 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2965 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
2966 HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);
2967 return HVC.run();
2968 }
2969};
2970} // namespace
2971
2972char HexagonVectorCombineLegacy::ID = 0;
2973
2974INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
2975 "Hexagon Vector Combine", false, false)
2982INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
2983 "Hexagon Vector Combine", false, false)
2984
2986 return new HexagonVectorCombineLegacy();
2987}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_UNUSED
Definition Compiler.h:298
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
hexagon bit simplify
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
#define CallBuilder(B, F)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:546
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
#define H(x, y, z)
Definition MD5.cpp:57
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
This file contains the declarations for metadata subclasses.
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
iterator_range< iterator > children()
NodeT * getBlock() const
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool empty() const
Definition Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
const BasicBlock & back() const
Definition Function.h:860
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
unsigned getVectorLength() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2618
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2094
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2333
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2463
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2204
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2082
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2593
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2508
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1532
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2439
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:215
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
Definition ModRef.h:234
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:47
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:296
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1779
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1180
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:390
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1741
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2032
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:368
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:311