File: | llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp |
Warning: | line 372, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- HexagonVectorCombine.cpp ------------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // HexagonVectorCombine is a utility class implementing a variety of functions | |||
9 | // that assist in vector-based optimizations. | |||
10 | // | |||
11 | // AlignVectors: replace unaligned vector loads and stores with aligned ones. | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "llvm/ADT/APInt.h" | |||
15 | #include "llvm/ADT/ArrayRef.h" | |||
16 | #include "llvm/ADT/DenseMap.h" | |||
17 | #include "llvm/ADT/Optional.h" | |||
18 | #include "llvm/ADT/STLExtras.h" | |||
19 | #include "llvm/ADT/SmallVector.h" | |||
20 | #include "llvm/Analysis/AliasAnalysis.h" | |||
21 | #include "llvm/Analysis/AssumptionCache.h" | |||
22 | #include "llvm/Analysis/InstructionSimplify.h" | |||
23 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
24 | #include "llvm/Analysis/ValueTracking.h" | |||
25 | #include "llvm/Analysis/VectorUtils.h" | |||
26 | #include "llvm/CodeGen/TargetPassConfig.h" | |||
27 | #include "llvm/IR/Dominators.h" | |||
28 | #include "llvm/IR/IRBuilder.h" | |||
29 | #include "llvm/IR/IntrinsicInst.h" | |||
30 | #include "llvm/IR/Intrinsics.h" | |||
31 | #include "llvm/IR/IntrinsicsHexagon.h" | |||
32 | #include "llvm/IR/Metadata.h" | |||
33 | #include "llvm/InitializePasses.h" | |||
34 | #include "llvm/Pass.h" | |||
35 | #include "llvm/Support/KnownBits.h" | |||
36 | #include "llvm/Support/MathExtras.h" | |||
37 | #include "llvm/Support/raw_ostream.h" | |||
38 | #include "llvm/Target/TargetMachine.h" | |||
39 | ||||
40 | #include "HexagonSubtarget.h" | |||
41 | #include "HexagonTargetMachine.h" | |||
42 | ||||
43 | #include <algorithm> | |||
44 | #include <deque> | |||
45 | #include <map> | |||
46 | #include <set> | |||
47 | #include <utility> | |||
48 | #include <vector> | |||
49 | ||||
50 | #define DEBUG_TYPE"hexagon-vc" "hexagon-vc" | |||
51 | ||||
52 | using namespace llvm; | |||
53 | ||||
54 | namespace { | |||
55 | class HexagonVectorCombine { | |||
56 | public: | |||
57 | HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_, | |||
58 | DominatorTree &DT_, TargetLibraryInfo &TLI_, | |||
59 | const TargetMachine &TM_) | |||
60 | : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_), | |||
61 | TLI(TLI_), | |||
62 | HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {} | |||
63 | ||||
64 | bool run(); | |||
65 | ||||
66 | // Common integer type. | |||
67 | IntegerType *getIntTy() const; | |||
68 | // Byte type: either scalar (when Length = 0), or vector with given | |||
69 | // element count. | |||
70 | Type *getByteTy(int ElemCount = 0) const; | |||
71 | // Boolean type: either scalar (when Length = 0), or vector with given | |||
72 | // element count. | |||
73 | Type *getBoolTy(int ElemCount = 0) const; | |||
74 | // Create a ConstantInt of type returned by getIntTy with the value Val. | |||
75 | ConstantInt *getConstInt(int Val) const; | |||
76 | // Get the integer value of V, if it exists. | |||
77 | Optional<APInt> getIntValue(const Value *Val) const; | |||
78 | // Is V a constant 0, or a vector of 0s? | |||
79 | bool isZero(const Value *Val) const; | |||
80 | // Is V an undef value? | |||
81 | bool isUndef(const Value *Val) const; | |||
82 | ||||
83 | int getSizeOf(const Value *Val) const; | |||
84 | int getSizeOf(const Type *Ty) const; | |||
85 | int getTypeAlignment(Type *Ty) const; | |||
86 | ||||
87 | VectorType *getByteVectorTy(int ScLen) const; | |||
88 | Constant *getNullValue(Type *Ty) const; | |||
89 | Constant *getFullValue(Type *Ty) const; | |||
90 | ||||
91 | Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start, | |||
92 | int Length, int Where) const; | |||
93 | Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const; | |||
94 | Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const; | |||
95 | Value *concat(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) const; | |||
96 | Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize, | |||
97 | Value *Pad) const; | |||
98 | Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy, | |||
99 | Type *ToTy) const; | |||
100 | Value *vlsb(IRBuilder<> &Builder, Value *Val) const; | |||
101 | Value *vbytes(IRBuilder<> &Builder, Value *Val) const; | |||
102 | ||||
103 | Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID, | |||
104 | Type *RetTy, ArrayRef<Value *> Args) const; | |||
105 | ||||
106 | Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const; | |||
107 | ||||
108 | template <typename T = std::vector<Instruction *>> | |||
109 | bool isSafeToMoveBeforeInBB(const Instruction &In, | |||
110 | BasicBlock::const_iterator To, | |||
111 | const T &Ignore = {}) const; | |||
112 | ||||
113 | Function &F; | |||
114 | const DataLayout &DL; | |||
115 | AliasAnalysis &AA; | |||
116 | AssumptionCache &AC; | |||
117 | DominatorTree &DT; | |||
118 | TargetLibraryInfo &TLI; | |||
119 | const HexagonSubtarget &HST; | |||
120 | ||||
121 | private: | |||
122 | #ifndef NDEBUG1 | |||
123 | // These two functions are only used for assertions at the moment. | |||
124 | bool isByteVecTy(Type *Ty) const; | |||
125 | bool isSectorTy(Type *Ty) const; | |||
126 | #endif | |||
127 | Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start, | |||
128 | int Length) const; | |||
129 | }; | |||
130 | ||||
131 | class AlignVectors { | |||
132 | public: | |||
133 | AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {} | |||
134 | ||||
135 | bool run(); | |||
136 | ||||
137 | private: | |||
138 | using InstList = std::vector<Instruction *>; | |||
139 | ||||
140 | struct Segment { | |||
141 | void *Data; | |||
142 | int Start; | |||
143 | int Size; | |||
144 | }; | |||
145 | ||||
146 | struct AddrInfo { | |||
147 | AddrInfo(const AddrInfo &) = default; | |||
148 | AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T, | |||
149 | Align H) | |||
150 | : Inst(I), Addr(A), ValTy(T), HaveAlign(H), | |||
151 | NeedAlign(HVC.getTypeAlignment(ValTy)) {} | |||
152 | ||||
153 | // XXX: add Size member? | |||
154 | Instruction *Inst; | |||
155 | Value *Addr; | |||
156 | Type *ValTy; | |||
157 | Align HaveAlign; | |||
158 | Align NeedAlign; | |||
159 | int Offset = 0; // Offset (in bytes) from the first member of the | |||
160 | // containing AddrList. | |||
161 | }; | |||
162 | using AddrList = std::vector<AddrInfo>; | |||
163 | ||||
164 | struct InstrLess { | |||
165 | bool operator()(const Instruction *A, const Instruction *B) const { | |||
166 | return A->comesBefore(B); | |||
167 | } | |||
168 | }; | |||
169 | using DepList = std::set<Instruction *, InstrLess>; | |||
170 | ||||
171 | struct MoveGroup { | |||
172 | MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load) | |||
173 | : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {} | |||
174 | Instruction *Base; // Base instruction of the parent address group. | |||
175 | InstList Main; // Main group of instructions. | |||
176 | InstList Deps; // List of dependencies. | |||
177 | bool IsHvx; // Is this group of HVX instructions? | |||
178 | bool IsLoad; // Is this a load group? | |||
179 | }; | |||
180 | using MoveList = std::vector<MoveGroup>; | |||
181 | ||||
182 | struct ByteSpan { | |||
183 | struct Segment { | |||
184 | // Segment of a Value: 'Len' bytes starting at byte 'Begin'. | |||
185 | Segment(Value *Val, int Begin, int Len) | |||
186 | : Val(Val), Start(Begin), Size(Len) {} | |||
187 | Segment(const Segment &Seg) = default; | |||
188 | Value *Val; // Value representable as a sequence of bytes. | |||
189 | int Start; // First byte of the value that belongs to the segment. | |||
190 | int Size; // Number of bytes in the segment. | |||
191 | }; | |||
192 | ||||
193 | struct Block { | |||
194 | Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {} | |||
195 | Block(Value *Val, int Off, int Len, int Pos) | |||
196 | : Seg(Val, Off, Len), Pos(Pos) {} | |||
197 | Block(const Block &Blk) = default; | |||
198 | Segment Seg; // Value segment. | |||
199 | int Pos; // Position (offset) of the segment in the Block. | |||
200 | }; | |||
201 | ||||
202 | int extent() const; | |||
203 | ByteSpan section(int Start, int Length) const; | |||
204 | ByteSpan &shift(int Offset); | |||
205 | SmallVector<Value *, 8> values() const; | |||
206 | ||||
207 | int size() const { return Blocks.size(); } | |||
208 | Block &operator[](int i) { return Blocks[i]; } | |||
209 | ||||
210 | std::vector<Block> Blocks; | |||
211 | ||||
212 | using iterator = decltype(Blocks)::iterator; | |||
213 | iterator begin() { return Blocks.begin(); } | |||
214 | iterator end() { return Blocks.end(); } | |||
215 | using const_iterator = decltype(Blocks)::const_iterator; | |||
216 | const_iterator begin() const { return Blocks.begin(); } | |||
217 | const_iterator end() const { return Blocks.end(); } | |||
218 | }; | |||
219 | ||||
220 | Align getAlignFromValue(const Value *V) const; | |||
221 | Optional<MemoryLocation> getLocation(const Instruction &In) const; | |||
222 | Optional<AddrInfo> getAddrInfo(Instruction &In) const; | |||
223 | bool isHvx(const AddrInfo &AI) const; | |||
224 | ||||
225 | Value *getPayload(Value *Val) const; | |||
226 | Value *getMask(Value *Val) const; | |||
227 | Value *getPassThrough(Value *Val) const; | |||
228 | ||||
229 | Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy, | |||
230 | int Adjust) const; | |||
231 | Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy, | |||
232 | int Alignment) const; | |||
233 | Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr, | |||
234 | int Alignment, Value *Mask, Value *PassThru) const; | |||
235 | Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr, | |||
236 | int Alignment, Value *Mask) const; | |||
237 | ||||
238 | bool createAddressGroups(); | |||
239 | MoveList createLoadGroups(const AddrList &Group) const; | |||
240 | MoveList createStoreGroups(const AddrList &Group) const; | |||
241 | bool move(const MoveGroup &Move) const; | |||
242 | bool realignGroup(const MoveGroup &Move) const; | |||
243 | ||||
244 | friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI); | |||
245 | friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG); | |||
246 | friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS); | |||
247 | ||||
248 | std::map<Instruction *, AddrList> AddrGroups; | |||
249 | HexagonVectorCombine &HVC; | |||
250 | }; | |||
251 | ||||
252 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
253 | raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { | |||
254 | OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; | |||
255 | OS << "Addr: " << *AI.Addr << '\n'; | |||
256 | OS << "Type: " << *AI.ValTy << '\n'; | |||
257 | OS << "HaveAlign: " << AI.HaveAlign.value() << '\n'; | |||
258 | OS << "NeedAlign: " << AI.NeedAlign.value() << '\n'; | |||
259 | OS << "Offset: " << AI.Offset; | |||
260 | return OS; | |||
261 | } | |||
262 | ||||
263 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
264 | raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { | |||
265 | OS << "Main\n"; | |||
266 | for (Instruction *I : MG.Main) | |||
267 | OS << " " << *I << '\n'; | |||
268 | OS << "Deps\n"; | |||
269 | for (Instruction *I : MG.Deps) | |||
270 | OS << " " << *I << '\n'; | |||
271 | return OS; | |||
272 | } | |||
273 | ||||
274 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
275 | raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { | |||
276 | OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; | |||
277 | for (const AlignVectors::ByteSpan::Block &B : BS) { | |||
278 | OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] " | |||
279 | << *B.Seg.Val << '\n'; | |||
280 | } | |||
281 | OS << ']'; | |||
282 | return OS; | |||
283 | } | |||
284 | ||||
285 | } // namespace | |||
286 | ||||
287 | namespace { | |||
288 | ||||
289 | template <typename T> T *getIfUnordered(T *MaybeT) { | |||
290 | return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr; | |||
291 | } | |||
292 | template <typename T> T *isCandidate(Instruction *In) { | |||
293 | return dyn_cast<T>(In); | |||
294 | } | |||
295 | template <> LoadInst *isCandidate<LoadInst>(Instruction *In) { | |||
296 | return getIfUnordered(dyn_cast<LoadInst>(In)); | |||
297 | } | |||
298 | template <> StoreInst *isCandidate<StoreInst>(Instruction *In) { | |||
299 | return getIfUnordered(dyn_cast<StoreInst>(In)); | |||
300 | } | |||
301 | ||||
302 | #if !defined(_MSC_VER) || _MSC_VER >= 1926 | |||
303 | // VS2017 and some versions of VS2019 have trouble compiling this: | |||
304 | // error C2976: 'std::map': too few template arguments | |||
305 | // VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925) | |||
306 | template <typename Pred, typename... Ts> | |||
307 | void erase_if(std::map<Ts...> &map, Pred p) | |||
308 | #else | |||
309 | template <typename Pred, typename T, typename U> | |||
310 | void erase_if(std::map<T, U> &map, Pred p) | |||
311 | #endif | |||
312 | { | |||
313 | for (auto i = map.begin(), e = map.end(); i != e;) { | |||
314 | if (p(*i)) | |||
315 | i = map.erase(i); | |||
316 | else | |||
317 | i = std::next(i); | |||
318 | } | |||
319 | } | |||
320 | ||||
321 | // Forward other erase_ifs to the LLVM implementations. | |||
322 | template <typename Pred, typename T> void erase_if(T &&container, Pred p) { | |||
323 | llvm::erase_if(std::forward<T>(container), p); | |||
324 | } | |||
325 | ||||
326 | } // namespace | |||
327 | ||||
328 | // --- Begin AlignVectors | |||
329 | ||||
330 | auto AlignVectors::ByteSpan::extent() const -> int { | |||
331 | if (size() == 0) | |||
332 | return 0; | |||
333 | int Min = Blocks[0].Pos; | |||
334 | int Max = Blocks[0].Pos + Blocks[0].Seg.Size; | |||
335 | for (int i = 1, e = size(); i != e; ++i) { | |||
336 | Min = std::min(Min, Blocks[i].Pos); | |||
337 | Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size); | |||
338 | } | |||
339 | return Max - Min; | |||
340 | } | |||
341 | ||||
342 | auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan { | |||
343 | ByteSpan Section; | |||
344 | for (const ByteSpan::Block &B : Blocks) { | |||
345 | int L = std::max(B.Pos, Start); // Left end. | |||
346 | int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1. | |||
347 | if (L < R) { | |||
348 | // How much to chop off the beginning of the segment: | |||
349 | int Off = L > B.Pos ? L - B.Pos : 0; | |||
350 | Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L); | |||
351 | } | |||
352 | } | |||
353 | return Section; | |||
354 | } | |||
355 | ||||
356 | auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & { | |||
357 | for (Block &B : Blocks) | |||
358 | B.Pos += Offset; | |||
359 | return *this; | |||
360 | } | |||
361 | ||||
362 | auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> { | |||
363 | SmallVector<Value *, 8> Values(Blocks.size()); | |||
364 | for (int i = 0, e = Blocks.size(); i != e; ++i) | |||
365 | Values[i] = Blocks[i].Seg.Val; | |||
366 | return Values; | |||
367 | } | |||
368 | ||||
369 | auto AlignVectors::getAlignFromValue(const Value *V) const -> Align { | |||
370 | const auto *C = dyn_cast<ConstantInt>(V); | |||
371 | assert(C && "Alignment must be a compile-time constant integer")(static_cast<void> (0)); | |||
372 | return C->getAlignValue(); | |||
| ||||
373 | } | |||
374 | ||||
375 | auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> { | |||
376 | if (auto *L = isCandidate<LoadInst>(&In)) | |||
377 | return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(), | |||
378 | L->getAlign()); | |||
379 | if (auto *S = isCandidate<StoreInst>(&In)) | |||
380 | return AddrInfo(HVC, S, S->getPointerOperand(), | |||
381 | S->getValueOperand()->getType(), S->getAlign()); | |||
382 | if (auto *II
| |||
383 | Intrinsic::ID ID = II->getIntrinsicID(); | |||
384 | switch (ID) { | |||
385 | case Intrinsic::masked_load: | |||
386 | return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(), | |||
387 | getAlignFromValue(II->getArgOperand(1))); | |||
388 | case Intrinsic::masked_store: | |||
389 | return AddrInfo(HVC, II, II->getArgOperand(1), | |||
390 | II->getArgOperand(0)->getType(), | |||
391 | getAlignFromValue(II->getArgOperand(2))); | |||
392 | } | |||
393 | } | |||
394 | return Optional<AddrInfo>(); | |||
395 | } | |||
396 | ||||
397 | auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool { | |||
398 | return HVC.HST.isTypeForHVX(AI.ValTy); | |||
399 | } | |||
400 | ||||
401 | auto AlignVectors::getPayload(Value *Val) const -> Value * { | |||
402 | if (auto *In = dyn_cast<Instruction>(Val)) { | |||
403 | Intrinsic::ID ID = 0; | |||
404 | if (auto *II = dyn_cast<IntrinsicInst>(In)) | |||
405 | ID = II->getIntrinsicID(); | |||
406 | if (isa<StoreInst>(In) || ID == Intrinsic::masked_store) | |||
407 | return In->getOperand(0); | |||
408 | } | |||
409 | return Val; | |||
410 | } | |||
411 | ||||
412 | auto AlignVectors::getMask(Value *Val) const -> Value * { | |||
413 | if (auto *II = dyn_cast<IntrinsicInst>(Val)) { | |||
414 | switch (II->getIntrinsicID()) { | |||
415 | case Intrinsic::masked_load: | |||
416 | return II->getArgOperand(2); | |||
417 | case Intrinsic::masked_store: | |||
418 | return II->getArgOperand(3); | |||
419 | } | |||
420 | } | |||
421 | ||||
422 | Type *ValTy = getPayload(Val)->getType(); | |||
423 | if (auto *VecTy = dyn_cast<VectorType>(ValTy)) { | |||
424 | int ElemCount = VecTy->getElementCount().getFixedValue(); | |||
425 | return HVC.getFullValue(HVC.getBoolTy(ElemCount)); | |||
426 | } | |||
427 | return HVC.getFullValue(HVC.getBoolTy()); | |||
428 | } | |||
429 | ||||
430 | auto AlignVectors::getPassThrough(Value *Val) const -> Value * { | |||
431 | if (auto *II = dyn_cast<IntrinsicInst>(Val)) { | |||
432 | if (II->getIntrinsicID() == Intrinsic::masked_load) | |||
433 | return II->getArgOperand(3); | |||
434 | } | |||
435 | return UndefValue::get(getPayload(Val)->getType()); | |||
436 | } | |||
437 | ||||
438 | auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, | |||
439 | Type *ValTy, int Adjust) const | |||
440 | -> Value * { | |||
441 | // The adjustment is in bytes, but if it's a multiple of the type size, | |||
442 | // we don't need to do pointer casts. | |||
443 | auto *PtrTy = cast<PointerType>(Ptr->getType()); | |||
444 | if (!PtrTy->isOpaque()) { | |||
445 | Type *ElemTy = PtrTy->getElementType(); | |||
446 | int ElemSize = HVC.getSizeOf(ElemTy); | |||
447 | if (Adjust % ElemSize == 0) { | |||
448 | Value *Tmp0 = | |||
449 | Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize)); | |||
450 | return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo()); | |||
451 | } | |||
452 | } | |||
453 | ||||
454 | PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext()); | |||
455 | Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy); | |||
456 | Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0, | |||
457 | HVC.getConstInt(Adjust)); | |||
458 | return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo()); | |||
459 | } | |||
460 | ||||
461 | auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, | |||
462 | Type *ValTy, int Alignment) const | |||
463 | -> Value * { | |||
464 | Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy()); | |||
465 | Value *Mask = HVC.getConstInt(-Alignment); | |||
466 | Value *And = Builder.CreateAnd(AsInt, Mask); | |||
467 | return Builder.CreateIntToPtr(And, ValTy->getPointerTo()); | |||
468 | } | |||
469 | ||||
470 | auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, | |||
471 | Value *Ptr, int Alignment, Value *Mask, | |||
472 | Value *PassThru) const -> Value * { | |||
473 | assert(!HVC.isUndef(Mask))(static_cast<void> (0)); // Should this be allowed? | |||
474 | if (HVC.isZero(Mask)) | |||
475 | return PassThru; | |||
476 | if (Mask == ConstantInt::getTrue(Mask->getType())) | |||
477 | return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment)); | |||
478 | return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru); | |||
479 | } | |||
480 | ||||
481 | auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val, | |||
482 | Value *Ptr, int Alignment, | |||
483 | Value *Mask) const -> Value * { | |||
484 | if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask)) | |||
485 | return UndefValue::get(Val->getType()); | |||
486 | if (Mask == ConstantInt::getTrue(Mask->getType())) | |||
487 | return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment)); | |||
488 | return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask); | |||
489 | } | |||
490 | ||||
491 | auto AlignVectors::createAddressGroups() -> bool { | |||
492 | // An address group created here may contain instructions spanning | |||
493 | // multiple basic blocks. | |||
494 | AddrList WorkStack; | |||
495 | ||||
496 | auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> { | |||
497 | for (AddrInfo &W : WorkStack) { | |||
498 | if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr)) | |||
499 | return std::make_pair(W.Inst, *D); | |||
500 | } | |||
501 | return std::make_pair(nullptr, 0); | |||
502 | }; | |||
503 | ||||
504 | auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void { | |||
505 | BasicBlock &Block = *DomN->getBlock(); | |||
506 | for (Instruction &I : Block) { | |||
507 | auto AI = this->getAddrInfo(I); // Use this-> for gcc6. | |||
508 | if (!AI) | |||
509 | continue; | |||
510 | auto F = findBaseAndOffset(*AI); | |||
511 | Instruction *GroupInst; | |||
512 | if (Instruction *BI = F.first) { | |||
513 | AI->Offset = F.second; | |||
514 | GroupInst = BI; | |||
515 | } else { | |||
516 | WorkStack.push_back(*AI); | |||
517 | GroupInst = AI->Inst; | |||
518 | } | |||
519 | AddrGroups[GroupInst].push_back(*AI); | |||
520 | } | |||
521 | ||||
522 | for (DomTreeNode *C : DomN->children()) | |||
523 | Visit(C, Visit); | |||
524 | ||||
525 | while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block) | |||
526 | WorkStack.pop_back(); | |||
527 | }; | |||
528 | ||||
529 | traverseBlock(HVC.DT.getRootNode(), traverseBlock); | |||
530 | assert(WorkStack.empty())(static_cast<void> (0)); | |||
531 | ||||
532 | // AddrGroups are formed. | |||
533 | ||||
534 | // Remove groups of size 1. | |||
535 | erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; }); | |||
536 | // Remove groups that don't use HVX types. | |||
537 | erase_if(AddrGroups, [&](auto &G) { | |||
538 | return !llvm::any_of( | |||
539 | G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); }); | |||
540 | }); | |||
541 | ||||
542 | return !AddrGroups.empty(); | |||
543 | } | |||
544 | ||||
545 | auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList { | |||
546 | // Form load groups. | |||
547 | // To avoid complications with moving code across basic blocks, only form | |||
548 | // groups that are contained within a single basic block. | |||
549 | ||||
550 | auto getUpwardDeps = [](Instruction *In, Instruction *Base) { | |||
551 | BasicBlock *Parent = Base->getParent(); | |||
552 | assert(In->getParent() == Parent &&(static_cast<void> (0)) | |||
553 | "Base and In should be in the same block")(static_cast<void> (0)); | |||
554 | assert(Base->comesBefore(In) && "Base should come before In")(static_cast<void> (0)); | |||
555 | ||||
556 | DepList Deps; | |||
557 | std::deque<Instruction *> WorkQ = {In}; | |||
558 | while (!WorkQ.empty()) { | |||
559 | Instruction *D = WorkQ.front(); | |||
560 | WorkQ.pop_front(); | |||
561 | Deps.insert(D); | |||
562 | for (Value *Op : D->operands()) { | |||
563 | if (auto *I = dyn_cast<Instruction>(Op)) { | |||
564 | if (I->getParent() == Parent && Base->comesBefore(I)) | |||
565 | WorkQ.push_back(I); | |||
566 | } | |||
567 | } | |||
568 | } | |||
569 | return Deps; | |||
570 | }; | |||
571 | ||||
572 | auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) { | |||
573 | assert(!Move.Main.empty() && "Move group should have non-empty Main")(static_cast<void> (0)); | |||
574 | // Don't mix HVX and non-HVX instructions. | |||
575 | if (Move.IsHvx != isHvx(Info)) | |||
576 | return false; | |||
577 | // Leading instruction in the load group. | |||
578 | Instruction *Base = Move.Main.front(); | |||
579 | if (Base->getParent() != Info.Inst->getParent()) | |||
580 | return false; | |||
581 | ||||
582 | auto isSafeToMoveToBase = [&](const Instruction *I) { | |||
583 | return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()); | |||
584 | }; | |||
585 | DepList Deps = getUpwardDeps(Info.Inst, Base); | |||
586 | if (!llvm::all_of(Deps, isSafeToMoveToBase)) | |||
587 | return false; | |||
588 | ||||
589 | // The dependencies will be moved together with the load, so make sure | |||
590 | // that none of them could be moved independently in another group. | |||
591 | Deps.erase(Info.Inst); | |||
592 | auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; }; | |||
593 | if (llvm::any_of(Deps, inAddrMap)) | |||
594 | return false; | |||
595 | Move.Main.push_back(Info.Inst); | |||
596 | llvm::append_range(Move.Deps, Deps); | |||
597 | return true; | |||
598 | }; | |||
599 | ||||
600 | MoveList LoadGroups; | |||
601 | ||||
602 | for (const AddrInfo &Info : Group) { | |||
603 | if (!Info.Inst->mayReadFromMemory()) | |||
604 | continue; | |||
605 | if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back())) | |||
606 | LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true); | |||
607 | } | |||
608 | ||||
609 | // Erase singleton groups. | |||
610 | erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; }); | |||
611 | return LoadGroups; | |||
612 | } | |||
613 | ||||
614 | auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList { | |||
615 | // Form store groups. | |||
616 | // To avoid complications with moving code across basic blocks, only form | |||
617 | // groups that are contained within a single basic block. | |||
618 | ||||
619 | auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) { | |||
620 | assert(!Move.Main.empty() && "Move group should have non-empty Main")(static_cast<void> (0)); | |||
621 | // For stores with return values we'd have to collect downward depenencies. | |||
622 | // There are no such stores that we handle at the moment, so omit that. | |||
623 | assert(Info.Inst->getType()->isVoidTy() &&(static_cast<void> (0)) | |||
624 | "Not handling stores with return values")(static_cast<void> (0)); | |||
625 | // Don't mix HVX and non-HVX instructions. | |||
626 | if (Move.IsHvx != isHvx(Info)) | |||
627 | return false; | |||
628 | // For stores we need to be careful whether it's safe to move them. | |||
629 | // Stores that are otherwise safe to move together may not appear safe | |||
630 | // to move over one another (i.e. isSafeToMoveBefore may return false). | |||
631 | Instruction *Base = Move.Main.front(); | |||
632 | if (Base->getParent() != Info.Inst->getParent()) | |||
633 | return false; | |||
634 | if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main)) | |||
635 | return false; | |||
636 | Move.Main.push_back(Info.Inst); | |||
637 | return true; | |||
638 | }; | |||
639 | ||||
640 | MoveList StoreGroups; | |||
641 | ||||
642 | for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) { | |||
643 | const AddrInfo &Info = *I; | |||
644 | if (!Info.Inst->mayWriteToMemory()) | |||
645 | continue; | |||
646 | if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back())) | |||
647 | StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false); | |||
648 | } | |||
649 | ||||
650 | // Erase singleton groups. | |||
651 | erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; }); | |||
652 | return StoreGroups; | |||
653 | } | |||
654 | ||||
655 | auto AlignVectors::move(const MoveGroup &Move) const -> bool { | |||
656 | assert(!Move.Main.empty() && "Move group should have non-empty Main")(static_cast<void> (0)); | |||
657 | Instruction *Where = Move.Main.front(); | |||
658 | ||||
659 | if (Move.IsLoad) { | |||
660 | // Move all deps to before Where, keeping order. | |||
661 | for (Instruction *D : Move.Deps) | |||
662 | D->moveBefore(Where); | |||
663 | // Move all main instructions to after Where, keeping order. | |||
664 | ArrayRef<Instruction *> Main(Move.Main); | |||
665 | for (Instruction *M : Main.drop_front(1)) { | |||
666 | M->moveAfter(Where); | |||
667 | Where = M; | |||
668 | } | |||
669 | } else { | |||
670 | // NOTE: Deps are empty for "store" groups. If they need to be | |||
671 | // non-empty, decide on the order. | |||
672 | assert(Move.Deps.empty())(static_cast<void> (0)); | |||
673 | // Move all main instructions to before Where, inverting order. | |||
674 | ArrayRef<Instruction *> Main(Move.Main); | |||
675 | for (Instruction *M : Main.drop_front(1)) { | |||
676 | M->moveBefore(Where); | |||
677 | Where = M; | |||
678 | } | |||
679 | } | |||
680 | ||||
681 | return Move.Main.size() + Move.Deps.size() > 1; | |||
682 | } | |||
683 | ||||
684 | auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { | |||
685 | // TODO: Needs support for masked loads/stores of "scalar" vectors. | |||
686 | if (!Move.IsHvx) | |||
687 | return false; | |||
688 | ||||
689 | // Return the element with the maximum alignment from Range, | |||
690 | // where GetValue obtains the value to compare from an element. | |||
691 | auto getMaxOf = [](auto Range, auto GetValue) { | |||
692 | return *std::max_element( | |||
693 | Range.begin(), Range.end(), | |||
694 | [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); }); | |||
695 | }; | |||
696 | ||||
697 | const AddrList &BaseInfos = AddrGroups.at(Move.Base); | |||
698 | ||||
699 | // Conceptually, there is a vector of N bytes covering the addresses | |||
700 | // starting from the minimum offset (i.e. Base.Addr+Start). This vector | |||
701 | // represents a contiguous memory region that spans all accessed memory | |||
702 | // locations. | |||
703 | // The correspondence between loaded or stored values will be expressed | |||
704 | // in terms of this vector. For example, the 0th element of the vector | |||
705 | // from the Base address info will start at byte Start from the beginning | |||
706 | // of this conceptual vector. | |||
707 | // | |||
708 | // This vector will be loaded/stored starting at the nearest down-aligned | |||
709 | // address and the amount od the down-alignment will be AlignVal: | |||
710 | // valign(load_vector(align_down(Base+Start)), AlignVal) | |||
711 | ||||
712 | std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end()); | |||
713 | AddrList MoveInfos; | |||
714 | llvm::copy_if( | |||
715 | BaseInfos, std::back_inserter(MoveInfos), | |||
716 | [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); }); | |||
717 | ||||
718 | // Maximum alignment present in the whole address group. | |||
719 | const AddrInfo &WithMaxAlign = | |||
720 | getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; }); | |||
721 | Align MaxGiven = WithMaxAlign.HaveAlign; | |||
722 | ||||
723 | // Minimum alignment present in the move address group. | |||
724 | const AddrInfo &WithMinOffset = | |||
725 | getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; }); | |||
726 | ||||
727 | const AddrInfo &WithMaxNeeded = | |||
728 | getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; }); | |||
729 | Align MinNeeded = WithMaxNeeded.NeedAlign; | |||
730 | ||||
731 | // Set the builder at the top instruction in the move group. | |||
732 | Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back(); | |||
733 | IRBuilder<> Builder(TopIn); | |||
734 | Value *AlignAddr = nullptr; // Actual aligned address. | |||
735 | Value *AlignVal = nullptr; // Right-shift amount (for valign). | |||
736 | ||||
737 | if (MinNeeded <= MaxGiven) { | |||
738 | int Start = WithMinOffset.Offset; | |||
739 | int OffAtMax = WithMaxAlign.Offset; | |||
740 | // Shift the offset of the maximally aligned instruction (OffAtMax) | |||
741 | // back by just enough multiples of the required alignment to cover the | |||
742 | // distance from Start to OffAtMax. | |||
743 | // Calculate the address adjustment amount based on the address with the | |||
744 | // maximum alignment. This is to allow a simple gep instruction instead | |||
745 | // of potential bitcasts to i8*. | |||
746 | int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value()); | |||
747 | AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr, | |||
748 | WithMaxAlign.ValTy, Adjust); | |||
749 | int Diff = Start - (OffAtMax + Adjust); | |||
750 | AlignVal = HVC.getConstInt(Diff); | |||
751 | // Sanity. | |||
752 | assert(Diff >= 0)(static_cast<void> (0)); | |||
753 | assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value())(static_cast<void> (0)); | |||
754 | } else { | |||
755 | // WithMinOffset is the lowest address in the group, | |||
756 | // WithMinOffset.Addr = Base+Start. | |||
757 | // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb) | |||
758 | // mask off unnecessary bits, so it's ok to just the original pointer as | |||
759 | // the alignment amount. | |||
760 | // Do an explicit down-alignment of the address to avoid creating an | |||
761 | // aligned instruction with an address that is not really aligned. | |||
762 | AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr, | |||
763 | WithMinOffset.ValTy, MinNeeded.value()); | |||
764 | AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy()); | |||
765 | } | |||
766 | ||||
767 | ByteSpan VSpan; | |||
768 | for (const AddrInfo &AI : MoveInfos) { | |||
769 | VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy), | |||
770 | AI.Offset - WithMinOffset.Offset); | |||
771 | } | |||
772 | ||||
773 | // The aligned loads/stores will use blocks that are either scalars, | |||
774 | // or HVX vectors. Let "sector" be the unified term for such a block. | |||
775 | // blend(scalar, vector) -> sector... | |||
776 | int ScLen = Move.IsHvx ? HVC.HST.getVectorLength() | |||
777 | : std::max<int>(MinNeeded.value(), 4); | |||
778 | assert(!Move.IsHvx || ScLen == 64 || ScLen == 128)(static_cast<void> (0)); | |||
779 | assert(Move.IsHvx || ScLen == 4 || ScLen == 8)(static_cast<void> (0)); | |||
780 | ||||
781 | Type *SecTy = HVC.getByteTy(ScLen); | |||
782 | int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen; | |||
783 | bool DoAlign = !HVC.isZero(AlignVal); | |||
784 | ||||
785 | if (Move.IsLoad) { | |||
786 | ByteSpan ASpan; | |||
787 | auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen)); | |||
788 | auto *Undef = UndefValue::get(SecTy); | |||
789 | ||||
790 | for (int i = 0; i != NumSectors + DoAlign; ++i) { | |||
791 | Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen); | |||
792 | // FIXME: generate a predicated load? | |||
793 | Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef); | |||
794 | // If vector shifting is potentially needed, accumulate metadata | |||
795 | // from source sections of twice the load width. | |||
796 | int Start = (i - DoAlign) * ScLen; | |||
797 | int Width = (1 + DoAlign) * ScLen; | |||
798 | propagateMetadata(cast<Instruction>(Load), | |||
799 | VSpan.section(Start, Width).values()); | |||
800 | ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen); | |||
801 | } | |||
802 | ||||
803 | if (DoAlign) { | |||
804 | for (int j = 0; j != NumSectors; ++j) { | |||
805 | ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val, | |||
806 | ASpan[j + 1].Seg.Val, AlignVal); | |||
807 | } | |||
808 | } | |||
809 | ||||
810 | for (ByteSpan::Block &B : VSpan) { | |||
811 | ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos); | |||
812 | Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size)); | |||
813 | for (ByteSpan::Block &S : ASection) { | |||
814 | Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val)); | |||
815 | Accum = | |||
816 | HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos); | |||
817 | } | |||
818 | // Instead of casting everything to bytes for the vselect, cast to the | |||
819 | // original value type. This will avoid complications with casting masks. | |||
820 | // For example, in cases when the original mask applied to i32, it could | |||
821 | // be converted to a mask applicable to i8 via pred_typecast intrinsic, | |||
822 | // but if the mask is not exactly of HVX length, extra handling would be | |||
823 | // needed to make it work. | |||
824 | Type *ValTy = getPayload(B.Seg.Val)->getType(); | |||
825 | Value *Cast = Builder.CreateBitCast(Accum, ValTy); | |||
826 | Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast, | |||
827 | getPassThrough(B.Seg.Val)); | |||
828 | B.Seg.Val->replaceAllUsesWith(Sel); | |||
829 | } | |||
830 | } else { | |||
831 | // Stores. | |||
832 | ByteSpan ASpanV, ASpanM; | |||
833 | ||||
834 | // Return a vector value corresponding to the input value Val: | |||
835 | // either <1 x Val> for scalar Val, or Val itself for vector Val. | |||
836 | auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * { | |||
837 | Type *Ty = Val->getType(); | |||
838 | if (Ty->isVectorTy()) | |||
839 | return Val; | |||
840 | auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false); | |||
841 | return Builder.CreateBitCast(Val, VecTy); | |||
842 | }; | |||
843 | ||||
844 | // Create an extra "undef" sector at the beginning and at the end. | |||
845 | // They will be used as the left/right filler in the vlalign step. | |||
846 | for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) { | |||
847 | // For stores, the size of each section is an aligned vector length. | |||
848 | // Adjust the store offsets relative to the section start offset. | |||
849 | ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen); | |||
850 | Value *AccumV = UndefValue::get(SecTy); | |||
851 | Value *AccumM = HVC.getNullValue(SecTy); | |||
852 | for (ByteSpan::Block &S : VSection) { | |||
853 | Value *Pay = getPayload(S.Seg.Val); | |||
854 | Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)), | |||
855 | Pay->getType(), HVC.getByteTy()); | |||
856 | AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask), | |||
857 | S.Seg.Start, S.Seg.Size, S.Pos); | |||
858 | AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay), | |||
859 | S.Seg.Start, S.Seg.Size, S.Pos); | |||
860 | } | |||
861 | ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen); | |||
862 | ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen); | |||
863 | } | |||
864 | ||||
865 | // vlalign | |||
866 | if (DoAlign) { | |||
867 | for (int j = 1; j != NumSectors + 2; ++j) { | |||
868 | ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val, | |||
869 | ASpanV[j].Seg.Val, AlignVal); | |||
870 | ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val, | |||
871 | ASpanM[j].Seg.Val, AlignVal); | |||
872 | } | |||
873 | } | |||
874 | ||||
875 | for (int i = 0; i != NumSectors + DoAlign; ++i) { | |||
876 | Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen); | |||
877 | Value *Val = ASpanV[i].Seg.Val; | |||
878 | Value *Mask = ASpanM[i].Seg.Val; // bytes | |||
879 | if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) { | |||
880 | Value *Store = createAlignedStore(Builder, Val, Ptr, ScLen, | |||
881 | HVC.vlsb(Builder, Mask)); | |||
882 | // If vector shifting is potentially needed, accumulate metadata | |||
883 | // from source sections of twice the store width. | |||
884 | int Start = (i - DoAlign) * ScLen; | |||
885 | int Width = (1 + DoAlign) * ScLen; | |||
886 | propagateMetadata(cast<Instruction>(Store), | |||
887 | VSpan.section(Start, Width).values()); | |||
888 | } | |||
889 | } | |||
890 | } | |||
891 | ||||
892 | for (auto *Inst : Move.Main) | |||
893 | Inst->eraseFromParent(); | |||
894 | ||||
895 | return true; | |||
896 | } | |||
897 | ||||
898 | auto AlignVectors::run() -> bool { | |||
899 | if (!createAddressGroups()) | |||
900 | return false; | |||
901 | ||||
902 | bool Changed = false; | |||
903 | MoveList LoadGroups, StoreGroups; | |||
904 | ||||
905 | for (auto &G : AddrGroups) { | |||
906 | llvm::append_range(LoadGroups, createLoadGroups(G.second)); | |||
907 | llvm::append_range(StoreGroups, createStoreGroups(G.second)); | |||
908 | } | |||
909 | ||||
910 | for (auto &M : LoadGroups) | |||
911 | Changed |= move(M); | |||
912 | for (auto &M : StoreGroups) | |||
913 | Changed |= move(M); | |||
914 | ||||
915 | for (auto &M : LoadGroups) | |||
916 | Changed |= realignGroup(M); | |||
917 | for (auto &M : StoreGroups) | |||
918 | Changed |= realignGroup(M); | |||
919 | ||||
920 | return Changed; | |||
921 | } | |||
922 | ||||
923 | // --- End AlignVectors | |||
924 | ||||
925 | auto HexagonVectorCombine::run() -> bool { | |||
926 | if (!HST.useHVXOps()) | |||
927 | return false; | |||
928 | ||||
929 | bool Changed = AlignVectors(*this).run(); | |||
930 | return Changed; | |||
931 | } | |||
932 | ||||
933 | auto HexagonVectorCombine::getIntTy() const -> IntegerType * { | |||
934 | return Type::getInt32Ty(F.getContext()); | |||
935 | } | |||
936 | ||||
937 | auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * { | |||
938 | assert(ElemCount >= 0)(static_cast<void> (0)); | |||
939 | IntegerType *ByteTy = Type::getInt8Ty(F.getContext()); | |||
940 | if (ElemCount == 0) | |||
941 | return ByteTy; | |||
942 | return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false); | |||
943 | } | |||
944 | ||||
945 | auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * { | |||
946 | assert(ElemCount >= 0)(static_cast<void> (0)); | |||
947 | IntegerType *BoolTy = Type::getInt1Ty(F.getContext()); | |||
948 | if (ElemCount == 0) | |||
949 | return BoolTy; | |||
950 | return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false); | |||
951 | } | |||
952 | ||||
953 | auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * { | |||
954 | return ConstantInt::getSigned(getIntTy(), Val); | |||
955 | } | |||
956 | ||||
957 | auto HexagonVectorCombine::isZero(const Value *Val) const -> bool { | |||
958 | if (auto *C = dyn_cast<Constant>(Val)) | |||
959 | return C->isZeroValue(); | |||
960 | return false; | |||
961 | } | |||
962 | ||||
963 | auto HexagonVectorCombine::getIntValue(const Value *Val) const | |||
964 | -> Optional<APInt> { | |||
965 | if (auto *CI = dyn_cast<ConstantInt>(Val)) | |||
966 | return CI->getValue(); | |||
967 | return None; | |||
968 | } | |||
969 | ||||
970 | auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool { | |||
971 | return isa<UndefValue>(Val); | |||
972 | } | |||
973 | ||||
974 | auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int { | |||
975 | return getSizeOf(Val->getType()); | |||
976 | } | |||
977 | ||||
978 | auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int { | |||
979 | return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue(); | |||
980 | } | |||
981 | ||||
982 | auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int { | |||
983 | // The actual type may be shorter than the HVX vector, so determine | |||
984 | // the alignment based on subtarget info. | |||
985 | if (HST.isTypeForHVX(Ty)) | |||
986 | return HST.getVectorLength(); | |||
987 | return DL.getABITypeAlign(Ty).value(); | |||
988 | } | |||
989 | ||||
990 | auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * { | |||
991 | assert(Ty->isIntOrIntVectorTy())(static_cast<void> (0)); | |||
992 | auto Zero = ConstantInt::get(Ty->getScalarType(), 0); | |||
993 | if (auto *VecTy = dyn_cast<VectorType>(Ty)) | |||
994 | return ConstantVector::getSplat(VecTy->getElementCount(), Zero); | |||
995 | return Zero; | |||
996 | } | |||
997 | ||||
998 | auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * { | |||
999 | assert(Ty->isIntOrIntVectorTy())(static_cast<void> (0)); | |||
1000 | auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1); | |||
1001 | if (auto *VecTy = dyn_cast<VectorType>(Ty)) | |||
1002 | return ConstantVector::getSplat(VecTy->getElementCount(), Minus1); | |||
1003 | return Minus1; | |||
1004 | } | |||
1005 | ||||
1006 | // Insert bytes [Start..Start+Length) of Src into Dst at byte Where. | |||
1007 | auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src, | |||
1008 | int Start, int Length, int Where) const | |||
1009 | -> Value * { | |||
1010 | assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()))(static_cast<void> (0)); | |||
1011 | int SrcLen = getSizeOf(Src); | |||
1012 | int DstLen = getSizeOf(Dst); | |||
1013 | assert(0 <= Start && Start + Length <= SrcLen)(static_cast<void> (0)); | |||
1014 | assert(0 <= Where && Where + Length <= DstLen)(static_cast<void> (0)); | |||
1015 | ||||
1016 | int P2Len = PowerOf2Ceil(SrcLen | DstLen); | |||
1017 | auto *Undef = UndefValue::get(getByteTy()); | |||
1018 | Value *P2Src = vresize(Builder, Src, P2Len, Undef); | |||
1019 | Value *P2Dst = vresize(Builder, Dst, P2Len, Undef); | |||
1020 | ||||
1021 | SmallVector<int, 256> SMask(P2Len); | |||
1022 | for (int i = 0; i != P2Len; ++i) { | |||
1023 | // If i is in [Where, Where+Length), pick Src[Start+(i-Where)]. | |||
1024 | // Otherwise, pick Dst[i]; | |||
1025 | SMask[i] = | |||
1026 | (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i; | |||
1027 | } | |||
1028 | ||||
1029 | Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask); | |||
1030 | return vresize(Builder, P2Insert, DstLen, Undef); | |||
1031 | } | |||
1032 | ||||
1033 | auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, | |||
1034 | Value *Amt) const -> Value * { | |||
1035 | assert(Lo->getType() == Hi->getType() && "Argument type mismatch")(static_cast<void> (0)); | |||
1036 | assert(isSectorTy(Hi->getType()))(static_cast<void> (0)); | |||
1037 | if (isZero(Amt)) | |||
1038 | return Hi; | |||
1039 | int VecLen = getSizeOf(Hi); | |||
1040 | if (auto IntAmt = getIntValue(Amt)) | |||
1041 | return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(), | |||
1042 | VecLen); | |||
1043 | ||||
1044 | if (HST.isTypeForHVX(Hi->getType())) { | |||
1045 | int HwLen = HST.getVectorLength(); | |||
1046 | assert(VecLen == HwLen && "Expecting an exact HVX type")(static_cast<void> (0)); | |||
1047 | Intrinsic::ID V6_vlalignb = HwLen == 64 | |||
1048 | ? Intrinsic::hexagon_V6_vlalignb | |||
1049 | : Intrinsic::hexagon_V6_vlalignb_128B; | |||
1050 | return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(), | |||
1051 | {Hi, Lo, Amt}); | |||
1052 | } | |||
1053 | ||||
1054 | if (VecLen == 4) { | |||
1055 | Value *Pair = concat(Builder, {Lo, Hi}); | |||
1056 | Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32); | |||
1057 | Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext())); | |||
1058 | return Builder.CreateBitCast(Trunc, Hi->getType()); | |||
1059 | } | |||
1060 | if (VecLen == 8) { | |||
1061 | Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt); | |||
1062 | return vralignb(Builder, Lo, Hi, Sub); | |||
1063 | } | |||
1064 | llvm_unreachable("Unexpected vector length")__builtin_unreachable(); | |||
1065 | } | |||
1066 | ||||
1067 | auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, | |||
1068 | Value *Amt) const -> Value * { | |||
1069 | assert(Lo->getType() == Hi->getType() && "Argument type mismatch")(static_cast<void> (0)); | |||
1070 | assert(isSectorTy(Lo->getType()))(static_cast<void> (0)); | |||
1071 | if (isZero(Amt)) | |||
1072 | return Lo; | |||
1073 | int VecLen = getSizeOf(Lo); | |||
1074 | if (auto IntAmt = getIntValue(Amt)) | |||
1075 | return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen); | |||
1076 | ||||
1077 | if (HST.isTypeForHVX(Lo->getType())) { | |||
1078 | int HwLen = HST.getVectorLength(); | |||
1079 | assert(VecLen == HwLen && "Expecting an exact HVX type")(static_cast<void> (0)); | |||
1080 | Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb | |||
1081 | : Intrinsic::hexagon_V6_valignb_128B; | |||
1082 | return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(), | |||
1083 | {Hi, Lo, Amt}); | |||
1084 | } | |||
1085 | ||||
1086 | if (VecLen == 4) { | |||
1087 | Value *Pair = concat(Builder, {Lo, Hi}); | |||
1088 | Value *Shift = Builder.CreateLShr(Pair, Amt); | |||
1089 | Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext())); | |||
1090 | return Builder.CreateBitCast(Trunc, Lo->getType()); | |||
1091 | } | |||
1092 | if (VecLen == 8) { | |||
1093 | Type *Int64Ty = Type::getInt64Ty(F.getContext()); | |||
1094 | Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty); | |||
1095 | Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty); | |||
1096 | Function *FI = Intrinsic::getDeclaration(F.getParent(), | |||
1097 | Intrinsic::hexagon_S2_valignrb); | |||
1098 | Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt}); | |||
1099 | return Builder.CreateBitCast(Call, Lo->getType()); | |||
1100 | } | |||
1101 | llvm_unreachable("Unexpected vector length")__builtin_unreachable(); | |||
1102 | } | |||
1103 | ||||
1104 | // Concatenates a sequence of vectors of the same type. | |||
1105 | auto HexagonVectorCombine::concat(IRBuilder<> &Builder, | |||
1106 | ArrayRef<Value *> Vecs) const -> Value * { | |||
1107 | assert(!Vecs.empty())(static_cast<void> (0)); | |||
1108 | SmallVector<int, 256> SMask; | |||
1109 | std::vector<Value *> Work[2]; | |||
1110 | int ThisW = 0, OtherW = 1; | |||
1111 | ||||
1112 | Work[ThisW].assign(Vecs.begin(), Vecs.end()); | |||
1113 | while (Work[ThisW].size() > 1) { | |||
1114 | auto *Ty = cast<VectorType>(Work[ThisW].front()->getType()); | |||
1115 | int ElemCount = Ty->getElementCount().getFixedValue(); | |||
1116 | SMask.resize(ElemCount * 2); | |||
1117 | std::iota(SMask.begin(), SMask.end(), 0); | |||
1118 | ||||
1119 | Work[OtherW].clear(); | |||
1120 | if (Work[ThisW].size() % 2 != 0) | |||
1121 | Work[ThisW].push_back(UndefValue::get(Ty)); | |||
1122 | for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) { | |||
1123 | Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i], | |||
1124 | Work[ThisW][i + 1], SMask); | |||
1125 | Work[OtherW].push_back(Joined); | |||
1126 | } | |||
1127 | std::swap(ThisW, OtherW); | |||
1128 | } | |||
1129 | ||||
1130 | // Since there may have been some undefs appended to make shuffle operands | |||
1131 | // have the same type, perform the last shuffle to only pick the original | |||
1132 | // elements. | |||
1133 | SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType())); | |||
1134 | std::iota(SMask.begin(), SMask.end(), 0); | |||
1135 | Value *Total = Work[OtherW].front(); | |||
1136 | return Builder.CreateShuffleVector(Total, SMask); | |||
1137 | } | |||
1138 | ||||
1139 | auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val, | |||
1140 | int NewSize, Value *Pad) const -> Value * { | |||
1141 | assert(isa<VectorType>(Val->getType()))(static_cast<void> (0)); | |||
1142 | auto *ValTy = cast<VectorType>(Val->getType()); | |||
1143 | assert(ValTy->getElementType() == Pad->getType())(static_cast<void> (0)); | |||
1144 | ||||
1145 | int CurSize = ValTy->getElementCount().getFixedValue(); | |||
1146 | if (CurSize == NewSize) | |||
1147 | return Val; | |||
1148 | // Truncate? | |||
1149 | if (CurSize > NewSize) | |||
1150 | return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize); | |||
1151 | // Extend. | |||
1152 | SmallVector<int, 128> SMask(NewSize); | |||
1153 | std::iota(SMask.begin(), SMask.begin() + CurSize, 0); | |||
1154 | std::fill(SMask.begin() + CurSize, SMask.end(), CurSize); | |||
1155 | Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad); | |||
1156 | return Builder.CreateShuffleVector(Val, PadVec, SMask); | |||
1157 | } | |||
1158 | ||||
1159 | auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask, | |||
1160 | Type *FromTy, Type *ToTy) const -> Value * { | |||
1161 | // Mask is a vector <N x i1>, where each element corresponds to an | |||
1162 | // element of FromTy. Remap it so that each element will correspond | |||
1163 | // to an element of ToTy. | |||
1164 | assert(isa<VectorType>(Mask->getType()))(static_cast<void> (0)); | |||
1165 | ||||
1166 | Type *FromSTy = FromTy->getScalarType(); | |||
1167 | Type *ToSTy = ToTy->getScalarType(); | |||
1168 | if (FromSTy == ToSTy) | |||
1169 | return Mask; | |||
1170 | ||||
1171 | int FromSize = getSizeOf(FromSTy); | |||
1172 | int ToSize = getSizeOf(ToSTy); | |||
1173 | assert(FromSize % ToSize == 0 || ToSize % FromSize == 0)(static_cast<void> (0)); | |||
1174 | ||||
1175 | auto *MaskTy = cast<VectorType>(Mask->getType()); | |||
1176 | int FromCount = MaskTy->getElementCount().getFixedValue(); | |||
1177 | int ToCount = (FromCount * FromSize) / ToSize; | |||
1178 | assert((FromCount * FromSize) % ToSize == 0)(static_cast<void> (0)); | |||
1179 | ||||
1180 | // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> -> | |||
1181 | // -> trunc to <M x i1>. | |||
1182 | Value *Ext = Builder.CreateSExt( | |||
1183 | Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false)); | |||
1184 | Value *Cast = Builder.CreateBitCast( | |||
1185 | Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false)); | |||
1186 | return Builder.CreateTrunc( | |||
1187 | Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false)); | |||
1188 | } | |||
1189 | ||||
1190 | // Bitcast to bytes, and return least significant bits. | |||
1191 | auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const | |||
1192 | -> Value * { | |||
1193 | Type *ScalarTy = Val->getType()->getScalarType(); | |||
1194 | if (ScalarTy == getBoolTy()) | |||
1195 | return Val; | |||
1196 | ||||
1197 | Value *Bytes = vbytes(Builder, Val); | |||
1198 | if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType())) | |||
1199 | return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy))); | |||
1200 | // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not | |||
1201 | // <1 x i1>. | |||
1202 | return Builder.CreateTrunc(Bytes, getBoolTy()); | |||
1203 | } | |||
1204 | ||||
1205 | // Bitcast to bytes for non-bool. For bool, convert i1 -> i8. | |||
1206 | auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const | |||
1207 | -> Value * { | |||
1208 | Type *ScalarTy = Val->getType()->getScalarType(); | |||
1209 | if (ScalarTy == getByteTy()) | |||
1210 | return Val; | |||
1211 | ||||
1212 | if (ScalarTy != getBoolTy()) | |||
1213 | return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val))); | |||
1214 | // For bool, return a sext from i1 to i8. | |||
1215 | if (auto *VecTy = dyn_cast<VectorType>(Val->getType())) | |||
1216 | return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy)); | |||
1217 | return Builder.CreateSExt(Val, getByteTy()); | |||
1218 | } | |||
1219 | ||||
1220 | auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder, | |||
1221 | Intrinsic::ID IntID, Type *RetTy, | |||
1222 | ArrayRef<Value *> Args) const | |||
1223 | -> Value * { | |||
1224 | int HwLen = HST.getVectorLength(); | |||
1225 | Type *BoolTy = Type::getInt1Ty(F.getContext()); | |||
1226 | Type *Int32Ty = Type::getInt32Ty(F.getContext()); | |||
1227 | // HVX vector -> v16i32/v32i32 | |||
1228 | // HVX vector predicate -> v512i1/v1024i1 | |||
1229 | auto getTypeForIntrin = [&](Type *Ty) -> Type * { | |||
1230 | if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) { | |||
1231 | Type *ElemTy = cast<VectorType>(Ty)->getElementType(); | |||
1232 | if (ElemTy == Int32Ty) | |||
1233 | return Ty; | |||
1234 | if (ElemTy == BoolTy) | |||
1235 | return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false); | |||
1236 | return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false); | |||
1237 | } | |||
1238 | // Non-HVX type. It should be a scalar. | |||
1239 | assert(Ty == Int32Ty || Ty->isIntegerTy(64))(static_cast<void> (0)); | |||
1240 | return Ty; | |||
1241 | }; | |||
1242 | ||||
1243 | auto getCast = [&](IRBuilder<> &Builder, Value *Val, | |||
1244 | Type *DestTy) -> Value * { | |||
1245 | Type *SrcTy = Val->getType(); | |||
1246 | if (SrcTy == DestTy) | |||
1247 | return Val; | |||
1248 | if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) { | |||
1249 | if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) { | |||
1250 | // This should take care of casts the other way too, for example | |||
1251 | // v1024i1 -> v32i1. | |||
1252 | Intrinsic::ID TC = HwLen == 64 | |||
1253 | ? Intrinsic::hexagon_V6_pred_typecast | |||
1254 | : Intrinsic::hexagon_V6_pred_typecast_128B; | |||
1255 | Function *FI = Intrinsic::getDeclaration(F.getParent(), TC, | |||
1256 | {DestTy, Val->getType()}); | |||
1257 | return Builder.CreateCall(FI, {Val}); | |||
1258 | } | |||
1259 | // Non-predicate HVX vector. | |||
1260 | return Builder.CreateBitCast(Val, DestTy); | |||
1261 | } | |||
1262 | // Non-HVX type. It should be a scalar, and it should already have | |||
1263 | // a valid type. | |||
1264 | llvm_unreachable("Unexpected type")__builtin_unreachable(); | |||
1265 | }; | |||
1266 | ||||
1267 | SmallVector<Value *, 4> IntOps; | |||
1268 | for (Value *A : Args) | |||
1269 | IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType()))); | |||
1270 | Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID); | |||
1271 | Value *Call = Builder.CreateCall(FI, IntOps); | |||
1272 | ||||
1273 | Type *CallTy = Call->getType(); | |||
1274 | if (CallTy == RetTy) | |||
1275 | return Call; | |||
1276 | // Scalar types should have RetTy matching the call return type. | |||
1277 | assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true))(static_cast<void> (0)); | |||
1278 | if (cast<VectorType>(CallTy)->getElementType() == BoolTy) | |||
1279 | return getCast(Builder, Call, RetTy); | |||
1280 | return Builder.CreateBitCast(Call, RetTy); | |||
1281 | } | |||
1282 | ||||
1283 | auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0, | |||
1284 | Value *Ptr1) const | |||
1285 | -> Optional<int> { | |||
1286 | struct Builder : IRBuilder<> { | |||
1287 | Builder(BasicBlock *B) : IRBuilder<>(B) {} | |||
1288 | ~Builder() { | |||
1289 | for (Instruction *I : llvm::reverse(ToErase)) | |||
1290 | I->eraseFromParent(); | |||
1291 | } | |||
1292 | SmallVector<Instruction *, 8> ToErase; | |||
1293 | }; | |||
1294 | ||||
1295 | #define CallBuilder(B, F) \ | |||
1296 | [&](auto &B_) { \ | |||
1297 | Value *V = B_.F; \ | |||
1298 | if (auto *I = dyn_cast<Instruction>(V)) \ | |||
1299 | B_.ToErase.push_back(I); \ | |||
1300 | return V; \ | |||
1301 | }(B) | |||
1302 | ||||
1303 | auto Simplify = [&](Value *V) { | |||
1304 | if (auto *I = dyn_cast<Instruction>(V)) { | |||
1305 | SimplifyQuery Q(DL, &TLI, &DT, &AC, I); | |||
1306 | if (Value *S = SimplifyInstruction(I, Q)) | |||
1307 | return S; | |||
1308 | } | |||
1309 | return V; | |||
1310 | }; | |||
1311 | ||||
1312 | auto StripBitCast = [](Value *V) { | |||
1313 | while (auto *C = dyn_cast<BitCastInst>(V)) | |||
1314 | V = C->getOperand(0); | |||
1315 | return V; | |||
1316 | }; | |||
1317 | ||||
1318 | Ptr0 = StripBitCast(Ptr0); | |||
1319 | Ptr1 = StripBitCast(Ptr1); | |||
1320 | if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1)) | |||
1321 | return None; | |||
1322 | ||||
1323 | auto *Gep0 = cast<GetElementPtrInst>(Ptr0); | |||
1324 | auto *Gep1 = cast<GetElementPtrInst>(Ptr1); | |||
1325 | if (Gep0->getPointerOperand() != Gep1->getPointerOperand()) | |||
1326 | return None; | |||
1327 | ||||
1328 | Builder B(Gep0->getParent()); | |||
1329 | int Scale = DL.getTypeStoreSize(Gep0->getSourceElementType()); | |||
1330 | ||||
1331 | // FIXME: for now only check GEPs with a single index. | |||
1332 | if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2) | |||
1333 | return None; | |||
1334 | ||||
1335 | Value *Idx0 = Gep0->getOperand(1); | |||
1336 | Value *Idx1 = Gep1->getOperand(1); | |||
1337 | ||||
1338 | // First, try to simplify the subtraction directly. | |||
1339 | if (auto *Diff = dyn_cast<ConstantInt>( | |||
1340 | Simplify(CallBuilder(B, CreateSub(Idx0, Idx1))))) | |||
1341 | return Diff->getSExtValue() * Scale; | |||
1342 | ||||
1343 | KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT); | |||
1344 | KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT); | |||
1345 | APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One); | |||
1346 | if (Unknown.isAllOnesValue()) | |||
1347 | return None; | |||
1348 | ||||
1349 | Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown); | |||
1350 | Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU))); | |||
1351 | Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU))); | |||
1352 | Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1))); | |||
1353 | int Diff0 = 0; | |||
1354 | if (auto *C = dyn_cast<ConstantInt>(SubU)) { | |||
1355 | Diff0 = C->getSExtValue(); | |||
1356 | } else { | |||
1357 | return None; | |||
1358 | } | |||
1359 | ||||
1360 | Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown); | |||
1361 | Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK))); | |||
1362 | Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK))); | |||
1363 | Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1))); | |||
1364 | int Diff1 = 0; | |||
1365 | if (auto *C = dyn_cast<ConstantInt>(SubK)) { | |||
1366 | Diff1 = C->getSExtValue(); | |||
1367 | } else { | |||
1368 | return None; | |||
1369 | } | |||
1370 | ||||
1371 | return (Diff0 + Diff1) * Scale; | |||
1372 | ||||
1373 | #undef CallBuilder | |||
1374 | } | |||
1375 | ||||
1376 | template <typename T> | |||
1377 | auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In, | |||
1378 | BasicBlock::const_iterator To, | |||
1379 | const T &Ignore) const | |||
1380 | -> bool { | |||
1381 | auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> { | |||
1382 | if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { | |||
1383 | switch (II->getIntrinsicID()) { | |||
1384 | case Intrinsic::masked_load: | |||
1385 | return MemoryLocation::getForArgument(II, 0, TLI); | |||
1386 | case Intrinsic::masked_store: | |||
1387 | return MemoryLocation::getForArgument(II, 1, TLI); | |||
1388 | } | |||
1389 | } | |||
1390 | return MemoryLocation::getOrNone(&I); | |||
1391 | }; | |||
1392 | ||||
1393 | // The source and the destination must be in the same basic block. | |||
1394 | const BasicBlock &Block = *In.getParent(); | |||
1395 | assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block)(static_cast<void> (0)); | |||
1396 | // No PHIs. | |||
1397 | if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To))) | |||
1398 | return false; | |||
1399 | ||||
1400 | if (!mayBeMemoryDependent(In)) | |||
1401 | return true; | |||
1402 | bool MayWrite = In.mayWriteToMemory(); | |||
1403 | auto MaybeLoc = getLocOrNone(In); | |||
1404 | ||||
1405 | auto From = In.getIterator(); | |||
1406 | if (From == To) | |||
1407 | return true; | |||
1408 | bool MoveUp = (To != Block.end() && To->comesBefore(&In)); | |||
1409 | auto Range = | |||
1410 | MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To); | |||
1411 | for (auto It = Range.first; It != Range.second; ++It) { | |||
1412 | const Instruction &I = *It; | |||
1413 | if (llvm::is_contained(Ignore, &I)) | |||
1414 | continue; | |||
1415 | // assume intrinsic can be ignored | |||
1416 | if (auto *II = dyn_cast<IntrinsicInst>(&I)) { | |||
1417 | if (II->getIntrinsicID() == Intrinsic::assume) | |||
1418 | continue; | |||
1419 | } | |||
1420 | // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp. | |||
1421 | if (I.mayThrow()) | |||
1422 | return false; | |||
1423 | if (auto *CB = dyn_cast<CallBase>(&I)) { | |||
1424 | if (!CB->hasFnAttr(Attribute::WillReturn)) | |||
1425 | return false; | |||
1426 | if (!CB->hasFnAttr(Attribute::NoSync)) | |||
1427 | return false; | |||
1428 | } | |||
1429 | if (I.mayReadOrWriteMemory()) { | |||
1430 | auto MaybeLocI = getLocOrNone(I); | |||
1431 | if (MayWrite || I.mayWriteToMemory()) { | |||
1432 | if (!MaybeLoc || !MaybeLocI) | |||
1433 | return false; | |||
1434 | if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI)) | |||
1435 | return false; | |||
1436 | } | |||
1437 | } | |||
1438 | } | |||
1439 | return true; | |||
1440 | } | |||
1441 | ||||
1442 | #ifndef NDEBUG1 | |||
1443 | auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool { | |||
1444 | if (auto *VecTy = dyn_cast<VectorType>(Ty)) | |||
1445 | return VecTy->getElementType() == getByteTy(); | |||
1446 | return false; | |||
1447 | } | |||
1448 | ||||
1449 | auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool { | |||
1450 | if (!isByteVecTy(Ty)) | |||
1451 | return false; | |||
1452 | int Size = getSizeOf(Ty); | |||
1453 | if (HST.isTypeForHVX(Ty)) | |||
1454 | return Size == static_cast<int>(HST.getVectorLength()); | |||
1455 | return Size == 4 || Size == 8; | |||
1456 | } | |||
1457 | #endif | |||
1458 | ||||
1459 | auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo, | |||
1460 | Value *Hi, int Start, | |||
1461 | int Length) const -> Value * { | |||
1462 | assert(0 <= Start && Start < Length)(static_cast<void> (0)); | |||
1463 | SmallVector<int, 128> SMask(Length); | |||
1464 | std::iota(SMask.begin(), SMask.end(), Start); | |||
1465 | return Builder.CreateShuffleVector(Lo, Hi, SMask); | |||
1466 | } | |||
1467 | ||||
1468 | // Pass management. | |||
1469 | ||||
1470 | namespace llvm { | |||
1471 | void initializeHexagonVectorCombineLegacyPass(PassRegistry &); | |||
1472 | FunctionPass *createHexagonVectorCombineLegacyPass(); | |||
1473 | } // namespace llvm | |||
1474 | ||||
1475 | namespace { | |||
1476 | class HexagonVectorCombineLegacy : public FunctionPass { | |||
1477 | public: | |||
1478 | static char ID; | |||
1479 | ||||
1480 | HexagonVectorCombineLegacy() : FunctionPass(ID) {} | |||
1481 | ||||
1482 | StringRef getPassName() const override { return "Hexagon Vector Combine"; } | |||
1483 | ||||
1484 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
1485 | AU.setPreservesCFG(); | |||
1486 | AU.addRequired<AAResultsWrapperPass>(); | |||
1487 | AU.addRequired<AssumptionCacheTracker>(); | |||
1488 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
1489 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
1490 | AU.addRequired<TargetPassConfig>(); | |||
1491 | FunctionPass::getAnalysisUsage(AU); | |||
1492 | } | |||
1493 | ||||
1494 | bool runOnFunction(Function &F) override { | |||
1495 | if (skipFunction(F)) | |||
| ||||
1496 | return false; | |||
1497 | AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
1498 | AssumptionCache &AC = | |||
1499 | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); | |||
1500 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
1501 | TargetLibraryInfo &TLI = | |||
1502 | getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | |||
1503 | auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>(); | |||
1504 | HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM); | |||
1505 | return HVC.run(); | |||
1506 | } | |||
1507 | }; | |||
1508 | } // namespace | |||
1509 | ||||
1510 | char HexagonVectorCombineLegacy::ID = 0; | |||
1511 | ||||
1512 | INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,static void *initializeHexagonVectorCombineLegacyPassOnce(PassRegistry &Registry) { | |||
1513 | "Hexagon Vector Combine", false, false)static void *initializeHexagonVectorCombineLegacyPassOnce(PassRegistry &Registry) { | |||
1514 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
1515 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
1516 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
1517 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
1518 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry); | |||
1519 | INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Hexagon Vector Combine", "hexagon-vc" , &HexagonVectorCombineLegacy::ID, PassInfo::NormalCtor_t (callDefaultCtor<HexagonVectorCombineLegacy>), false, false ); Registry.registerPass(*PI, true); return PI; } static llvm ::once_flag InitializeHexagonVectorCombineLegacyPassFlag; void llvm::initializeHexagonVectorCombineLegacyPass(PassRegistry & Registry) { llvm::call_once(InitializeHexagonVectorCombineLegacyPassFlag , initializeHexagonVectorCombineLegacyPassOnce, std::ref(Registry )); } | |||
1520 | "Hexagon Vector Combine", false, false)PassInfo *PI = new PassInfo( "Hexagon Vector Combine", "hexagon-vc" , &HexagonVectorCombineLegacy::ID, PassInfo::NormalCtor_t (callDefaultCtor<HexagonVectorCombineLegacy>), false, false ); Registry.registerPass(*PI, true); return PI; } static llvm ::once_flag InitializeHexagonVectorCombineLegacyPassFlag; void llvm::initializeHexagonVectorCombineLegacyPass(PassRegistry & Registry) { llvm::call_once(InitializeHexagonVectorCombineLegacyPassFlag , initializeHexagonVectorCombineLegacyPassOnce, std::ref(Registry )); } | |||
1521 | ||||
1522 | FunctionPass *llvm::createHexagonVectorCombineLegacyPass() { | |||
1523 | return new HexagonVectorCombineLegacy(); | |||
1524 | } |