LLVM 20.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonISelLowering.h"
11#include "llvm/ADT/BitVector.h"
12#include "llvm/ADT/SetVector.h"
14#include "llvm/IR/Intrinsics.h"
15#include "llvm/IR/IntrinsicsHexagon.h"
16#include "llvm/Support/Debug.h"
18
19#include <algorithm>
20#include <cmath>
21#include <deque>
22#include <functional>
23#include <map>
24#include <optional>
25#include <set>
26#include <unordered_map>
27#include <utility>
28#include <vector>
29
30#define DEBUG_TYPE "hexagon-isel"
31using namespace llvm;
32
33namespace {
34
35// --------------------------------------------------------------------
36// Implementation of permutation networks.
37
38// Implementation of the node routing through butterfly networks:
39// - Forward delta.
40// - Reverse delta.
41// - Benes.
42//
43//
44// Forward delta network consists of log(N) steps, where N is the number
45// of inputs. In each step, an input can stay in place, or it can get
46// routed to another position[1]. The step after that consists of two
47// networks, each half in size in terms of the number of nodes. In those
48// terms, in the given step, an input can go to either the upper or the
49// lower network in the next step.
50//
51// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
52// positions as long as there is no conflict.
53
54// Here's a delta network for 8 inputs, only the switching routes are
55// shown:
56//
57// Steps:
58// |- 1 ---------------|- 2 -----|- 3 -|
59//
60// Inp[0] *** *** *** *** Out[0]
61// \ / \ / \ /
62// \ / \ / X
63// \ / \ / / \
64// Inp[1] *** \ / *** X *** *** Out[1]
65// \ \ / / \ / \ /
66// \ \ / / X X
67// \ \ / / / \ / \
68// Inp[2] *** \ \ / / *** X *** *** Out[2]
69// \ \ X / / / \ \ /
70// \ \ / \ / / / \ X
71// \ X X / / \ / \
72// Inp[3] *** \ / \ / \ / *** *** *** Out[3]
73// \ X X X /
74// \ / \ / \ / \ /
75// X X X X
76// / \ / \ / \ / \
77// / X X X \
78// Inp[4] *** / \ / \ / \ *** *** *** Out[4]
79// / X X \ \ / \ /
80// / / \ / \ \ \ / X
81// / / X \ \ \ / / \
82// Inp[5] *** / / \ \ *** X *** *** Out[5]
83// / / \ \ \ / \ /
84// / / \ \ X X
85// / / \ \ / \ / \
86// Inp[6] *** / \ *** X *** *** Out[6]
87// / \ / \ \ /
88// / \ / \ X
89// / \ / \ / \
90// Inp[7] *** *** *** *** Out[7]
91//
92//
93// Reverse delta network is same as delta network, with the steps in
94// the opposite order.
95//
96//
97// Benes network is a forward delta network immediately followed by
98// a reverse delta network.
99
100enum class ColorKind { None, Red, Black };
101
102// Graph coloring utility used to partition nodes into two groups:
103// they will correspond to nodes routed to the upper and lower networks.
104struct Coloring {
105 using Node = int;
106 using MapType = std::map<Node, ColorKind>;
107 static constexpr Node Ignore = Node(-1);
108
109 Coloring(ArrayRef<Node> Ord) : Order(Ord) {
110 build();
111 if (!color())
112 Colors.clear();
113 }
114
115 const MapType &colors() const {
116 return Colors;
117 }
118
119 ColorKind other(ColorKind Color) {
120 if (Color == ColorKind::None)
121 return ColorKind::Red;
122 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
123 }
124
125 LLVM_DUMP_METHOD void dump() const;
126
127private:
128 ArrayRef<Node> Order;
129 MapType Colors;
130 std::set<Node> Needed;
131
132 using NodeSet = std::set<Node>;
133 std::map<Node,NodeSet> Edges;
134
135 Node conj(Node Pos) {
136 Node Num = Order.size();
137 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
138 }
139
140 ColorKind getColor(Node N) {
141 auto F = Colors.find(N);
142 return F != Colors.end() ? F->second : ColorKind::None;
143 }
144
145 std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
146
147 void build();
148 bool color();
149};
150} // namespace
151
152std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
153 auto Color = ColorKind::None;
154 for (Node N : Nodes) {
155 ColorKind ColorN = getColor(N);
156 if (ColorN == ColorKind::None)
157 continue;
158 if (Color == ColorKind::None)
159 Color = ColorN;
160 else if (Color != ColorKind::None && Color != ColorN)
161 return { false, ColorKind::None };
162 }
163 return { true, Color };
164}
165
166void Coloring::build() {
167 // Add Order[P] and Order[conj(P)] to Edges.
168 for (unsigned P = 0; P != Order.size(); ++P) {
169 Node I = Order[P];
170 if (I != Ignore) {
171 Needed.insert(I);
172 Node PC = Order[conj(P)];
173 if (PC != Ignore && PC != I)
174 Edges[I].insert(PC);
175 }
176 }
177 // Add I and conj(I) to Edges.
178 for (unsigned I = 0; I != Order.size(); ++I) {
179 if (!Needed.count(I))
180 continue;
181 Node C = conj(I);
182 // This will create an entry in the edge table, even if I is not
183 // connected to any other node. This is necessary, because it still
184 // needs to be colored.
185 NodeSet &Is = Edges[I];
186 if (Needed.count(C))
187 Is.insert(C);
188 }
189}
190
191bool Coloring::color() {
192 SetVector<Node> FirstQ;
193 auto Enqueue = [this,&FirstQ] (Node N) {
195 Q.insert(N);
196 for (unsigned I = 0; I != Q.size(); ++I) {
197 NodeSet &Ns = Edges[Q[I]];
198 Q.insert(Ns.begin(), Ns.end());
199 }
200 FirstQ.insert(Q.begin(), Q.end());
201 };
202 for (Node N : Needed)
203 Enqueue(N);
204
205 for (Node N : FirstQ) {
206 if (Colors.count(N))
207 continue;
208 NodeSet &Ns = Edges[N];
209 auto P = getUniqueColor(Ns);
210 if (!P.first)
211 return false;
212 Colors[N] = other(P.second);
213 }
214
215 // First, color nodes that don't have any dups.
216 for (auto E : Edges) {
217 Node N = E.first;
218 if (!Needed.count(conj(N)) || Colors.count(N))
219 continue;
220 auto P = getUniqueColor(E.second);
221 if (!P.first)
222 return false;
223 Colors[N] = other(P.second);
224 }
225
226 // Now, nodes that are still uncolored. Since the graph can be modified
227 // in this step, create a work queue.
228 std::vector<Node> WorkQ;
229 for (auto E : Edges) {
230 Node N = E.first;
231 if (!Colors.count(N))
232 WorkQ.push_back(N);
233 }
234
235 for (Node N : WorkQ) {
236 NodeSet &Ns = Edges[N];
237 auto P = getUniqueColor(Ns);
238 if (P.first) {
239 Colors[N] = other(P.second);
240 continue;
241 }
242
243 // Coloring failed. Split this node.
244 Node C = conj(N);
245 ColorKind ColorN = other(ColorKind::None);
246 ColorKind ColorC = other(ColorN);
247 NodeSet &Cs = Edges[C];
248 NodeSet CopyNs = Ns;
249 for (Node M : CopyNs) {
250 ColorKind ColorM = getColor(M);
251 if (ColorM == ColorC) {
252 // Connect M with C, disconnect M from N.
253 Cs.insert(M);
254 Edges[M].insert(C);
255 Ns.erase(M);
256 Edges[M].erase(N);
257 }
258 }
259 Colors[N] = ColorN;
260 Colors[C] = ColorC;
261 }
262
263 // Explicitly assign "None" to all uncolored nodes.
264 for (unsigned I = 0; I != Order.size(); ++I)
265 if (Colors.count(I) == 0)
266 Colors[I] = ColorKind::None;
267
268 return true;
269}
270
271#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
272void Coloring::dump() const {
273 dbgs() << "{ Order: {";
274 for (Node P : Order) {
275 if (P != Ignore)
276 dbgs() << ' ' << P;
277 else
278 dbgs() << " -";
279 }
280 dbgs() << " }\n";
281 dbgs() << " Needed: {";
282 for (Node N : Needed)
283 dbgs() << ' ' << N;
284 dbgs() << " }\n";
285
286 dbgs() << " Edges: {\n";
287 for (auto E : Edges) {
288 dbgs() << " " << E.first << " -> {";
289 for (auto N : E.second)
290 dbgs() << ' ' << N;
291 dbgs() << " }\n";
292 }
293 dbgs() << " }\n";
294
295 auto ColorKindToName = [](ColorKind C) {
296 switch (C) {
297 case ColorKind::None:
298 return "None";
299 case ColorKind::Red:
300 return "Red";
301 case ColorKind::Black:
302 return "Black";
303 }
304 llvm_unreachable("all ColorKinds should be handled by the switch above");
305 };
306
307 dbgs() << " Colors: {\n";
308 for (auto C : Colors)
309 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
310 dbgs() << " }\n}\n";
311}
312#endif
313
314namespace {
315// Base class of for reordering networks. They don't strictly need to be
316// permutations, as outputs with repeated occurrences of an input element
317// are allowed.
318struct PermNetwork {
319 using Controls = std::vector<uint8_t>;
320 using ElemType = int;
321 static constexpr ElemType Ignore = ElemType(-1);
322
323 enum : uint8_t {
324 None,
325 Pass,
326 Switch
327 };
328 enum : uint8_t {
329 Forward,
330 Reverse
331 };
332
333 PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
334 Order.assign(Ord.data(), Ord.data()+Ord.size());
335 Log = 0;
336
337 unsigned S = Order.size();
338 while (S >>= 1)
339 ++Log;
340
341 Table.resize(Order.size());
342 for (RowType &Row : Table)
343 Row.resize(Mult*Log, None);
344 }
345
346 void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
347 unsigned Size = Order.size();
348 V.resize(Size);
349 for (unsigned I = 0; I != Size; ++I) {
350 unsigned W = 0;
351 for (unsigned L = 0; L != Log; ++L) {
352 unsigned C = ctl(I, StartAt+L) == Switch;
353 if (Dir == Forward)
354 W |= C << (Log-1-L);
355 else
356 W |= C << L;
357 }
358 assert(isUInt<8>(W));
359 V[I] = uint8_t(W);
360 }
361 }
362
363 uint8_t ctl(ElemType Pos, unsigned Step) const {
364 return Table[Pos][Step];
365 }
366 unsigned size() const {
367 return Order.size();
368 }
369 unsigned steps() const {
370 return Log;
371 }
372
373protected:
374 unsigned Log;
375 std::vector<ElemType> Order;
376 using RowType = std::vector<uint8_t>;
377 std::vector<RowType> Table;
378};
379
380struct ForwardDeltaNetwork : public PermNetwork {
381 ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
382
383 bool run(Controls &V) {
384 if (!route(Order.data(), Table.data(), size(), 0))
385 return false;
386 getControls(V, 0, Forward);
387 return true;
388 }
389
390private:
391 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
392};
393
394struct ReverseDeltaNetwork : public PermNetwork {
395 ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
396
397 bool run(Controls &V) {
398 if (!route(Order.data(), Table.data(), size(), 0))
399 return false;
400 getControls(V, 0, Reverse);
401 return true;
402 }
403
404private:
405 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
406};
407
408struct BenesNetwork : public PermNetwork {
409 BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
410
411 bool run(Controls &F, Controls &R) {
412 if (!route(Order.data(), Table.data(), size(), 0))
413 return false;
414
415 getControls(F, 0, Forward);
416 getControls(R, Log, Reverse);
417 return true;
418 }
419
420private:
421 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
422};
423} // namespace
424
425bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
426 unsigned Step) {
427 bool UseUp = false, UseDown = false;
428 ElemType Num = Size;
429
430 // Cannot use coloring here, because coloring is used to determine
431 // the "big" switch, i.e. the one that changes halves, and in a forward
432 // network, a color can be simultaneously routed to both halves in the
433 // step we're working on.
434 for (ElemType J = 0; J != Num; ++J) {
435 ElemType I = P[J];
436 // I is the position in the input,
437 // J is the position in the output.
438 if (I == Ignore)
439 continue;
440 uint8_t S;
441 if (I < Num/2)
442 S = (J < Num/2) ? Pass : Switch;
443 else
444 S = (J < Num/2) ? Switch : Pass;
445
446 // U is the element in the table that needs to be updated.
447 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
448 if (U < Num/2)
449 UseUp = true;
450 else
451 UseDown = true;
452 if (T[U][Step] != S && T[U][Step] != None)
453 return false;
454 T[U][Step] = S;
455 }
456
457 for (ElemType J = 0; J != Num; ++J)
458 if (P[J] != Ignore && P[J] >= Num/2)
459 P[J] -= Num/2;
460
461 if (Step+1 < Log) {
462 if (UseUp && !route(P, T, Size/2, Step+1))
463 return false;
464 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
465 return false;
466 }
467 return true;
468}
469
470bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
471 unsigned Step) {
472 unsigned Pets = Log-1 - Step;
473 bool UseUp = false, UseDown = false;
474 ElemType Num = Size;
475
476 // In this step half-switching occurs, so coloring can be used.
477 Coloring G({P,Size});
478 const Coloring::MapType &M = G.colors();
479 if (M.empty())
480 return false;
481
482 ColorKind ColorUp = ColorKind::None;
483 for (ElemType J = 0; J != Num; ++J) {
484 ElemType I = P[J];
485 // I is the position in the input,
486 // J is the position in the output.
487 if (I == Ignore)
488 continue;
489 ColorKind C = M.at(I);
490 if (C == ColorKind::None)
491 continue;
492 // During "Step", inputs cannot switch halves, so if the "up" color
493 // is still unknown, make sure that it is selected in such a way that
494 // "I" will stay in the same half.
495 bool InpUp = I < Num/2;
496 if (ColorUp == ColorKind::None)
497 ColorUp = InpUp ? C : G.other(C);
498 if ((C == ColorUp) != InpUp) {
499 // If I should go to a different half than where is it now, give up.
500 return false;
501 }
502
503 uint8_t S;
504 if (InpUp) {
505 S = (J < Num/2) ? Pass : Switch;
506 UseUp = true;
507 } else {
508 S = (J < Num/2) ? Switch : Pass;
509 UseDown = true;
510 }
511 T[J][Pets] = S;
512 }
513
514 // Reorder the working permutation according to the computed switch table
515 // for the last step (i.e. Pets).
516 for (ElemType J = 0, E = Size / 2; J != E; ++J) {
517 ElemType PJ = P[J]; // Current values of P[J]
518 ElemType PC = P[J+Size/2]; // and P[conj(J)]
519 ElemType QJ = PJ; // New values of P[J]
520 ElemType QC = PC; // and P[conj(J)]
521 if (T[J][Pets] == Switch)
522 QC = PJ;
523 if (T[J+Size/2][Pets] == Switch)
524 QJ = PC;
525 P[J] = QJ;
526 P[J+Size/2] = QC;
527 }
528
529 for (ElemType J = 0; J != Num; ++J)
530 if (P[J] != Ignore && P[J] >= Num/2)
531 P[J] -= Num/2;
532
533 if (Step+1 < Log) {
534 if (UseUp && !route(P, T, Size/2, Step+1))
535 return false;
536 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
537 return false;
538 }
539 return true;
540}
541
542bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
543 unsigned Step) {
544 Coloring G({P,Size});
545 const Coloring::MapType &M = G.colors();
546 if (M.empty())
547 return false;
548 ElemType Num = Size;
549
550 unsigned Pets = 2*Log-1 - Step;
551 bool UseUp = false, UseDown = false;
552
553 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
554 // result in different controls. Let's pick the one where the first
555 // control will be "Pass".
556 ColorKind ColorUp = ColorKind::None;
557 for (ElemType J = 0; J != Num; ++J) {
558 ElemType I = P[J];
559 if (I == Ignore)
560 continue;
561 ColorKind C = M.at(I);
562 if (C == ColorKind::None)
563 continue;
564 if (ColorUp == ColorKind::None) {
565 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
566 }
567 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
568 if (C == ColorUp) {
569 if (I < Num/2)
570 T[I][Step] = Pass;
571 else
572 T[CI][Step] = Switch;
573 T[J][Pets] = (J < Num/2) ? Pass : Switch;
574 UseUp = true;
575 } else { // Down
576 if (I < Num/2)
577 T[CI][Step] = Switch;
578 else
579 T[I][Step] = Pass;
580 T[J][Pets] = (J < Num/2) ? Switch : Pass;
581 UseDown = true;
582 }
583 }
584
585 // Reorder the working permutation according to the computed switch table
586 // for the last step (i.e. Pets).
587 for (ElemType J = 0; J != Num/2; ++J) {
588 ElemType PJ = P[J]; // Current values of P[J]
589 ElemType PC = P[J+Num/2]; // and P[conj(J)]
590 ElemType QJ = PJ; // New values of P[J]
591 ElemType QC = PC; // and P[conj(J)]
592 if (T[J][Pets] == Switch)
593 QC = PJ;
594 if (T[J+Num/2][Pets] == Switch)
595 QJ = PC;
596 P[J] = QJ;
597 P[J+Num/2] = QC;
598 }
599
600 for (ElemType J = 0; J != Num; ++J)
601 if (P[J] != Ignore && P[J] >= Num/2)
602 P[J] -= Num/2;
603
604 if (Step+1 < Log) {
605 if (UseUp && !route(P, T, Size/2, Step+1))
606 return false;
607 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
608 return false;
609 }
610 return true;
611}
612
613// --------------------------------------------------------------------
614// Support for building selection results (output instructions that are
615// parts of the final selection).
616
617namespace {
618struct OpRef {
619 OpRef(SDValue V) : OpV(V) {}
620 bool isValue() const { return OpV.getNode() != nullptr; }
621 bool isValid() const { return isValue() || !(OpN & Invalid); }
622 bool isUndef() const { return OpN & Undef; }
623 static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
624 static OpRef fail() { return OpRef(Invalid); }
625
626 static OpRef lo(const OpRef &R) {
627 assert(!R.isValue());
628 return OpRef(R.OpN & (Undef | Index | LoHalf));
629 }
630 static OpRef hi(const OpRef &R) {
631 assert(!R.isValue());
632 return OpRef(R.OpN & (Undef | Index | HiHalf));
633 }
634 static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
635
636 // Direct value.
637 SDValue OpV = SDValue();
638
639 // Reference to the operand of the input node:
640 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
641 // operand index:
642 // If bit 30 is set, it's the high half of the operand.
643 // If bit 29 is set, it's the low half of the operand.
644 unsigned OpN = 0;
645
646 enum : unsigned {
647 Invalid = 0x10000000,
648 LoHalf = 0x20000000,
649 HiHalf = 0x40000000,
650 Whole = LoHalf | HiHalf,
651 Undef = 0x80000000,
652 Index = 0x0FFFFFFF, // Mask of the index value.
653 IndexBits = 28,
654 };
655
657 void print(raw_ostream &OS, const SelectionDAG &G) const;
658
659private:
660 OpRef(unsigned N) : OpN(N) {}
661};
662
663struct NodeTemplate {
664 NodeTemplate() = default;
665 unsigned Opc = 0;
666 MVT Ty = MVT::Other;
667 std::vector<OpRef> Ops;
668
669 LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
670};
671
672struct ResultStack {
673 ResultStack(SDNode *Inp)
674 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
675 SDNode *InpNode;
676 MVT InpTy;
677 unsigned push(const NodeTemplate &Res) {
678 List.push_back(Res);
679 return List.size()-1;
680 }
681 unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
682 NodeTemplate Res;
683 Res.Opc = Opc;
684 Res.Ty = Ty;
685 Res.Ops = Ops;
686 return push(Res);
687 }
688 bool empty() const { return List.empty(); }
689 unsigned size() const { return List.size(); }
690 unsigned top() const { return size()-1; }
691 const NodeTemplate &operator[](unsigned I) const { return List[I]; }
692 unsigned reset(unsigned NewTop) {
693 List.resize(NewTop+1);
694 return NewTop;
695 }
696
697 using BaseType = std::vector<NodeTemplate>;
698 BaseType::iterator begin() { return List.begin(); }
699 BaseType::iterator end() { return List.end(); }
700 BaseType::const_iterator begin() const { return List.begin(); }
701 BaseType::const_iterator end() const { return List.end(); }
702
704
706 void print(raw_ostream &OS, const SelectionDAG &G) const;
707};
708} // namespace
709
710#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
711void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
712 if (isValue()) {
713 OpV.getNode()->print(OS, &G);
714 return;
715 }
716 if (OpN & Invalid) {
717 OS << "invalid";
718 return;
719 }
720 if (OpN & Undef) {
721 OS << "undef";
722 return;
723 }
724 if ((OpN & Whole) != Whole) {
725 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
726 if (OpN & LoHalf)
727 OS << "lo ";
728 else
729 OS << "hi ";
730 }
731 OS << '#' << SignExtend32(OpN & Index, IndexBits);
732}
733
734void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
735 const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
736 OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
737 << TII.getName(Opc);
738 bool Comma = false;
739 for (const auto &R : Ops) {
740 if (Comma)
741 OS << ',';
742 Comma = true;
743 OS << ' ';
744 R.print(OS, G);
745 }
746}
747
748void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
749 OS << "Input node:\n";
750#ifndef NDEBUG
751 InpNode->dumpr(&G);
752#endif
753 OS << "Result templates:\n";
754 for (unsigned I = 0, E = List.size(); I != E; ++I) {
755 OS << '[' << I << "] ";
756 List[I].print(OS, G);
757 OS << '\n';
758 }
759}
760#endif
761
762namespace {
763struct ShuffleMask {
764 ShuffleMask(ArrayRef<int> M) : Mask(M) {
765 for (int M : Mask) {
766 if (M == -1)
767 continue;
768 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
769 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
770 }
771 }
772
774 int MinSrc = -1, MaxSrc = -1;
775
776 ShuffleMask lo() const {
777 size_t H = Mask.size()/2;
778 return ShuffleMask(Mask.take_front(H));
779 }
780 ShuffleMask hi() const {
781 size_t H = Mask.size()/2;
782 return ShuffleMask(Mask.take_back(H));
783 }
784
785 void print(raw_ostream &OS) const {
786 OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
787 for (int M : Mask)
788 OS << ' ' << M;
789 OS << " }";
790 }
791};
792
794raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
795 SM.print(OS);
796 return OS;
797}
798} // namespace
799
800namespace shuffles {
802// Vdd = vshuffvdd(Vu, Vv, Rt)
803// Vdd = vdealvdd(Vu, Vv, Rt)
804// Vd = vpack(Vu, Vv, Size, TakeOdd)
805// Vd = vshuff(Vu, Vv, Size, TakeOdd)
806// Vd = vdeal(Vu, Vv, Size, TakeOdd)
807// Vd = vdealb4w(Vu, Vv)
808
809ArrayRef<int> lo(ArrayRef<int> Vuu) { return Vuu.take_front(Vuu.size() / 2); }
810ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
811
813 int Len = Vu.size();
814 MaskT Vdd(2 * Len);
815 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
816 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
817
818 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
819 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
820
821 for (int Offset = 1; Offset < Len; Offset *= 2) {
822 if ((Rt & Offset) == 0)
823 continue;
824 for (int i = 0; i != Len; ++i) {
825 if ((i & Offset) == 0)
826 std::swap(Vd1[i], Vd0[i + Offset]);
827 }
828 }
829 return Vdd;
830}
831
833 int Len = Vu.size();
834 MaskT Vdd(2 * Len);
835 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
836 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
837
838 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
839 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
840
841 for (int Offset = Len / 2; Offset > 0; Offset /= 2) {
842 if ((Rt & Offset) == 0)
843 continue;
844 for (int i = 0; i != Len; ++i) {
845 if ((i & Offset) == 0)
846 std::swap(Vd1[i], Vd0[i + Offset]);
847 }
848 }
849 return Vdd;
850}
851
852MaskT vpack(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
853 int Len = Vu.size();
854 MaskT Vd(Len);
855 auto Odd = static_cast<int>(TakeOdd);
856 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
857 for (int b = 0; b != static_cast<int>(Size); ++b) {
858 // clang-format off
859 Vd[i * Size + b] = Vv[(2 * i + Odd) * Size + b];
860 Vd[i * Size + b + Len / 2] = Vu[(2 * i + Odd) * Size + b];
861 // clang-format on
862 }
863 }
864 return Vd;
865}
866
867MaskT vshuff(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
868 int Len = Vu.size();
869 MaskT Vd(Len);
870 auto Odd = static_cast<int>(TakeOdd);
871 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
872 for (int b = 0; b != static_cast<int>(Size); ++b) {
873 Vd[(2 * i + 0) * Size + b] = Vv[(2 * i + Odd) * Size + b];
874 Vd[(2 * i + 1) * Size + b] = Vu[(2 * i + Odd) * Size + b];
875 }
876 }
877 return Vd;
878}
879
880MaskT vdeal(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
881 int Len = Vu.size();
882 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
883 return vpack(hi(T), lo(T), Size, TakeOdd);
884}
885
887 int Len = Vu.size();
888 MaskT Vd(Len);
889 for (int i = 0, e = Len / 4; i != e; ++i) {
890 Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
891 Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
892 Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
893 Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
894 }
895 return Vd;
896}
897
898template <typename ShuffFunc, typename... OptArgs>
899auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
900 MaskT Vu(Length), Vv(Length);
901 std::iota(Vu.begin(), Vu.end(), Length); // High
902 std::iota(Vv.begin(), Vv.end(), 0); // Low
903 return S(Vu, Vv, args...);
904}
905
906} // namespace shuffles
907
908// --------------------------------------------------------------------
909// The HvxSelector class.
910
912 return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
913}
915 return G.getSubtarget<HexagonSubtarget>();
916}
917
918namespace llvm {
919 struct HvxSelector {
924 const unsigned HwLen;
925
927 : Lower(getHexagonLowering(G)), ISel(HS), DAG(G),
928 HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
929
930 MVT getSingleVT(MVT ElemTy) const {
931 assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
932 unsigned NumElems = HwLen / (ElemTy.getSizeInBits() / 8);
933 return MVT::getVectorVT(ElemTy, NumElems);
934 }
935
936 MVT getPairVT(MVT ElemTy) const {
937 assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
938 unsigned NumElems = (2 * HwLen) / (ElemTy.getSizeInBits() / 8);
939 return MVT::getVectorVT(ElemTy, NumElems);
940 }
941
942 MVT getBoolVT() const {
943 // Return HwLen x i1.
944 return MVT::getVectorVT(MVT::i1, HwLen);
945 }
946
948 void selectShuffle(SDNode *N);
949 void selectRor(SDNode *N);
950 void selectVAlign(SDNode *N);
951
953 unsigned Width);
955 ArrayRef<uint32_t> Completions, unsigned Width);
956 static std::optional<int> rotationDistance(ShuffleMask SM, unsigned WrapAt);
957
958 private:
959 void select(SDNode *ISelN);
960 void materialize(const ResultStack &Results);
961
962 SDValue getConst32(int Val, const SDLoc &dl);
963 SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
964
965 enum : unsigned {
966 None,
967 PackMux,
968 };
969 OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
970 OpRef funnels(OpRef Va, OpRef Vb, int Amount, ResultStack &Results);
971
972 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
973 MutableArrayRef<int> NewMask, unsigned Options = None);
974 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
975 MutableArrayRef<int> NewMask);
976 OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
977 ResultStack &Results);
978 OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
979 ResultStack &Results);
980
981 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
982 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
983 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
984 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
985
986 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
987 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
988 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
989 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
990
991 bool selectVectorConstants(SDNode *N);
992 bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
993 SDValue Va, SDValue Vb, SDNode *N);
994 };
995} // namespace llvm
996
998 MutableArrayRef<int> MaskR) {
999 unsigned VecLen = Mask.size();
1000 assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
1001 for (unsigned I = 0; I != VecLen; ++I) {
1002 int M = Mask[I];
1003 if (M < 0) {
1004 MaskL[I] = MaskR[I] = -1;
1005 } else if (unsigned(M) < VecLen) {
1006 MaskL[I] = M;
1007 MaskR[I] = -1;
1008 } else {
1009 MaskL[I] = -1;
1010 MaskR[I] = M-VecLen;
1011 }
1012 }
1013}
1014
1015static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
1016 unsigned MaxLen) {
1017 assert(A.size() > 0 && A.size() >= MaxLen);
1018 int F = A[0];
1019 int E = F;
1020 for (unsigned I = 1; I != MaxLen; ++I) {
1021 if (A[I] - E != Inc)
1022 return { F, I };
1023 E = A[I];
1024 }
1025 return { F, MaxLen };
1026}
1027
1028static bool isUndef(ArrayRef<int> Mask) {
1029 for (int Idx : Mask)
1030 if (Idx != -1)
1031 return false;
1032 return true;
1033}
1034
1035static bool isIdentity(ArrayRef<int> Mask) {
1036 for (int I = 0, E = Mask.size(); I != E; ++I) {
1037 int M = Mask[I];
1038 if (M >= 0 && M != I)
1039 return false;
1040 }
1041 return true;
1042}
1043
1044static bool isLowHalfOnly(ArrayRef<int> Mask) {
1045 int L = Mask.size();
1046 assert(L % 2 == 0);
1047 // Check if the second half of the mask is all-undef.
1048 return llvm::all_of(Mask.drop_front(L / 2), [](int M) { return M < 0; });
1049}
1050
1052 unsigned SegLen) {
1053 assert(isPowerOf2_32(SegLen));
1055 if (SM.MaxSrc == -1)
1056 return SegList;
1057
1058 unsigned Shift = Log2_32(SegLen);
1059 BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
1060
1061 for (int M : SM.Mask) {
1062 if (M >= 0)
1063 Segs.set(M >> Shift);
1064 }
1065
1066 for (unsigned B : Segs.set_bits())
1067 SegList.push_back(B);
1068 return SegList;
1069}
1070
1072 unsigned SegLen) {
1073 // Calculate the layout of the output segments in terms of the input
1074 // segments.
1075 // For example [1,3,1,0] means that the output consists of 4 output
1076 // segments, where the first output segment has only elements of the
1077 // input segment at index 1. The next output segment only has elements
1078 // of the input segment 3, etc.
1079 // If an output segment only has undef elements, the value will be ~0u.
1080 // If an output segment has elements from more than one input segment,
1081 // the corresponding value will be ~1u.
1082 unsigned MaskLen = SM.Mask.size();
1083 assert(MaskLen % SegLen == 0);
1084 SmallVector<unsigned, 4> Map(MaskLen / SegLen);
1085
1086 for (int S = 0, E = Map.size(); S != E; ++S) {
1087 unsigned Idx = ~0u;
1088 for (int I = 0; I != static_cast<int>(SegLen); ++I) {
1089 int M = SM.Mask[S*SegLen + I];
1090 if (M < 0)
1091 continue;
1092 unsigned G = M / SegLen; // Input segment of this element.
1093 if (Idx == ~0u) {
1094 Idx = G;
1095 } else if (Idx != G) {
1096 Idx = ~1u;
1097 break;
1098 }
1099 }
1100 Map[S] = Idx;
1101 }
1102
1103 return Map;
1104}
1105
1107 unsigned SegLen, MutableArrayRef<int> PackedMask) {
1109 for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1110 unsigned S = OutSegMap[I];
1111 assert(S != ~0u && "Unexpected undef");
1112 assert(S != ~1u && "Unexpected multi");
1113 if (InvMap.size() <= S)
1114 InvMap.resize(S+1);
1115 InvMap[S] = I;
1116 }
1117
1118 unsigned Shift = Log2_32(SegLen);
1119 for (int I = 0, E = Mask.size(); I != E; ++I) {
1120 int M = Mask[I];
1121 if (M >= 0) {
1122 int OutIdx = InvMap[M >> Shift];
1123 M = (M & (SegLen-1)) + SegLen*OutIdx;
1124 }
1125 PackedMask[I] = M;
1126 }
1127}
1128
1129bool HvxSelector::selectVectorConstants(SDNode *N) {
1130 // Constant vectors are generated as loads from constant pools or as
1131 // splats of a constant value. Since they are generated during the
1132 // selection process, the main selection algorithm is not aware of them.
1133 // Select them directly here.
1135 SetVector<SDNode*> WorkQ;
1136
1137 // The DAG can change (due to CSE) during selection, so cache all the
1138 // unselected nodes first to avoid traversing a mutating DAG.
1139 WorkQ.insert(N);
1140 for (unsigned i = 0; i != WorkQ.size(); ++i) {
1141 SDNode *W = WorkQ[i];
1142 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1143 Nodes.push_back(W);
1144 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1145 WorkQ.insert(W->getOperand(j).getNode());
1146 }
1147
1148 for (SDNode *L : Nodes)
1149 select(L);
1150
1151 return !Nodes.empty();
1152}
1153
1154void HvxSelector::materialize(const ResultStack &Results) {
1155 DEBUG_WITH_TYPE("isel", {
1156 dbgs() << "Materializing\n";
1157 Results.print(dbgs(), DAG);
1158 });
1159 if (Results.empty())
1160 return;
1161 const SDLoc &dl(Results.InpNode);
1162 std::vector<SDValue> Output;
1163
1164 for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1165 const NodeTemplate &Node = Results[I];
1166 std::vector<SDValue> Ops;
1167 for (const OpRef &R : Node.Ops) {
1168 assert(R.isValid());
1169 if (R.isValue()) {
1170 Ops.push_back(R.OpV);
1171 continue;
1172 }
1173 if (R.OpN & OpRef::Undef) {
1174 MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
1175 Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1176 continue;
1177 }
1178 // R is an index of a result.
1179 unsigned Part = R.OpN & OpRef::Whole;
1180 int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1181 if (Idx < 0)
1182 Idx += I;
1183 assert(Idx >= 0 && unsigned(Idx) < Output.size());
1184 SDValue Op = Output[Idx];
1185 MVT OpTy = Op.getValueType().getSimpleVT();
1186 if (Part != OpRef::Whole) {
1187 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1189 OpTy.getVectorNumElements()/2);
1190 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1191 : Hexagon::vsub_hi;
1192 Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1193 }
1194 Ops.push_back(Op);
1195 } // for (Node : Results)
1196
1197 assert(Node.Ty != MVT::Other);
1198 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1199 ? Ops.front().getNode()
1200 : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1201 Output.push_back(SDValue(ResN, 0));
1202 }
1203
1204 SDNode *OutN = Output.back().getNode();
1205 SDNode *InpN = Results.InpNode;
1206 DEBUG_WITH_TYPE("isel", {
1207 dbgs() << "Generated node:\n";
1208 OutN->dumpr(&DAG);
1209 });
1210
1211 ISel.ReplaceNode(InpN, OutN);
1212 selectVectorConstants(OutN);
1214}
1215
1216OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1217 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1218 const SDLoc &dl(Results.InpNode);
1219 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1220 getConst32(Hexagon::HvxWRRegClassID, dl),
1221 Lo, getConst32(Hexagon::vsub_lo, dl),
1222 Hi, getConst32(Hexagon::vsub_hi, dl),
1223 });
1224 return OpRef::res(Results.top());
1225}
1226
1227OpRef HvxSelector::funnels(OpRef Va, OpRef Vb, int Amount,
1228 ResultStack &Results) {
1229 // Do a funnel shift towards the low end (shift right) by Amount bytes.
1230 // If Amount < 0, treat it as shift left, i.e. do a shift right by
1231 // Amount + HwLen.
1232 auto VecLen = static_cast<int>(HwLen);
1233
1234 if (Amount == 0)
1235 return Va;
1236 if (Amount == VecLen)
1237 return Vb;
1238
1239 MVT Ty = getSingleVT(MVT::i8);
1240 const SDLoc &dl(Results.InpNode);
1241
1242 if (Amount < 0)
1243 Amount += VecLen;
1244 if (Amount > VecLen) {
1245 Amount -= VecLen;
1246 std::swap(Va, Vb);
1247 }
1248
1249 if (isUInt<3>(Amount)) {
1250 SDValue A = getConst32(Amount, dl);
1251 Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, A});
1252 } else if (isUInt<3>(VecLen - Amount)) {
1253 SDValue A = getConst32(VecLen - Amount, dl);
1254 Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va, A});
1255 } else {
1256 SDValue A = getConst32(Amount, dl);
1257 Results.push(Hexagon::A2_tfrsi, Ty, {A});
1258 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1259 }
1260 return OpRef::res(Results.top());
1261}
1262
1263// Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1264// pack these halves into a single vector, and remap SM into NewMask to use
1265// the new vector instead.
1266OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1267 ResultStack &Results, MutableArrayRef<int> NewMask,
1268 unsigned Options) {
1269 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1270 if (!Va.isValid() || !Vb.isValid())
1271 return OpRef::fail();
1272
1273 if (Vb.isUndef()) {
1274 std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1275 return Va;
1276 }
1277 if (Va.isUndef()) {
1278 std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1280 return Vb;
1281 }
1282
1283 MVT Ty = getSingleVT(MVT::i8);
1284 MVT PairTy = getPairVT(MVT::i8);
1285 OpRef Inp[2] = {Va, Vb};
1286 unsigned VecLen = SM.Mask.size();
1287
1288 auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1289 ResultStack &Results) {
1290 if (Amt == 0)
1291 return Lo;
1292 const SDLoc &dl(Results.InpNode);
1293 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1294 bool IsRight = isUInt<3>(Amt); // Right align.
1295 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1296 unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1297 Results.push(Opc, Ty, {Hi, Lo, S});
1298 return OpRef::res(Results.top());
1299 }
1300 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1301 OpRef A = OpRef::res(Results.top());
1302 Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1303 return OpRef::res(Results.top());
1304 };
1305
1306 // Segment is a vector half.
1307 unsigned SegLen = HwLen / 2;
1308
1309 // Check if we can shuffle vector halves around to get the used elements
1310 // into a single vector.
1311 shuffles::MaskT MaskH(SM.Mask);
1312 SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1313 unsigned SegCount = SegList.size();
1314 SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1315
1316 if (SegList.empty())
1317 return OpRef::undef(Ty);
1318
1319 // NOTE:
1320 // In the following part of the function, where the segments are rearranged,
1321 // the shuffle mask SM can be of any length that is a multiple of a vector
1322 // (i.e. a multiple of 2*SegLen), and non-zero.
1323 // The output segment map is computed, and it may have any even number of
1324 // entries, but the rearrangement of input segments will be done based only
1325 // on the first two (non-undef) entries in the segment map.
1326 // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1327 // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1328 // a single vector V = 3:1. The output mask will then be updated to use
1329 // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1330 //
1331 // Picking the segments based on the output map is an optimization. For
1332 // correctness it is only necessary that Seg0 and Seg1 are the two input
1333 // segments that are used in the output.
1334
1335 unsigned Seg0 = ~0u, Seg1 = ~0u;
1336 for (unsigned X : SegMap) {
1337 if (X == ~0u)
1338 continue;
1339 if (Seg0 == ~0u)
1340 Seg0 = X;
1341 else if (Seg1 != ~0u)
1342 break;
1343 if (X == ~1u || X != Seg0)
1344 Seg1 = X;
1345 }
1346
1347 if (SegCount == 1) {
1348 unsigned SrcOp = SegList[0] / 2;
1349 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1350 int M = SM.Mask[I];
1351 if (M >= 0) {
1352 M -= SrcOp * HwLen;
1353 assert(M >= 0);
1354 }
1355 NewMask[I] = M;
1356 }
1357 return Inp[SrcOp];
1358 }
1359
1360 if (SegCount == 2) {
1361 // Seg0 should not be undef here: this would imply a SegList
1362 // with <= 1 elements, which was checked earlier.
1363 assert(Seg0 != ~0u);
1364
1365 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1366 // segment list instead.
1367 if (Seg0 == ~1u || Seg1 == ~1u) {
1368 if (Seg0 == Seg1) {
1369 Seg0 = SegList[0];
1370 Seg1 = SegList[1];
1371 } else if (Seg0 == ~1u) {
1372 Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1373 } else {
1374 assert(Seg1 == ~1u);
1375 Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1376 }
1377 }
1378 assert(Seg0 != ~1u && Seg1 != ~1u);
1379
1380 assert(Seg0 != Seg1 && "Expecting different segments");
1381 const SDLoc &dl(Results.InpNode);
1382 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1383 OpRef HL = OpRef::res(Results.top());
1384
1385 // Va = AB, Vb = CD
1386
1387 if (Seg0 / 2 == Seg1 / 2) {
1388 // Same input vector.
1389 Va = Inp[Seg0 / 2];
1390 if (Seg0 > Seg1) {
1391 // Swap halves.
1392 Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1393 Va = OpRef::res(Results.top());
1394 }
1395 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1396 } else if (Seg0 % 2 == Seg1 % 2) {
1397 // Picking AC, BD, CA, or DB.
1398 // vshuff(CD,AB,HL) -> BD:AC
1399 // vshuff(AB,CD,HL) -> DB:CA
1400 auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1401 : std::make_pair(Va, Vb); // CA or DB
1402 Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1403 OpRef P = OpRef::res(Results.top());
1404 Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1405 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1406 } else {
1407 // Picking AD, BC, CB, or DA.
1408 if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1409 // AD or BC: this can be done using vmux.
1410 // Q = V6_pred_scalar2 SegLen
1411 // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1412 Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1413 OpRef Qt = OpRef::res(Results.top());
1414 auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1415 : std::make_pair(Vb, Va); // CB
1416 Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1417 Va = OpRef::res(Results.top());
1418 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1419 } else {
1420 // BC or DA: this could be done via valign by SegLen.
1421 // Do nothing here, because valign (if possible) will be generated
1422 // later on (make sure the Seg0 values are as expected).
1423 assert(Seg0 == 1 || Seg0 == 3);
1424 }
1425 }
1426 }
1427
1428 // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1429 // FIXME: maybe remove this?
1430 ShuffleMask SMH(MaskH);
1431 assert(SMH.Mask.size() == VecLen);
1432 shuffles::MaskT MaskA(SMH.Mask);
1433
1434 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1435 // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1436 shuffles::MaskT Swapped(SMH.Mask);
1438 ShuffleMask SW(Swapped);
1439 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1440 MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1441 std::swap(Va, Vb);
1442 }
1443 }
1444 ShuffleMask SMA(MaskA);
1445 assert(SMA.Mask.size() == VecLen);
1446
1447 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1448 int ShiftR = SMA.MinSrc;
1449 if (ShiftR >= static_cast<int>(HwLen)) {
1450 Va = Vb;
1451 Vb = OpRef::undef(Ty);
1452 ShiftR -= HwLen;
1453 }
1454 OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1455
1456 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1457 int M = SMA.Mask[I];
1458 if (M != -1)
1459 M -= SMA.MinSrc;
1460 NewMask[I] = M;
1461 }
1462 return RetVal;
1463 }
1464
1465 // By here, packing by segment (half-vector) shuffling, and vector alignment
1466 // failed. Try vmux.
1467 // Note: since this is using the original mask, Va and Vb must not have been
1468 // modified.
1469
1470 if (Options & PackMux) {
1471 // If elements picked from Va and Vb have all different (source) indexes
1472 // (relative to the start of the argument), do a mux, and update the mask.
1473 BitVector Picked(HwLen);
1475 bool CanMux = true;
1476 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1477 int M = SM.Mask[I];
1478 if (M == -1)
1479 continue;
1480 if (M >= static_cast<int>(HwLen))
1481 M -= HwLen;
1482 else
1483 MuxBytes[M] = 0xFF;
1484 if (Picked[M]) {
1485 CanMux = false;
1486 break;
1487 }
1488 NewMask[I] = M;
1489 }
1490 if (CanMux)
1491 return vmuxs(MuxBytes, Va, Vb, Results);
1492 }
1493 return OpRef::fail();
1494}
1495
1496// Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1497// pack these vectors into a pair, and remap SM into NewMask to use the
1498// new pair instead.
1499OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1500 ResultStack &Results, MutableArrayRef<int> NewMask) {
1501 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1503 if (SegList.empty())
1504 return OpRef::undef(getPairVT(MVT::i8));
1505
1506 // If more than two halves are used, bail.
1507 // TODO: be more aggressive here?
1508 unsigned SegCount = SegList.size();
1509 if (SegCount > 2)
1510 return OpRef::fail();
1511
1512 MVT HalfTy = getSingleVT(MVT::i8);
1513
1514 OpRef Inp[2] = { Va, Vb };
1515 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1516
1517 // Really make sure we have at most 2 vectors used in the mask.
1518 assert(SegCount <= 2);
1519
1520 for (int I = 0, E = SegList.size(); I != E; ++I) {
1521 unsigned S = SegList[I];
1522 OpRef Op = Inp[S / 2];
1523 Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1524 }
1525
1526 // NOTE: Using SegList as the packing map here (not SegMap). This works,
1527 // because we're not concerned here about the order of the segments (i.e.
1528 // single vectors) in the output pair. Changing the order of vectors is
1529 // free (as opposed to changing the order of vector halves as in packs),
1530 // and so there is no extra cost added in case the order needs to be
1531 // changed later.
1532 packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1533 return concats(Out[0], Out[1], Results);
1534}
1535
1536OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1537 ResultStack &Results) {
1538 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1539 MVT ByteTy = getSingleVT(MVT::i8);
1540 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1541 const SDLoc &dl(Results.InpNode);
1542 SDValue B = getVectorConstant(Bytes, dl);
1543 Results.push(Hexagon::V6_vd0, ByteTy, {});
1544 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1545 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1546 return OpRef::res(Results.top());
1547}
1548
1549OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1550 ResultStack &Results) {
1551 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1552 size_t S = Bytes.size() / 2;
1553 OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1554 OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1555 return concats(L, H, Results);
1556}
1557
1558OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1559 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1560 unsigned VecLen = SM.Mask.size();
1561 assert(HwLen == VecLen);
1562 (void)VecLen;
1563 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1564
1565 if (isIdentity(SM.Mask))
1566 return Va;
1567 if (isUndef(SM.Mask))
1568 return OpRef::undef(getSingleVT(MVT::i8));
1569
1570 // First, check for rotations.
1571 if (auto Dist = rotationDistance(SM, VecLen)) {
1572 OpRef Rotate = funnels(Va, Va, *Dist, Results);
1573 if (Rotate.isValid())
1574 return Rotate;
1575 }
1576 unsigned HalfLen = HwLen / 2;
1577 assert(isPowerOf2_32(HalfLen));
1578
1579 // Handle special case where the output is the same half of the input
1580 // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1581 std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1582 if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1583 std::pair<int, unsigned> Strip2 =
1584 findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1585 if (Strip1 == Strip2) {
1586 const SDLoc &dl(Results.InpNode);
1587 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1588 Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1589 {Va, Va, OpRef::res(Results.top())});
1590 OpRef S = OpRef::res(Results.top());
1591 return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1592 }
1593 }
1594
1595 OpRef P = perfect(SM, Va, Results);
1596 if (P.isValid())
1597 return P;
1598 return butterfly(SM, Va, Results);
1599}
1600
1601OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1602 ResultStack &Results) {
1603 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1604 if (isUndef(SM.Mask))
1605 return OpRef::undef(getSingleVT(MVT::i8));
1606
1607 OpRef C = contracting(SM, Va, Vb, Results);
1608 if (C.isValid())
1609 return C;
1610
1611 int VecLen = SM.Mask.size();
1612 shuffles::MaskT PackedMask(VecLen);
1613 OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1614 if (P.isValid())
1615 return shuffs1(ShuffleMask(PackedMask), P, Results);
1616
1617 // TODO: Before we split the mask, try perfect shuffle on concatenated
1618 // operands.
1619
1620 shuffles::MaskT MaskL(VecLen), MaskR(VecLen);
1621 splitMask(SM.Mask, MaskL, MaskR);
1622
1623 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1624 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1625 if (!L.isValid() || !R.isValid())
1626 return OpRef::fail();
1627
1628 SmallVector<uint8_t, 128> Bytes(VecLen);
1629 for (int I = 0; I != VecLen; ++I) {
1630 if (MaskL[I] != -1)
1631 Bytes[I] = 0xFF;
1632 }
1633 return vmuxs(Bytes, L, R, Results);
1634}
1635
1636OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1637 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1638 int VecLen = SM.Mask.size();
1639
1640 if (isIdentity(SM.Mask))
1641 return Va;
1642 if (isUndef(SM.Mask))
1643 return OpRef::undef(getPairVT(MVT::i8));
1644
1645 shuffles::MaskT PackedMask(VecLen);
1646 OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1647 if (P.isValid()) {
1648 ShuffleMask PM(PackedMask);
1649 OpRef E = expanding(PM, P, Results);
1650 if (E.isValid())
1651 return E;
1652
1653 OpRef L = shuffs1(PM.lo(), P, Results);
1654 OpRef H = shuffs1(PM.hi(), P, Results);
1655 if (L.isValid() && H.isValid())
1656 return concats(L, H, Results);
1657 }
1658
1659 if (!isLowHalfOnly(SM.Mask)) {
1660 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1661 // is all-undef) may produce a perfect shuffle that generates legitimate
1662 // upper half. This isn't wrong, but if the perfect shuffle was possible,
1663 // then there is a good chance that a shorter (contracting) code may be
1664 // used as well (e.g. V6_vshuffeb, etc).
1665 OpRef R = perfect(SM, Va, Results);
1666 if (R.isValid())
1667 return R;
1668 // TODO commute the mask and try the opposite order of the halves.
1669 }
1670
1671 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1672 OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1673 if (L.isValid() && H.isValid())
1674 return concats(L, H, Results);
1675
1676 return OpRef::fail();
1677}
1678
1679OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1680 ResultStack &Results) {
1681 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1682 if (isUndef(SM.Mask))
1683 return OpRef::undef(getPairVT(MVT::i8));
1684
1685 int VecLen = SM.Mask.size();
1686 SmallVector<int,256> PackedMask(VecLen);
1687 OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1688 if (P.isValid())
1689 return shuffp1(ShuffleMask(PackedMask), P, Results);
1690
1691 SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1692 splitMask(SM.Mask, MaskL, MaskR);
1693
1694 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1695 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1696 if (!L.isValid() || !R.isValid())
1697 return OpRef::fail();
1698
1699 // Mux the results.
1700 SmallVector<uint8_t,256> Bytes(VecLen);
1701 for (int I = 0; I != VecLen; ++I) {
1702 if (MaskL[I] != -1)
1703 Bytes[I] = 0xFF;
1704 }
1705 return vmuxp(Bytes, L, R, Results);
1706}
1707
1708namespace {
1709 struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1710 template <typename T>
1711 Deleter(SelectionDAG &D, T &C)
1712 : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1713 C.erase(N);
1714 }) {}
1715 };
1716
1717 template <typename T>
1718 struct NullifyingVector : public T {
1720 NullifyingVector(T &&V) : T(V) {
1721 for (unsigned i = 0, e = T::size(); i != e; ++i) {
1722 SDNode *&N = T::operator[](i);
1723 Refs[N] = &N;
1724 }
1725 }
1726 void erase(SDNode *N) {
1727 auto F = Refs.find(N);
1728 if (F != Refs.end())
1729 *F->second = nullptr;
1730 }
1731 };
1732}
1733
1734void HvxSelector::select(SDNode *ISelN) {
1735 // What's important here is to select the right set of nodes. The main
1736 // selection algorithm loops over nodes in a topological order, i.e. users
1737 // are visited before their operands.
1738 //
1739 // It is an error to have an unselected node with a selected operand, and
1740 // there is an assertion in the main selector code to enforce that.
1741 //
1742 // Such a situation could occur if we selected a node, which is both a
1743 // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1744 // node in the DAG.
1745 assert(ISelN->getOpcode() == HexagonISD::ISEL);
1746 SDNode *N0 = ISelN->getOperand(0).getNode();
1747
1748 // There could have been nodes created (i.e. inserted into the DAG)
1749 // that are now dead. Remove them, in case they use any of the nodes
1750 // to select (and make them look shared).
1752
1753 SetVector<SDNode *> SubNodes;
1754
1755 if (!N0->isMachineOpcode()) {
1756 // Don't want to select N0 if it's shared with another node, except if
1757 // it's shared with other ISELs.
1758 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1759 if (llvm::all_of(N0->users(), IsISelN))
1760 SubNodes.insert(N0);
1761 }
1762 if (SubNodes.empty()) {
1763 ISel.ReplaceNode(ISelN, N0);
1764 return;
1765 }
1766
1767 // Need to manually select the nodes that are dominated by the ISEL. Other
1768 // nodes are reachable from the rest of the DAG, and so will be selected
1769 // by the DAG selection routine.
1770 SetVector<SDNode*> Dom, NonDom;
1771 Dom.insert(N0);
1772
1773 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
1774 if (Dom.count(T))
1775 return true;
1776 if (T->use_empty() || NonDom.count(T))
1777 return false;
1778 for (SDNode *U : T->users()) {
1779 // If T is reachable from a known non-dominated node, then T itself
1780 // is non-dominated.
1781 if (!Rec(U, Rec)) {
1782 NonDom.insert(T);
1783 return false;
1784 }
1785 }
1786 Dom.insert(T);
1787 return true;
1788 };
1789
1790 auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
1791
1792 // Add the rest of nodes dominated by ISEL to SubNodes.
1793 for (unsigned I = 0; I != SubNodes.size(); ++I) {
1794 for (SDValue Op : SubNodes[I]->ops()) {
1795 SDNode *O = Op.getNode();
1796 if (IsDom(O))
1797 SubNodes.insert(O);
1798 }
1799 }
1800
1801 // Do a topological sort of nodes from Dom.
1802 SetVector<SDNode*> TmpQ;
1803
1804 std::map<SDNode *, unsigned> OpCount;
1805 for (SDNode *T : Dom) {
1806 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
1807 return Dom.count(U.getNode());
1808 });
1809
1810 OpCount.insert({T, NumDomOps});
1811 if (NumDomOps == 0)
1812 TmpQ.insert(T);
1813 }
1814
1815 for (unsigned I = 0; I != TmpQ.size(); ++I) {
1816 SDNode *S = TmpQ[I];
1817 for (SDNode *U : S->users()) {
1818 if (U == ISelN)
1819 continue;
1820 auto F = OpCount.find(U);
1821 assert(F != OpCount.end());
1822 if (F->second > 0 && !--F->second)
1823 TmpQ.insert(F->first);
1824 }
1825 }
1826
1827 // Remove the marker.
1828 ISel.ReplaceNode(ISelN, N0);
1829
1830 assert(SubNodes.size() == TmpQ.size());
1831 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1832
1833 Deleter DUQ(DAG, Queue);
1834 for (SDNode *S : reverse(Queue)) {
1835 if (S == nullptr)
1836 continue;
1837 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1838 ISel.Select(S);
1839 }
1840}
1841
1842bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1843 MVT ResTy, SDValue Va, SDValue Vb,
1844 SDNode *N) {
1845 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1846 MVT ElemTy = ResTy.getVectorElementType();
1847 assert(ElemTy == MVT::i8);
1848 unsigned VecLen = Mask.size();
1849 bool HavePairs = (2*HwLen == VecLen);
1850 MVT SingleTy = getSingleVT(MVT::i8);
1851
1852 // The prior attempts to handle this shuffle may have left a bunch of
1853 // dead nodes in the DAG (such as constants). These nodes will be added
1854 // at the end of DAG's node list, which at that point had already been
1855 // sorted topologically. In the main selection loop, the node list is
1856 // traversed backwards from the root node, which means that any new
1857 // nodes (from the end of the list) will not be visited.
1858 // Scalarization will replace the shuffle node with the scalarized
1859 // expression, and if that expression reused any if the leftoever (dead)
1860 // nodes, these nodes would not be selected (since the "local" selection
1861 // only visits nodes that are not in AllNodes).
1862 // To avoid this issue, remove all dead nodes from the DAG now.
1863// DAG.RemoveDeadNodes();
1864
1866 LLVMContext &Ctx = *DAG.getContext();
1867 MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1868 for (int I : Mask) {
1869 if (I < 0) {
1870 Ops.push_back(ISel.selectUndef(dl, LegalTy));
1871 continue;
1872 }
1873 SDValue Vec;
1874 unsigned M = I;
1875 if (M < VecLen) {
1876 Vec = Va;
1877 } else {
1878 Vec = Vb;
1879 M -= VecLen;
1880 }
1881 if (HavePairs) {
1882 if (M < HwLen) {
1883 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1884 } else {
1885 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1886 M -= HwLen;
1887 }
1888 }
1889 SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1890 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1892 assert(L.getNode());
1893 Ops.push_back(L);
1894 }
1895
1896 SDValue LV;
1897 if (2*HwLen == VecLen) {
1898 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1899 SDValue L0 = Lower.LowerOperation(B0, DAG);
1900 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1901 SDValue L1 = Lower.LowerOperation(B1, DAG);
1902 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1903 // functions may expect to be called only for illegal operations, so
1904 // make sure that they are not called for legal ones. Develop a better
1905 // mechanism for dealing with this.
1906 LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1907 } else {
1908 SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1909 LV = Lower.LowerOperation(BV, DAG);
1910 }
1911
1912 assert(!N->use_empty());
1913 SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1914 ISel.ReplaceNode(N, IS.getNode());
1915 select(IS.getNode());
1917 return true;
1918}
1919
1921 unsigned Width) {
1922 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1923 unsigned Impossible = ~(1u << Width) + 1;
1924 for (unsigned I = 0, E = Bs.size(); I != E; ++I) {
1925 uint8_t B = Bs[I];
1926 if (B == 0xff)
1927 continue;
1928 if (~Impossible == 0)
1929 break;
1930 for (unsigned Log = 0; Log != Width; ++Log) {
1931 if (Impossible & (1u << Log))
1932 continue;
1933 unsigned Expected = (I >> Log) % 2;
1934 if (B != Expected)
1935 Impossible |= (1u << Log);
1936 }
1937 }
1938 return ~Impossible;
1939 };
1940
1941 SmallVector<uint32_t, 8> Worklist(Width);
1942
1943 for (unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1944 SmallVector<uint8_t> BitValues(SM.Mask.size());
1945 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1946 int M = SM.Mask[i];
1947 if (M < 0)
1948 BitValues[i] = 0xff;
1949 else
1950 BitValues[i] = (M & (1u << BitIdx)) != 0;
1951 }
1952 Worklist[BitIdx] = possibilities(BitValues, Width);
1953 }
1954
1955 // If there is a word P in Worklist that matches multiple possibilities,
1956 // then if any other word Q matches any of the possibilities matched by P,
1957 // then Q matches all the possibilities matched by P. In fact, P == Q.
1958 // In other words, for each words P, Q, the sets of possibilities matched
1959 // by P and Q are either equal or disjoint (no partial overlap).
1960 //
1961 // Illustration: For 4-bit values there are 4 complete sequences:
1962 // a: 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
1963 // b: 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
1964 // c: 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1
1965 // d: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1966 //
1967 // Words containing unknown bits that match two of the complete
1968 // sequences:
1969 // ab: 0 u u 1 0 u u 1 0 u u 1 0 u u 1
1970 // ac: 0 u 0 u u 1 u 1 0 u 0 u u 1 u 1
1971 // ad: 0 u 0 u 0 u 0 u u 1 u 1 u 1 u 1
1972 // bc: 0 0 u u u u 1 1 0 0 u u u u 1 1
1973 // bd: 0 0 u u 0 0 u u u u 1 1 u u 1 1
1974 // cd: 0 0 0 0 u u u u u u u u 1 1 1 1
1975 //
1976 // Proof of the claim above:
1977 // Let P be a word that matches s0 and s1. For that to happen, all known
1978 // bits in P must match s0 and s1 exactly.
1979 // Assume there is Q that matches s1. Note that since P and Q came from
1980 // the same shuffle mask, the positions of unknown bits in P and Q match
1981 // exactly, which makes the indices of known bits be exactly the same
1982 // between P and Q. Since P matches s0 and s1, the known bits of P much
1983 // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1984 // are exactly the same as in s1, which means that they are exactly the
1985 // same as in P. This implies that P == Q.
1986
1987 // There can be a situation where there are more entries with the same
1988 // bits set than there are set bits (e.g. value 9 occuring more than 2
1989 // times). In such cases it will be impossible to complete this to a
1990 // perfect shuffle.
1991 SmallVector<uint32_t, 8> Sorted(Worklist);
1992 llvm::sort(Sorted.begin(), Sorted.end());
1993
1994 for (unsigned I = 0, E = Sorted.size(); I != E;) {
1995 unsigned P = Sorted[I], Count = 1;
1996 while (++I != E && P == Sorted[I])
1997 ++Count;
1998 if ((unsigned)llvm::popcount(P) < Count) {
1999 // Reset all occurences of P, if there are more occurrences of P
2000 // than there are bits in P.
2001 llvm::replace(Worklist, P, 0U);
2002 }
2003 }
2004
2005 return Worklist;
2006}
2007
2010 // Pick a completion if there are multiple possibilities. For now just
2011 // select any valid completion.
2012 SmallVector<uint32_t, 8> Comps(Completions);
2013
2014 for (unsigned I = 0; I != Width; ++I) {
2015 uint32_t P = Comps[I];
2016 assert(P != 0);
2017 if (isPowerOf2_32(P))
2018 continue;
2019 // T = least significant bit of P.
2020 uint32_t T = P ^ ((P - 1) & P);
2021 // Clear T in all remaining words matching P.
2022 for (unsigned J = I + 1; J != Width; ++J) {
2023 if (Comps[J] == P)
2024 Comps[J] ^= T;
2025 }
2026 Comps[I] = T;
2027 }
2028
2029#ifndef NDEBUG
2030 // Check that we have generated a valid completion.
2031 uint32_t OrAll = 0;
2032 for (uint32_t C : Comps) {
2034 OrAll |= C;
2035 }
2036 assert(OrAll == (1u << Width) -1);
2037#endif
2038
2039 return Comps;
2040}
2041
2042std::optional<int> HvxSelector::rotationDistance(ShuffleMask SM,
2043 unsigned WrapAt) {
2044 std::optional<int> Dist;
2045 for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
2046 int M = SM.Mask[I];
2047 if (M < 0)
2048 continue;
2049 if (Dist) {
2050 if ((I + *Dist) % static_cast<int>(WrapAt) != M)
2051 return std::nullopt;
2052 } else {
2053 // Integer a%b operator assumes rounding towards zero by /, so it
2054 // "misbehaves" when a crosses 0 (the remainder also changes sign).
2055 // Add WrapAt in an attempt to keep I+Dist non-negative.
2056 Dist = M - I;
2057 if (Dist < 0)
2058 Dist = *Dist + WrapAt;
2059 }
2060 }
2061 return Dist;
2062}
2063
2064OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
2065 ResultStack &Results) {
2066 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2067 if (!Va.isValid() || !Vb.isValid())
2068 return OpRef::fail();
2069
2070 // Contracting shuffles, i.e. instructions that always discard some bytes
2071 // from the operand vectors.
2072 //
2073 // Funnel shifts
2074 // V6_vshuff{e,o}b
2075 // V6_vshuf{e,o}h
2076 // V6_vdealb4w
2077 // V6_vpack{e,o}{b,h}
2078
2079 int VecLen = SM.Mask.size();
2080
2081 // First, check for funnel shifts.
2082 if (auto Dist = rotationDistance(SM, 2 * VecLen)) {
2083 OpRef Funnel = funnels(Va, Vb, *Dist, Results);
2084 if (Funnel.isValid())
2085 return Funnel;
2086 }
2087
2088 MVT SingleTy = getSingleVT(MVT::i8);
2089 MVT PairTy = getPairVT(MVT::i8);
2090
2091 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2092 return Mask1 == Mask2;
2093 };
2094
2095 using PackConfig = std::pair<unsigned, bool>;
2096 PackConfig Packs[] = {
2097 {1, false}, // byte, even
2098 {1, true}, // byte, odd
2099 {2, false}, // half, even
2100 {2, true}, // half, odd
2101 };
2102
2103 { // Check vpack
2104 unsigned Opcodes[] = {
2105 Hexagon::V6_vpackeb,
2106 Hexagon::V6_vpackob,
2107 Hexagon::V6_vpackeh,
2108 Hexagon::V6_vpackoh,
2109 };
2110 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2111 auto [Size, Odd] = Packs[i];
2112 if (same(SM.Mask, shuffles::mask(shuffles::vpack, HwLen, Size, Odd))) {
2113 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2114 return OpRef::res(Results.top());
2115 }
2116 }
2117 }
2118
2119 { // Check vshuff
2120 unsigned Opcodes[] = {
2121 Hexagon::V6_vshuffeb,
2122 Hexagon::V6_vshuffob,
2123 Hexagon::V6_vshufeh,
2124 Hexagon::V6_vshufoh,
2125 };
2126 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2127 auto [Size, Odd] = Packs[i];
2128 if (same(SM.Mask, shuffles::mask(shuffles::vshuff, HwLen, Size, Odd))) {
2129 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2130 return OpRef::res(Results.top());
2131 }
2132 }
2133 }
2134
2135 { // Check vdeal
2136 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2137 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2138 // variants of "deal" can be done similarly.
2139 unsigned Opcodes[] = {
2140 Hexagon::V6_vpackeb,
2141 Hexagon::V6_vpackob,
2142 Hexagon::V6_vpackeh,
2143 Hexagon::V6_vpackoh,
2144 };
2145 const SDLoc &dl(Results.InpNode);
2146
2147 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2148 auto [Size, Odd] = Packs[i];
2149 if (same(SM.Mask, shuffles::mask(shuffles::vdeal, HwLen, Size, Odd))) {
2150 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)});
2151 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2152 auto vdeal = OpRef::res(Results.top());
2153 Results.push(Opcodes[i], SingleTy,
2154 {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2155 return OpRef::res(Results.top());
2156 }
2157 }
2158 }
2159
2160 if (same(SM.Mask, shuffles::mask(shuffles::vdealb4w, HwLen))) {
2161 Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
2162 return OpRef::res(Results.top());
2163 }
2164
2165 return OpRef::fail();
2166}
2167
2168OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2169 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2170 // Expanding shuffles (using all elements and inserting into larger vector):
2171 //
2172 // V6_vunpacku{b,h} [*]
2173 //
2174 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
2175 //
2176 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2177 // they are not shuffles.
2178 //
2179 // The argument is a single vector.
2180
2181 int VecLen = SM.Mask.size();
2182 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
2183
2184 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
2185
2186 // The patterns for the unpacks, in terms of the starting offsets of the
2187 // consecutive strips (L = length of the strip, N = VecLen):
2188 //
2189 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2190
2191 if (Strip.first != 0)
2192 return OpRef::fail();
2193
2194 // The vunpackus only handle byte and half-word.
2195 if (Strip.second != 1 && Strip.second != 2)
2196 return OpRef::fail();
2197
2198 int N = VecLen;
2199 int L = Strip.second;
2200
2201 // First, check the non-ignored strips.
2202 for (int I = 2*L; I < N; I += 2*L) {
2203 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
2204 if (S.second != unsigned(L))
2205 return OpRef::fail();
2206 if (2*S.first != I)
2207 return OpRef::fail();
2208 }
2209 // Check the -1s.
2210 for (int I = L; I < N; I += 2*L) {
2211 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
2212 if (S.first != -1 || S.second != unsigned(L))
2213 return OpRef::fail();
2214 }
2215
2216 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
2217 : Hexagon::V6_vunpackuh;
2218 Results.push(Opc, getPairVT(MVT::i8), {Va});
2219 return OpRef::res(Results.top());
2220}
2221
2222OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2223 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2224 // V6_vdeal{b,h}
2225 // V6_vshuff{b,h}
2226
2227 // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
2228 // V6_vshuffvdd (V6_vshuff)
2229 // V6_dealvdd (V6_vdeal)
2230
2231 int VecLen = SM.Mask.size();
2232 assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
2233 unsigned LogLen = Log2_32(VecLen);
2234 unsigned HwLog = Log2_32(HwLen);
2235 // The result length must be the same as the length of a single vector,
2236 // or a vector pair.
2237 assert(LogLen == HwLog || LogLen == HwLog + 1);
2238 bool HavePairs = LogLen == HwLog + 1;
2239
2240 SmallVector<unsigned, 8> Perm(LogLen);
2241
2242 // Check if this could be a perfect shuffle, or a combination of perfect
2243 // shuffles.
2244 //
2245 // Consider this permutation (using hex digits to make the ASCII diagrams
2246 // easier to read):
2247 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
2248 // This is a "deal" operation: divide the input into two halves, and
2249 // create the output by picking elements by alternating between these two
2250 // halves:
2251 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2252 // 8 9 A B C D E F
2253 //
2254 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
2255 // a somwehat different mechanism that could be used to perform shuffle/
2256 // deal operations: a 2x2 transpose.
2257 // Consider the halves of inputs again, they can be interpreted as a 2x8
2258 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
2259 // together. Now, when considering 2 elements at a time, it will be a 2x4
2260 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
2261 // 01 23 45 67
2262 // 89 AB CD EF
2263 // With groups of 4, this will become a single 2x2 matrix, and so on.
2264 //
2265 // The 2x2 transpose instruction works by transposing each of the 2x2
2266 // matrices (or "sub-matrices"), given a specific group size. For example,
2267 // if the group size is 1 (i.e. each element is its own group), there
2268 // will be four transposes of the four 2x2 matrices that form the 2x8.
2269 // For example, with the inputs as above, the result will be:
2270 // 0 8 2 A 4 C 6 E
2271 // 1 9 3 B 5 D 7 F
2272 // Now, this result can be tranposed again, but with the group size of 2:
2273 // 08 19 4C 5D
2274 // 2A 3B 6E 7F
2275 // If we then transpose that result, but with the group size of 4, we get:
2276 // 0819 2A3B
2277 // 4C5D 6E7F
2278 // If we concatenate these two rows, it will be
2279 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
2280 // which is the same as the "deal" [*] above.
2281 //
2282 // In general, a "deal" of individual elements is a series of 2x2 transposes,
2283 // with changing group size. HVX has two instructions:
2284 // Vdd = V6_vdealvdd Vu, Vv, Rt
2285 // Vdd = V6_shufvdd Vu, Vv, Rt
2286 // that perform exactly that. The register Rt controls which transposes are
2287 // going to happen: a bit at position n (counting from 0) indicates that a
2288 // transpose with a group size of 2^n will take place. If multiple bits are
2289 // set, multiple transposes will happen: vdealvdd will perform them starting
2290 // with the largest group size, vshuffvdd will do them in the reverse order.
2291 //
2292 // The main observation is that each 2x2 transpose corresponds to swapping
2293 // columns of bits in the binary representation of the values.
2294 //
2295 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
2296 // in a given column. The * denote the columns that will be swapped.
2297 // The transpose with the group size 2^n corresponds to swapping columns
2298 // 3 (the highest log) and log2(n):
2299 //
2300 // 3 2 1 0 0 2 1 3 0 2 3 1
2301 // * * * * * *
2302 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2303 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
2304 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2305 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2306 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2307 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2308 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2309 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2310 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2311 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2312 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2313 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2314 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2315 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2316 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2317 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2318
2319 // There is one special case that is not a perfect shuffle, but can be
2320 // turned into one easily: when the shuffle operates on a vector pair,
2321 // but the two vectors in the pair are swapped. The code that identifies
2322 // perfect shuffles will reject it, unless the order is reversed.
2323 shuffles::MaskT MaskStorage(SM.Mask);
2324 bool InvertedPair = false;
2325 if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2326 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2327 int M = SM.Mask[i];
2328 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2329 }
2330 InvertedPair = true;
2331 SM = ShuffleMask(MaskStorage);
2332 }
2333
2334 auto Comps = getPerfectCompletions(SM, LogLen);
2335 if (llvm::is_contained(Comps, 0))
2336 return OpRef::fail();
2337
2338 auto Pick = completeToPerfect(Comps, LogLen);
2339 for (unsigned I = 0; I != LogLen; ++I)
2340 Perm[I] = Log2_32(Pick[I]);
2341
2342 // Once we have Perm, represent it as cycles. Denote the maximum log2
2343 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2344 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2345 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2346 // order being from left to right. Any (contiguous) segment where the
2347 // values ai, ai+1...aj are either all increasing or all decreasing,
2348 // can be implemented via a single vshuffvdd/vdealvdd respectively.
2349 //
2350 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2351 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2352 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2353 // can be used to generate a sequence of vshuffvdd/vdealvdd.
2354 //
2355 // Example:
2356 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2357 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2358 // (4 0 1)(4 0)(4 2 3)(4 2).
2359 // It can then be expanded into swaps as
2360 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2361 // and broken up into "increasing" segments as
2362 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2363 // This is equivalent to
2364 // (4 0 1)(4 0 2 3)(4 2),
2365 // which can be implemented as 3 vshufvdd instructions.
2366
2367 using CycleType = SmallVector<unsigned, 8>;
2368 std::set<CycleType> Cycles;
2369 std::set<unsigned> All;
2370
2371 for (unsigned I : Perm)
2372 All.insert(I);
2373
2374 // If the cycle contains LogLen-1, move it to the front of the cycle.
2375 // Otherwise, return the cycle unchanged.
2376 auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2377 unsigned LogPos, N = C.size();
2378 for (LogPos = 0; LogPos != N; ++LogPos)
2379 if (C[LogPos] == LogLen - 1)
2380 break;
2381 if (LogPos == N)
2382 return C;
2383
2384 CycleType NewC(C.begin() + LogPos, C.end());
2385 NewC.append(C.begin(), C.begin() + LogPos);
2386 return NewC;
2387 };
2388
2389 auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2390 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2391 // for bytes zero is included, for halfwords is not.
2392 if (Cs.size() != 1)
2393 return 0u;
2394 const CycleType &C = *Cs.begin();
2395 if (C[0] != Len - 1)
2396 return 0u;
2397 int D = Len - C.size();
2398 if (D != 0 && D != 1)
2399 return 0u;
2400
2401 bool IsDeal = true, IsShuff = true;
2402 for (unsigned I = 1; I != Len - D; ++I) {
2403 if (C[I] != Len - 1 - I)
2404 IsDeal = false;
2405 if (C[I] != I - (1 - D)) // I-1, I
2406 IsShuff = false;
2407 }
2408 // At most one, IsDeal or IsShuff, can be non-zero.
2409 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2410 static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
2411 static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
2412 return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2413 };
2414
2415 while (!All.empty()) {
2416 unsigned A = *All.begin();
2417 All.erase(A);
2418 CycleType C;
2419 C.push_back(A);
2420 for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2421 C.push_back(B);
2422 All.erase(B);
2423 }
2424 if (C.size() <= 1)
2425 continue;
2426 Cycles.insert(canonicalize(C));
2427 }
2428
2429 MVT SingleTy = getSingleVT(MVT::i8);
2430 MVT PairTy = getPairVT(MVT::i8);
2431
2432 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2433 if (unsigned(VecLen) == HwLen) {
2434 if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2435 Results.push(SingleOpc, SingleTy, {Va});
2436 return OpRef::res(Results.top());
2437 }
2438 }
2439
2440 // From the cycles, construct the sequence of values that will
2441 // then form the control values for vdealvdd/vshuffvdd, i.e.
2442 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2443 // This essentially strips the M value from the cycles where
2444 // it's present, and performs the insertion of M (then stripping)
2445 // for cycles without M (as described in an earlier comment).
2446 SmallVector<unsigned, 8> SwapElems;
2447 // When the input is extended (i.e. single vector becomes a pair),
2448 // this is done by using an "undef" vector as the second input.
2449 // However, then we get
2450 // input 1: GOODBITS
2451 // input 2: ........
2452 // but we need
2453 // input 1: ....BITS
2454 // input 2: ....GOOD
2455 // Then at the end, this needs to be undone. To accomplish this,
2456 // artificially add "LogLen-1" at both ends of the sequence.
2457 if (!HavePairs)
2458 SwapElems.push_back(LogLen - 1);
2459 for (const CycleType &C : Cycles) {
2460 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2461 unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
2462 SwapElems.append(C.begin() + First, C.end());
2463 if (First == 0)
2464 SwapElems.push_back(C[0]);
2465 }
2466 if (!HavePairs)
2467 SwapElems.push_back(LogLen - 1);
2468
2469 const SDLoc &dl(Results.InpNode);
2470 OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy), Results);
2471 if (InvertedPair)
2472 Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2473
2474 for (unsigned I = 0, E = SwapElems.size(); I != E;) {
2475 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
2476 unsigned S = (1u << SwapElems[I]);
2477 if (I < E - 1) {
2478 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
2479 S |= 1u << SwapElems[I];
2480 // The above loop will not add a bit for the final SwapElems[I+1],
2481 // so add it here.
2482 S |= 1u << SwapElems[I];
2483 }
2484 ++I;
2485
2486 NodeTemplate Res;
2487 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2488 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2489 Res.Ty = PairTy;
2490 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2491 Results.push(Res);
2492 Arg = OpRef::res(Results.top());
2493 }
2494
2495 return HavePairs ? Arg : OpRef::lo(Arg);
2496}
2497
2498OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2499 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2500 // Butterfly shuffles.
2501 //
2502 // V6_vdelta
2503 // V6_vrdelta
2504 // V6_vror
2505
2506 // The assumption here is that all elements picked by Mask are in the
2507 // first operand to the vector_shuffle. This assumption is enforced
2508 // by the caller.
2509
2510 MVT ResTy = getSingleVT(MVT::i8);
2511 PermNetwork::Controls FC, RC;
2512 const SDLoc &dl(Results.InpNode);
2513 int VecLen = SM.Mask.size();
2514
2515 for (int M : SM.Mask) {
2516 if (M != -1 && M >= VecLen)
2517 return OpRef::fail();
2518 }
2519
2520 // Try the deltas/benes for both single vectors and vector pairs.
2521 ForwardDeltaNetwork FN(SM.Mask);
2522 if (FN.run(FC)) {
2523 SDValue Ctl = getVectorConstant(FC, dl);
2524 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2525 return OpRef::res(Results.top());
2526 }
2527
2528 // Try reverse delta.
2529 ReverseDeltaNetwork RN(SM.Mask);
2530 if (RN.run(RC)) {
2531 SDValue Ctl = getVectorConstant(RC, dl);
2532 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2533 return OpRef::res(Results.top());
2534 }
2535
2536 // Do Benes.
2537 BenesNetwork BN(SM.Mask);
2538 if (BN.run(FC, RC)) {
2539 SDValue CtlF = getVectorConstant(FC, dl);
2540 SDValue CtlR = getVectorConstant(RC, dl);
2541 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2542 Results.push(Hexagon::V6_vrdelta, ResTy,
2543 {OpRef::res(-1), OpRef(CtlR)});
2544 return OpRef::res(Results.top());
2545 }
2546
2547 return OpRef::fail();
2548}
2549
2550SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2551 return DAG.getTargetConstant(Val, dl, MVT::i32);
2552}
2553
2554SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2555 const SDLoc &dl) {
2557 for (uint8_t C : Data)
2558 Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2559 MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2560 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2561 SDValue LV = Lower.LowerOperation(BV, DAG);
2563 return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2564}
2565
2567 SDValue Inp = N->getOperand(0);
2568 MVT ResTy = N->getValueType(0).getSimpleVT();
2569 unsigned Idx = N->getConstantOperandVal(1);
2570
2571 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
2572 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
2574 assert(2 * ResLen == InpTy.getVectorNumElements());
2575 assert(Idx == 0 || Idx == ResLen);
2576
2577 unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2578 SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2579
2580 ISel.ReplaceNode(N, Ext.getNode());
2581}
2582
2584 DEBUG_WITH_TYPE("isel", {
2585 dbgs() << "Starting " << __func__ << " on node:\n";
2586 N->dump(&DAG);
2587 });
2588 MVT ResTy = N->getValueType(0).getSimpleVT();
2589 // Assume that vector shuffles operate on vectors of bytes.
2590 assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2591
2592 auto *SN = cast<ShuffleVectorSDNode>(N);
2593 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2594 // This shouldn't really be necessary. Is it?
2595 for (int &Idx : Mask)
2596 if (Idx != -1 && Idx < 0)
2597 Idx = -1;
2598
2599 unsigned VecLen = Mask.size();
2600 bool HavePairs = (2*HwLen == VecLen);
2601 assert(ResTy.getSizeInBits() / 8 == VecLen);
2602
2603 // Vd = vector_shuffle Va, Vb, Mask
2604 //
2605
2606 bool UseLeft = false, UseRight = false;
2607 for (unsigned I = 0; I != VecLen; ++I) {
2608 if (Mask[I] == -1)
2609 continue;
2610 unsigned Idx = Mask[I];
2611 assert(Idx < 2*VecLen);
2612 if (Idx < VecLen)
2613 UseLeft = true;
2614 else
2615 UseRight = true;
2616 }
2617
2618 DEBUG_WITH_TYPE("isel", {
2619 dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2620 << UseLeft << " UseRight=" << UseRight << " HavePairs="
2621 << HavePairs << '\n';
2622 });
2623 // If the mask is all -1's, generate "undef".
2624 if (!UseLeft && !UseRight) {
2625 ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2626 return;
2627 }
2628
2629 SDValue Vec0 = N->getOperand(0);
2630 SDValue Vec1 = N->getOperand(1);
2631 assert(Vec0.getValueType() == ResTy && Vec1.getValueType() == ResTy);
2632
2633 ResultStack Results(SN);
2634 OpRef Va = OpRef::undef(ResTy);
2635 OpRef Vb = OpRef::undef(ResTy);
2636
2637 if (!Vec0.isUndef()) {
2638 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2639 Va = OpRef::OpRef::res(Results.top());
2640 }
2641 if (!Vec1.isUndef()) {
2642 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2643 Vb = OpRef::res(Results.top());
2644 }
2645
2646 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2647 : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2648
2649 bool Done = Res.isValid();
2650 if (Done) {
2651 // Make sure that Res is on the stack before materializing.
2652 Results.push(TargetOpcode::COPY, ResTy, {Res});
2653 materialize(Results);
2654 } else {
2655 Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2656 }
2657
2658 if (!Done) {
2659#ifndef NDEBUG
2660 dbgs() << "Unhandled shuffle:\n";
2661 SN->dumpr(&DAG);
2662#endif
2663 llvm_unreachable("Failed to select vector shuffle");
2664 }
2665}
2666
2668 // If this is a rotation by less than 8, use V6_valignbi.
2669 MVT Ty = N->getValueType(0).getSimpleVT();
2670 const SDLoc &dl(N);
2671 SDValue VecV = N->getOperand(0);
2672 SDValue RotV = N->getOperand(1);
2673 SDNode *NewN = nullptr;
2674
2675 if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2676 unsigned S = CN->getZExtValue() % HST.getVectorLength();
2677 if (S == 0) {
2678 NewN = VecV.getNode();
2679 } else if (isUInt<3>(S)) {
2680 NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2681 {VecV, VecV, getConst32(S, dl)});
2682 }
2683 }
2684
2685 if (!NewN)
2686 NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2687
2688 ISel.ReplaceNode(N, NewN);
2689}
2690
2692 SDValue Vv = N->getOperand(0);
2693 SDValue Vu = N->getOperand(1);
2694 SDValue Rt = N->getOperand(2);
2695 SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2696 N->getValueType(0), {Vv, Vu, Rt});
2697 ISel.ReplaceNode(N, NewN);
2699}
2700
2701void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2702 auto getNodes = [this]() -> std::vector<SDNode *> {
2703 std::vector<SDNode *> T;
2704 T.reserve(CurDAG->allnodes_size());
2705 for (SDNode &N : CurDAG->allnodes())
2706 T.push_back(&N);
2707 return T;
2708 };
2709
2710 ppHvxShuffleOfShuffle(getNodes());
2711}
2712
2713template <> struct std::hash<SDValue> {
2714 std::size_t operator()(SDValue V) const {
2715 return std::hash<const void *>()(V.getNode()) +
2716 std::hash<unsigned>()(V.getResNo());
2717 };
2718};
2719
2720void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2721 // Motivating case:
2722 // t10: v64i32 = ...
2723 // t46: v128i8 = vector_shuffle<...> t44, t45
2724 // t48: v128i8 = vector_shuffle<...> t44, t45
2725 // t42: v128i8 = vector_shuffle<...> t46, t48
2726 // t12: v32i32 = extract_subvector t10, Constant:i32<0>
2727 // t44: v128i8 = bitcast t12
2728 // t15: v32i32 = extract_subvector t10, Constant:i32<32>
2729 // t45: v128i8 = bitcast t15
2730 SelectionDAG &DAG = *CurDAG;
2731 unsigned HwLen = HST->getVectorLength();
2732
2733 struct SubVectorInfo {
2734 SubVectorInfo(SDValue S, unsigned H) : Src(S), HalfIdx(H) {}
2735 SDValue Src;
2736 unsigned HalfIdx;
2737 };
2738
2739 using MapType = std::unordered_map<SDValue, unsigned>;
2740
2741 auto getMaskElt = [&](unsigned Idx, ShuffleVectorSDNode *Shuff0,
2742 ShuffleVectorSDNode *Shuff1,
2743 const MapType &OpMap) -> int {
2744 // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2745 // Idx as a (non-undef) element of the top level shuffle's mask, that
2746 // is, index into concat(Shuff0, Shuff1).
2747 // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2748 // same source vector (as described by OpMap), return the index of
2749 // that source vector corresponding to Idx.
2750 ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2751 if (Idx >= HwLen)
2752 Idx -= HwLen;
2753
2754 // Get the mask index that M points at in the corresponding operand.
2755 int MaybeN = OpShuff->getMaskElt(Idx);
2756 if (MaybeN < 0)
2757 return -1;
2758
2759 auto N = static_cast<unsigned>(MaybeN);
2760 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2761 : OpMap.at(OpShuff->getOperand(1));
2762 if (N >= HwLen)
2763 N -= HwLen;
2764
2765 return N + SrcBase;
2766 };
2767
2768 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2769 // Fold all 3 shuffles into a single one.
2770 auto *This = cast<ShuffleVectorSDNode>(TopShuff);
2771 auto *S0 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(0));
2772 auto *S1 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(1));
2773 ArrayRef<int> TopMask = This->getMask();
2774 // This should be guaranteed by type checks in the caller, and the fact
2775 // that all shuffles should have been promoted to operate on MVT::i8.
2776 assert(TopMask.size() == S0->getMask().size() &&
2777 TopMask.size() == S1->getMask().size());
2778 assert(TopMask.size() == HwLen);
2779
2780 SmallVector<int, 256> FoldedMask(2 * HwLen);
2781 for (unsigned I = 0; I != HwLen; ++I) {
2782 int MaybeM = TopMask[I];
2783 if (MaybeM >= 0) {
2784 FoldedMask[I] =
2785 getMaskElt(static_cast<unsigned>(MaybeM), S0, S1, OpMap);
2786 } else {
2787 FoldedMask[I] = -1;
2788 }
2789 }
2790 // The second half of the result will be all-undef.
2791 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2792
2793 // Return
2794 // FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2795 // (LoHalf FoldedShuffle)
2796 const SDLoc &dl(TopShuff);
2797 MVT SingleTy = MVT::getVectorVT(MVT::i8, HwLen);
2798 MVT PairTy = MVT::getVectorVT(MVT::i8, 2 * HwLen);
2799 SDValue FoldedShuff =
2800 DAG.getVectorShuffle(PairTy, dl, DAG.getBitcast(PairTy, Inp),
2801 DAG.getUNDEF(PairTy), FoldedMask);
2802 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SingleTy, FoldedShuff,
2803 DAG.getConstant(0, dl, MVT::i32));
2804 };
2805
2806 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2807 while (V.getOpcode() == ISD::BITCAST)
2808 V = V.getOperand(0);
2809 if (V.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2810 return std::nullopt;
2811 return SubVectorInfo(V.getOperand(0),
2812 !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2813 };
2814
2815 for (SDNode *N : Nodes) {
2816 if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2817 continue;
2818 EVT ResTy = N->getValueType(0);
2819 if (ResTy.getVectorElementType() != MVT::i8)
2820 continue;
2821 if (ResTy.getVectorNumElements() != HwLen)
2822 continue;
2823
2824 SDValue V0 = N->getOperand(0);
2825 SDValue V1 = N->getOperand(1);
2826 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
2827 continue;
2828 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
2829 continue;
2830 if (V0.getValueType() != ResTy || V1.getValueType() != ResTy)
2831 continue;
2832
2833 // Check if all operands of the two operand shuffles are extract_subvectors
2834 // from the same vector pair.
2835 auto V0A = getSourceInfo(V0.getOperand(0));
2836 if (!V0A.has_value())
2837 continue;
2838 auto V0B = getSourceInfo(V0.getOperand(1));
2839 if (!V0B.has_value() || V0B->Src != V0A->Src)
2840 continue;
2841 auto V1A = getSourceInfo(V1.getOperand(0));
2842 if (!V1A.has_value() || V1A->Src != V0A->Src)
2843 continue;
2844 auto V1B = getSourceInfo(V1.getOperand(1));
2845 if (!V1B.has_value() || V1B->Src != V0A->Src)
2846 continue;
2847
2848 // The source must be a pair. This should be guaranteed here,
2849 // but check just in case.
2850 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2851
2852 MapType OpMap = {
2853 {V0.getOperand(0), V0A->HalfIdx * HwLen},
2854 {V0.getOperand(1), V0B->HalfIdx * HwLen},
2855 {V1.getOperand(0), V1A->HalfIdx * HwLen},
2856 {V1.getOperand(1), V1B->HalfIdx * HwLen},
2857 };
2858 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2859 ReplaceNode(N, NewS.getNode());
2860 }
2861}
2862
2863void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2864 HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2865}
2866
2867void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2868 HvxSelector(*this, *CurDAG).selectShuffle(N);
2869}
2870
2871void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2872 HvxSelector(*this, *CurDAG).selectRor(N);
2873}
2874
2875void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2876 HvxSelector(*this, *CurDAG).selectVAlign(N);
2877}
2878
2880 const SDLoc &dl(N);
2881 SDValue Chain = N->getOperand(0);
2882 SDValue Address = N->getOperand(2);
2883 SDValue Predicate = N->getOperand(3);
2884 SDValue Base = N->getOperand(4);
2885 SDValue Modifier = N->getOperand(5);
2886 SDValue Offset = N->getOperand(6);
2887 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2888
2889 unsigned Opcode;
2890 unsigned IntNo = N->getConstantOperandVal(1);
2891 switch (IntNo) {
2892 default:
2893 llvm_unreachable("Unexpected HVX gather intrinsic.");
2894 case Intrinsic::hexagon_V6_vgathermhq:
2895 case Intrinsic::hexagon_V6_vgathermhq_128B:
2896 Opcode = Hexagon::V6_vgathermhq_pseudo;
2897 break;
2898 case Intrinsic::hexagon_V6_vgathermwq:
2899 case Intrinsic::hexagon_V6_vgathermwq_128B:
2900 Opcode = Hexagon::V6_vgathermwq_pseudo;
2901 break;
2902 case Intrinsic::hexagon_V6_vgathermhwq:
2903 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2904 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2905 break;
2906 }
2907
2908 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2909 SDValue Ops[] = { Address, ImmOperand,
2910 Predicate, Base, Modifier, Offset, Chain };
2911 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2912
2913 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2914 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2915
2916 ReplaceNode(N, Result);
2917}
2918
2920 const SDLoc &dl(N);
2921 SDValue Chain = N->getOperand(0);
2922 SDValue Address = N->getOperand(2);
2923 SDValue Base = N->getOperand(3);
2924 SDValue Modifier = N->getOperand(4);
2925 SDValue Offset = N->getOperand(5);
2926 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2927
2928 unsigned Opcode;
2929 unsigned IntNo = N->getConstantOperandVal(1);
2930 switch (IntNo) {
2931 default:
2932 llvm_unreachable("Unexpected HVX gather intrinsic.");
2933 case Intrinsic::hexagon_V6_vgathermh:
2934 case Intrinsic::hexagon_V6_vgathermh_128B:
2935 Opcode = Hexagon::V6_vgathermh_pseudo;
2936 break;
2937 case Intrinsic::hexagon_V6_vgathermw:
2938 case Intrinsic::hexagon_V6_vgathermw_128B:
2939 Opcode = Hexagon::V6_vgathermw_pseudo;
2940 break;
2941 case Intrinsic::hexagon_V6_vgathermhw:
2942 case Intrinsic::hexagon_V6_vgathermhw_128B:
2943 Opcode = Hexagon::V6_vgathermhw_pseudo;
2944 break;
2945 }
2946
2947 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2948 SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2949 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2950
2951 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2952 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2953
2954 ReplaceNode(N, Result);
2955}
2956
2958 unsigned IID = N->getConstantOperandVal(0);
2959 SDNode *Result;
2960 switch (IID) {
2961 case Intrinsic::hexagon_V6_vaddcarry: {
2962 std::array<SDValue, 3> Ops = {
2963 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2964 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2965 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2966 break;
2967 }
2968 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2969 std::array<SDValue, 3> Ops = {
2970 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2971 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2972 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2973 break;
2974 }
2975 case Intrinsic::hexagon_V6_vsubcarry: {
2976 std::array<SDValue, 3> Ops = {
2977 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2978 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2979 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2980 break;
2981 }
2982 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2983 std::array<SDValue, 3> Ops = {
2984 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2985 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2986 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2987 break;
2988 }
2989 default:
2990 llvm_unreachable("Unexpected HVX dual output intrinsic.");
2991 }
2992 ReplaceUses(N, Result);
2993 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
2994 ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
2996}
unsigned SubReg
static const LLT S1
ReachingDefAnalysis InstSet InstSet & Ignore
Function Alias Analysis Results
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
This file implements the BitVector class.
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static bool isIdentity(ArrayRef< int > Mask)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
static bool isUndef(ArrayRef< int > Mask)
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
static bool isLowHalfOnly(ArrayRef< int > Mask)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
nvptx lower args
#define P(N)
const NodeList & List
Definition: RDFGraph.cpp:200
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
raw_pwrite_stream & OS
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file implements a set that has insertion order iteration characteristics.
Stack Slot Coloring
xray Insert XRay ops
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:231
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:207
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:238
const T * data() const
Definition: ArrayRef.h:165
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Tagged union holding either a T or a Error.
Definition: Error.h:481
bool empty() const
Definition: Function.h:859
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4890
size_t size() const
Definition: Function.h:858
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
unsigned getVectorLength() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:422
iterator begin() const
Definition: ArrayRef.h:359
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:415
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
iterator begin()
bool insert(SUnit *SU)
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
const SDValue & getOperand(unsigned Num) const
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:567
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:563
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition: SetVector.h:87
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ Undef
Value of the register doesn't matter.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
@ Done
Definition: Threading.h:61
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1866
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:563
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
MaskT vshuffvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
MaskT vpack(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > hi(ArrayRef< int > Vuu)
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdealb4w(ArrayRef< int > Vu, ArrayRef< int > Vv)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > lo(ArrayRef< int > Vuu)
MaskT vdealvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:35
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
MVT getSingleVT(MVT ElemTy) const
static SmallVector< uint32_t, 8 > completeToPerfect(ArrayRef< uint32_t > Completions, unsigned Width)
HexagonDAGToDAGISel & ISel
const HexagonTargetLowering & Lower
void selectExtractSubvector(SDNode *N)
static SmallVector< uint32_t, 8 > getPerfectCompletions(ShuffleMask SM, unsigned Width)
MVT getPairVT(MVT ElemTy) const
const HexagonSubtarget & HST
static std::optional< int > rotationDistance(ShuffleMask SM, unsigned WrapAt)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::size_t operator()(SDValue V) const