LLVM  15.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Hexagon.h"
10 #include "HexagonISelDAGToDAG.h"
11 #include "HexagonISelLowering.h"
12 #include "HexagonTargetMachine.h"
13 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include "llvm/IR/IntrinsicsHexagon.h"
19 #include "llvm/Support/Debug.h"
20 
21 #include <deque>
22 #include <map>
23 #include <set>
24 #include <utility>
25 #include <vector>
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 // --------------------------------------------------------------------
34 // Implementation of permutation networks.
35 
36 // Implementation of the node routing through butterfly networks:
37 // - Forward delta.
38 // - Reverse delta.
39 // - Benes.
40 //
41 //
42 // Forward delta network consists of log(N) steps, where N is the number
43 // of inputs. In each step, an input can stay in place, or it can get
44 // routed to another position[1]. The step after that consists of two
45 // networks, each half in size in terms of the number of nodes. In those
46 // terms, in the given step, an input can go to either the upper or the
47 // lower network in the next step.
48 //
49 // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
50 // positions as long as there is no conflict.
51 
52 // Here's a delta network for 8 inputs, only the switching routes are
53 // shown:
54 //
55 // Steps:
56 // |- 1 ---------------|- 2 -----|- 3 -|
57 //
58 // Inp[0] *** *** *** *** Out[0]
59 // \ / \ / \ /
60 // \ / \ / X
61 // \ / \ / / \
62 // Inp[1] *** \ / *** X *** *** Out[1]
63 // \ \ / / \ / \ /
64 // \ \ / / X X
65 // \ \ / / / \ / \
66 // Inp[2] *** \ \ / / *** X *** *** Out[2]
67 // \ \ X / / / \ \ /
68 // \ \ / \ / / / \ X
69 // \ X X / / \ / \
70 // Inp[3] *** \ / \ / \ / *** *** *** Out[3]
71 // \ X X X /
72 // \ / \ / \ / \ /
73 // X X X X
74 // / \ / \ / \ / \
75 // / X X X \
76 // Inp[4] *** / \ / \ / \ *** *** *** Out[4]
77 // / X X \ \ / \ /
78 // / / \ / \ \ \ / X
79 // / / X \ \ \ / / \
80 // Inp[5] *** / / \ \ *** X *** *** Out[5]
81 // / / \ \ \ / \ /
82 // / / \ \ X X
83 // / / \ \ / \ / \
84 // Inp[6] *** / \ *** X *** *** Out[6]
85 // / \ / \ \ /
86 // / \ / \ X
87 // / \ / \ / \
88 // Inp[7] *** *** *** *** Out[7]
89 //
90 //
91 // Reverse delta network is same as delta network, with the steps in
92 // the opposite order.
93 //
94 //
95 // Benes network is a forward delta network immediately followed by
96 // a reverse delta network.
97 
98 enum class ColorKind { None, Red, Black };
99 
100 // Graph coloring utility used to partition nodes into two groups:
101 // they will correspond to nodes routed to the upper and lower networks.
102 struct Coloring {
103  using Node = int;
104  using MapType = std::map<Node, ColorKind>;
105  static constexpr Node Ignore = Node(-1);
106 
107  Coloring(ArrayRef<Node> Ord) : Order(Ord) {
108  build();
109  if (!color())
110  Colors.clear();
111  }
112 
113  const MapType &colors() const {
114  return Colors;
115  }
116 
117  ColorKind other(ColorKind Color) {
118  if (Color == ColorKind::None)
119  return ColorKind::Red;
120  return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
121  }
122 
123  LLVM_DUMP_METHOD void dump() const;
124 
125 private:
126  ArrayRef<Node> Order;
127  MapType Colors;
128  std::set<Node> Needed;
129 
130  using NodeSet = std::set<Node>;
131  std::map<Node,NodeSet> Edges;
132 
133  Node conj(Node Pos) {
134  Node Num = Order.size();
135  return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
136  }
137 
138  ColorKind getColor(Node N) {
139  auto F = Colors.find(N);
140  return F != Colors.end() ? F->second : ColorKind::None;
141  }
142 
143  std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
144 
145  void build();
146  bool color();
147 };
148 } // namespace
149 
150 std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
151  auto Color = ColorKind::None;
152  for (Node N : Nodes) {
153  ColorKind ColorN = getColor(N);
154  if (ColorN == ColorKind::None)
155  continue;
156  if (Color == ColorKind::None)
157  Color = ColorN;
158  else if (Color != ColorKind::None && Color != ColorN)
159  return { false, ColorKind::None };
160  }
161  return { true, Color };
162 }
163 
164 void Coloring::build() {
165  // Add Order[P] and Order[conj(P)] to Edges.
166  for (unsigned P = 0; P != Order.size(); ++P) {
167  Node I = Order[P];
168  if (I != Ignore) {
169  Needed.insert(I);
170  Node PC = Order[conj(P)];
171  if (PC != Ignore && PC != I)
172  Edges[I].insert(PC);
173  }
174  }
175  // Add I and conj(I) to Edges.
176  for (unsigned I = 0; I != Order.size(); ++I) {
177  if (!Needed.count(I))
178  continue;
179  Node C = conj(I);
180  // This will create an entry in the edge table, even if I is not
181  // connected to any other node. This is necessary, because it still
182  // needs to be colored.
183  NodeSet &Is = Edges[I];
184  if (Needed.count(C))
185  Is.insert(C);
186  }
187 }
188 
189 bool Coloring::color() {
190  SetVector<Node> FirstQ;
191  auto Enqueue = [this,&FirstQ] (Node N) {
192  SetVector<Node> Q;
193  Q.insert(N);
194  for (unsigned I = 0; I != Q.size(); ++I) {
195  NodeSet &Ns = Edges[Q[I]];
196  Q.insert(Ns.begin(), Ns.end());
197  }
198  FirstQ.insert(Q.begin(), Q.end());
199  };
200  for (Node N : Needed)
201  Enqueue(N);
202 
203  for (Node N : FirstQ) {
204  if (Colors.count(N))
205  continue;
206  NodeSet &Ns = Edges[N];
207  auto P = getUniqueColor(Ns);
208  if (!P.first)
209  return false;
210  Colors[N] = other(P.second);
211  }
212 
213  // First, color nodes that don't have any dups.
214  for (auto E : Edges) {
215  Node N = E.first;
216  if (!Needed.count(conj(N)) || Colors.count(N))
217  continue;
218  auto P = getUniqueColor(E.second);
219  if (!P.first)
220  return false;
221  Colors[N] = other(P.second);
222  }
223 
224  // Now, nodes that are still uncolored. Since the graph can be modified
225  // in this step, create a work queue.
226  std::vector<Node> WorkQ;
227  for (auto E : Edges) {
228  Node N = E.first;
229  if (!Colors.count(N))
230  WorkQ.push_back(N);
231  }
232 
233  for (Node N : WorkQ) {
234  NodeSet &Ns = Edges[N];
235  auto P = getUniqueColor(Ns);
236  if (P.first) {
237  Colors[N] = other(P.second);
238  continue;
239  }
240 
241  // Coloring failed. Split this node.
242  Node C = conj(N);
243  ColorKind ColorN = other(ColorKind::None);
244  ColorKind ColorC = other(ColorN);
245  NodeSet &Cs = Edges[C];
246  NodeSet CopyNs = Ns;
247  for (Node M : CopyNs) {
248  ColorKind ColorM = getColor(M);
249  if (ColorM == ColorC) {
250  // Connect M with C, disconnect M from N.
251  Cs.insert(M);
252  Edges[M].insert(C);
253  Ns.erase(M);
254  Edges[M].erase(N);
255  }
256  }
257  Colors[N] = ColorN;
258  Colors[C] = ColorC;
259  }
260 
261  // Explicitly assign "None" to all uncolored nodes.
262  for (unsigned I = 0; I != Order.size(); ++I)
263  if (Colors.count(I) == 0)
264  Colors[I] = ColorKind::None;
265 
266  return true;
267 }
268 
269 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
270 void Coloring::dump() const {
271  dbgs() << "{ Order: {";
272  for (Node P : Order) {
273  if (P != Ignore)
274  dbgs() << ' ' << P;
275  else
276  dbgs() << " -";
277  }
278  dbgs() << " }\n";
279  dbgs() << " Needed: {";
280  for (Node N : Needed)
281  dbgs() << ' ' << N;
282  dbgs() << " }\n";
283 
284  dbgs() << " Edges: {\n";
285  for (auto E : Edges) {
286  dbgs() << " " << E.first << " -> {";
287  for (auto N : E.second)
288  dbgs() << ' ' << N;
289  dbgs() << " }\n";
290  }
291  dbgs() << " }\n";
292 
293  auto ColorKindToName = [](ColorKind C) {
294  switch (C) {
295  case ColorKind::None:
296  return "None";
297  case ColorKind::Red:
298  return "Red";
299  case ColorKind::Black:
300  return "Black";
301  }
302  llvm_unreachable("all ColorKinds should be handled by the switch above");
303  };
304 
305  dbgs() << " Colors: {\n";
306  for (auto C : Colors)
307  dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
308  dbgs() << " }\n}\n";
309 }
310 #endif
311 
312 namespace {
313 // Base class of for reordering networks. They don't strictly need to be
314 // permutations, as outputs with repeated occurrences of an input element
315 // are allowed.
316 struct PermNetwork {
317  using Controls = std::vector<uint8_t>;
318  using ElemType = int;
319  static constexpr ElemType Ignore = ElemType(-1);
320 
321  enum : uint8_t {
322  None,
323  Pass,
324  Switch
325  };
326  enum : uint8_t {
327  Forward,
328  Reverse
329  };
330 
331  PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
332  Order.assign(Ord.data(), Ord.data()+Ord.size());
333  Log = 0;
334 
335  unsigned S = Order.size();
336  while (S >>= 1)
337  ++Log;
338 
339  Table.resize(Order.size());
340  for (RowType &Row : Table)
341  Row.resize(Mult*Log, None);
342  }
343 
344  void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
345  unsigned Size = Order.size();
346  V.resize(Size);
347  for (unsigned I = 0; I != Size; ++I) {
348  unsigned W = 0;
349  for (unsigned L = 0; L != Log; ++L) {
350  unsigned C = ctl(I, StartAt+L) == Switch;
351  if (Dir == Forward)
352  W |= C << (Log-1-L);
353  else
354  W |= C << L;
355  }
356  assert(isUInt<8>(W));
357  V[I] = uint8_t(W);
358  }
359  }
360 
361  uint8_t ctl(ElemType Pos, unsigned Step) const {
362  return Table[Pos][Step];
363  }
364  unsigned size() const {
365  return Order.size();
366  }
367  unsigned steps() const {
368  return Log;
369  }
370 
371 protected:
372  unsigned Log;
373  std::vector<ElemType> Order;
374  using RowType = std::vector<uint8_t>;
375  std::vector<RowType> Table;
376 };
377 
378 struct ForwardDeltaNetwork : public PermNetwork {
379  ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
380 
381  bool run(Controls &V) {
382  if (!route(Order.data(), Table.data(), size(), 0))
383  return false;
384  getControls(V, 0, Forward);
385  return true;
386  }
387 
388 private:
389  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
390 };
391 
392 struct ReverseDeltaNetwork : public PermNetwork {
393  ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
394 
395  bool run(Controls &V) {
396  if (!route(Order.data(), Table.data(), size(), 0))
397  return false;
398  getControls(V, 0, Reverse);
399  return true;
400  }
401 
402 private:
403  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
404 };
405 
406 struct BenesNetwork : public PermNetwork {
407  BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
408 
409  bool run(Controls &F, Controls &R) {
410  if (!route(Order.data(), Table.data(), size(), 0))
411  return false;
412 
413  getControls(F, 0, Forward);
414  getControls(R, Log, Reverse);
415  return true;
416  }
417 
418 private:
419  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
420 };
421 } // namespace
422 
423 bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
424  unsigned Step) {
425  bool UseUp = false, UseDown = false;
426  ElemType Num = Size;
427 
428  // Cannot use coloring here, because coloring is used to determine
429  // the "big" switch, i.e. the one that changes halves, and in a forward
430  // network, a color can be simultaneously routed to both halves in the
431  // step we're working on.
432  for (ElemType J = 0; J != Num; ++J) {
433  ElemType I = P[J];
434  // I is the position in the input,
435  // J is the position in the output.
436  if (I == Ignore)
437  continue;
438  uint8_t S;
439  if (I < Num/2)
440  S = (J < Num/2) ? Pass : Switch;
441  else
442  S = (J < Num/2) ? Switch : Pass;
443 
444  // U is the element in the table that needs to be updated.
445  ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
446  if (U < Num/2)
447  UseUp = true;
448  else
449  UseDown = true;
450  if (T[U][Step] != S && T[U][Step] != None)
451  return false;
452  T[U][Step] = S;
453  }
454 
455  for (ElemType J = 0; J != Num; ++J)
456  if (P[J] != Ignore && P[J] >= Num/2)
457  P[J] -= Num/2;
458 
459  if (Step+1 < Log) {
460  if (UseUp && !route(P, T, Size/2, Step+1))
461  return false;
462  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
463  return false;
464  }
465  return true;
466 }
467 
468 bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
469  unsigned Step) {
470  unsigned Pets = Log-1 - Step;
471  bool UseUp = false, UseDown = false;
472  ElemType Num = Size;
473 
474  // In this step half-switching occurs, so coloring can be used.
475  Coloring G({P,Size});
476  const Coloring::MapType &M = G.colors();
477  if (M.empty())
478  return false;
479 
480  ColorKind ColorUp = ColorKind::None;
481  for (ElemType J = 0; J != Num; ++J) {
482  ElemType I = P[J];
483  // I is the position in the input,
484  // J is the position in the output.
485  if (I == Ignore)
486  continue;
487  ColorKind C = M.at(I);
488  if (C == ColorKind::None)
489  continue;
490  // During "Step", inputs cannot switch halves, so if the "up" color
491  // is still unknown, make sure that it is selected in such a way that
492  // "I" will stay in the same half.
493  bool InpUp = I < Num/2;
494  if (ColorUp == ColorKind::None)
495  ColorUp = InpUp ? C : G.other(C);
496  if ((C == ColorUp) != InpUp) {
497  // If I should go to a different half than where is it now, give up.
498  return false;
499  }
500 
501  uint8_t S;
502  if (InpUp) {
503  S = (J < Num/2) ? Pass : Switch;
504  UseUp = true;
505  } else {
506  S = (J < Num/2) ? Switch : Pass;
507  UseDown = true;
508  }
509  T[J][Pets] = S;
510  }
511 
512  // Reorder the working permutation according to the computed switch table
513  // for the last step (i.e. Pets).
514  for (ElemType J = 0, E = Size / 2; J != E; ++J) {
515  ElemType PJ = P[J]; // Current values of P[J]
516  ElemType PC = P[J+Size/2]; // and P[conj(J)]
517  ElemType QJ = PJ; // New values of P[J]
518  ElemType QC = PC; // and P[conj(J)]
519  if (T[J][Pets] == Switch)
520  QC = PJ;
521  if (T[J+Size/2][Pets] == Switch)
522  QJ = PC;
523  P[J] = QJ;
524  P[J+Size/2] = QC;
525  }
526 
527  for (ElemType J = 0; J != Num; ++J)
528  if (P[J] != Ignore && P[J] >= Num/2)
529  P[J] -= Num/2;
530 
531  if (Step+1 < Log) {
532  if (UseUp && !route(P, T, Size/2, Step+1))
533  return false;
534  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
535  return false;
536  }
537  return true;
538 }
539 
540 bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
541  unsigned Step) {
542  Coloring G({P,Size});
543  const Coloring::MapType &M = G.colors();
544  if (M.empty())
545  return false;
546  ElemType Num = Size;
547 
548  unsigned Pets = 2*Log-1 - Step;
549  bool UseUp = false, UseDown = false;
550 
551  // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
552  // result in different controls. Let's pick the one where the first
553  // control will be "Pass".
554  ColorKind ColorUp = ColorKind::None;
555  for (ElemType J = 0; J != Num; ++J) {
556  ElemType I = P[J];
557  if (I == Ignore)
558  continue;
559  ColorKind C = M.at(I);
560  if (C == ColorKind::None)
561  continue;
562  if (ColorUp == ColorKind::None) {
563  ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
564  }
565  unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
566  if (C == ColorUp) {
567  if (I < Num/2)
568  T[I][Step] = Pass;
569  else
570  T[CI][Step] = Switch;
571  T[J][Pets] = (J < Num/2) ? Pass : Switch;
572  UseUp = true;
573  } else { // Down
574  if (I < Num/2)
575  T[CI][Step] = Switch;
576  else
577  T[I][Step] = Pass;
578  T[J][Pets] = (J < Num/2) ? Switch : Pass;
579  UseDown = true;
580  }
581  }
582 
583  // Reorder the working permutation according to the computed switch table
584  // for the last step (i.e. Pets).
585  for (ElemType J = 0; J != Num/2; ++J) {
586  ElemType PJ = P[J]; // Current values of P[J]
587  ElemType PC = P[J+Num/2]; // and P[conj(J)]
588  ElemType QJ = PJ; // New values of P[J]
589  ElemType QC = PC; // and P[conj(J)]
590  if (T[J][Pets] == Switch)
591  QC = PJ;
592  if (T[J+Num/2][Pets] == Switch)
593  QJ = PC;
594  P[J] = QJ;
595  P[J+Num/2] = QC;
596  }
597 
598  for (ElemType J = 0; J != Num; ++J)
599  if (P[J] != Ignore && P[J] >= Num/2)
600  P[J] -= Num/2;
601 
602  if (Step+1 < Log) {
603  if (UseUp && !route(P, T, Size/2, Step+1))
604  return false;
605  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
606  return false;
607  }
608  return true;
609 }
610 
611 // --------------------------------------------------------------------
612 // Support for building selection results (output instructions that are
613 // parts of the final selection).
614 
615 namespace {
616 struct OpRef {
617  OpRef(SDValue V) : OpV(V) {}
618  bool isValue() const { return OpV.getNode() != nullptr; }
619  bool isValid() const { return isValue() || !(OpN & Invalid); }
620  static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
621  static OpRef fail() { return OpRef(Invalid); }
622 
623  static OpRef lo(const OpRef &R) {
624  assert(!R.isValue());
625  return OpRef(R.OpN & (Undef | Index | LoHalf));
626  }
627  static OpRef hi(const OpRef &R) {
628  assert(!R.isValue());
629  return OpRef(R.OpN & (Undef | Index | HiHalf));
630  }
631  static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
632 
633  // Direct value.
634  SDValue OpV = SDValue();
635 
636  // Reference to the operand of the input node:
637  // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
638  // operand index:
639  // If bit 30 is set, it's the high half of the operand.
640  // If bit 29 is set, it's the low half of the operand.
641  unsigned OpN = 0;
642 
643  enum : unsigned {
644  Invalid = 0x10000000,
645  LoHalf = 0x20000000,
646  HiHalf = 0x40000000,
647  Whole = LoHalf | HiHalf,
648  Undef = 0x80000000,
649  Index = 0x0FFFFFFF, // Mask of the index value.
650  IndexBits = 28,
651  };
652 
654  void print(raw_ostream &OS, const SelectionDAG &G) const;
655 
656 private:
657  OpRef(unsigned N) : OpN(N) {}
658 };
659 
660 struct NodeTemplate {
661  NodeTemplate() = default;
662  unsigned Opc = 0;
663  MVT Ty = MVT::Other;
664  std::vector<OpRef> Ops;
665 
666  LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
667 };
668 
669 struct ResultStack {
670  ResultStack(SDNode *Inp)
671  : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
672  SDNode *InpNode;
673  MVT InpTy;
674  unsigned push(const NodeTemplate &Res) {
675  List.push_back(Res);
676  return List.size()-1;
677  }
678  unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
679  NodeTemplate Res;
680  Res.Opc = Opc;
681  Res.Ty = Ty;
682  Res.Ops = Ops;
683  return push(Res);
684  }
685  bool empty() const { return List.empty(); }
686  unsigned size() const { return List.size(); }
687  unsigned top() const { return size()-1; }
688  const NodeTemplate &operator[](unsigned I) const { return List[I]; }
689  unsigned reset(unsigned NewTop) {
690  List.resize(NewTop+1);
691  return NewTop;
692  }
693 
694  using BaseType = std::vector<NodeTemplate>;
695  BaseType::iterator begin() { return List.begin(); }
696  BaseType::iterator end() { return List.end(); }
697  BaseType::const_iterator begin() const { return List.begin(); }
698  BaseType::const_iterator end() const { return List.end(); }
699 
700  BaseType List;
701 
703  void print(raw_ostream &OS, const SelectionDAG &G) const;
704 };
705 } // namespace
706 
707 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
708 void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
709  if (isValue()) {
710  OpV.getNode()->print(OS, &G);
711  return;
712  }
713  if (OpN & Invalid) {
714  OS << "invalid";
715  return;
716  }
717  if (OpN & Undef) {
718  OS << "undef";
719  return;
720  }
721  if ((OpN & Whole) != Whole) {
722  assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
723  if (OpN & LoHalf)
724  OS << "lo ";
725  else
726  OS << "hi ";
727  }
728  OS << '#' << SignExtend32(OpN & Index, IndexBits);
729 }
730 
731 void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
732  const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
733  OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
734  << TII.getName(Opc);
735  bool Comma = false;
736  for (const auto &R : Ops) {
737  if (Comma)
738  OS << ',';
739  Comma = true;
740  OS << ' ';
741  R.print(OS, G);
742  }
743 }
744 
745 void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
746  OS << "Input node:\n";
747 #ifndef NDEBUG
748  InpNode->dumpr(&G);
749 #endif
750  OS << "Result templates:\n";
751  for (unsigned I = 0, E = List.size(); I != E; ++I) {
752  OS << '[' << I << "] ";
753  List[I].print(OS, G);
754  OS << '\n';
755  }
756 }
757 #endif
758 
759 namespace {
760 struct ShuffleMask {
761  ShuffleMask(ArrayRef<int> M) : Mask(M) {
762  for (int M : Mask) {
763  if (M == -1)
764  continue;
765  MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
766  MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
767  }
768  }
769 
771  int MinSrc = -1, MaxSrc = -1;
772 
773  ShuffleMask lo() const {
774  size_t H = Mask.size()/2;
775  return ShuffleMask(Mask.take_front(H));
776  }
777  ShuffleMask hi() const {
778  size_t H = Mask.size()/2;
779  return ShuffleMask(Mask.take_back(H));
780  }
781 
782  void print(raw_ostream &OS) const {
783  OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
784  for (int M : Mask)
785  OS << ' ' << M;
786  OS << " }";
787  }
788 };
789 
791 raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
792  SM.print(OS);
793  return OS;
794 }
795 } // namespace
796 
797 // --------------------------------------------------------------------
798 // The HvxSelector class.
799 
801  return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
802 }
804  return G.getSubtarget<HexagonSubtarget>();
805 }
806 
807 namespace llvm {
808  struct HvxSelector {
813  const unsigned HwLen;
814 
817  HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
818 
819  MVT getSingleVT(MVT ElemTy) const {
820  assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
821  unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8);
822  return MVT::getVectorVT(ElemTy, NumElems);
823  }
824 
825  MVT getPairVT(MVT ElemTy) const {
826  assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
827  unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8);
828  return MVT::getVectorVT(ElemTy, NumElems);
829  }
830 
831  MVT getBoolVT() const {
832  // Return HwLen x i1.
833  return MVT::getVectorVT(MVT::i1, HwLen);
834  }
835 
836  void selectShuffle(SDNode *N);
837  void selectRor(SDNode *N);
838  void selectVAlign(SDNode *N);
839 
840  private:
841  void select(SDNode *ISelN);
842  void materialize(const ResultStack &Results);
843 
844  SDValue getConst32(int Val, const SDLoc &dl);
845  SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
846 
847  enum : unsigned {
848  None,
849  PackMux,
850  };
851  OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
852  OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
853  MutableArrayRef<int> NewMask, unsigned Options = None);
854  OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
855  MutableArrayRef<int> NewMask);
856  OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
857  ResultStack &Results);
858  OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
859  ResultStack &Results);
860 
861  OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
862  OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
863  OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
864  OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
865 
866  OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
867  OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
868  OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
869  OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
870 
871  bool selectVectorConstants(SDNode *N);
872  bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
873  SDValue Va, SDValue Vb, SDNode *N);
874 
875  };
876 }
877 
879  MutableArrayRef<int> MaskR) {
880  unsigned VecLen = Mask.size();
881  assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
882  for (unsigned I = 0; I != VecLen; ++I) {
883  int M = Mask[I];
884  if (M < 0) {
885  MaskL[I] = MaskR[I] = -1;
886  } else if (unsigned(M) < VecLen) {
887  MaskL[I] = M;
888  MaskR[I] = -1;
889  } else {
890  MaskL[I] = -1;
891  MaskR[I] = M-VecLen;
892  }
893  }
894 }
895 
896 static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
897  unsigned MaxLen) {
898  assert(A.size() > 0 && A.size() >= MaxLen);
899  int F = A[0];
900  int E = F;
901  for (unsigned I = 1; I != MaxLen; ++I) {
902  if (A[I] - E != Inc)
903  return { F, I };
904  E = A[I];
905  }
906  return { F, MaxLen };
907 }
908 
909 static bool isUndef(ArrayRef<int> Mask) {
910  for (int Idx : Mask)
911  if (Idx != -1)
912  return false;
913  return true;
914 }
915 
917  for (int I = 0, E = Mask.size(); I != E; ++I) {
918  int M = Mask[I];
919  if (M >= 0 && M != I)
920  return false;
921  }
922  return true;
923 }
924 
926  unsigned SegLen) {
927  assert(isPowerOf2_32(SegLen));
928  SmallVector<unsigned, 4> SegList;
929  if (SM.MaxSrc == -1)
930  return SegList;
931 
932  unsigned Shift = Log2_32(SegLen);
933  BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
934 
935  for (int M : SM.Mask) {
936  if (M >= 0)
937  Segs.set(M >> Shift);
938  }
939 
940  for (unsigned B : Segs.set_bits())
941  SegList.push_back(B);
942  return SegList;
943 }
944 
946  unsigned SegLen) {
947  // Calculate the layout of the output segments in terms of the input
948  // segments.
949  // For example [1,3,1,0] means that the output consists of 4 output
950  // segments, where the first output segment has only elements of the
951  // input segment at index 1. The next output segment only has elements
952  // of the input segment 3, etc.
953  // If an output segment only has undef elements, the value will be ~0u.
954  // If an output segment has elements from more than one input segment,
955  // the corresponding value will be ~1u.
956  unsigned MaskLen = SM.Mask.size();
957  assert(MaskLen % SegLen == 0);
958  SmallVector<unsigned, 4> Map(MaskLen / SegLen);
959 
960  for (int S = 0, E = Map.size(); S != E; ++S) {
961  unsigned Idx = ~0u;
962  for (int I = 0; I != static_cast<int>(SegLen); ++I) {
963  int M = SM.Mask[S*SegLen + I];
964  if (M < 0)
965  continue;
966  unsigned G = M / SegLen; // Input segment of this element.
967  if (Idx == ~0u) {
968  Idx = G;
969  } else if (Idx != G) {
970  Idx = ~1u;
971  break;
972  }
973  }
974  Map[S] = Idx;
975  }
976 
977  return Map;
978 }
979 
981  unsigned SegLen, MutableArrayRef<int> PackedMask) {
983  for (int I = OutSegMap.size() - 1; I >= 0; --I) {
984  unsigned S = OutSegMap[I];
985  assert(S != ~0u && "Unexpected undef");
986  assert(S != ~1u && "Unexpected multi");
987  if (InvMap.size() <= S)
988  InvMap.resize(S+1);
989  InvMap[S] = I;
990  }
991 
992  unsigned Shift = Log2_32(SegLen);
993  for (int I = 0, E = Mask.size(); I != E; ++I) {
994  int M = Mask[I];
995  if (M >= 0) {
996  int OutIdx = InvMap[M >> Shift];
997  M = (M & (SegLen-1)) + SegLen*OutIdx;
998  }
999  PackedMask[I] = M;
1000  }
1001 }
1002 
1004  // Check by adding all numbers only works if there is no overflow.
1005  assert(Mask.size() < 0x00007FFF && "Overflow failure");
1006  int Sum = 0;
1007  for (int Idx : Mask) {
1008  if (Idx == -1)
1009  return false;
1010  Sum += Idx;
1011  }
1012  int N = Mask.size();
1013  return 2*Sum == N*(N-1);
1014 }
1015 
1016 bool HvxSelector::selectVectorConstants(SDNode *N) {
1017  // Constant vectors are generated as loads from constant pools or as
1018  // splats of a constant value. Since they are generated during the
1019  // selection process, the main selection algorithm is not aware of them.
1020  // Select them directly here.
1021  SmallVector<SDNode*,4> Nodes;
1022  SetVector<SDNode*> WorkQ;
1023 
1024  // The DAG can change (due to CSE) during selection, so cache all the
1025  // unselected nodes first to avoid traversing a mutating DAG.
1026  WorkQ.insert(N);
1027  for (unsigned i = 0; i != WorkQ.size(); ++i) {
1028  SDNode *W = WorkQ[i];
1029  if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1030  Nodes.push_back(W);
1031  for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1032  WorkQ.insert(W->getOperand(j).getNode());
1033  }
1034 
1035  for (SDNode *L : Nodes)
1036  select(L);
1037 
1038  return !Nodes.empty();
1039 }
1040 
1041 void HvxSelector::materialize(const ResultStack &Results) {
1042  DEBUG_WITH_TYPE("isel", {
1043  dbgs() << "Materializing\n";
1044  Results.print(dbgs(), DAG);
1045  });
1046  if (Results.empty())
1047  return;
1048  const SDLoc &dl(Results.InpNode);
1049  std::vector<SDValue> Output;
1050 
1051  for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1052  const NodeTemplate &Node = Results[I];
1053  std::vector<SDValue> Ops;
1054  for (const OpRef &R : Node.Ops) {
1055  assert(R.isValid());
1056  if (R.isValue()) {
1057  Ops.push_back(R.OpV);
1058  continue;
1059  }
1060  if (R.OpN & OpRef::Undef) {
1062  Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1063  continue;
1064  }
1065  // R is an index of a result.
1066  unsigned Part = R.OpN & OpRef::Whole;
1067  int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1068  if (Idx < 0)
1069  Idx += I;
1070  assert(Idx >= 0 && unsigned(Idx) < Output.size());
1071  SDValue Op = Output[Idx];
1072  MVT OpTy = Op.getValueType().getSimpleVT();
1073  if (Part != OpRef::Whole) {
1074  assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1075  MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
1076  OpTy.getVectorNumElements()/2);
1077  unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1078  : Hexagon::vsub_hi;
1079  Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1080  }
1081  Ops.push_back(Op);
1082  } // for (Node : Results)
1083 
1084  assert(Node.Ty != MVT::Other);
1085  SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1086  ? Ops.front().getNode()
1087  : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1088  Output.push_back(SDValue(ResN, 0));
1089  }
1090 
1091  SDNode *OutN = Output.back().getNode();
1092  SDNode *InpN = Results.InpNode;
1093  DEBUG_WITH_TYPE("isel", {
1094  dbgs() << "Generated node:\n";
1095  OutN->dumpr(&DAG);
1096  });
1097 
1098  ISel.ReplaceNode(InpN, OutN);
1099  selectVectorConstants(OutN);
1100  DAG.RemoveDeadNodes();
1101 }
1102 
1103 OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1104  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1105  const SDLoc &dl(Results.InpNode);
1106  Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1107  getConst32(Hexagon::HvxWRRegClassID, dl),
1108  Lo, getConst32(Hexagon::vsub_lo, dl),
1109  Hi, getConst32(Hexagon::vsub_hi, dl),
1110  });
1111  return OpRef::res(Results.top());
1112 }
1113 
1114 // Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1115 // pack these halves into a single vector, and remap SM into NewMask to use
1116 // the new vector instead.
1117 OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1118  ResultStack &Results, MutableArrayRef<int> NewMask,
1119  unsigned Options) {
1120  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1121  if (!Va.isValid() || !Vb.isValid())
1122  return OpRef::fail();
1123 
1124  MVT Ty = getSingleVT(MVT::i8);
1126  OpRef Inp[2] = {Va, Vb};
1127  unsigned VecLen = SM.Mask.size();
1128 
1129  auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1130  ResultStack &Results) {
1131  if (Amt == 0)
1132  return Lo;
1133  const SDLoc &dl(Results.InpNode);
1134  if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1135  bool IsRight = isUInt<3>(Amt); // Right align.
1136  SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1137  unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1138  Results.push(Opc, Ty, {Hi, Lo, S});
1139  return OpRef::res(Results.top());
1140  }
1141  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1142  OpRef A = OpRef::res(Results.top());
1143  Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1144  return OpRef::res(Results.top());
1145  };
1146 
1147  // Segment is a vector half.
1148  unsigned SegLen = HwLen / 2;
1149 
1150  // Check if we can shuffle vector halves around to get the used elements
1151  // into a single vector.
1152  SmallVector<int,128> MaskH(SM.Mask.begin(), SM.Mask.end());
1153  SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1154  unsigned SegCount = SegList.size();
1155  SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1156 
1157  if (SegList.empty())
1158  return OpRef::undef(Ty);
1159 
1160  // NOTE:
1161  // In the following part of the function, where the segments are rearranged,
1162  // the shuffle mask SM can be of any length that is a multiple of a vector
1163  // (i.e. a multiple of 2*SegLen), and non-zero.
1164  // The output segment map is computed, and it may have any even number of
1165  // entries, but the rearrangement of input segments will be done based only
1166  // on the first two (non-undef) entries in the segment map.
1167  // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1168  // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1169  // a single vector V = 3:1. The output mask will then be updated to use
1170  // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1171  //
1172  // Picking the segments based on the output map is an optimization. For
1173  // correctness it is only necessary that Seg0 and Seg1 are the two input
1174  // segments that are used in the output.
1175 
1176  unsigned Seg0 = ~0u, Seg1 = ~0u;
1177  for (int I = 0, E = SegMap.size(); I != E; ++I) {
1178  unsigned X = SegMap[I];
1179  if (X == ~0u)
1180  continue;
1181  if (Seg0 == ~0u)
1182  Seg0 = X;
1183  else if (Seg1 != ~0u)
1184  break;
1185  if (X == ~1u || X != Seg0)
1186  Seg1 = X;
1187  }
1188 
1189  if (SegCount == 1) {
1190  unsigned SrcOp = SegList[0] / 2;
1191  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1192  int M = SM.Mask[I];
1193  if (M >= 0) {
1194  M -= SrcOp * HwLen;
1195  assert(M >= 0);
1196  }
1197  NewMask[I] = M;
1198  }
1199  return Inp[SrcOp];
1200  }
1201 
1202  if (SegCount == 2) {
1203  // Seg0 should not be undef here: this would imply a SegList
1204  // with <= 1 elements, which was checked earlier.
1205  assert(Seg0 != ~0u);
1206 
1207  // If Seg0 or Seg1 are "multi-defined", pick them from the input
1208  // segment list instead.
1209  if (Seg0 == ~1u || Seg1 == ~1u) {
1210  if (Seg0 == Seg1) {
1211  Seg0 = SegList[0];
1212  Seg1 = SegList[1];
1213  } else if (Seg0 == ~1u) {
1214  Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1215  } else {
1216  assert(Seg1 == ~1u);
1217  Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1218  }
1219  }
1220  assert(Seg0 != ~1u && Seg1 != ~1u);
1221 
1222  assert(Seg0 != Seg1 && "Expecting different segments");
1223  const SDLoc &dl(Results.InpNode);
1224  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1225  OpRef HL = OpRef::res(Results.top());
1226 
1227  // Va = AB, Vb = CD
1228 
1229  if (Seg0 / 2 == Seg1 / 2) {
1230  // Same input vector.
1231  Va = Inp[Seg0 / 2];
1232  if (Seg0 > Seg1) {
1233  // Swap halves.
1234  Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1235  Va = OpRef::res(Results.top());
1236  }
1237  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1238  } else if (Seg0 % 2 == Seg1 % 2) {
1239  // Picking AC, BD, CA, or DB.
1240  // vshuff(CD,AB,HL) -> BD:AC
1241  // vshuff(AB,CD,HL) -> DB:CA
1242  auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1243  : std::make_pair(Va, Vb); // CA or DB
1244  Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1245  OpRef P = OpRef::res(Results.top());
1246  Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1247  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1248  } else {
1249  // Picking AD, BC, CB, or DA.
1250  if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1251  // AD or BC: this can be done using vmux.
1252  // Q = V6_pred_scalar2 SegLen
1253  // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1254  Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1255  OpRef Qt = OpRef::res(Results.top());
1256  auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1257  : std::make_pair(Vb, Va); // CB
1258  Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1259  Va = OpRef::res(Results.top());
1260  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1261  } else {
1262  // BC or DA: this could be done via valign by SegLen.
1263  // Do nothing here, because valign (if possible) will be generated
1264  // later on (make sure the Seg0 values are as expected).
1265  assert(Seg0 == 1 || Seg0 == 3);
1266  }
1267  }
1268  }
1269 
1270  // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1271 
1272  ShuffleMask SMH(MaskH);
1273  assert(SMH.Mask.size() == VecLen);
1274  SmallVector<int,128> MaskA(SMH.Mask.begin(), SMH.Mask.end());
1275 
1276  if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1277  // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1278  SmallVector<int,128> Swapped(SMH.Mask.begin(), SMH.Mask.end());
1280  ShuffleMask SW(Swapped);
1281  if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1282  MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1283  std::swap(Va, Vb);
1284  }
1285  }
1286  ShuffleMask SMA(MaskA);
1287  assert(SMA.Mask.size() == VecLen);
1288 
1289  if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1290  int ShiftR = SMA.MinSrc;
1291  if (ShiftR >= static_cast<int>(HwLen)) {
1292  Va = Vb;
1293  Vb = OpRef::undef(Ty);
1294  ShiftR -= HwLen;
1295  }
1296  OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1297 
1298  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1299  int M = SMA.Mask[I];
1300  if (M != -1)
1301  M -= SMA.MinSrc;
1302  NewMask[I] = M;
1303  }
1304  return RetVal;
1305  }
1306 
1307  // By here, packing by segment (half-vector) shuffling, and vector alignment
1308  // failed. Try vmux.
1309  // Note: since this is using the original mask, Va and Vb must not have been
1310  // modified.
1311 
1312  if (Options & PackMux) {
1313  // If elements picked from Va and Vb have all different (source) indexes
1314  // (relative to the start of the argument), do a mux, and update the mask.
1315  BitVector Picked(HwLen);
1316  SmallVector<uint8_t,128> MuxBytes(HwLen);
1317  bool CanMux = true;
1318  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1319  int M = SM.Mask[I];
1320  if (M == -1)
1321  continue;
1322  if (M >= static_cast<int>(HwLen))
1323  M -= HwLen;
1324  else
1325  MuxBytes[M] = 0xFF;
1326  if (Picked[M]) {
1327  CanMux = false;
1328  break;
1329  }
1330  NewMask[I] = M;
1331  }
1332  if (CanMux)
1333  return vmuxs(MuxBytes, Va, Vb, Results);
1334  }
1335  return OpRef::fail();
1336 }
1337 
1338 // Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1339 // pack these vectors into a pair, and remap SM into NewMask to use the
1340 // new pair instead.
1341 OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1342  ResultStack &Results, MutableArrayRef<int> NewMask) {
1343  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1345  if (SegList.empty())
1346  return OpRef::undef(getPairVT(MVT::i8));
1347 
1348  // If more than two halves are used, bail.
1349  // TODO: be more aggressive here?
1350  unsigned SegCount = SegList.size();
1351  if (SegCount > 2)
1352  return OpRef::fail();
1353 
1354  MVT HalfTy = getSingleVT(MVT::i8);
1355 
1356  OpRef Inp[2] = { Va, Vb };
1357  OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1358 
1359  // Really make sure we have at most 2 vectors used in the mask.
1360  assert(SegCount <= 2);
1361 
1362  for (int I = 0, E = SegList.size(); I != E; ++I) {
1363  unsigned S = SegList[I];
1364  OpRef Op = Inp[S / 2];
1365  Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1366  }
1367 
1368  // NOTE: Using SegList as the packing map here (not SegMap). This works,
1369  // because we're not concerned here about the order of the segments (i.e.
1370  // single vectors) in the output pair. Changing the order of vectors is
1371  // free (as opposed to changing the order of vector halves as in packs),
1372  // and so there is no extra cost added in case the order needs to be
1373  // changed later.
1374  packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1375  return concats(Out[0], Out[1], Results);
1376 }
1377 
1378 OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1379  ResultStack &Results) {
1380  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1381  MVT ByteTy = getSingleVT(MVT::i8);
1382  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1383  const SDLoc &dl(Results.InpNode);
1384  SDValue B = getVectorConstant(Bytes, dl);
1385  Results.push(Hexagon::V6_vd0, ByteTy, {});
1386  Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1387  Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1388  return OpRef::res(Results.top());
1389 }
1390 
1391 OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1392  ResultStack &Results) {
1393  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1394  size_t S = Bytes.size() / 2;
1395  OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1396  OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1397  return concats(L, H, Results);
1398 }
1399 
1400 OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1401  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1402  unsigned VecLen = SM.Mask.size();
1403  assert(HwLen == VecLen);
1404  (void)VecLen;
1405  assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1406 
1407  if (isIdentity(SM.Mask))
1408  return Va;
1409  if (isUndef(SM.Mask))
1410  return OpRef::undef(getSingleVT(MVT::i8));
1411 
1412  unsigned HalfLen = HwLen / 2;
1413  assert(isPowerOf2_32(HalfLen));
1414 
1415  // Handle special case where the output is the same half of the input
1416  // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1417  std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1418  if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1419  std::pair<int, unsigned> Strip2 =
1420  findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1421  if (Strip1 == Strip2) {
1422  const SDLoc &dl(Results.InpNode);
1423  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1424  Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1425  {Va, Va, OpRef::res(Results.top())});
1426  OpRef S = OpRef::res(Results.top());
1427  return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1428  }
1429  }
1430 
1431  OpRef P = perfect(SM, Va, Results);
1432  if (P.isValid())
1433  return P;
1434  return butterfly(SM, Va, Results);
1435 }
1436 
1437 OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1438  ResultStack &Results) {
1439  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1440  if (isUndef(SM.Mask))
1441  return OpRef::undef(getSingleVT(MVT::i8));
1442 
1443  OpRef C = contracting(SM, Va, Vb, Results);
1444  if (C.isValid())
1445  return C;
1446 
1447  int VecLen = SM.Mask.size();
1448  SmallVector<int,128> PackedMask(VecLen);
1449  OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1450  if (P.isValid())
1451  return shuffs1(ShuffleMask(PackedMask), P, Results);
1452 
1453  // TODO: Before we split the mask, try perfect shuffle on concatenated
1454  // operands. This won't work now, because the perfect code does not
1455  // tolerate undefs in the mask.
1456 
1457  SmallVector<int,128> MaskL(VecLen), MaskR(VecLen);
1458  splitMask(SM.Mask, MaskL, MaskR);
1459 
1460  OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1461  OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1462  if (!L.isValid() || !R.isValid())
1463  return OpRef::fail();
1464 
1465  SmallVector<uint8_t,128> Bytes(VecLen);
1466  for (int I = 0; I != VecLen; ++I) {
1467  if (MaskL[I] != -1)
1468  Bytes[I] = 0xFF;
1469  }
1470  return vmuxs(Bytes, L, R, Results);
1471 }
1472 
1473 OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1474  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1475  int VecLen = SM.Mask.size();
1476 
1477  if (isIdentity(SM.Mask))
1478  return Va;
1479  if (isUndef(SM.Mask))
1480  return OpRef::undef(getPairVT(MVT::i8));
1481 
1482  SmallVector<int,128> PackedMask(VecLen);
1483  OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1484  if (P.isValid()) {
1485  ShuffleMask PM(PackedMask);
1486  OpRef E = expanding(PM, P, Results);
1487  if (E.isValid())
1488  return E;
1489 
1490  OpRef L = shuffs1(PM.lo(), P, Results);
1491  OpRef H = shuffs1(PM.hi(), P, Results);
1492  if (L.isValid() && H.isValid())
1493  return concats(L, H, Results);
1494  }
1495 
1496  OpRef R = perfect(SM, Va, Results);
1497  if (R.isValid())
1498  return R;
1499  // TODO commute the mask and try the opposite order of the halves.
1500 
1501  OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1502  OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1503  if (L.isValid() && H.isValid())
1504  return concats(L, H, Results);
1505 
1506  return OpRef::fail();
1507 }
1508 
1509 OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1510  ResultStack &Results) {
1511  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1512  if (isUndef(SM.Mask))
1513  return OpRef::undef(getPairVT(MVT::i8));
1514 
1515  int VecLen = SM.Mask.size();
1516  SmallVector<int,256> PackedMask(VecLen);
1517  OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1518  if (P.isValid())
1519  return shuffp1(ShuffleMask(PackedMask), P, Results);
1520 
1521  SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1522  splitMask(SM.Mask, MaskL, MaskR);
1523 
1524  OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1525  OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1526  if (!L.isValid() || !R.isValid())
1527  return OpRef::fail();
1528 
1529  // Mux the results.
1530  SmallVector<uint8_t,256> Bytes(VecLen);
1531  for (int I = 0; I != VecLen; ++I) {
1532  if (MaskL[I] != -1)
1533  Bytes[I] = 0xFF;
1534  }
1535  return vmuxp(Bytes, L, R, Results);
1536 }
1537 
1538 namespace {
1539  struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1540  template <typename T>
1541  Deleter(SelectionDAG &D, T &C)
1542  : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1543  C.erase(N);
1544  }) {}
1545  };
1546 
1547  template <typename T>
1548  struct NullifyingVector : public T {
1550  NullifyingVector(T &&V) : T(V) {
1551  for (unsigned i = 0, e = T::size(); i != e; ++i) {
1552  SDNode *&N = T::operator[](i);
1553  Refs[N] = &N;
1554  }
1555  }
1556  void erase(SDNode *N) {
1557  auto F = Refs.find(N);
1558  if (F != Refs.end())
1559  *F->second = nullptr;
1560  }
1561  };
1562 }
1563 
1564 void HvxSelector::select(SDNode *ISelN) {
1565  // What's important here is to select the right set of nodes. The main
1566  // selection algorithm loops over nodes in a topological order, i.e. users
1567  // are visited before their operands.
1568  //
1569  // It is an error to have an unselected node with a selected operand, and
1570  // there is an assertion in the main selector code to enforce that.
1571  //
1572  // Such a situation could occur if we selected a node, which is both a
1573  // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1574  // node in the DAG.
1575  assert(ISelN->getOpcode() == HexagonISD::ISEL);
1576  SDNode *N0 = ISelN->getOperand(0).getNode();
1577  if (N0->isMachineOpcode()) {
1578  ISel.ReplaceNode(ISelN, N0);
1579  return;
1580  }
1581 
1582  // There could have been nodes created (i.e. inserted into the DAG)
1583  // that are now dead. Remove them, in case they use any of the nodes
1584  // to select (and make them look shared).
1585  DAG.RemoveDeadNodes();
1586 
1587  SetVector<SDNode*> SubNodes, TmpQ;
1588  std::map<SDNode*,unsigned> NumOps;
1589 
1590  // Don't want to select N0 if it's shared with another node, except if
1591  // it's shared with other ISELs.
1592  auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1593  if (llvm::all_of(N0->uses(), IsISelN))
1594  SubNodes.insert(N0);
1595 
1596  auto InSubNodes = [&SubNodes](SDNode *T) { return SubNodes.count(T); };
1597  for (unsigned I = 0; I != SubNodes.size(); ++I) {
1598  SDNode *S = SubNodes[I];
1599  unsigned OpN = 0;
1600  // Only add subnodes that are only reachable from N0.
1601  for (SDValue Op : S->ops()) {
1602  SDNode *O = Op.getNode();
1603  if (llvm::all_of(O->uses(), InSubNodes)) {
1604  SubNodes.insert(O);
1605  ++OpN;
1606  }
1607  }
1608  NumOps.insert({S, OpN});
1609  if (OpN == 0)
1610  TmpQ.insert(S);
1611  }
1612 
1613  for (unsigned I = 0; I != TmpQ.size(); ++I) {
1614  SDNode *S = TmpQ[I];
1615  for (SDNode *U : S->uses()) {
1616  if (U == ISelN)
1617  continue;
1618  auto F = NumOps.find(U);
1619  assert(F != NumOps.end());
1620  if (F->second > 0 && !--F->second)
1621  TmpQ.insert(F->first);
1622  }
1623  }
1624 
1625  // Remove the marker.
1626  ISel.ReplaceNode(ISelN, N0);
1627 
1628  assert(SubNodes.size() == TmpQ.size());
1629  NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1630 
1631  Deleter DUQ(DAG, Queue);
1632  for (SDNode *S : reverse(Queue)) {
1633  if (S == nullptr)
1634  continue;
1635  DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1636  ISel.Select(S);
1637  }
1638 }
1639 
1640 bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1641  MVT ResTy, SDValue Va, SDValue Vb,
1642  SDNode *N) {
1643  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1644  MVT ElemTy = ResTy.getVectorElementType();
1645  assert(ElemTy == MVT::i8);
1646  unsigned VecLen = Mask.size();
1647  bool HavePairs = (2*HwLen == VecLen);
1648  MVT SingleTy = getSingleVT(MVT::i8);
1649 
1650  // The prior attempts to handle this shuffle may have left a bunch of
1651  // dead nodes in the DAG (such as constants). These nodes will be added
1652  // at the end of DAG's node list, which at that point had already been
1653  // sorted topologically. In the main selection loop, the node list is
1654  // traversed backwards from the root node, which means that any new
1655  // nodes (from the end of the list) will not be visited.
1656  // Scalarization will replace the shuffle node with the scalarized
1657  // expression, and if that expression reused any if the leftoever (dead)
1658  // nodes, these nodes would not be selected (since the "local" selection
1659  // only visits nodes that are not in AllNodes).
1660  // To avoid this issue, remove all dead nodes from the DAG now.
1661 // DAG.RemoveDeadNodes();
1662 
1664  LLVMContext &Ctx = *DAG.getContext();
1665  MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1666  for (int I : Mask) {
1667  if (I < 0) {
1668  Ops.push_back(ISel.selectUndef(dl, LegalTy));
1669  continue;
1670  }
1671  SDValue Vec;
1672  unsigned M = I;
1673  if (M < VecLen) {
1674  Vec = Va;
1675  } else {
1676  Vec = Vb;
1677  M -= VecLen;
1678  }
1679  if (HavePairs) {
1680  if (M < HwLen) {
1681  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1682  } else {
1683  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1684  M -= HwLen;
1685  }
1686  }
1687  SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1688  SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1689  SDValue L = Lower.LowerOperation(Ex, DAG);
1690  assert(L.getNode());
1691  Ops.push_back(L);
1692  }
1693 
1694  SDValue LV;
1695  if (2*HwLen == VecLen) {
1696  SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1697  SDValue L0 = Lower.LowerOperation(B0, DAG);
1698  SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1700  // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1701  // functions may expect to be called only for illegal operations, so
1702  // make sure that they are not called for legal ones. Develop a better
1703  // mechanism for dealing with this.
1704  LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1705  } else {
1706  SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1707  LV = Lower.LowerOperation(BV, DAG);
1708  }
1709 
1710  assert(!N->use_empty());
1711  SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1712  ISel.ReplaceNode(N, IS.getNode());
1713  select(IS.getNode());
1714  DAG.RemoveDeadNodes();
1715  return true;
1716 }
1717 
1718 OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
1719  ResultStack &Results) {
1720  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1721  if (!Va.isValid() || !Vb.isValid())
1722  return OpRef::fail();
1723 
1724  // Contracting shuffles, i.e. instructions that always discard some bytes
1725  // from the operand vectors.
1726  //
1727  // V6_vshuff{e,o}b
1728  // V6_vdealb4w
1729  // V6_vpack{e,o}{b,h}
1730 
1731  int VecLen = SM.Mask.size();
1732  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1733  MVT ResTy = getSingleVT(MVT::i8);
1734 
1735  // The following shuffles only work for bytes and halfwords. This requires
1736  // the strip length to be 1 or 2.
1737  if (Strip.second != 1 && Strip.second != 2)
1738  return OpRef::fail();
1739 
1740  // The patterns for the shuffles, in terms of the starting offsets of the
1741  // consecutive strips (L = length of the strip, N = VecLen):
1742  //
1743  // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2
1744  // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2
1745  //
1746  // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2
1747  // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2
1748  //
1749  // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ...
1750 
1751  // The value of the element in the mask following the strip will decide
1752  // what kind of a shuffle this can be.
1753  int NextInMask = SM.Mask[Strip.second];
1754 
1755  // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask
1756  // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would
1757  // satisfy this.
1758  if (NextInMask < VecLen) {
1759  // vpack{e,o} or vdealb4w
1760  if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
1761  int N = VecLen;
1762  // Check if this is vdealb4w (L=1).
1763  for (int I = 0; I != N/4; ++I)
1764  if (SM.Mask[I] != 4*I)
1765  return OpRef::fail();
1766  for (int I = 0; I != N/4; ++I)
1767  if (SM.Mask[I+N/4] != 2 + 4*I)
1768  return OpRef::fail();
1769  for (int I = 0; I != N/4; ++I)
1770  if (SM.Mask[I+N/2] != N + 4*I)
1771  return OpRef::fail();
1772  for (int I = 0; I != N/4; ++I)
1773  if (SM.Mask[I+3*N/4] != N+2 + 4*I)
1774  return OpRef::fail();
1775  // Matched mask for vdealb4w.
1776  Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
1777  return OpRef::res(Results.top());
1778  }
1779 
1780  // Check if this is vpack{e,o}.
1781  int N = VecLen;
1782  int L = Strip.second;
1783  // Check if the first strip starts at 0 or at L.
1784  if (Strip.first != 0 && Strip.first != L)
1785  return OpRef::fail();
1786  // Examine the rest of the mask.
1787  for (int I = L; I < N; I += L) {
1788  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1789  // Check whether the mask element at the beginning of each strip
1790  // increases by 2L each time.
1791  if (S.first - Strip.first != 2*I)
1792  return OpRef::fail();
1793  // Check whether each strip is of the same length.
1794  if (S.second != unsigned(L))
1795  return OpRef::fail();
1796  }
1797 
1798  // Strip.first == 0 => vpacke
1799  // Strip.first == L => vpacko
1800  assert(Strip.first == 0 || Strip.first == L);
1801  using namespace Hexagon;
1802  NodeTemplate Res;
1803  Res.Opc = Strip.second == 1 // Number of bytes.
1804  ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
1805  : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
1806  Res.Ty = ResTy;
1807  Res.Ops = { Vb, Va };
1808  Results.push(Res);
1809  return OpRef::res(Results.top());
1810  }
1811 
1812  // Check if this is vshuff{e,o}.
1813  int N = VecLen;
1814  int L = Strip.second;
1815  std::pair<int,unsigned> PrevS = Strip;
1816  bool Flip = false;
1817  for (int I = L; I < N; I += L) {
1818  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1819  if (S.second != PrevS.second)
1820  return OpRef::fail();
1821  int Diff = Flip ? PrevS.first - S.first + 2*L
1822  : S.first - PrevS.first;
1823  if (Diff != N)
1824  return OpRef::fail();
1825  Flip ^= true;
1826  PrevS = S;
1827  }
1828  // Strip.first == 0 => vshuffe
1829  // Strip.first == L => vshuffo
1830  assert(Strip.first == 0 || Strip.first == L);
1831  using namespace Hexagon;
1832  NodeTemplate Res;
1833  Res.Opc = Strip.second == 1 // Number of bytes.
1834  ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
1835  : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
1836  Res.Ty = ResTy;
1837  Res.Ops = { Vb, Va };
1838  Results.push(Res);
1839  return OpRef::res(Results.top());
1840 }
1841 
1842 OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1843  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1844  // Expanding shuffles (using all elements and inserting into larger vector):
1845  //
1846  // V6_vunpacku{b,h} [*]
1847  //
1848  // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
1849  //
1850  // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
1851  // they are not shuffles.
1852  //
1853  // The argument is a single vector.
1854 
1855  int VecLen = SM.Mask.size();
1856  assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
1857 
1858  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1859 
1860  // The patterns for the unpacks, in terms of the starting offsets of the
1861  // consecutive strips (L = length of the strip, N = VecLen):
1862  //
1863  // vunpacku: 0, -1, L, -1, 2L, -1 ...
1864 
1865  if (Strip.first != 0)
1866  return OpRef::fail();
1867 
1868  // The vunpackus only handle byte and half-word.
1869  if (Strip.second != 1 && Strip.second != 2)
1870  return OpRef::fail();
1871 
1872  int N = VecLen;
1873  int L = Strip.second;
1874 
1875  // First, check the non-ignored strips.
1876  for (int I = 2*L; I < N; I += 2*L) {
1877  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1878  if (S.second != unsigned(L))
1879  return OpRef::fail();
1880  if (2*S.first != I)
1881  return OpRef::fail();
1882  }
1883  // Check the -1s.
1884  for (int I = L; I < N; I += 2*L) {
1885  auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
1886  if (S.first != -1 || S.second != unsigned(L))
1887  return OpRef::fail();
1888  }
1889 
1890  unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
1891  : Hexagon::V6_vunpackuh;
1892  Results.push(Opc, getPairVT(MVT::i8), {Va});
1893  return OpRef::res(Results.top());
1894 }
1895 
1896 OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1897  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1898  // V6_vdeal{b,h}
1899  // V6_vshuff{b,h}
1900 
1901  // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
1902  // V6_vshuffvdd (V6_vshuff)
1903  // V6_dealvdd (V6_vdeal)
1904 
1905  int VecLen = SM.Mask.size();
1906  assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
1907  unsigned LogLen = Log2_32(VecLen);
1908  unsigned HwLog = Log2_32(HwLen);
1909  // The result length must be the same as the length of a single vector,
1910  // or a vector pair.
1911  assert(LogLen == HwLog || LogLen == HwLog+1);
1912  bool HavePairs = LogLen == HwLog+1;
1913 
1914  if (!isPermutation(SM.Mask))
1915  return OpRef::fail();
1916 
1917  SmallVector<unsigned,8> Perm(LogLen);
1918 
1919  // Check if this could be a perfect shuffle, or a combination of perfect
1920  // shuffles.
1921  //
1922  // Consider this permutation (using hex digits to make the ASCII diagrams
1923  // easier to read):
1924  // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
1925  // This is a "deal" operation: divide the input into two halves, and
1926  // create the output by picking elements by alternating between these two
1927  // halves:
1928  // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
1929  // 8 9 A B C D E F
1930  //
1931  // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
1932  // a somwehat different mechanism that could be used to perform shuffle/
1933  // deal operations: a 2x2 transpose.
1934  // Consider the halves of inputs again, they can be interpreted as a 2x8
1935  // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
1936  // together. Now, when considering 2 elements at a time, it will be a 2x4
1937  // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
1938  // 01 23 45 67
1939  // 89 AB CD EF
1940  // With groups of 4, this will become a single 2x2 matrix, and so on.
1941  //
1942  // The 2x2 transpose instruction works by transposing each of the 2x2
1943  // matrices (or "sub-matrices"), given a specific group size. For example,
1944  // if the group size is 1 (i.e. each element is its own group), there
1945  // will be four transposes of the four 2x2 matrices that form the 2x8.
1946  // For example, with the inputs as above, the result will be:
1947  // 0 8 2 A 4 C 6 E
1948  // 1 9 3 B 5 D 7 F
1949  // Now, this result can be tranposed again, but with the group size of 2:
1950  // 08 19 4C 5D
1951  // 2A 3B 6E 7F
1952  // If we then transpose that result, but with the group size of 4, we get:
1953  // 0819 2A3B
1954  // 4C5D 6E7F
1955  // If we concatenate these two rows, it will be
1956  // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
1957  // which is the same as the "deal" [*] above.
1958  //
1959  // In general, a "deal" of individual elements is a series of 2x2 transposes,
1960  // with changing group size. HVX has two instructions:
1961  // Vdd = V6_vdealvdd Vu, Vv, Rt
1962  // Vdd = V6_shufvdd Vu, Vv, Rt
1963  // that perform exactly that. The register Rt controls which transposes are
1964  // going to happen: a bit at position n (counting from 0) indicates that a
1965  // transpose with a group size of 2^n will take place. If multiple bits are
1966  // set, multiple transposes will happen: vdealvdd will perform them starting
1967  // with the largest group size, vshuffvdd will do them in the reverse order.
1968  //
1969  // The main observation is that each 2x2 transpose corresponds to swapping
1970  // columns of bits in the binary representation of the values.
1971  //
1972  // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
1973  // in a given column. The * denote the columns that will be swapped.
1974  // The transpose with the group size 2^n corresponds to swapping columns
1975  // 3 (the highest log) and log2(n):
1976  //
1977  // 3 2 1 0 0 2 1 3 0 2 3 1
1978  // * * * * * *
1979  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1980  // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
1981  // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
1982  // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
1983  // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
1984  // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
1985  // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
1986  // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
1987  // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
1988  // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
1989  // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
1990  // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
1991  // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
1992  // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
1993  // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
1994  // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
1995 
1996  // There is one special case that is not a perfect shuffle, but
1997  // can be turned into one easily: when the shuffle operates on
1998  // a vector pair, but the two vectors in the pair are swapped.
1999  // The code below that identifies perfect shuffles will reject
2000  // it, unless the order is reversed.
2001  SmallVector<int,128> MaskStorage(SM.Mask.begin(), SM.Mask.end());
2002  bool InvertedPair = false;
2003  if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2004  for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2005  int M = SM.Mask[i];
2006  MaskStorage[i] = M >= int(HwLen) ? M-HwLen : M+HwLen;
2007  }
2008  InvertedPair = true;
2009  }
2010  ArrayRef<int> LocalMask(MaskStorage);
2011 
2012  auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) {
2013  unsigned X = Mask[0] ^ Mask[Num/2];
2014  // Check that the first half has the X's bits clear.
2015  if ((Mask[0] & X) != 0)
2016  return 0u;
2017  for (unsigned I = 1; I != Num/2; ++I) {
2018  if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X)
2019  return 0u;
2020  if ((Mask[I] & X) != 0)
2021  return 0u;
2022  }
2023  return X;
2024  };
2025 
2026  // Create a vector of log2's for each column: Perm[i] corresponds to
2027  // the i-th bit (lsb is 0).
2028  assert(VecLen > 2);
2029  for (unsigned I = VecLen; I >= 2; I >>= 1) {
2030  // Examine the initial segment of Mask of size I.
2031  unsigned X = XorPow2(LocalMask, I);
2032  if (!isPowerOf2_32(X))
2033  return OpRef::fail();
2034  // Check the other segments of Mask.
2035  for (int J = I; J < VecLen; J += I) {
2036  if (XorPow2(LocalMask.slice(J, I), I) != X)
2037  return OpRef::fail();
2038  }
2039  Perm[Log2_32(X)] = Log2_32(I)-1;
2040  }
2041 
2042  // Once we have Perm, represent it as cycles. Denote the maximum log2
2043  // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2044  // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2045  // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2046  // order being from left to right. Any (contiguous) segment where the
2047  // values ai, ai+1...aj are either all increasing or all decreasing,
2048  // can be implemented via a single vshuffvdd/vdealvdd respectively.
2049  //
2050  // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2051  // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2052  // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2053  // can be used to generate a sequence of vshuffvdd/vdealvdd.
2054  //
2055  // Example:
2056  // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2057  // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2058  // (4 0 1)(4 0)(4 2 3)(4 2).
2059  // It can then be expanded into swaps as
2060  // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2061  // and broken up into "increasing" segments as
2062  // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2063  // This is equivalent to
2064  // (4 0 1)(4 0 2 3)(4 2),
2065  // which can be implemented as 3 vshufvdd instructions.
2066 
2067  using CycleType = SmallVector<unsigned,8>;
2068  std::set<CycleType> Cycles;
2069  std::set<unsigned> All;
2070 
2071  for (unsigned I : Perm)
2072  All.insert(I);
2073 
2074  // If the cycle contains LogLen-1, move it to the front of the cycle.
2075  // Otherwise, return the cycle unchanged.
2076  auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2077  unsigned LogPos, N = C.size();
2078  for (LogPos = 0; LogPos != N; ++LogPos)
2079  if (C[LogPos] == LogLen-1)
2080  break;
2081  if (LogPos == N)
2082  return C;
2083 
2084  CycleType NewC(C.begin()+LogPos, C.end());
2085  NewC.append(C.begin(), C.begin()+LogPos);
2086  return NewC;
2087  };
2088 
2089  auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2090  // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2091  // for bytes zero is included, for halfwords is not.
2092  if (Cs.size() != 1)
2093  return 0u;
2094  const CycleType &C = *Cs.begin();
2095  if (C[0] != Len-1)
2096  return 0u;
2097  int D = Len - C.size();
2098  if (D != 0 && D != 1)
2099  return 0u;
2100 
2101  bool IsDeal = true, IsShuff = true;
2102  for (unsigned I = 1; I != Len-D; ++I) {
2103  if (C[I] != Len-1-I)
2104  IsDeal = false;
2105  if (C[I] != I-(1-D)) // I-1, I
2106  IsShuff = false;
2107  }
2108  // At most one, IsDeal or IsShuff, can be non-zero.
2109  assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2110  static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh };
2111  static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh };
2112  return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2113  };
2114 
2115  while (!All.empty()) {
2116  unsigned A = *All.begin();
2117  All.erase(A);
2118  CycleType C;
2119  C.push_back(A);
2120  for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2121  C.push_back(B);
2122  All.erase(B);
2123  }
2124  if (C.size() <= 1)
2125  continue;
2126  Cycles.insert(canonicalize(C));
2127  }
2128 
2129  MVT SingleTy = getSingleVT(MVT::i8);
2131 
2132  // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2133  if (unsigned(VecLen) == HwLen) {
2134  if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2135  Results.push(SingleOpc, SingleTy, {Va});
2136  return OpRef::res(Results.top());
2137  }
2138  }
2139 
2140  // From the cycles, construct the sequence of values that will
2141  // then form the control values for vdealvdd/vshuffvdd, i.e.
2142  // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2143  // This essentially strips the M value from the cycles where
2144  // it's present, and performs the insertion of M (then stripping)
2145  // for cycles without M (as described in an earlier comment).
2146  SmallVector<unsigned,8> SwapElems;
2147  // When the input is extended (i.e. single vector becomes a pair),
2148  // this is done by using an "undef" vector as the second input.
2149  // However, then we get
2150  // input 1: GOODBITS
2151  // input 2: ........
2152  // but we need
2153  // input 1: ....BITS
2154  // input 2: ....GOOD
2155  // Then at the end, this needs to be undone. To accomplish this,
2156  // artificially add "LogLen-1" at both ends of the sequence.
2157  if (!HavePairs)
2158  SwapElems.push_back(LogLen-1);
2159  for (const CycleType &C : Cycles) {
2160  // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2161  unsigned First = (C[0] == LogLen-1) ? 1 : 0;
2162  SwapElems.append(C.begin()+First, C.end());
2163  if (First == 0)
2164  SwapElems.push_back(C[0]);
2165  }
2166  if (!HavePairs)
2167  SwapElems.push_back(LogLen-1);
2168 
2169  const SDLoc &dl(Results.InpNode);
2170  OpRef Arg = HavePairs ? Va
2171  : concats(Va, OpRef::undef(SingleTy), Results);
2172  if (InvertedPair)
2173  Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2174 
2175  for (unsigned I = 0, E = SwapElems.size(); I != E; ) {
2176  bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1];
2177  unsigned S = (1u << SwapElems[I]);
2178  if (I < E-1) {
2179  while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1]))
2180  S |= 1u << SwapElems[I];
2181  // The above loop will not add a bit for the final SwapElems[I+1],
2182  // so add it here.
2183  S |= 1u << SwapElems[I];
2184  }
2185  ++I;
2186 
2187  NodeTemplate Res;
2188  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2189  Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2190  Res.Ty = PairTy;
2191  Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) };
2192  Results.push(Res);
2193  Arg = OpRef::res(Results.top());
2194  }
2195 
2196  return HavePairs ? Arg : OpRef::lo(Arg);
2197 }
2198 
2199 OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2200  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2201  // Butterfly shuffles.
2202  //
2203  // V6_vdelta
2204  // V6_vrdelta
2205  // V6_vror
2206 
2207  // The assumption here is that all elements picked by Mask are in the
2208  // first operand to the vector_shuffle. This assumption is enforced
2209  // by the caller.
2210 
2211  MVT ResTy = getSingleVT(MVT::i8);
2212  PermNetwork::Controls FC, RC;
2213  const SDLoc &dl(Results.InpNode);
2214  int VecLen = SM.Mask.size();
2215 
2216  for (int M : SM.Mask) {
2217  if (M != -1 && M >= VecLen)
2218  return OpRef::fail();
2219  }
2220 
2221  // Try the deltas/benes for both single vectors and vector pairs.
2222  ForwardDeltaNetwork FN(SM.Mask);
2223  if (FN.run(FC)) {
2224  SDValue Ctl = getVectorConstant(FC, dl);
2225  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2226  return OpRef::res(Results.top());
2227  }
2228 
2229  // Try reverse delta.
2230  ReverseDeltaNetwork RN(SM.Mask);
2231  if (RN.run(RC)) {
2232  SDValue Ctl = getVectorConstant(RC, dl);
2233  Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2234  return OpRef::res(Results.top());
2235  }
2236 
2237  // Do Benes.
2238  BenesNetwork BN(SM.Mask);
2239  if (BN.run(FC, RC)) {
2240  SDValue CtlF = getVectorConstant(FC, dl);
2241  SDValue CtlR = getVectorConstant(RC, dl);
2242  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2243  Results.push(Hexagon::V6_vrdelta, ResTy,
2244  {OpRef::res(-1), OpRef(CtlR)});
2245  return OpRef::res(Results.top());
2246  }
2247 
2248  return OpRef::fail();
2249 }
2250 
2251 SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2252  return DAG.getTargetConstant(Val, dl, MVT::i32);
2253 }
2254 
2255 SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2256  const SDLoc &dl) {
2258  for (uint8_t C : Data)
2259  Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2260  MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2261  SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2262  SDValue LV = Lower.LowerOperation(BV, DAG);
2263  DAG.RemoveDeadNode(BV.getNode());
2264  return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2265 }
2266 
2268  DEBUG_WITH_TYPE("isel", {
2269  dbgs() << "Starting " << __func__ << " on node:\n";
2270  N->dump(&DAG);
2271  });
2272  MVT ResTy = N->getValueType(0).getSimpleVT();
2273  // Assume that vector shuffles operate on vectors of bytes.
2274  assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2275 
2276  auto *SN = cast<ShuffleVectorSDNode>(N);
2277  std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2278  // This shouldn't really be necessary. Is it?
2279  for (int &Idx : Mask)
2280  if (Idx != -1 && Idx < 0)
2281  Idx = -1;
2282 
2283  unsigned VecLen = Mask.size();
2284  bool HavePairs = (2*HwLen == VecLen);
2285  assert(ResTy.getSizeInBits() / 8 == VecLen);
2286 
2287  // Vd = vector_shuffle Va, Vb, Mask
2288  //
2289 
2290  bool UseLeft = false, UseRight = false;
2291  for (unsigned I = 0; I != VecLen; ++I) {
2292  if (Mask[I] == -1)
2293  continue;
2294  unsigned Idx = Mask[I];
2295  assert(Idx < 2*VecLen);
2296  if (Idx < VecLen)
2297  UseLeft = true;
2298  else
2299  UseRight = true;
2300  }
2301 
2302  DEBUG_WITH_TYPE("isel", {
2303  dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2304  << UseLeft << " UseRight=" << UseRight << " HavePairs="
2305  << HavePairs << '\n';
2306  });
2307  // If the mask is all -1's, generate "undef".
2308  if (!UseLeft && !UseRight) {
2309  ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2310  return;
2311  }
2312 
2313  SDValue Vec0 = N->getOperand(0);
2314  SDValue Vec1 = N->getOperand(1);
2315  ResultStack Results(SN);
2316  Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2317  Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2318  OpRef Va = OpRef::res(Results.top()-1);
2319  OpRef Vb = OpRef::res(Results.top());
2320 
2321  OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2322  : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2323 
2324  bool Done = Res.isValid();
2325  if (Done) {
2326  // Make sure that Res is on the stack before materializing.
2327  Results.push(TargetOpcode::COPY, ResTy, {Res});
2328  materialize(Results);
2329  } else {
2330  Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2331  }
2332 
2333  if (!Done) {
2334 #ifndef NDEBUG
2335  dbgs() << "Unhandled shuffle:\n";
2336  SN->dumpr(&DAG);
2337 #endif
2338  llvm_unreachable("Failed to select vector shuffle");
2339  }
2340 }
2341 
2343  // If this is a rotation by less than 8, use V6_valignbi.
2344  MVT Ty = N->getValueType(0).getSimpleVT();
2345  const SDLoc &dl(N);
2346  SDValue VecV = N->getOperand(0);
2347  SDValue RotV = N->getOperand(1);
2348  SDNode *NewN = nullptr;
2349 
2350  if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2351  unsigned S = CN->getZExtValue() % HST.getVectorLength();
2352  if (S == 0) {
2353  NewN = VecV.getNode();
2354  } else if (isUInt<3>(S)) {
2355  NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2356  {VecV, VecV, getConst32(S, dl)});
2357  }
2358  }
2359 
2360  if (!NewN)
2361  NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2362 
2363  ISel.ReplaceNode(N, NewN);
2364 }
2365 
2367  SDValue Vv = N->getOperand(0);
2368  SDValue Vu = N->getOperand(1);
2369  SDValue Rt = N->getOperand(2);
2370  SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2371  N->getValueType(0), {Vv, Vu, Rt});
2372  ISel.ReplaceNode(N, NewN);
2373  DAG.RemoveDeadNode(N);
2374 }
2375 
2376 void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2377  HvxSelector(*this, *CurDAG).selectShuffle(N);
2378 }
2379 
2380 void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2381  HvxSelector(*this, *CurDAG).selectRor(N);
2382 }
2383 
2384 void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2385  HvxSelector(*this, *CurDAG).selectVAlign(N);
2386 }
2387 
2389  const SDLoc &dl(N);
2390  SDValue Chain = N->getOperand(0);
2391  SDValue Address = N->getOperand(2);
2392  SDValue Predicate = N->getOperand(3);
2393  SDValue Base = N->getOperand(4);
2394  SDValue Modifier = N->getOperand(5);
2395  SDValue Offset = N->getOperand(6);
2396  SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2397 
2398  unsigned Opcode;
2399  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2400  switch (IntNo) {
2401  default:
2402  llvm_unreachable("Unexpected HVX gather intrinsic.");
2403  case Intrinsic::hexagon_V6_vgathermhq:
2404  case Intrinsic::hexagon_V6_vgathermhq_128B:
2405  Opcode = Hexagon::V6_vgathermhq_pseudo;
2406  break;
2407  case Intrinsic::hexagon_V6_vgathermwq:
2408  case Intrinsic::hexagon_V6_vgathermwq_128B:
2409  Opcode = Hexagon::V6_vgathermwq_pseudo;
2410  break;
2411  case Intrinsic::hexagon_V6_vgathermhwq:
2412  case Intrinsic::hexagon_V6_vgathermhwq_128B:
2413  Opcode = Hexagon::V6_vgathermhwq_pseudo;
2414  break;
2415  }
2416 
2417  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2418  SDValue Ops[] = { Address, ImmOperand,
2419  Predicate, Base, Modifier, Offset, Chain };
2420  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2421 
2422  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2423  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2424 
2425  ReplaceNode(N, Result);
2426 }
2427 
2429  const SDLoc &dl(N);
2430  SDValue Chain = N->getOperand(0);
2431  SDValue Address = N->getOperand(2);
2432  SDValue Base = N->getOperand(3);
2433  SDValue Modifier = N->getOperand(4);
2434  SDValue Offset = N->getOperand(5);
2435  SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2436 
2437  unsigned Opcode;
2438  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2439  switch (IntNo) {
2440  default:
2441  llvm_unreachable("Unexpected HVX gather intrinsic.");
2442  case Intrinsic::hexagon_V6_vgathermh:
2443  case Intrinsic::hexagon_V6_vgathermh_128B:
2444  Opcode = Hexagon::V6_vgathermh_pseudo;
2445  break;
2446  case Intrinsic::hexagon_V6_vgathermw:
2447  case Intrinsic::hexagon_V6_vgathermw_128B:
2448  Opcode = Hexagon::V6_vgathermw_pseudo;
2449  break;
2450  case Intrinsic::hexagon_V6_vgathermhw:
2451  case Intrinsic::hexagon_V6_vgathermhw_128B:
2452  Opcode = Hexagon::V6_vgathermhw_pseudo;
2453  break;
2454  }
2455 
2456  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2457  SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2458  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2459 
2460  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2461  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2462 
2463  ReplaceNode(N, Result);
2464 }
2465 
2467  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2468  SDNode *Result;
2469  switch (IID) {
2470  case Intrinsic::hexagon_V6_vaddcarry: {
2471  std::array<SDValue, 3> Ops = {
2472  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2473  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2474  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2475  break;
2476  }
2477  case Intrinsic::hexagon_V6_vaddcarry_128B: {
2478  std::array<SDValue, 3> Ops = {
2479  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2480  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2481  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2482  break;
2483  }
2484  case Intrinsic::hexagon_V6_vsubcarry: {
2485  std::array<SDValue, 3> Ops = {
2486  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2487  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2488  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2489  break;
2490  }
2491  case Intrinsic::hexagon_V6_vsubcarry_128B: {
2492  std::array<SDValue, 3> Ops = {
2493  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2494  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2495  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2496  break;
2497  }
2498  default:
2499  llvm_unreachable("Unexpected HVX dual output intrinsic.");
2500  }
2501  ReplaceUses(N, Result);
2502  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
2503  ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
2504  CurDAG->RemoveDeadNode(N);
2505 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition: HexagonSubtarget.h:295
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:528
B1
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:494
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::v128i1
@ v128i1
Definition: MachineValueType.h:73
Pass
print lazy value Lazy Value Info Printer Pass
Definition: LazyValueInfo.cpp:1979
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1090
llvm::HvxSelector::Lower
const HexagonTargetLowering & Lower
Definition: HexagonISelDAGToDAGHVX.cpp:809
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::NodeSet::end
iterator end()
Definition: MachinePipeliner.h:431
llvm::Function::empty
bool empty() const
Definition: Function.h:732
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::Function::print
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4485
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
isIdentity
static bool isIdentity(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:916
getHexagonLowering
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:800
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::HvxSelector::HwLen
const unsigned HwLen
Definition: HexagonISelDAGToDAGHVX.cpp:813
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:542
llvm::SmallVector< unsigned, 4 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:242
llvm::HexagonTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: HexagonISelLowering.cpp:3178
llvm::HvxSelector::selectRor
void selectRor(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2342
PairTy
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Definition: MachineModuleInfoImpls.cpp:30
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HvxSelector::selectShuffle
void selectShuffle(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2267
llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:133
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:24
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::MachO::Invalid
@ Invalid
Invalid file type.
Definition: InterfaceFile.h:55
llvm::MemOp
Definition: TargetLowering.h:111
llvm::coro::ABI::Switch
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Shift
bool Shift
Definition: README.txt:468
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:848
llvm::HexagonDAGToDAGISel
Definition: HexagonISelDAGToDAG.h:29
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:900
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
HexagonTargetMachine.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
getHexagonSubtarget
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:803
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:182
BaseType
Coloring
Stack Slot Coloring
Definition: StackSlotColoring.cpp:139
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:462
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::ArrayRef::data
const T * data() const
Definition: ArrayRef.h:161
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:241
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:909
HexagonISelDAGToDAG.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
getInputSegmentList
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
Definition: HexagonISelDAGToDAGHVX.cpp:925
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:632
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:306
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::HexagonISD::ISEL
@ ISEL
Definition: HexagonISelLowering.h:95
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1449
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:329
llvm::MVT::v64i1
@ v64i1
Definition: MachineValueType.h:72
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:229
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:623
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:793
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::SelectionDAG::DAGNodeDeletedListener
Definition: SelectionDAG.h:326
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::SPIRV::StorageClass::Output
@ Output
llvm::BitVector
Definition: BitVector.h:75
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:320
llvm::None
const NoneType None
Definition: None.h:24
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:981
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon.h
llvm::HvxSelector
Definition: HexagonISelDAGToDAGHVX.cpp:808
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::NodeSet::begin
iterator begin()
Definition: MachinePipeliner.h:430
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:240
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:795
getOutputSegmentMap
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
Definition: HexagonISelDAGToDAGHVX.cpp:945
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:954
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:203
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
llvm::HexagonDAGToDAGISel::SelectV65GatherPred
void SelectV65GatherPred(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2388
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:908
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8851
size
i< reg-> size
Definition: README.txt:166
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:869
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::HvxSelector::HST
const HexagonSubtarget & HST
Definition: HexagonISelDAGToDAGHVX.cpp:812
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:883
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9559
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SDNode::dumpr
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
Definition: SelectionDAGDumper.cpp:1004
HexagonISelLowering.h
packSegmentMask
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
Definition: HexagonISelDAGToDAGHVX.cpp:980
llvm::SDNode::isMachineOpcode
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
Definition: SelectionDAGNodes.h:687
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1216
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1598
SelectionDAGISel.h
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::HexagonTargetLowering
Definition: HexagonISelLowering.h:105
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::HvxSelector::ISel
HexagonDAGToDAGISel & ISel
Definition: HexagonISelDAGToDAGHVX.cpp:810
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:78
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:115
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:805
llvm::object::Comma
@ Comma
Definition: COFFModuleDefinition.cpp:35
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:114
llvm::MachO::All
@ All
Definition: InterfaceFile.h:69
j
return j(j<< 16)
llvm::MipsISD::Mult
@ Mult
Definition: MipsISelLowering.h:135
findStrip
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
Definition: HexagonISelDAGToDAGHVX.cpp:896
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::HvxSelector::getPairVT
MVT getPairVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:825
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:227
llvm::HexagonDAGToDAGISel::SelectV65Gather
void SelectV65Gather(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2428
llvm::c_str
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Definition: WindowsSupport.h:193
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9677
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
isPermutation
static bool isPermutation(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:1003
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:187
llvm::HvxSelector::selectVAlign
void selectVAlign(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2366
MachineInstrBuilder.h
llvm::HvxSelector::DAG
SelectionDAG & DAG
Definition: HexagonISelDAGToDAGHVX.cpp:811
List
const NodeList & List
Definition: RDFGraph.cpp:199
N
#define N
splitMask
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
Definition: HexagonISelDAGToDAGHVX.cpp:878
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::HvxSelector::HvxSelector
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:815
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::HexagonDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: HexagonISelDAGToDAG.cpp:883
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:652
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:354
RN
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s src2 rnd ← FPSCR RN
Definition: README_P9.txt:262
llvm::HvxSelector::getSingleVT
MVT getSingleVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:819
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::ShuffleVectorSDNode::commuteMask
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
Definition: SelectionDAGNodes.h:1546
Debug.h
llvm::SrcOp
Definition: MachineIRBuilder.h:126
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::HexagonDAGToDAGISel::SelectHVXDualOutput
void SelectHVXDualOutput(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2466
llvm::HvxSelector::getBoolVT
MVT getBoolVT() const
Definition: HexagonISelDAGToDAGHVX.cpp:831
llvm::Function::size
size_t size() const
Definition: Function.h:731
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::irsymtab::build
Error build(ArrayRef< Module * > Mods, SmallVector< char, 0 > &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc)
Fills in Symtab and StrtabBuilder with a valid symbol and string table for Mods.
Definition: IRSymtab.cpp:365
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52