17 #include "llvm/IR/IntrinsicsHexagon.h"
27 #define DEBUG_TYPE "hexagon-isel"
98 enum class ColorKind {
None, Red, Black };
104 using MapType = std::map<Node, ColorKind>;
105 static constexpr Node
Ignore = Node(-1);
113 const MapType &colors()
const {
117 ColorKind other(ColorKind Color) {
119 return ColorKind::Red;
120 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
128 std::set<Node> Needed;
130 using NodeSet = std::set<Node>;
131 std::map<Node,NodeSet> Edges;
133 Node conj(Node Pos) {
134 Node Num = Order.
size();
135 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
138 ColorKind getColor(Node
N) {
139 auto F = Colors.find(
N);
143 std::pair<bool, ColorKind> getUniqueColor(
const NodeSet &Nodes);
150 std::pair<bool, ColorKind> Coloring::getUniqueColor(
const NodeSet &Nodes) {
152 for (Node
N : Nodes) {
153 ColorKind ColorN = getColor(
N);
161 return {
true, Color };
166 for (
unsigned P = 0;
P != Order.size(); ++
P) {
170 Node PC = Order[conj(
P)];
176 for (
unsigned I = 0;
I != Order.size(); ++
I) {
177 if (!Needed.count(
I))
189 bool Coloring::color() {
191 auto Enqueue = [
this,&FirstQ] (Node
N) {
194 for (
unsigned I = 0;
I != Q.
size(); ++
I) {
198 FirstQ.
insert(Q.begin(), Q.end());
200 for (Node
N : Needed)
203 for (Node
N : FirstQ) {
207 auto P = getUniqueColor(Ns);
210 Colors[
N] = other(
P.second);
214 for (
auto E : Edges) {
216 if (!Needed.count(conj(
N)) || Colors.count(
N))
218 auto P = getUniqueColor(
E.second);
221 Colors[
N] = other(
P.second);
226 std::vector<Node> WorkQ;
227 for (
auto E : Edges) {
229 if (!Colors.count(
N))
233 for (
unsigned I = 0;
I < WorkQ.size(); ++
I) {
236 auto P = getUniqueColor(Ns);
238 Colors[
N] = other(
P.second);
245 ColorKind ColorC = other(ColorN);
248 for (Node M : CopyNs) {
249 ColorKind ColorM = getColor(M);
250 if (ColorM == ColorC) {
263 for (
unsigned I = 0;
I != Order.size(); ++
I)
264 if (Colors.count(
I) == 0)
270 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
272 dbgs() <<
"{ Order: {";
273 for (
unsigned I = 0;
I != Order.size(); ++
I) {
281 dbgs() <<
" Needed: {";
282 for (Node
N : Needed)
286 dbgs() <<
" Edges: {\n";
287 for (
auto E : Edges) {
288 dbgs() <<
" " <<
E.first <<
" -> {";
289 for (
auto N :
E.second)
295 auto ColorKindToName = [](ColorKind
C) {
301 case ColorKind::Black:
307 dbgs() <<
" Colors: {\n";
308 for (
auto C : Colors)
309 dbgs() <<
" " <<
C.first <<
" -> " << ColorKindToName(
C.second) <<
"\n";
319 using Controls = std::vector<uint8_t>;
320 using ElemType =
int;
321 static constexpr ElemType
Ignore = ElemType(-1);
337 unsigned S = Order.size();
341 Table.resize(Order.size());
342 for (RowType &Row : Table)
346 void getControls(Controls &V,
unsigned StartAt, uint8_t Dir)
const {
347 unsigned Size = Order.size();
349 for (
unsigned I = 0;
I !=
Size; ++
I) {
351 for (
unsigned L = 0; L != Log; ++L) {
352 unsigned C = ctl(
I, StartAt+L) ==
Switch;
363 uint8_t ctl(ElemType Pos,
unsigned Step)
const {
364 return Table[Pos][Step];
366 unsigned size()
const {
369 unsigned steps()
const {
375 std::vector<ElemType> Order;
376 using RowType = std::vector<uint8_t>;
377 std::vector<RowType> Table;
380 struct ForwardDeltaNetwork :
public PermNetwork {
383 bool run(Controls &V) {
384 if (!route(Order.data(), Table.data(),
size(), 0))
386 getControls(V, 0, Forward);
391 bool route(ElemType *
P, RowType *T,
unsigned Size,
unsigned Step);
394 struct ReverseDeltaNetwork :
public PermNetwork {
397 bool run(Controls &V) {
398 if (!route(Order.data(), Table.data(),
size(), 0))
400 getControls(V, 0, Reverse);
405 bool route(ElemType *
P, RowType *T,
unsigned Size,
unsigned Step);
408 struct BenesNetwork :
public PermNetwork {
411 bool run(Controls &
F, Controls &R) {
412 if (!route(Order.data(), Table.data(),
size(), 0))
415 getControls(
F, 0, Forward);
416 getControls(R, Log, Reverse);
421 bool route(ElemType *
P, RowType *T,
unsigned Size,
unsigned Step);
425 bool ForwardDeltaNetwork::route(ElemType *
P, RowType *T,
unsigned Size,
427 bool UseUp =
false, UseDown =
false;
434 for (ElemType J = 0; J != Num; ++J) {
442 S = (J < Num/2) ?
Pass : Switch;
444 S = (J < Num/2) ? Switch :
Pass;
447 ElemType U = (
S ==
Pass) ?
I : (
I < Num/2 ?
I+Num/2 :
I-Num/2);
452 if (T[U][Step] !=
S && T[U][Step] !=
None)
457 for (ElemType J = 0; J != Num; ++J)
458 if (
P[J] !=
Ignore &&
P[J] >= Num/2)
462 if (UseUp && !route(
P, T,
Size/2, Step+1))
470 bool ReverseDeltaNetwork::route(ElemType *
P, RowType *T,
unsigned Size,
472 unsigned Pets = Log-1 - Step;
473 bool UseUp =
false, UseDown =
false;
478 const Coloring::MapType &
M =
G.colors();
483 for (ElemType J = 0; J != Num; ++J) {
489 ColorKind
C =
M.at(
I);
495 bool InpUp =
I < Num/2;
497 ColorUp = InpUp ?
C :
G.other(
C);
498 if ((
C == ColorUp) != InpUp) {
505 S = (J < Num/2) ?
Pass : Switch;
508 S = (J < Num/2) ? Switch :
Pass;
516 for (ElemType J = 0,
E =
Size / 2; J !=
E; ++J) {
518 ElemType PC =
P[J+
Size/2];
521 if (T[J][Pets] == Switch)
523 if (T[J+
Size/2][Pets] == Switch)
529 for (ElemType J = 0; J != Num; ++J)
530 if (
P[J] !=
Ignore &&
P[J] >= Num/2)
534 if (UseUp && !route(
P, T,
Size/2, Step+1))
542 bool BenesNetwork::route(ElemType *
P, RowType *T,
unsigned Size,
545 const Coloring::MapType &
M =
G.colors();
550 unsigned Pets = 2*Log-1 - Step;
551 bool UseUp =
false, UseDown =
false;
557 for (ElemType J = 0; J != Num; ++J) {
561 ColorKind
C =
M.at(
I);
565 ColorUp = (
I < Num / 2) ? ColorKind::Red : ColorKind::Black;
567 unsigned CI = (
I < Num/2) ?
I+Num/2 :
I-Num/2;
573 T[J][Pets] = (J < Num/2) ?
Pass : Switch;
580 T[J][Pets] = (J < Num/2) ? Switch :
Pass;
587 for (ElemType J = 0; J != Num/2; ++J) {
589 ElemType PC =
P[J+Num/2];
592 if (T[J][Pets] == Switch)
594 if (T[J+Num/2][Pets] == Switch)
600 for (ElemType J = 0; J != Num; ++J)
601 if (
P[J] !=
Ignore &&
P[J] >= Num/2)
605 if (UseUp && !route(
P, T,
Size/2, Step+1))
620 bool isValue()
const {
return OpV.getNode() !=
nullptr; }
621 bool isValid()
const {
return isValue() || !(OpN &
Invalid); }
622 static OpRef res(
int N) {
return OpRef(Whole | (
N &
Index)); }
623 static OpRef
fail() {
return OpRef(Invalid); }
625 static OpRef lo(
const OpRef &R) {
629 static OpRef hi(
const OpRef &R) {
649 Whole = LoHalf | HiHalf,
659 OpRef(
unsigned N) : OpN(
N) {}
662 struct NodeTemplate {
663 NodeTemplate() =
default;
666 std::vector<OpRef> Ops;
673 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
676 unsigned push(
const NodeTemplate &Res) {
678 return List.size()-1;
680 unsigned push(
unsigned Opc,
MVT Ty, std::vector<OpRef> &&Ops) {
687 bool empty()
const {
return List.empty(); }
688 unsigned size()
const {
return List.size(); }
689 unsigned top()
const {
return size()-1; }
690 const NodeTemplate &operator[](
unsigned I)
const {
return List[
I]; }
691 unsigned reset(
unsigned NewTop) {
692 List.resize(NewTop+1);
696 using BaseType = std::vector<NodeTemplate>;
697 BaseType::iterator
begin() {
return List.begin(); }
698 BaseType::iterator
end() {
return List.end(); }
699 BaseType::const_iterator
begin()
const {
return List.begin(); }
700 BaseType::const_iterator
end()
const {
return List.end(); }
709 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
712 OpV.getNode()->print(OS, &
G);
723 if ((OpN & Whole) != Whole) {
724 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
738 for (
const auto &R : Ops) {
748 OS <<
"Input node:\n";
752 OS <<
"Result templates:\n";
753 for (
unsigned I = 0,
E =
List.size();
I !=
E; ++
I) {
754 OS <<
'[' <<
I <<
"] ";
764 for (
unsigned I = 0,
E =
Mask.size();
I !=
E; ++
I) {
768 MinSrc = (MinSrc == -1) ? M :
std::min(MinSrc, M);
769 MaxSrc = (MaxSrc == -1) ? M :
std::max(MaxSrc, M);
774 int MinSrc = -1, MaxSrc = -1;
776 ShuffleMask lo()
const {
777 size_t H =
Mask.size()/2;
778 return ShuffleMask(
Mask.take_front(
H));
780 ShuffleMask hi()
const {
781 size_t H =
Mask.size()/2;
782 return ShuffleMask(
Mask.take_back(
H));
786 OS <<
"MinSrc:" << MinSrc <<
", MaxSrc:" << MaxSrc <<
" {";
837 void select(
SDNode *ISelN);
838 void materialize(
const ResultStack &
Results);
846 OpRef concat(OpRef Va, OpRef Vb, ResultStack &
Results);
847 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results,
849 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results,
856 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &
Results);
857 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results);
858 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &
Results);
859 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results);
861 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &
Results);
862 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results);
863 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &
Results);
864 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &
Results);
866 bool selectVectorConstants(
SDNode *
N);
875 unsigned VecLen =
Mask.size();
877 for (
unsigned I = 0;
I != VecLen; ++
I) {
880 MaskL[
I] = MaskR[
I] = -1;
881 }
else if (
unsigned(
M) < VecLen) {
893 assert(A.size() > 0 && A.size() >= MaxLen);
896 for (
unsigned I = 1;
I != MaxLen; ++
I) {
901 return {
F, MaxLen };
912 for (
int I = 0,
E =
Mask.size();
I !=
E; ++
I) {
914 if (
M >= 0 &&
M !=
I)
922 assert(
Mask.size() < 0x00007FFF &&
"Sanity failure");
924 for (
int Idx :
Mask) {
930 return 2*Sum ==
N*(
N-1);
933 bool HvxSelector::selectVectorConstants(
SDNode *
N) {
944 for (
unsigned i = 0;
i != WorkQ.
size(); ++
i) {
948 for (
unsigned j = 0,
f =
W->getNumOperands();
j !=
f; ++
j)
949 WorkQ.
insert(
W->getOperand(
j).getNode());
955 return !Nodes.empty();
958 void HvxSelector::materialize(
const ResultStack &
Results) {
960 dbgs() <<
"Materializing\n";
966 std::vector<SDValue> Output;
969 const NodeTemplate &Node =
Results[
I];
970 std::vector<SDValue> Ops;
971 for (
const OpRef &R : Node.Ops) {
974 Ops.push_back(
R.OpV);
979 Ops.push_back(
ISel.selectUndef(dl,
MVT(SVT)));
983 unsigned Part =
R.OpN & OpRef::Whole;
987 assert(Idx >= 0 &&
unsigned(Idx) < Output.size());
989 MVT OpTy =
Op.getValueType().getSimpleVT();
990 if (Part != OpRef::Whole) {
991 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
994 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1002 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1003 ? Ops.front().getNode()
1005 Output.push_back(
SDValue(ResN, 0));
1008 SDNode *OutN = Output.back().getNode();
1011 dbgs() <<
"Generated node:\n";
1016 selectVectorConstants(OutN);
1020 OpRef HvxSelector::concat(OpRef
Lo, OpRef
Hi, ResultStack &
Results) {
1028 return OpRef::res(
Results.top());
1032 OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1036 if (!Va.isValid() || !Vb.isValid())
1039 int VecLen = SM.Mask.size();
1042 auto IsExtSubvector = [] (ShuffleMask
M) {
1043 assert(
M.MinSrc >= 0 &&
M.MaxSrc >= 0);
1044 for (
int I = 0,
E =
M.Mask.size();
I !=
E; ++
I) {
1045 if (
M.Mask[
I] >= 0 &&
M.Mask[
I]-
I !=
M.MinSrc)
1051 if (SM.MaxSrc - SM.MinSrc <
int(
HwLen)) {
1052 if (SM.MinSrc == 0 || SM.MinSrc ==
int(
HwLen) || !IsExtSubvector(SM)) {
1058 if (SM.MaxSrc <
int(
HwLen)) {
1059 memcpy(NewMask.
data(), SM.Mask.data(),
sizeof(
int)*VecLen);
1062 if (SM.MinSrc >=
int(
HwLen)) {
1063 for (
int I = 0;
I != VecLen; ++
I) {
1072 int MinSrc = SM.MinSrc;
1073 if (SM.MaxSrc <
int(
HwLen)) {
1075 }
else if (SM.MinSrc >
int(
HwLen)) {
1077 MinSrc = SM.MinSrc -
HwLen;
1080 if (isUInt<3>(MinSrc) || isUInt<3>(
HwLen-MinSrc)) {
1081 bool IsRight = isUInt<3>(MinSrc);
1084 unsigned Opc = IsRight ? Hexagon::V6_valignbi
1085 : Hexagon::V6_vlalignbi;
1086 Results.push(Opc, Ty, {Vb, Va,
S});
1091 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)});
1093 for (
int I = 0;
I != VecLen; ++
I) {
1099 return OpRef::res(
Results.top());
1102 if (Options & PackMux) {
1108 for (
int I = 0;
I != VecLen; ++
I) {
1112 if (M >=
int(
HwLen))
1123 return vmuxs(MuxBytes, Va, Vb,
Results);
1129 OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1132 unsigned HalfMask = 0;
1134 for (
int M : SM.Mask) {
1137 HalfMask |= (1u << (
M >> LogHw));
1150 OpRef Inp[2] = { Va, Vb };
1151 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1153 uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
1155 for (
unsigned I = 0;
I != 4; ++
I) {
1156 if ((HalfMask & (1u <<
I)) == 0)
1159 OpRef
Op = Inp[
I/2];
1160 Out[Idx] = (
I & 1) ? OpRef::hi(
Op) : OpRef::lo(
Op);
1164 int VecLen = SM.Mask.size();
1165 for (
int I = 0;
I != VecLen; ++
I) {
1168 uint8_t Idx = HalfIdx[
M >> LogHw];
1169 assert(Idx == 0 || Idx == 1);
1175 return concat(Out[0], Out[1],
Results);
1184 SDValue B = getVectorConstant(Bytes, dl);
1185 Results.push(Hexagon::V6_vd0, ByteTy, {});
1186 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(
B), OpRef::res(-1)});
1187 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1188 return OpRef::res(
Results.top());
1194 size_t S = Bytes.
size() / 2;
1200 OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1202 unsigned VecLen = SM.Mask.size();
1205 assert(
all_of(SM.Mask, [
this](
int M) { return M == -1 || M < int(HwLen); }));
1212 OpRef
P = perfect(SM, Va,
Results);
1215 return butterfly(SM, Va,
Results);
1218 OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1224 OpRef
C = contracting(SM, Va, Vb,
Results);
1228 int VecLen = SM.Mask.size();
1230 OpRef
P = packs(SM, Va, Vb,
Results, NewMask);
1232 return shuffs1(ShuffleMask(NewMask),
P,
Results);
1237 OpRef L = shuffs1(ShuffleMask(MaskL), Va,
Results);
1238 OpRef
R = shuffs1(ShuffleMask(MaskR), Vb,
Results);
1239 if (!L.isValid() || !
R.isValid())
1243 for (
int I = 0;
I != VecLen; ++
I) {
1247 return vmuxs(Bytes, L, R,
Results);
1250 OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1252 int VecLen = SM.Mask.size();
1260 OpRef
P = packs(SM, OpRef::lo(Va), OpRef::hi(Va),
Results, PackedMask);
1262 ShuffleMask PM(PackedMask);
1267 OpRef L = shuffs1(PM.lo(),
P,
Results);
1269 if (L.isValid() &&
H.isValid())
1273 OpRef
R = perfect(SM, Va,
Results);
1278 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va),
Results);
1279 OpRef
H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va),
Results);
1280 if (L.isValid() &&
H.isValid())
1286 OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1292 int VecLen = SM.Mask.size();
1294 OpRef
P = packp(SM, Va, Vb,
Results, PackedMask);
1296 return shuffp1(ShuffleMask(PackedMask),
P,
Results);
1301 OpRef L = shuffp1(ShuffleMask(MaskL), Va,
Results);
1302 OpRef
R = shuffp1(ShuffleMask(MaskR), Vb,
Results);
1303 if (!L.isValid() || !
R.isValid())
1308 for (
int I = 0;
I != VecLen; ++
I) {
1312 return vmuxp(Bytes, L, R,
Results);
1317 template <
typename T>
1324 template <
typename T>
1325 struct NullifyingVector :
public T {
1327 NullifyingVector(T &&V) :
T(V) {
1335 if (
F != Refs.
end())
1336 *
F->second =
nullptr;
1341 void HvxSelector::select(
SDNode *ISelN) {
1365 std::map<SDNode*,unsigned> NumOps;
1373 auto InSubNodes = [&SubNodes](
SDNode *
T) {
return SubNodes.
count(T); };
1374 for (
unsigned I = 0;
I != SubNodes.
size(); ++
I) {
1385 NumOps.insert({
S, OpN});
1390 for (
unsigned I = 0;
I != TmpQ.
size(); ++
I) {
1395 auto F = NumOps.find(U);
1397 if (
F->second > 0 && !--
F->second)
1406 NullifyingVector<decltype(TmpQ)::vector_type>
Queue(TmpQ.takeVector());
1408 Deleter DUQ(
DAG, Queue);
1423 unsigned VecLen =
Mask.size();
1424 bool HavePairs = (2*
HwLen == VecLen);
1443 for (
int I :
Mask) {
1445 Ops.push_back(
ISel.selectUndef(dl, LegalTy));
1472 if (2*
HwLen == VecLen) {
1495 OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
1498 if (!Va.isValid() || !Vb.isValid())
1508 int VecLen = SM.Mask.size();
1509 std::pair<int,unsigned> Strip =
findStrip(SM.Mask, 1, VecLen);
1514 if (Strip.second != 1 && Strip.second != 2)
1530 int NextInMask = SM.Mask[Strip.second];
1535 if (NextInMask < VecLen) {
1537 if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
1540 for (
int I = 0;
I !=
N/4; ++
I)
1541 if (SM.Mask[
I] != 4*
I)
1543 for (
int I = 0;
I !=
N/4; ++
I)
1544 if (SM.Mask[
I+
N/4] != 2 + 4*
I)
1546 for (
int I = 0;
I !=
N/4; ++
I)
1547 if (SM.Mask[
I+
N/2] !=
N + 4*
I)
1549 for (
int I = 0;
I !=
N/4; ++
I)
1550 if (SM.Mask[
I+3*
N/4] !=
N+2 + 4*
I)
1553 Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
1554 return OpRef::res(
Results.top());
1559 int L = Strip.second;
1561 if (Strip.first != 0 && Strip.first != L)
1564 for (
int I = L;
I <
N;
I += L) {
1568 if (
S.first - Strip.first != 2*
I)
1571 if (
S.second !=
unsigned(L))
1577 assert(Strip.first == 0 || Strip.first == L);
1578 using namespace Hexagon;
1580 Res.Opc = Strip.second == 1
1581 ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
1582 : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
1584 Res.Ops = { Vb, Va };
1586 return OpRef::res(
Results.top());
1591 int L = Strip.second;
1592 std::pair<int,unsigned> PrevS = Strip;
1594 for (
int I = L;
I <
N;
I += L) {
1596 if (
S.second != PrevS.second)
1598 int Diff = Flip ? PrevS.first -
S.first + 2*L
1599 :
S.first - PrevS.first;
1607 assert(Strip.first == 0 || Strip.first == L);
1608 using namespace Hexagon;
1610 Res.Opc = Strip.second == 1
1611 ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
1612 : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
1614 Res.Ops = { Vb, Va };
1616 return OpRef::res(
Results.top());
1619 OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1632 int VecLen = SM.Mask.size();
1633 assert(2*
HwLen ==
unsigned(VecLen) &&
"Expecting vector-pair type");
1635 std::pair<int,unsigned> Strip =
findStrip(SM.Mask, 1, VecLen);
1642 if (Strip.first != 0)
1646 if (Strip.second != 1 && Strip.second != 2)
1650 int L = Strip.second;
1653 for (
int I = 2*L;
I < 2*
N;
I += 2*L) {
1655 if (
S.second !=
unsigned(L))
1661 for (
int I = L;
I < 2*
N;
I += 2*L) {
1663 if (
S.first != -1 ||
S.second !=
unsigned(L))
1667 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
1668 : Hexagon::V6_vunpackuh;
1670 return OpRef::res(
Results.top());
1673 OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1682 int VecLen = SM.Mask.size();
1684 unsigned LogLen =
Log2_32(VecLen);
1688 assert(LogLen == HwLog || LogLen == HwLog+1);
1689 bool HavePairs = LogLen == HwLog+1;
1779 bool InvertedPair =
false;
1780 if (HavePairs && SM.Mask[0] >=
int(
HwLen)) {
1781 for (
int i = 0,
e = SM.Mask.size();
i !=
e; ++
i) {
1785 InvertedPair =
true;
1792 if ((
Mask[0] &
X) != 0)
1794 for (
unsigned I = 1;
I != Num/2; ++
I) {
1806 for (
unsigned I = VecLen;
I >= 2;
I >>= 1) {
1808 unsigned X = XorPow2(LocalMask,
I);
1812 for (
int J =
I; J < VecLen; J +=
I) {
1813 if (XorPow2(LocalMask.slice(J,
I),
I) !=
X)
1845 std::set<CycleType> Cycles;
1846 std::set<unsigned>
All;
1848 for (
unsigned I : Perm)
1853 auto canonicalize = [LogLen](
const CycleType &
C) -> CycleType {
1854 unsigned LogPos,
N =
C.size();
1855 for (LogPos = 0; LogPos !=
N; ++LogPos)
1856 if (
C[LogPos] == LogLen-1)
1861 CycleType NewC(
C.begin()+LogPos,
C.end());
1862 NewC.append(
C.begin(),
C.begin()+LogPos);
1866 auto pfs = [](
const std::set<CycleType> &Cs,
unsigned Len) {
1871 const CycleType &
C = *Cs.begin();
1874 int D = Len -
C.size();
1875 if (
D != 0 &&
D != 1)
1878 bool IsDeal =
true, IsShuff =
true;
1879 for (
unsigned I = 1;
I != Len-
D; ++
I) {
1880 if (
C[
I] != Len-1-
I)
1882 if (
C[
I] !=
I-(1-
D))
1886 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
1887 static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh };
1888 static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh };
1889 return IsDeal ? Deals[
D] : (IsShuff ? Shufs[
D] : 0);
1892 while (!
All.empty()) {
1893 unsigned A = *
All.begin();
1897 for (
unsigned B = Perm[A];
B !=
A;
B = Perm[
B]) {
1903 Cycles.insert(canonicalize(
C));
1910 if (
unsigned(VecLen) ==
HwLen) {
1911 if (
unsigned SingleOpc = pfs(Cycles, LogLen)) {
1912 Results.push(SingleOpc, SingleTy, {Va});
1913 return OpRef::res(
Results.top());
1935 SwapElems.push_back(LogLen-1);
1936 for (
const CycleType &
C : Cycles) {
1938 unsigned First = (
C[0] == LogLen-1) ? 1 : 0;
1941 SwapElems.push_back(
C[0]);
1944 SwapElems.push_back(LogLen-1);
1947 OpRef
Arg = HavePairs ? Va
1948 : concat(Va, OpRef::undef(SingleTy),
Results);
1952 for (
unsigned I = 0,
E = SwapElems.size();
I !=
E; ) {
1953 bool IsInc =
I ==
E-1 || SwapElems[
I] < SwapElems[
I+1];
1954 unsigned S = (1u << SwapElems[
I]);
1956 while (++
I <
E-1 && IsInc == (SwapElems[
I] < SwapElems[
I+1]))
1957 S |= 1u << SwapElems[
I];
1960 S |= 1u << SwapElems[
I];
1967 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
1969 Res.Ops = { OpRef::hi(
Arg), OpRef::lo(
Arg), OpRef::res(-1) };
1974 return HavePairs ?
Arg : OpRef::lo(
Arg);
1977 OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1990 PermNetwork::Controls
FC, RC;
1992 int VecLen = SM.Mask.size();
1994 for (
int M : SM.Mask) {
1995 if (M != -1 && M >= VecLen)
2000 ForwardDeltaNetwork FN(SM.Mask);
2002 SDValue Ctl = getVectorConstant(
FC, dl);
2003 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2004 return OpRef::res(
Results.top());
2008 ReverseDeltaNetwork
RN(SM.Mask);
2010 SDValue Ctl = getVectorConstant(RC, dl);
2011 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2012 return OpRef::res(
Results.top());
2016 BenesNetwork BN(SM.Mask);
2017 if (BN.run(
FC, RC)) {
2018 SDValue CtlF = getVectorConstant(
FC, dl);
2019 SDValue CtlR = getVectorConstant(RC, dl);
2020 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2021 Results.push(Hexagon::V6_vrdelta, ResTy,
2022 {OpRef::res(-1), OpRef(CtlR)});
2023 return OpRef::res(
Results.top());
2032 for (uint8_t
C :
Data)
2043 dbgs() <<
"Starting " << __func__ <<
" on node:\n";
2046 MVT ResTy =
N->getValueType(0).getSimpleVT();
2048 assert(ResTy.isVector() && ResTy.getVectorElementType() ==
MVT::i8);
2050 auto *SN = cast<ShuffleVectorSDNode>(
N);
2051 std::vector<int>
Mask(SN->getMask().begin(), SN->getMask().end());
2053 for (
int &Idx :
Mask)
2054 if (Idx != -1 && Idx < 0)
2057 unsigned VecLen =
Mask.size();
2058 bool HavePairs = (2*
HwLen == VecLen);
2059 assert(ResTy.getSizeInBits() / 8 == VecLen);
2064 bool UseLeft =
false, UseRight =
false;
2065 for (
unsigned I = 0;
I != VecLen; ++
I) {
2068 unsigned Idx =
Mask[
I];
2077 dbgs() <<
"VecLen=" << VecLen <<
" HwLen=" <<
HwLen <<
" UseLeft="
2078 << UseLeft <<
" UseRight=" << UseRight <<
" HavePairs="
2079 << HavePairs <<
'\n';
2082 if (!UseLeft && !UseRight) {
2090 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2091 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2092 OpRef Va = OpRef::res(
Results.top()-1);
2093 OpRef Vb = OpRef::res(
Results.top());
2095 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(
Mask), Va, Vb,
Results)
2098 bool Done = Res.isValid();
2101 Results.push(TargetOpcode::COPY, ResTy, {Res});
2104 Done = scalarizeShuffle(
Mask,
SDLoc(
N), ResTy, Vec0, Vec1,
N);
2109 dbgs() <<
"Unhandled shuffle:\n";
2118 MVT Ty =
N->getValueType(0).getSimpleVT();
2124 if (
auto *CN = dyn_cast<ConstantSDNode>(RotV.
getNode())) {
2128 }
else if (isUInt<3>(
S)) {
2146 N->getValueType(0), {Vv, Vu, Rt});
2151 void HexagonDAGToDAGISel::SelectHvxShuffle(
SDNode *
N) {
2155 void HexagonDAGToDAGISel::SelectHvxRor(
SDNode *
N) {
2159 void HexagonDAGToDAGISel::SelectHvxVAlign(
SDNode *
N) {
2169 SDValue Modifier =
N->getOperand(5);
2173 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2177 case Intrinsic::hexagon_V6_vgathermhq:
2178 case Intrinsic::hexagon_V6_vgathermhq_128B:
2179 Opcode = Hexagon::V6_vgathermhq_pseudo;
2181 case Intrinsic::hexagon_V6_vgathermwq:
2182 case Intrinsic::hexagon_V6_vgathermwq_128B:
2183 Opcode = Hexagon::V6_vgathermwq_pseudo;
2185 case Intrinsic::hexagon_V6_vgathermhwq:
2186 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2187 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2193 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2196 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {
MemOp});
2198 ReplaceNode(
N, Result);
2206 SDValue Modifier =
N->getOperand(4);
2210 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2214 case Intrinsic::hexagon_V6_vgathermh:
2215 case Intrinsic::hexagon_V6_vgathermh_128B:
2216 Opcode = Hexagon::V6_vgathermh_pseudo;
2218 case Intrinsic::hexagon_V6_vgathermw:
2219 case Intrinsic::hexagon_V6_vgathermw_128B:
2220 Opcode = Hexagon::V6_vgathermw_pseudo;
2222 case Intrinsic::hexagon_V6_vgathermhw:
2223 case Intrinsic::hexagon_V6_vgathermhw_128B:
2224 Opcode = Hexagon::V6_vgathermhw_pseudo;
2230 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2233 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {
MemOp});
2235 ReplaceNode(
N, Result);
2239 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
2242 case Intrinsic::hexagon_V6_vaddcarry: {
2243 std::array<SDValue, 3> Ops = {
2244 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2246 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry,
SDLoc(
N), VTs, Ops);
2249 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2250 std::array<SDValue, 3> Ops = {
2251 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2253 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry,
SDLoc(
N), VTs, Ops);
2256 case Intrinsic::hexagon_V6_vsubcarry: {
2257 std::array<SDValue, 3> Ops = {
2258 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2260 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry,
SDLoc(
N), VTs, Ops);
2263 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2264 std::array<SDValue, 3> Ops = {
2265 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2267 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry,
SDLoc(
N), VTs, Ops);
2273 ReplaceUses(
N, Result);
2276 CurDAG->RemoveDeadNode(
N);