LLVM  15.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
13 #include "llvm/IR/IntrinsicsHexagon.h"
15 
16 using namespace llvm;
17 
18 static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
19  cl::Hidden, cl::init(16),
20  cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
21 
26 
27 void
28 HexagonTargetLowering::initializeHVXLowering() {
29  if (Subtarget.useHVX64BOps()) {
30  addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
31  addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
32  addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
33  addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
34  addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
35  addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
36  // These "short" boolean vector types should be legal because
37  // they will appear as results of vector compares. If they were
38  // not legal, type legalization would try to make them legal
39  // and that would require using operations that do not use or
40  // produce such types. That, in turn, would imply using custom
41  // nodes, which would be unoptimizable by the DAG combiner.
42  // The idea is to rely on target-independent operations as much
43  // as possible.
44  addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
45  addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
46  addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
47  } else if (Subtarget.useHVX128BOps()) {
48  addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
49  addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
50  addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
51  addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
52  addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
53  addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
54  addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
55  addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
56  addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
57  if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
58  addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
59  addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
60  addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
61  addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
62  }
63  }
64 
65  // Set up operation actions.
66 
67  bool Use64b = Subtarget.useHVX64BOps();
68  ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
69  ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
70  MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
71  MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
72 
73  auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
74  setOperationAction(Opc, FromTy, Promote);
75  AddPromotedToType(Opc, FromTy, ToTy);
76  };
77 
78  // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
79  // Note: v16i1 -> i16 is handled in type legalization instead of op
80  // legalization.
90 
91  if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
92  Subtarget.useHVXFloatingPoint()) {
93 
94  static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
95  static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
96 
97  for (MVT T : FloatV) {
103 
106 
109 
112  // Custom-lower BUILD_VECTOR. The standard (target-independent)
113  // handling of it would convert it to a load, which is not always
114  // the optimal choice.
116  }
117 
118 
119  // BUILD_VECTOR with f16 operands cannot be promoted without
120  // promoting the result, so lower the node to vsplat or constant pool
124 
125  // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
126  // generated.
127  setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
128  setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
129  setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
130  setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
131 
132  for (MVT P : FloatW) {
141 
142  // Custom-lower BUILD_VECTOR. The standard (target-independent)
143  // handling of it would convert it to a load, which is not always
144  // the optimal choice.
146  // Make concat-vectors custom to handle concats of more than 2 vectors.
148 
151  }
152 
153  if (Subtarget.useHVXQFloatOps()) {
156  } else if (Subtarget.useHVXIEEEFPOps()) {
159  }
160  }
161 
162  for (MVT T : LegalV) {
165 
176  if (T != ByteV) {
180  }
181 
184  if (T.getScalarType() != MVT::i32) {
187  }
188 
196  // Make concat-vectors custom to handle concats of more than 2 vectors.
205  if (T != ByteV) {
207  // HVX only has shifts of words and halfwords.
211 
212  // Promote all shuffles to operate on vectors of bytes.
213  setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
214  }
215 
216  if (Subtarget.useHVXQFloatOps()) {
221  } else if (Subtarget.useHVXIEEEFPOps()) {
226  }
227 
235  }
236 
237  for (MVT T : LegalW) {
238  // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
239  // independent) handling of it would convert it to a load, which is
240  // not always the optimal choice.
242  // Make concat-vectors custom to handle concats of more than 2 vectors.
244 
245  // Custom-lower these operations for pairs. Expand them into a concat
246  // of the corresponding operations on individual vectors.
255 
263 
274  if (T != ByteW) {
278 
279  // Promote all shuffles to operate on vectors of bytes.
280  setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
281  }
282 
285  if (T.getScalarType() != MVT::i32) {
288  }
289 
294  }
295 
308 
321 
322  // Boolean vectors.
323 
324  for (MVT T : LegalW) {
325  // Boolean types for vector pairs will overlap with the boolean
326  // types for single vectors, e.g.
327  // v64i8 -> v64i1 (single)
328  // v64i16 -> v64i1 (pair)
329  // Set these actions first, and allow the single actions to overwrite
330  // any duplicates.
331  MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
336  // Masked load/store takes a mask that may need splitting.
339  }
340 
341  for (MVT T : LegalV) {
342  MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
353  }
354 
355  if (Use64b) {
358  } else {
361  }
362 
363  // Handle store widening for short vectors.
364  unsigned HwLen = Subtarget.getVectorLength();
365  for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
366  if (ElemTy == MVT::i1)
367  continue;
368  int ElemWidth = ElemTy.getFixedSizeInBits();
369  int MaxElems = (8*HwLen) / ElemWidth;
370  for (int N = 2; N < MaxElems; N *= 2) {
371  MVT VecTy = MVT::getVectorVT(ElemTy, N);
372  auto Action = getPreferredVectorAction(VecTy);
373  if (Action == TargetLoweringBase::TypeWidenVector) {
381 
382  MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
383  if (!isTypeLegal(BoolTy))
385  }
386  }
387  }
388 
390 }
391 
392 unsigned
393 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
394  MVT ElemTy = VecTy.getVectorElementType();
395  unsigned VecLen = VecTy.getVectorNumElements();
396  unsigned HwLen = Subtarget.getVectorLength();
397 
398  // Split vectors of i1 that exceed byte vector length.
399  if (ElemTy == MVT::i1 && VecLen > HwLen)
401 
402  ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
403  // For shorter vectors of i1, widen them if any of the corresponding
404  // vectors of integers needs to be widened.
405  if (ElemTy == MVT::i1) {
406  for (MVT T : Tys) {
407  assert(T != MVT::i1);
408  auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
409  if (A != ~0u)
410  return A;
411  }
412  return ~0u;
413  }
414 
415  // If the size of VecTy is at least half of the vector length,
416  // widen the vector. Note: the threshold was not selected in
417  // any scientific way.
418  if (llvm::is_contained(Tys, ElemTy)) {
419  unsigned VecWidth = VecTy.getSizeInBits();
420  bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
421  if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
423  unsigned HwWidth = 8*HwLen;
424  if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
426  }
427 
428  // Defer to default.
429  return ~0u;
430 }
431 
432 SDValue
433 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
434  const SDLoc &dl, SelectionDAG &DAG) const {
435  SmallVector<SDValue,4> IntOps;
436  IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
437  append_range(IntOps, Ops);
438  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
439 }
440 
441 MVT
442 HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
443  assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
444 
445  MVT ElemTy = Tys.first.getVectorElementType();
446  return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
447  Tys.second.getVectorNumElements());
448 }
449 
450 HexagonTargetLowering::TypePair
451 HexagonTargetLowering::typeSplit(MVT VecTy) const {
452  assert(VecTy.isVector());
453  unsigned NumElem = VecTy.getVectorNumElements();
454  assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
455  MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
456  return { HalfTy, HalfTy };
457 }
458 
459 MVT
460 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
461  MVT ElemTy = VecTy.getVectorElementType();
462  MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
463  return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
464 }
465 
466 MVT
467 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
468  MVT ElemTy = VecTy.getVectorElementType();
469  MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
470  return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
471 }
472 
473 SDValue
474 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
475  SelectionDAG &DAG) const {
476  if (ty(Vec).getVectorElementType() == ElemTy)
477  return Vec;
478  MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
479  return DAG.getBitcast(CastTy, Vec);
480 }
481 
482 SDValue
483 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
484  SelectionDAG &DAG) const {
485  return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
486  Ops.second, Ops.first);
487 }
488 
489 HexagonTargetLowering::VectorPair
490 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
491  SelectionDAG &DAG) const {
492  TypePair Tys = typeSplit(ty(Vec));
493  if (Vec.getOpcode() == HexagonISD::QCAT)
494  return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
495  return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
496 }
497 
498 bool
499 HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
500  return Subtarget.isHVXVectorType(Ty) &&
501  Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
502 }
503 
504 bool
505 HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
506  return Subtarget.isHVXVectorType(Ty) &&
507  Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
508 }
509 
510 bool
511 HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
512  return Subtarget.isHVXVectorType(Ty, true) &&
514 }
515 
516 bool HexagonTargetLowering::allowsHvxMemoryAccess(
517  MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
518  // Bool vectors are excluded by default, but make it explicit to
519  // emphasize that bool vectors cannot be loaded or stored.
520  // Also, disallow double vector stores (to prevent unnecessary
521  // store widening in DAG combiner).
522  if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
523  return false;
524  if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
525  return false;
526  if (Fast)
527  *Fast = true;
528  return true;
529 }
530 
531 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
532  MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
533  if (!Subtarget.isHVXVectorType(VecTy))
534  return false;
535  // XXX Should this be false? vmemu are a bit slower than vmem.
536  if (Fast)
537  *Fast = true;
538  return true;
539 }
540 
541 SDValue
542 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
543  SelectionDAG &DAG) const {
544  if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
545  ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
546 
547  unsigned ElemWidth = ElemTy.getSizeInBits();
548  if (ElemWidth == 8)
549  return ElemIdx;
550 
551  unsigned L = Log2_32(ElemWidth/8);
552  const SDLoc &dl(ElemIdx);
553  return DAG.getNode(ISD::SHL, dl, MVT::i32,
554  {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
555 }
556 
557 SDValue
558 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
559  SelectionDAG &DAG) const {
560  unsigned ElemWidth = ElemTy.getSizeInBits();
561  assert(ElemWidth >= 8 && ElemWidth <= 32);
562  if (ElemWidth == 32)
563  return Idx;
564 
565  if (ty(Idx) != MVT::i32)
566  Idx = DAG.getBitcast(MVT::i32, Idx);
567  const SDLoc &dl(Idx);
568  SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
569  SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
570  return SubIdx;
571 }
572 
573 SDValue
574 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
576  SelectionDAG &DAG) const {
577  MVT OpTy = ty(Op0);
578  assert(OpTy == ty(Op1));
579 
580  MVT ElemTy = OpTy.getVectorElementType();
581  if (ElemTy == MVT::i8)
582  return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
583  assert(ElemTy.getSizeInBits() >= 8);
584 
585  MVT ResTy = tyVector(OpTy, MVT::i8);
586  unsigned ElemSize = ElemTy.getSizeInBits() / 8;
587 
588  SmallVector<int,128> ByteMask;
589  for (int M : Mask) {
590  if (M < 0) {
591  for (unsigned I = 0; I != ElemSize; ++I)
592  ByteMask.push_back(-1);
593  } else {
594  int NewM = M*ElemSize;
595  for (unsigned I = 0; I != ElemSize; ++I)
596  ByteMask.push_back(NewM+I);
597  }
598  }
599  assert(ResTy.getVectorNumElements() == ByteMask.size());
600  return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
601  opCastElem(Op1, MVT::i8, DAG), ByteMask);
602 }
603 
604 SDValue
605 HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
606  const SDLoc &dl, MVT VecTy,
607  SelectionDAG &DAG) const {
608  unsigned VecLen = Values.size();
610  MVT ElemTy = VecTy.getVectorElementType();
611  unsigned ElemWidth = ElemTy.getSizeInBits();
612  unsigned HwLen = Subtarget.getVectorLength();
613 
614  unsigned ElemSize = ElemWidth / 8;
615  assert(ElemSize*VecLen == HwLen);
617 
618  if (VecTy.getVectorElementType() != MVT::i32 &&
619  !(Subtarget.useHVXFloatingPoint() &&
620  VecTy.getVectorElementType() == MVT::f32)) {
621  assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
622  unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
623  MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
624  for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
625  SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
626  Words.push_back(DAG.getBitcast(MVT::i32, W));
627  }
628  } else {
629  for (SDValue V : Values)
630  Words.push_back(DAG.getBitcast(MVT::i32, V));
631  }
632  auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
633  unsigned NumValues = Values.size();
634  assert(NumValues > 0);
635  bool IsUndef = true;
636  for (unsigned i = 0; i != NumValues; ++i) {
637  if (Values[i].isUndef())
638  continue;
639  IsUndef = false;
640  if (!SplatV.getNode())
641  SplatV = Values[i];
642  else if (SplatV != Values[i])
643  return false;
644  }
645  if (IsUndef)
646  SplatV = Values[0];
647  return true;
648  };
649 
650  unsigned NumWords = Words.size();
651  SDValue SplatV;
652  bool IsSplat = isSplat(Words, SplatV);
653  if (IsSplat && isUndef(SplatV))
654  return DAG.getUNDEF(VecTy);
655  if (IsSplat) {
656  assert(SplatV.getNode());
657  auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
658  if (IdxN && IdxN->isZero())
659  return getZero(dl, VecTy, DAG);
660  MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
661  SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
662  return DAG.getBitcast(VecTy, S);
663  }
664 
665  // Delay recognizing constant vectors until here, so that we can generate
666  // a vsplat.
667  SmallVector<ConstantInt*, 128> Consts(VecLen);
668  bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
669  if (AllConst) {
670  ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
671  (Constant**)Consts.end());
672  Constant *CV = ConstantVector::get(Tmp);
673  Align Alignment(HwLen);
674  SDValue CP =
675  LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
676  return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
677  MachinePointerInfo::getConstantPool(MF), Alignment);
678  }
679 
680  // A special case is a situation where the vector is built entirely from
681  // elements extracted from another vector. This could be done via a shuffle
682  // more efficiently, but typically, the size of the source vector will not
683  // match the size of the vector being built (which precludes the use of a
684  // shuffle directly).
685  // This only handles a single source vector, and the vector being built
686  // should be of a sub-vector type of the source vector type.
687  auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
688  SmallVectorImpl<int> &SrcIdx) {
689  SDValue Vec;
690  for (SDValue V : Values) {
691  if (isUndef(V)) {
692  SrcIdx.push_back(-1);
693  continue;
694  }
695  if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
696  return false;
697  // All extracts should come from the same vector.
698  SDValue T = V.getOperand(0);
699  if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
700  return false;
701  Vec = T;
702  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
703  if (C == nullptr)
704  return false;
705  int I = C->getSExtValue();
706  assert(I >= 0 && "Negative element index");
707  SrcIdx.push_back(I);
708  }
709  SrcVec = Vec;
710  return true;
711  };
712 
713  SmallVector<int,128> ExtIdx;
714  SDValue ExtVec;
715  if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
716  MVT ExtTy = ty(ExtVec);
717  unsigned ExtLen = ExtTy.getVectorNumElements();
718  if (ExtLen == VecLen || ExtLen == 2*VecLen) {
719  // Construct a new shuffle mask that will produce a vector with the same
720  // number of elements as the input vector, and such that the vector we
721  // want will be the initial subvector of it.
723  BitVector Used(ExtLen);
724 
725  for (int M : ExtIdx) {
726  Mask.push_back(M);
727  if (M >= 0)
728  Used.set(M);
729  }
730  // Fill the rest of the mask with the unused elements of ExtVec in hopes
731  // that it will result in a permutation of ExtVec's elements. It's still
732  // fine if it doesn't (e.g. if undefs are present, or elements are
733  // repeated), but permutations can always be done efficiently via vdelta
734  // and vrdelta.
735  for (unsigned I = 0; I != ExtLen; ++I) {
736  if (Mask.size() == ExtLen)
737  break;
738  if (!Used.test(I))
739  Mask.push_back(I);
740  }
741 
742  SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
743  DAG.getUNDEF(ExtTy), Mask);
744  if (ExtLen == VecLen)
745  return S;
746  return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S);
747  }
748  }
749 
750  // Find most common element to initialize vector with. This is to avoid
751  // unnecessary vinsert/valign for cases where the same value is present
752  // many times. Creates a histogram of the vector's elements to find the
753  // most common element n.
754  assert(4*Words.size() == Subtarget.getVectorLength());
755  int VecHist[32];
756  int n = 0;
757  for (unsigned i = 0; i != NumWords; ++i) {
758  VecHist[i] = 0;
759  if (Words[i].isUndef())
760  continue;
761  for (unsigned j = i; j != NumWords; ++j)
762  if (Words[i] == Words[j])
763  VecHist[i]++;
764 
765  if (VecHist[i] > VecHist[n])
766  n = i;
767  }
768 
769  SDValue HalfV = getZero(dl, VecTy, DAG);
770  if (VecHist[n] > 1) {
771  SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
772  HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
773  {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
774  }
775  SDValue HalfV0 = HalfV;
776  SDValue HalfV1 = HalfV;
777 
778  // Construct two halves in parallel, then or them together. Rn and Rm count
779  // number of rotations needed before the next element. One last rotation is
780  // performed post-loop to position the last element.
781  int Rn = 0, Rm = 0;
782  SDValue Sn, Sm;
783  SDValue N = HalfV0;
784  SDValue M = HalfV1;
785  for (unsigned i = 0; i != NumWords/2; ++i) {
786  // Rotate by element count since last insertion.
787  if (Words[i] != Words[n] || VecHist[n] <= 1) {
788  Sn = DAG.getConstant(Rn, dl, MVT::i32);
789  HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
790  N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
791  {HalfV0, Words[i]});
792  Rn = 0;
793  }
794  if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
795  Sm = DAG.getConstant(Rm, dl, MVT::i32);
796  HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
797  M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
798  {HalfV1, Words[i+NumWords/2]});
799  Rm = 0;
800  }
801  Rn += 4;
802  Rm += 4;
803  }
804  // Perform last rotation.
805  Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
806  Sm = DAG.getConstant(Rm, dl, MVT::i32);
807  HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
808  HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
809 
810  SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
811  SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
812 
813  SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
814 
815  SDValue OutV =
816  DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
817  return OutV;
818 }
819 
820 SDValue
821 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
822  unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
823  MVT PredTy = ty(PredV);
824  unsigned HwLen = Subtarget.getVectorLength();
825  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
826 
827  if (Subtarget.isHVXVectorType(PredTy, true)) {
828  // Move the vector predicate SubV to a vector register, and scale it
829  // down to match the representation (bytes per type element) that VecV
830  // uses. The scaling down will pick every 2nd or 4th (every Scale-th
831  // in general) element and put them at the front of the resulting
832  // vector. This subvector will then be inserted into the Q2V of VecV.
833  // To avoid having an operation that generates an illegal type (short
834  // vector), generate a full size vector.
835  //
836  SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
837  SmallVector<int,128> Mask(HwLen);
838  // Scale = BitBytes(PredV) / Given BitBytes.
839  unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
840  unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
841 
842  for (unsigned i = 0; i != HwLen; ++i) {
843  unsigned Num = i % Scale;
844  unsigned Off = i / Scale;
845  Mask[BlockLen*Num + Off] = i;
846  }
847  SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
848  if (!ZeroFill)
849  return S;
850  // Fill the bytes beyond BlockLen with 0s.
851  // V6_pred_scalar2 cannot fill the entire predicate, so it only works
852  // when BlockLen < HwLen.
853  assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
854  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
855  SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
856  {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
857  SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
858  return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
859  }
860 
861  // Make sure that this is a valid scalar predicate.
862  assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
863 
864  unsigned Bytes = 8 / PredTy.getVectorNumElements();
865  SmallVector<SDValue,4> Words[2];
866  unsigned IdxW = 0;
867 
868  auto Lo32 = [&DAG, &dl] (SDValue P) {
869  return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P);
870  };
871  auto Hi32 = [&DAG, &dl] (SDValue P) {
872  return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P);
873  };
874 
875  SDValue W0 = isUndef(PredV)
876  ? DAG.getUNDEF(MVT::i64)
877  : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
878  Words[IdxW].push_back(Hi32(W0));
879  Words[IdxW].push_back(Lo32(W0));
880 
881  while (Bytes < BitBytes) {
882  IdxW ^= 1;
883  Words[IdxW].clear();
884 
885  if (Bytes < 4) {
886  for (const SDValue &W : Words[IdxW ^ 1]) {
887  SDValue T = expandPredicate(W, dl, DAG);
888  Words[IdxW].push_back(Hi32(T));
889  Words[IdxW].push_back(Lo32(T));
890  }
891  } else {
892  for (const SDValue &W : Words[IdxW ^ 1]) {
893  Words[IdxW].push_back(W);
894  Words[IdxW].push_back(W);
895  }
896  }
897  Bytes *= 2;
898  }
899 
900  assert(Bytes == BitBytes);
901 
902  SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
903  SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
904  for (const SDValue &W : Words[IdxW]) {
905  Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
906  Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
907  }
908 
909  return Vec;
910 }
911 
912 SDValue
913 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
914  const SDLoc &dl, MVT VecTy,
915  SelectionDAG &DAG) const {
916  // Construct a vector V of bytes, such that a comparison V >u 0 would
917  // produce the required vector predicate.
918  unsigned VecLen = Values.size();
919  unsigned HwLen = Subtarget.getVectorLength();
920  assert(VecLen <= HwLen || VecLen == 8*HwLen);
922  bool AllT = true, AllF = true;
923 
924  auto IsTrue = [] (SDValue V) {
925  if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
926  return !N->isZero();
927  return false;
928  };
929  auto IsFalse = [] (SDValue V) {
930  if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
931  return N->isZero();
932  return false;
933  };
934 
935  if (VecLen <= HwLen) {
936  // In the hardware, each bit of a vector predicate corresponds to a byte
937  // of a vector register. Calculate how many bytes does a bit of VecTy
938  // correspond to.
939  assert(HwLen % VecLen == 0);
940  unsigned BitBytes = HwLen / VecLen;
941  for (SDValue V : Values) {
942  AllT &= IsTrue(V);
943  AllF &= IsFalse(V);
944 
945  SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
946  : DAG.getUNDEF(MVT::i8);
947  for (unsigned B = 0; B != BitBytes; ++B)
948  Bytes.push_back(Ext);
949  }
950  } else {
951  // There are as many i1 values, as there are bits in a vector register.
952  // Divide the values into groups of 8 and check that each group consists
953  // of the same value (ignoring undefs).
954  for (unsigned I = 0; I != VecLen; I += 8) {
955  unsigned B = 0;
956  // Find the first non-undef value in this group.
957  for (; B != 8; ++B) {
958  if (!Values[I+B].isUndef())
959  break;
960  }
961  SDValue F = Values[I+B];
962  AllT &= IsTrue(F);
963  AllF &= IsFalse(F);
964 
965  SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
966  : DAG.getUNDEF(MVT::i8);
967  Bytes.push_back(Ext);
968  // Verify that the rest of values in the group are the same as the
969  // first.
970  for (; B != 8; ++B)
971  assert(Values[I+B].isUndef() || Values[I+B] == F);
972  }
973  }
974 
975  if (AllT)
976  return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
977  if (AllF)
978  return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
979 
980  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
981  SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
982  return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
983 }
984 
985 SDValue
986 HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
987  const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
988  MVT ElemTy = ty(VecV).getVectorElementType();
989 
990  unsigned ElemWidth = ElemTy.getSizeInBits();
991  assert(ElemWidth >= 8 && ElemWidth <= 32);
992  (void)ElemWidth;
993 
994  SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
995  SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
996  {VecV, ByteIdx});
997  if (ElemTy == MVT::i32)
998  return ExWord;
999 
1000  // Have an extracted word, need to extract the smaller element out of it.
1001  // 1. Extract the bits of (the original) IdxV that correspond to the index
1002  // of the desired element in the 32-bit word.
1003  SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1004  // 2. Extract the element from the word.
1005  SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1006  return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1007 }
1008 
1009 SDValue
1010 HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1011  const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1012  // Implement other return types if necessary.
1013  assert(ResTy == MVT::i1);
1014 
1015  unsigned HwLen = Subtarget.getVectorLength();
1016  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1017  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1018 
1019  unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1020  SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1021  IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1022 
1023  SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1024  SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1025  return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1026 }
1027 
1028 SDValue
1029 HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1030  SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1031  MVT ElemTy = ty(VecV).getVectorElementType();
1032 
1033  unsigned ElemWidth = ElemTy.getSizeInBits();
1034  assert(ElemWidth >= 8 && ElemWidth <= 32);
1035  (void)ElemWidth;
1036 
1037  auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1038  SDValue ByteIdxV) {
1039  MVT VecTy = ty(VecV);
1040  unsigned HwLen = Subtarget.getVectorLength();
1041  SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
1042  {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)});
1043  SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1044  SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1045  SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1046  {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1047  SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1048  return TorV;
1049  };
1050 
1051  SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1052  if (ElemTy == MVT::i32)
1053  return InsertWord(VecV, ValV, ByteIdx);
1054 
1055  // If this is not inserting a 32-bit word, convert it into such a thing.
1056  // 1. Extract the existing word from the target vector.
1057  SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1058  {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1059  SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1060  dl, MVT::i32, DAG);
1061 
1062  // 2. Treating the extracted word as a 32-bit vector, insert the given
1063  // value into it.
1064  SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1065  MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1066  SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1067  ValV, SubIdx, dl, ElemTy, DAG);
1068 
1069  // 3. Insert the 32-bit word back into the original vector.
1070  return InsertWord(VecV, Ins, ByteIdx);
1071 }
1072 
1073 SDValue
1074 HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1075  SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1076  unsigned HwLen = Subtarget.getVectorLength();
1077  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1078  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1079 
1080  unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1081  SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1082  IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1083  ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1084 
1085  SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1086  return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1087 }
1088 
1089 SDValue
1090 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV,
1091  const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1092  MVT VecTy = ty(VecV);
1093  unsigned HwLen = Subtarget.getVectorLength();
1094  unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1095  MVT ElemTy = VecTy.getVectorElementType();
1096  unsigned ElemWidth = ElemTy.getSizeInBits();
1097 
1098  // If the source vector is a vector pair, get the single vector containing
1099  // the subvector of interest. The subvector will never overlap two single
1100  // vectors.
1101  if (isHvxPairTy(VecTy)) {
1102  unsigned SubIdx;
1103  if (Idx * ElemWidth >= 8*HwLen) {
1104  SubIdx = Hexagon::vsub_hi;
1105  Idx -= VecTy.getVectorNumElements() / 2;
1106  } else {
1107  SubIdx = Hexagon::vsub_lo;
1108  }
1109  VecTy = typeSplit(VecTy).first;
1110  VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1111  if (VecTy == ResTy)
1112  return VecV;
1113  }
1114 
1115  // The only meaningful subvectors of a single HVX vector are those that
1116  // fit in a scalar register.
1117  assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1118 
1119  MVT WordTy = tyVector(VecTy, MVT::i32);
1120  SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1121  unsigned WordIdx = (Idx*ElemWidth) / 32;
1122 
1123  SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1124  SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1125  if (ResTy.getSizeInBits() == 32)
1126  return DAG.getBitcast(ResTy, W0);
1127 
1128  SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1129  SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1130  SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0});
1131  return DAG.getBitcast(ResTy, WW);
1132 }
1133 
1134 SDValue
1135 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1136  const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1137  MVT VecTy = ty(VecV);
1138  unsigned HwLen = Subtarget.getVectorLength();
1139  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1140  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1141  // IdxV is required to be a constant.
1142  unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1143 
1144  unsigned ResLen = ResTy.getVectorNumElements();
1145  unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1146  unsigned Offset = Idx * BitBytes;
1147  SDValue Undef = DAG.getUNDEF(ByteTy);
1149 
1150  if (Subtarget.isHVXVectorType(ResTy, true)) {
1151  // Converting between two vector predicates. Since the result is shorter
1152  // than the source, it will correspond to a vector predicate with the
1153  // relevant bits replicated. The replication count is the ratio of the
1154  // source and target vector lengths.
1155  unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1156  assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1157  for (unsigned i = 0; i != HwLen/Rep; ++i) {
1158  for (unsigned j = 0; j != Rep; ++j)
1159  Mask.push_back(i + Offset);
1160  }
1161  SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1162  return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1163  }
1164 
1165  // Converting between a vector predicate and a scalar predicate. In the
1166  // vector predicate, a group of BitBytes bits will correspond to a single
1167  // i1 element of the source vector type. Those bits will all have the same
1168  // value. The same will be true for ByteVec, where each byte corresponds
1169  // to a bit in the vector predicate.
1170  // The algorithm is to traverse the ByteVec, going over the i1 values from
1171  // the source vector, and generate the corresponding representation in an
1172  // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1173  // elements so that the interesting 8 bytes will be in the low end of the
1174  // vector.
1175  unsigned Rep = 8 / ResLen;
1176  // Make sure the output fill the entire vector register, so repeat the
1177  // 8-byte groups as many times as necessary.
1178  for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1179  // This will generate the indexes of the 8 interesting bytes.
1180  for (unsigned i = 0; i != ResLen; ++i) {
1181  for (unsigned j = 0; j != Rep; ++j)
1182  Mask.push_back(Offset + i*BitBytes);
1183  }
1184  }
1185 
1186  SDValue Zero = getZero(dl, MVT::i32, DAG);
1187  SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1188  // Combine the two low words from ShuffV into a v8i8, and byte-compare
1189  // them against 0.
1190  SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1192  {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1193  SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0});
1194  return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1195  {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1196 }
1197 
1198 SDValue
1199 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1200  SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1201  MVT VecTy = ty(VecV);
1202  MVT SubTy = ty(SubV);
1203  unsigned HwLen = Subtarget.getVectorLength();
1204  MVT ElemTy = VecTy.getVectorElementType();
1205  unsigned ElemWidth = ElemTy.getSizeInBits();
1206 
1207  bool IsPair = isHvxPairTy(VecTy);
1208  MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1209  // The two single vectors that VecV consists of, if it's a pair.
1210  SDValue V0, V1;
1211  SDValue SingleV = VecV;
1212  SDValue PickHi;
1213 
1214  if (IsPair) {
1215  V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV);
1216  V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV);
1217 
1218  SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1219  dl, MVT::i32);
1220  PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1221  if (isHvxSingleTy(SubTy)) {
1222  if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1223  unsigned Idx = CN->getZExtValue();
1224  assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1225  unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1226  return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1227  }
1228  // If IdxV is not a constant, generate the two variants: with the
1229  // SubV as the high and as the low subregister, and select the right
1230  // pair based on the IdxV.
1231  SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1232  SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1233  return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1234  }
1235  // The subvector being inserted must be entirely contained in one of
1236  // the vectors V0 or V1. Set SingleV to the correct one, and update
1237  // IdxV to be the index relative to the beginning of that vector.
1238  SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1239  IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1240  SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1241  }
1242 
1243  // The only meaningful subvectors of a single HVX vector are those that
1244  // fit in a scalar register.
1245  assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1246  // Convert IdxV to be index in bytes.
1247  auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1248  if (!IdxN || !IdxN->isZero()) {
1249  IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1250  DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1251  SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1252  }
1253  // When inserting a single word, the rotation back to the original position
1254  // would be by HwLen-Idx, but if two words are inserted, it will need to be
1255  // by (HwLen-4)-Idx.
1256  unsigned RolBase = HwLen;
1257  if (VecTy.getSizeInBits() == 32) {
1258  SDValue V = DAG.getBitcast(MVT::i32, SubV);
1259  SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V);
1260  } else {
1261  SDValue V = DAG.getBitcast(MVT::i64, SubV);
1262  SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V);
1263  SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V);
1264  SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1265  SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1266  DAG.getConstant(4, dl, MVT::i32));
1267  SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1268  RolBase = HwLen-4;
1269  }
1270  // If the vector wasn't ror'ed, don't ror it back.
1271  if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1272  SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1273  DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1274  SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1275  }
1276 
1277  if (IsPair) {
1278  SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1279  SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1280  return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1281  }
1282  return SingleV;
1283 }
1284 
1285 SDValue
1286 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1287  SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1288  MVT VecTy = ty(VecV);
1289  MVT SubTy = ty(SubV);
1290  assert(Subtarget.isHVXVectorType(VecTy, true));
1291  // VecV is an HVX vector predicate. SubV may be either an HVX vector
1292  // predicate as well, or it can be a scalar predicate.
1293 
1294  unsigned VecLen = VecTy.getVectorNumElements();
1295  unsigned HwLen = Subtarget.getVectorLength();
1296  assert(HwLen % VecLen == 0 && "Unexpected vector type");
1297 
1298  unsigned Scale = VecLen / SubTy.getVectorNumElements();
1299  unsigned BitBytes = HwLen / VecLen;
1300  unsigned BlockLen = HwLen / Scale;
1301 
1302  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1303  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1304  SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1305  SDValue ByteIdx;
1306 
1307  auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1308  if (!IdxN || !IdxN->isZero()) {
1309  ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1310  DAG.getConstant(BitBytes, dl, MVT::i32));
1311  ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1312  }
1313 
1314  // ByteVec is the target vector VecV rotated in such a way that the
1315  // subvector should be inserted at index 0. Generate a predicate mask
1316  // and use vmux to do the insertion.
1317  assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1318  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1319  SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1320  {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1321  ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1322  // Rotate ByteVec back, and convert to a vector predicate.
1323  if (!IdxN || !IdxN->isZero()) {
1324  SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1325  SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1326  ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1327  }
1328  return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1329 }
1330 
1331 SDValue
1332 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1333  MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1334  // Sign- and any-extending of a vector predicate to a vector register is
1335  // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1336  // a vector of 1s (where the 1s are of type matching the vector type).
1337  assert(Subtarget.isHVXVectorType(ResTy));
1338  if (!ZeroExt)
1339  return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1340 
1341  assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1342  SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1343  DAG.getConstant(1, dl, MVT::i32));
1344  SDValue False = getZero(dl, ResTy, DAG);
1345  return DAG.getSelect(dl, ResTy, VecV, True, False);
1346 }
1347 
1348 SDValue
1349 HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1350  MVT ResTy, SelectionDAG &DAG) const {
1351  // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1352  // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1353  // vector register. The remaining bits of the vector register are
1354  // unspecified.
1355 
1356  MachineFunction &MF = DAG.getMachineFunction();
1357  unsigned HwLen = Subtarget.getVectorLength();
1358  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1359  MVT PredTy = ty(VecQ);
1360  unsigned PredLen = PredTy.getVectorNumElements();
1361  assert(HwLen % PredLen == 0);
1362  MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1363 
1364  Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1366  // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1367  // These are bytes with the LSB rotated left with respect to their index.
1368  for (unsigned i = 0; i != HwLen/8; ++i) {
1369  for (unsigned j = 0; j != 8; ++j)
1370  Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1371  }
1372  Constant *CV = ConstantVector::get(Tmp);
1373  Align Alignment(HwLen);
1374  SDValue CP =
1375  LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1376  SDValue Bytes =
1377  DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1378  MachinePointerInfo::getConstantPool(MF), Alignment);
1379 
1380  // Select the bytes that correspond to true bits in the vector predicate.
1381  SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1382  getZero(dl, VecTy, DAG));
1383  // Calculate the OR of all bytes in each group of 8. That will compress
1384  // all the individual bits into a single byte.
1385  // First, OR groups of 4, via vrmpy with 0x01010101.
1386  SDValue All1 =
1387  DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1388  SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1389  // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1390  SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1391  {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1392  SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1393 
1394  // Pick every 8th byte and coalesce them at the beginning of the output.
1395  // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1396  // byte and so on.
1398  for (unsigned i = 0; i != HwLen; ++i)
1399  Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1400  SDValue Collect =
1401  DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1402  return DAG.getBitcast(ResTy, Collect);
1403 }
1404 
1405 SDValue
1406 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1407  const {
1408  const SDLoc &dl(Op);
1409  MVT VecTy = ty(Op);
1410 
1411  unsigned Size = Op.getNumOperands();
1413  for (unsigned i = 0; i != Size; ++i)
1414  Ops.push_back(Op.getOperand(i));
1415 
1416  // First, split the BUILD_VECTOR for vector pairs. We could generate
1417  // some pairs directly (via splat), but splats should be generated
1418  // by the combiner prior to getting here.
1419  if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
1420  ArrayRef<SDValue> A(Ops);
1421  MVT SingleTy = typeSplit(VecTy).first;
1422  SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
1423  SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
1424  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1425  }
1426 
1427  if (VecTy.getVectorElementType() == MVT::i1)
1428  return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1429 
1430  // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1431  // not a legal type, just bitcast the node to use i16
1432  // types and bitcast the result back to f16
1433  if (VecTy.getVectorElementType() == MVT::f16) {
1434  SmallVector<SDValue,64> NewOps;
1435  for (unsigned i = 0; i != Size; i++)
1436  NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1437 
1438  SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1439  tyVector(VecTy, MVT::i16), NewOps);
1440  return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1441  }
1442 
1443  return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1444 }
1445 
1446 SDValue
1447 HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1448  const {
1449  const SDLoc &dl(Op);
1450  MVT VecTy = ty(Op);
1451  MVT ArgTy = ty(Op.getOperand(0));
1452 
1453  if (ArgTy == MVT::f16) {
1454  MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1455  SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1456  SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1457  SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1458  return DAG.getBitcast(VecTy, Splat);
1459  }
1460 
1461  return SDValue();
1462 }
1463 
1464 SDValue
1465 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1466  const {
1467  // Vector concatenation of two integer (non-bool) vectors does not need
1468  // special lowering. Custom-lower concats of bool vectors and expand
1469  // concats of more than 2 vectors.
1470  MVT VecTy = ty(Op);
1471  const SDLoc &dl(Op);
1472  unsigned NumOp = Op.getNumOperands();
1473  if (VecTy.getVectorElementType() != MVT::i1) {
1474  if (NumOp == 2)
1475  return Op;
1476  // Expand the other cases into a build-vector.
1477  SmallVector<SDValue,8> Elems;
1478  for (SDValue V : Op.getNode()->ops())
1479  DAG.ExtractVectorElements(V, Elems);
1480  // A vector of i16 will be broken up into a build_vector of i16's.
1481  // This is a problem, since at the time of operation legalization,
1482  // all operations are expected to be type-legalized, and i16 is not
1483  // a legal type. If any of the extracted elements is not of a valid
1484  // type, sign-extend it to a valid one.
1485  for (unsigned i = 0, e = Elems.size(); i != e; ++i) {
1486  SDValue V = Elems[i];
1487  MVT Ty = ty(V);
1488  if (!isTypeLegal(Ty)) {
1489  EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty);
1490  if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1491  Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1492  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1493  V.getOperand(0), V.getOperand(1)),
1494  DAG.getValueType(Ty));
1495  continue;
1496  }
1497  // A few less complicated cases.
1498  switch (V.getOpcode()) {
1499  case ISD::Constant:
1500  Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
1501  break;
1502  case ISD::UNDEF:
1503  Elems[i] = DAG.getUNDEF(NTy);
1504  break;
1505  case ISD::TRUNCATE:
1506  Elems[i] = V.getOperand(0);
1507  break;
1508  default:
1509  llvm_unreachable("Unexpected vector element");
1510  }
1511  }
1512  }
1513  return DAG.getBuildVector(VecTy, dl, Elems);
1514  }
1515 
1516  assert(VecTy.getVectorElementType() == MVT::i1);
1517  unsigned HwLen = Subtarget.getVectorLength();
1518  assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1519 
1520  SDValue Op0 = Op.getOperand(0);
1521 
1522  // If the operands are HVX types (i.e. not scalar predicates), then
1523  // defer the concatenation, and create QCAT instead.
1524  if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1525  if (NumOp == 2)
1526  return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1527 
1528  ArrayRef<SDUse> U(Op.getNode()->ops());
1529  SmallVector<SDValue,4> SV(U.begin(), U.end());
1530  ArrayRef<SDValue> Ops(SV);
1531 
1532  MVT HalfTy = typeSplit(VecTy).first;
1533  SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1534  Ops.take_front(NumOp/2));
1535  SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1536  Ops.take_back(NumOp/2));
1537  return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1538  }
1539 
1540  // Count how many bytes (in a vector register) each bit in VecTy
1541  // corresponds to.
1542  unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1543 
1544  SmallVector<SDValue,8> Prefixes;
1545  for (SDValue V : Op.getNode()->op_values()) {
1546  SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1547  Prefixes.push_back(P);
1548  }
1549 
1550  unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1551  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1552  SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
1553  SDValue Res = getZero(dl, ByteTy, DAG);
1554  for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1555  Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1556  Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1557  }
1558  return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1559 }
1560 
1561 SDValue
1562 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1563  const {
1564  // Change the type of the extracted element to i32.
1565  SDValue VecV = Op.getOperand(0);
1566  MVT ElemTy = ty(VecV).getVectorElementType();
1567  const SDLoc &dl(Op);
1568  SDValue IdxV = Op.getOperand(1);
1569  if (ElemTy == MVT::i1)
1570  return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1571 
1572  return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1573 }
1574 
1575 SDValue
1576 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1577  const {
1578  const SDLoc &dl(Op);
1579  MVT VecTy = ty(Op);
1580  SDValue VecV = Op.getOperand(0);
1581  SDValue ValV = Op.getOperand(1);
1582  SDValue IdxV = Op.getOperand(2);
1583  MVT ElemTy = ty(VecV).getVectorElementType();
1584  if (ElemTy == MVT::i1)
1585  return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1586 
1587  if (ElemTy == MVT::f16) {
1588  SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
1589  tyVector(VecTy, MVT::i16),
1590  DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1591  DAG.getBitcast(MVT::i16, ValV), IdxV);
1592  return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1593  }
1594 
1595  return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1596 }
1597 
1598 SDValue
1599 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1600  const {
1601  SDValue SrcV = Op.getOperand(0);
1602  MVT SrcTy = ty(SrcV);
1603  MVT DstTy = ty(Op);
1604  SDValue IdxV = Op.getOperand(1);
1605  unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1606  assert(Idx % DstTy.getVectorNumElements() == 0);
1607  (void)Idx;
1608  const SDLoc &dl(Op);
1609 
1610  MVT ElemTy = SrcTy.getVectorElementType();
1611  if (ElemTy == MVT::i1)
1612  return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1613 
1614  return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG);
1615 }
1616 
1617 SDValue
1618 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1619  const {
1620  // Idx does not need to be a constant.
1621  SDValue VecV = Op.getOperand(0);
1622  SDValue ValV = Op.getOperand(1);
1623  SDValue IdxV = Op.getOperand(2);
1624 
1625  const SDLoc &dl(Op);
1626  MVT VecTy = ty(VecV);
1627  MVT ElemTy = VecTy.getVectorElementType();
1628  if (ElemTy == MVT::i1)
1629  return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1630 
1631  return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1632 }
1633 
1634 SDValue
1635 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1636  // Lower any-extends of boolean vectors to sign-extends, since they
1637  // translate directly to Q2V. Zero-extending could also be done equally
1638  // fast, but Q2V is used/recognized in more places.
1639  // For all other vectors, use zero-extend.
1640  MVT ResTy = ty(Op);
1641  SDValue InpV = Op.getOperand(0);
1642  MVT ElemTy = ty(InpV).getVectorElementType();
1643  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1644  return LowerHvxSignExt(Op, DAG);
1645  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1646 }
1647 
1648 SDValue
1649 HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1650  MVT ResTy = ty(Op);
1651  SDValue InpV = Op.getOperand(0);
1652  MVT ElemTy = ty(InpV).getVectorElementType();
1653  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1654  return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1655  return Op;
1656 }
1657 
1658 SDValue
1659 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1660  MVT ResTy = ty(Op);
1661  SDValue InpV = Op.getOperand(0);
1662  MVT ElemTy = ty(InpV).getVectorElementType();
1663  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1664  return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1665  return Op;
1666 }
1667 
1668 SDValue
1669 HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1670  // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1671  // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1672  const SDLoc &dl(Op);
1673  MVT ResTy = ty(Op);
1674  SDValue InpV = Op.getOperand(0);
1675  assert(ResTy == ty(InpV));
1676 
1677  // Calculate the vectors of 1 and bitwidth(x).
1678  MVT ElemTy = ty(InpV).getVectorElementType();
1679  unsigned ElemWidth = ElemTy.getSizeInBits();
1680 
1681  SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1682  DAG.getConstant(1, dl, MVT::i32));
1683  SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1684  DAG.getConstant(ElemWidth, dl, MVT::i32));
1685  SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1686  DAG.getConstant(-1, dl, MVT::i32));
1687 
1688  // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1689  // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1690  // it separately in custom combine or selection).
1691  SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1692  {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1693  DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1694  return DAG.getNode(ISD::SUB, dl, ResTy,
1695  {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1696 }
1697 
1698 SDValue
1699 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1700  MVT ResTy = ty(Op);
1701  assert(ResTy.isVector());
1702  const SDLoc &dl(Op);
1703  SmallVector<int,256> ShuffMask;
1704 
1705  MVT ElemTy = ResTy.getVectorElementType();
1706  unsigned VecLen = ResTy.getVectorNumElements();
1707  SDValue Vs = Op.getOperand(0);
1708  SDValue Vt = Op.getOperand(1);
1709  bool IsSigned = Op.getOpcode() == ISD::MULHS;
1710 
1711  if (ElemTy == MVT::i8 || ElemTy == MVT::i16) {
1712  // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1713  // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1714  // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1715  // For i16, use V6_vmpyhv, which behaves in an analogous way to
1716  // V6_vmpybv: results Lo and Hi are products of even/odd elements
1717  // respectively.
1718  MVT ExtTy = typeExtElem(ResTy, 2);
1719  unsigned MpyOpc = ElemTy == MVT::i8
1720  ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv)
1721  : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv);
1722  SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
1723 
1724  // Discard low halves of the resulting values, collect the high halves.
1725  for (unsigned I = 0; I < VecLen; I += 2) {
1726  ShuffMask.push_back(I+1); // Pick even element.
1727  ShuffMask.push_back(I+VecLen+1); // Pick odd element.
1728  }
1729  VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
1730  SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
1731  return DAG.getBitcast(ResTy, BS);
1732  }
1733 
1734  assert(ElemTy == MVT::i32);
1735  SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
1736 
1737  auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) {
1738  // mulhs(Vs,Vt) =
1739  // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1740  // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
1741  // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1742  // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
1743  // + Lo(Vs) *us Vt] >> 32
1744  // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
1745  // anything, so it cannot produce any carry over to higher bits),
1746  // so everything in [] can be shifted by 16 without loss of precision.
1747  // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
1748  // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
1749  // Denote Hi(Vs) = Vs':
1750  // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
1751  // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
1752  SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG);
1753  // Get Vs':
1754  SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG);
1755  SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
1756  {T0, S0, Vt}, DAG);
1757  // Shift by 16:
1758  SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG);
1759  // Get Vs'*Hi(Vt):
1760  SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG);
1761  // Add:
1762  SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2});
1763  return T3;
1764  };
1765 
1766  auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) {
1767  MVT PairTy = typeJoin({ResTy, ResTy});
1768  SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG);
1769  SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
1770  {T0, Vs, Vt}, DAG);
1771  return opSplit(T1, dl, DAG).second;
1772  };
1773 
1774  if (IsSigned) {
1775  if (Subtarget.useHVXV62Ops())
1776  return MulHS_V62(Vs, Vt);
1777  return MulHS_V60(Vs, Vt);
1778  }
1779 
1780  // Unsigned mulhw. (Would expansion using signed mulhw be better?)
1781 
1782  auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) {
1783  return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair);
1784  };
1785  auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) {
1786  return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair);
1787  };
1788 
1789  MVT PairTy = typeJoin({ResTy, ResTy});
1790  SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy,
1791  {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
1792  // Multiply-unsigned halfwords:
1793  // LoVec = Vs.uh[2i] * Vt.uh[2i],
1794  // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
1795  SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG);
1796  // The low halves in the LoVec of the pair can be discarded. They are
1797  // not added to anything (in the full-precision product), so they cannot
1798  // produce a carry into the higher bits.
1799  SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG);
1800  // Swap low and high halves in Vt, and do the halfword multiplication
1801  // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
1802  SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG);
1803  SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG);
1804  // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
1805  // These products are words, but cannot be added directly because the
1806  // sums could overflow. Add these products, by halfwords, where each sum
1807  // of a pair of halfwords gives a word.
1808  SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
1809  {LoVec(T2), HiVec(T2)}, DAG);
1810  // Add the high halfwords from the products of the low halfwords.
1811  SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)});
1812  SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG);
1813  SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)});
1814  SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6});
1815  return T7;
1816 }
1817 
1818 SDValue
1819 HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1820  SDValue Val = Op.getOperand(0);
1821  MVT ResTy = ty(Op);
1822  MVT ValTy = ty(Val);
1823  const SDLoc &dl(Op);
1824 
1825  if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1826  unsigned HwLen = Subtarget.getVectorLength();
1827  MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1828  SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1829  unsigned BitWidth = ResTy.getSizeInBits();
1830 
1831  if (BitWidth < 64) {
1832  SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1833  dl, MVT::i32, DAG);
1834  if (BitWidth == 32)
1835  return W0;
1836  assert(BitWidth < 32u);
1837  return DAG.getZExtOrTrunc(W0, dl, ResTy);
1838  }
1839 
1840  // The result is >= 64 bits. The only options are 64 or 128.
1841  assert(BitWidth == 64 || BitWidth == 128);
1842  SmallVector<SDValue,4> Words;
1843  for (unsigned i = 0; i != BitWidth/32; ++i) {
1844  SDValue W = extractHvxElementReg(
1845  VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1846  Words.push_back(W);
1847  }
1848  SmallVector<SDValue,2> Combines;
1849  assert(Words.size() % 2 == 0);
1850  for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
1851  SDValue C = DAG.getNode(
1852  HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]});
1853  Combines.push_back(C);
1854  }
1855 
1856  if (BitWidth == 64)
1857  return Combines[0];
1858 
1859  return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
1860  }
1861  if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
1862  // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
1863  unsigned BitWidth = ValTy.getSizeInBits();
1864  unsigned HwLen = Subtarget.getVectorLength();
1865  assert(BitWidth == HwLen);
1866 
1867  MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
1868  SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
1869  // Splat each byte of Val 8 times.
1870  // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
1871  // where b0, b1,..., b15 are least to most significant bytes of I.
1873  // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
1874  // These are bytes with the LSB rotated left with respect to their index.
1876  for (unsigned I = 0; I != HwLen / 8; ++I) {
1877  SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
1878  SDValue Byte =
1879  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
1880  for (unsigned J = 0; J != 8; ++J) {
1881  Bytes.push_back(Byte);
1882  Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
1883  }
1884  }
1885 
1886  MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
1887  SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
1888  SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
1889 
1890  // Each Byte in the I2V will be set iff corresponding bit is set in Val.
1891  I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
1892  return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
1893  }
1894 
1895  return Op;
1896 }
1897 
1898 SDValue
1899 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
1900  // Sign- and zero-extends are legal.
1901  assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
1902  return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
1903  Op.getOperand(0));
1904 }
1905 
1906 SDValue
1907 HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
1908  MVT ResTy = ty(Op);
1909  if (ResTy.getVectorElementType() != MVT::i1)
1910  return Op;
1911 
1912  const SDLoc &dl(Op);
1913  unsigned HwLen = Subtarget.getVectorLength();
1914  unsigned VecLen = ResTy.getVectorNumElements();
1915  assert(HwLen % VecLen == 0);
1916  unsigned ElemSize = HwLen / VecLen;
1917 
1918  MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
1919  SDValue S =
1920  DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
1921  DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
1922  DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
1923  return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
1924 }
1925 
1926 SDValue
1927 HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
1928  if (SDValue S = getVectorShiftByInt(Op, DAG))
1929  return S;
1930  return Op;
1931 }
1932 
1933 SDValue
1934 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
1935  const SDLoc &dl(Op);
1936  MVT ResTy = ty(Op);
1937 
1938  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1939  bool Use64b = Subtarget.useHVX64BOps();
1940  unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast
1941  : Intrinsic::hexagon_V6_pred_typecast_128B;
1942  if (IntNo == IntPredCast) {
1943  SDValue Vs = Op.getOperand(1);
1944  MVT OpTy = ty(Vs);
1945  if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) {
1946  if (ResTy == OpTy)
1947  return Vs;
1948  return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs);
1949  }
1950  }
1951 
1952  return Op;
1953 }
1954 
1955 SDValue
1956 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
1957  const SDLoc &dl(Op);
1958  unsigned HwLen = Subtarget.getVectorLength();
1959  MachineFunction &MF = DAG.getMachineFunction();
1960  auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
1961  SDValue Mask = MaskN->getMask();
1962  SDValue Chain = MaskN->getChain();
1963  SDValue Base = MaskN->getBasePtr();
1964  auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
1965 
1966  unsigned Opc = Op->getOpcode();
1967  assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
1968 
1969  if (Opc == ISD::MLOAD) {
1970  MVT ValTy = ty(Op);
1971  SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
1972  SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
1973  if (isUndef(Thru))
1974  return Load;
1975  SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
1976  return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
1977  }
1978 
1979  // MSTORE
1980  // HVX only has aligned masked stores.
1981 
1982  // TODO: Fold negations of the mask into the store.
1983  unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
1984  SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
1985  SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
1986 
1987  if (MaskN->getAlign().value() % HwLen == 0) {
1988  SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
1989  {Mask, Base, Offset0, Value, Chain}, DAG);
1990  DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
1991  return Store;
1992  }
1993 
1994  // Unaligned case.
1995  auto StoreAlign = [&](SDValue V, SDValue A) {
1996  SDValue Z = getZero(dl, ty(V), DAG);
1997  // TODO: use funnel shifts?
1998  // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
1999  // upper half.
2000  SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2001  SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2002  return std::make_pair(LoV, HiV);
2003  };
2004 
2005  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2006  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2007  SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2008  VectorPair Tmp = StoreAlign(MaskV, Base);
2009  VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2010  DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2011  VectorPair ValueU = StoreAlign(Value, Base);
2012 
2013  SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2014  SDValue StoreLo =
2015  getInstr(StoreOpc, dl, MVT::Other,
2016  {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2017  SDValue StoreHi =
2018  getInstr(StoreOpc, dl, MVT::Other,
2019  {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2020  DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2021  DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2022  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2023 }
2024 
2025 SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2026  SelectionDAG &DAG) const {
2027  // This conversion only applies to QFloat.
2028  assert(Subtarget.useHVXQFloatOps());
2029 
2030  assert(Op->getOpcode() == ISD::FP_EXTEND);
2031 
2032  MVT VecTy = ty(Op);
2033  MVT ArgTy = ty(Op.getOperand(0));
2034  const SDLoc &dl(Op);
2035  assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2036 
2037  SDValue F16Vec = Op.getOperand(0);
2038 
2039  APFloat FloatVal = APFloat(1.0f);
2040  bool Ignored;
2042  SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2043  SDValue VmpyVec =
2044  getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2045 
2046  MVT HalfTy = typeSplit(VecTy).first;
2047  VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2048  SDValue LoVec =
2049  getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2050  SDValue HiVec =
2051  getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2052 
2053  SDValue ShuffVec =
2054  getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2055  {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG);
2056 
2057  return ShuffVec;
2058 }
2059 
2060 SDValue
2061 HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG)
2062  const {
2063  // This conversion only applies to IEEE.
2064  assert(Subtarget.useHVXIEEEFPOps());
2065 
2066  unsigned Opc = Op.getOpcode();
2067  // Catch invalid conversion ops (just in case).
2068  assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2069  Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2070  MVT ResTy = ty(Op);
2071 
2072  if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) {
2073  MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2074  // There are only conversions of f16.
2075  if (FpTy != MVT::f16)
2076  return SDValue();
2077 
2078  MVT IntTy = ResTy.getVectorElementType();
2079  // Other int types aren't legal in HVX, so we shouldn't see them here.
2080  assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2081  // Conversions to i8 and i16 are legal.
2082  if (IntTy == MVT::i8 || IntTy == MVT::i16)
2083  return Op;
2084  } else {
2085  // Converting int -> fp.
2086  if (ResTy.getVectorElementType() != MVT::f16)
2087  return SDValue();
2088  MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2089  // Other int types aren't legal in HVX, so we shouldn't see them here.
2090  assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2091  // i8, i16 -> f16 is legal.
2092  if (IntTy == MVT::i8 || IntTy == MVT::i16)
2093  return Op;
2094  }
2095 
2096  return SDValue();
2097 }
2098 
2099 SDValue
2100 HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
2101  assert(!Op.isMachineOpcode());
2102  SmallVector<SDValue,2> OpsL, OpsH;
2103  const SDLoc &dl(Op);
2104 
2105  auto SplitVTNode = [&DAG,this] (const VTSDNode *N) {
2106  MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2107  SDValue TV = DAG.getValueType(Ty);
2108  return std::make_pair(TV, TV);
2109  };
2110 
2111  for (SDValue A : Op.getNode()->ops()) {
2112  VectorPair P = Subtarget.isHVXVectorType(ty(A), true)
2113  ? opSplit(A, dl, DAG)
2114  : std::make_pair(A, A);
2115  // Special case for type operand.
2116  if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2117  if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2118  P = SplitVTNode(N);
2119  }
2120  OpsL.push_back(P.first);
2121  OpsH.push_back(P.second);
2122  }
2123 
2124  MVT ResTy = ty(Op);
2125  MVT HalfTy = typeSplit(ResTy).first;
2126  SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2127  SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2128  SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H);
2129  return S;
2130 }
2131 
2132 SDValue
2133 HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2134  auto *MemN = cast<MemSDNode>(Op.getNode());
2135 
2136  MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2137  if (!isHvxPairTy(MemTy))
2138  return Op;
2139 
2140  const SDLoc &dl(Op);
2141  unsigned HwLen = Subtarget.getVectorLength();
2142  MVT SingleTy = typeSplit(MemTy).first;
2143  SDValue Chain = MemN->getChain();
2144  SDValue Base0 = MemN->getBasePtr();
2145  SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
2146  unsigned MemOpc = MemN->getOpcode();
2147 
2148  MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
2149  if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2150  MachineFunction &MF = DAG.getMachineFunction();
2151  uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
2153  : HwLen;
2154  MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
2155  MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
2156  }
2157 
2158  if (MemOpc == ISD::LOAD) {
2159  assert(cast<LoadSDNode>(Op)->isUnindexed());
2160  SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
2161  SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
2162  return DAG.getMergeValues(
2163  { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
2165  Load0.getValue(1), Load1.getValue(1)) }, dl);
2166  }
2167  if (MemOpc == ISD::STORE) {
2168  assert(cast<StoreSDNode>(Op)->isUnindexed());
2169  VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
2170  SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
2171  SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
2172  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
2173  }
2174 
2175  assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
2176 
2177  auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
2178  assert(MaskN->isUnindexed());
2179  VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
2181 
2182  if (MemOpc == ISD::MLOAD) {
2183  VectorPair Thru =
2184  opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
2185  SDValue MLoad0 =
2186  DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
2187  Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
2188  ISD::NON_EXTLOAD, false);
2189  SDValue MLoad1 =
2190  DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
2191  Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
2192  ISD::NON_EXTLOAD, false);
2193  return DAG.getMergeValues(
2194  { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
2196  MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
2197  }
2198  if (MemOpc == ISD::MSTORE) {
2199  VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
2200  SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
2201  Masks.first, SingleTy, MOp0,
2202  ISD::UNINDEXED, false, false);
2203  SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
2204  Masks.second, SingleTy, MOp1,
2205  ISD::UNINDEXED, false, false);
2206  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
2207  }
2208 
2209  std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
2210  llvm_unreachable(Name.c_str());
2211 }
2212 
2213 SDValue
2214 HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
2215  const SDLoc &dl(Op);
2216  auto *LoadN = cast<LoadSDNode>(Op.getNode());
2217  assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
2218  assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
2219  "Not widening loads of i1 yet");
2220 
2221  SDValue Chain = LoadN->getChain();
2222  SDValue Base = LoadN->getBasePtr();
2224 
2225  MVT ResTy = ty(Op);
2226  unsigned HwLen = Subtarget.getVectorLength();
2227  unsigned ResLen = ResTy.getStoreSize();
2228  assert(ResLen < HwLen && "vsetq(v1) prerequisite");
2229 
2230  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2231  SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
2232  {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
2233 
2234  MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
2235  MachineFunction &MF = DAG.getMachineFunction();
2236  auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
2237 
2238  SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
2239  DAG.getUNDEF(LoadTy), LoadTy, MemOp,
2241  SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
2242  return DAG.getMergeValues({Value, Chain}, dl);
2243 }
2244 
2245 SDValue
2246 HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
2247  const SDLoc &dl(Op);
2248  auto *StoreN = cast<StoreSDNode>(Op.getNode());
2249  assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
2250  assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
2251  "Not widening stores of i1 yet");
2252 
2253  SDValue Chain = StoreN->getChain();
2254  SDValue Base = StoreN->getBasePtr();
2256 
2257  SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
2258  MVT ValueTy = ty(Value);
2259  unsigned ValueLen = ValueTy.getVectorNumElements();
2260  unsigned HwLen = Subtarget.getVectorLength();
2261  assert(isPowerOf2_32(ValueLen));
2262 
2263  for (unsigned Len = ValueLen; Len < HwLen; ) {
2264  Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG);
2265  Len = ty(Value).getVectorNumElements(); // This is Len *= 2
2266  }
2267  assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
2268 
2269  assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
2270  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2271  SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
2272  {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
2273  MachineFunction &MF = DAG.getMachineFunction();
2274  auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
2275  return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
2276  MemOp, ISD::UNINDEXED, false, false);
2277 }
2278 
2279 SDValue
2280 HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
2281  const SDLoc &dl(Op);
2282  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2283  MVT ElemTy = ty(Op0).getVectorElementType();
2284  unsigned HwLen = Subtarget.getVectorLength();
2285 
2286  unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
2287  assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
2288  MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
2289  if (!Subtarget.isHVXVectorType(WideOpTy, true))
2290  return SDValue();
2291 
2292  SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
2293  SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
2294  EVT ResTy =
2295  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
2296  SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
2297  {WideOp0, WideOp1, Op.getOperand(2)});
2298 
2299  EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op));
2300  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
2301  {SetCC, getZero(dl, MVT::i32, DAG)});
2302 }
2303 
2304 SDValue
2305 HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2306  const SDLoc &dl(Op);
2307  unsigned HwWidth = 8*Subtarget.getVectorLength();
2308 
2309  SDValue Op0 = Op.getOperand(0);
2310  MVT ResTy = ty(Op);
2311  MVT OpTy = ty(Op0);
2312  if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
2313  return SDValue();
2314 
2315  // .-res, op-> ScalarVec Illegal HVX
2316  // Scalar ok - -
2317  // Illegal widen(insert) widen -
2318  // HVX - widen ok
2319 
2320  auto getFactor = [HwWidth](MVT Ty) {
2321  unsigned Width = Ty.getSizeInBits();
2322  return HwWidth > Width ? HwWidth / Width : 1;
2323  };
2324 
2325  auto getWideTy = [getFactor](MVT Ty) {
2326  unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
2327  return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
2328  };
2329 
2330  unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK
2332  SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
2333  SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp);
2334  return WideRes;
2335 }
2336 
2337 SDValue
2338 HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
2339  const SDLoc &dl(Op);
2340  unsigned HwWidth = 8*Subtarget.getVectorLength();
2341 
2342  SDValue Op0 = Op.getOperand(0);
2343  MVT ResTy = ty(Op);
2344  MVT OpTy = ty(Op0);
2345  if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
2346  return SDValue();
2347 
2348  // .-res, op-> ScalarVec Illegal HVX
2349  // Scalar ok extract(widen) -
2350  // Illegal - widen widen
2351  // HVX - - ok
2352 
2353  auto getFactor = [HwWidth](MVT Ty) {
2354  unsigned Width = Ty.getSizeInBits();
2355  assert(HwWidth % Width == 0);
2356  return HwWidth / Width;
2357  };
2358 
2359  auto getWideTy = [getFactor](MVT Ty) {
2360  unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
2361  return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
2362  };
2363 
2364  if (Subtarget.isHVXVectorType(OpTy))
2365  return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0);
2366 
2367  assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?");
2368 
2369  SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
2370  SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy),
2371  WideOp);
2372  // If the original result wasn't legal and was supposed to be widened,
2373  // we're done.
2374  if (shouldWidenToHvx(ResTy, DAG))
2375  return WideRes;
2376 
2377  // The original result type wasn't meant to be widened to HVX, so
2378  // leave it as it is. Standard legalization should be able to deal
2379  // with it (since now it's a result of a target-idendependent ISD
2380  // node).
2381  assert(ResTy.isVector());
2382  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
2383  {WideRes, getZero(dl, MVT::i32, DAG)});
2384 }
2385 
2386 SDValue
2387 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
2388  unsigned Opc = Op.getOpcode();
2389  bool IsPairOp = isHvxPairTy(ty(Op)) ||
2390  llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
2391  return isHvxPairTy(ty(V));
2392  });
2393 
2394  if (IsPairOp) {
2395  switch (Opc) {
2396  default:
2397  break;
2398  case ISD::LOAD:
2399  case ISD::STORE:
2400  case ISD::MLOAD:
2401  case ISD::MSTORE:
2402  return SplitHvxMemOp(Op, DAG);
2403  case ISD::SINT_TO_FP:
2404  case ISD::UINT_TO_FP:
2405  case ISD::FP_TO_SINT:
2406  case ISD::FP_TO_UINT:
2407  if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
2408  return SplitHvxPairOp(Op, DAG);
2409  break;
2410  case ISD::CTPOP:
2411  case ISD::CTLZ:
2412  case ISD::CTTZ:
2413  case ISD::MUL:
2414  case ISD::FADD:
2415  case ISD::FSUB:
2416  case ISD::FMUL:
2417  case ISD::FMINNUM:
2418  case ISD::FMAXNUM:
2419  case ISD::MULHS:
2420  case ISD::MULHU:
2421  case ISD::AND:
2422  case ISD::OR:
2423  case ISD::XOR:
2424  case ISD::SRA:
2425  case ISD::SHL:
2426  case ISD::SRL:
2427  case ISD::SMIN:
2428  case ISD::SMAX:
2429  case ISD::UMIN:
2430  case ISD::UMAX:
2431  case ISD::SETCC:
2432  case ISD::VSELECT:
2433  case ISD::SIGN_EXTEND:
2434  case ISD::ZERO_EXTEND:
2436  case ISD::SPLAT_VECTOR:
2437  return SplitHvxPairOp(Op, DAG);
2438  }
2439  }
2440 
2441  switch (Opc) {
2442  default:
2443  break;
2444  case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
2445  case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
2446  case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
2447  case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
2448  case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
2449  case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
2450  case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
2451  case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
2452  case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
2453  case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
2454  case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
2455  case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
2456  case ISD::SELECT: return LowerHvxSelect(Op, DAG);
2457  case ISD::SRA:
2458  case ISD::SHL:
2459  case ISD::SRL: return LowerHvxShift(Op, DAG);
2460  case ISD::MULHS:
2461  case ISD::MULHU: return LowerHvxMulh(Op, DAG);
2462  case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
2463  case ISD::SETCC:
2464  case ISD::INTRINSIC_VOID: return Op;
2465  case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
2466  case ISD::MLOAD:
2467  case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
2468  // Unaligned loads will be handled by the default lowering.
2469  case ISD::LOAD: return SDValue();
2470  case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
2471  case ISD::FP_TO_SINT:
2472  case ISD::FP_TO_UINT:
2473  case ISD::SINT_TO_FP:
2474  case ISD::UINT_TO_FP: return LowerHvxConvertFpInt(Op, DAG);
2475  }
2476 #ifndef NDEBUG
2477  Op.dumpr(&DAG);
2478 #endif
2479  llvm_unreachable("Unhandled HVX operation");
2480 }
2481 
2482 void
2483 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
2485  unsigned Opc = N->getOpcode();
2486  SDValue Op(N, 0);
2487 
2488  switch (Opc) {
2489  case ISD::ANY_EXTEND:
2490  case ISD::SIGN_EXTEND:
2491  case ISD::ZERO_EXTEND:
2492  if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
2493  if (SDValue T = WidenHvxExtend(Op, DAG))
2494  Results.push_back(T);
2495  }
2496  break;
2497  case ISD::SETCC:
2498  if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
2499  if (SDValue T = WidenHvxSetCC(Op, DAG))
2500  Results.push_back(T);
2501  }
2502  break;
2503  case ISD::TRUNCATE:
2504  if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
2505  if (SDValue T = WidenHvxTruncate(Op, DAG))
2506  Results.push_back(T);
2507  }
2508  break;
2509  case ISD::STORE: {
2510  if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
2511  SDValue Store = WidenHvxStore(Op, DAG);
2512  Results.push_back(Store);
2513  }
2514  break;
2515  }
2516  case ISD::MLOAD:
2517  if (isHvxPairTy(ty(Op))) {
2518  SDValue S = SplitHvxMemOp(Op, DAG);
2519  assert(S->getOpcode() == ISD::MERGE_VALUES);
2520  Results.push_back(S.getOperand(0));
2521  Results.push_back(S.getOperand(1));
2522  }
2523  break;
2524  case ISD::MSTORE:
2525  if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
2526  SDValue S = SplitHvxMemOp(Op, DAG);
2527  Results.push_back(S);
2528  }
2529  break;
2530  default:
2531  break;
2532  }
2533 }
2534 
2535 void
2536 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
2538  unsigned Opc = N->getOpcode();
2539  SDValue Op(N, 0);
2540  switch (Opc) {
2541  case ISD::ANY_EXTEND:
2542  case ISD::SIGN_EXTEND:
2543  case ISD::ZERO_EXTEND:
2544  if (shouldWidenToHvx(ty(Op), DAG)) {
2545  if (SDValue T = WidenHvxExtend(Op, DAG))
2546  Results.push_back(T);
2547  }
2548  break;
2549  case ISD::SETCC:
2550  if (shouldWidenToHvx(ty(Op), DAG)) {
2551  if (SDValue T = WidenHvxSetCC(Op, DAG))
2552  Results.push_back(T);
2553  }
2554  break;
2555  case ISD::TRUNCATE:
2556  if (shouldWidenToHvx(ty(Op), DAG)) {
2557  if (SDValue T = WidenHvxTruncate(Op, DAG))
2558  Results.push_back(T);
2559  }
2560  break;
2561  case ISD::LOAD: {
2562  if (shouldWidenToHvx(ty(Op), DAG)) {
2563  SDValue Load = WidenHvxLoad(Op, DAG);
2564  assert(Load->getOpcode() == ISD::MERGE_VALUES);
2565  Results.push_back(Load.getOperand(0));
2566  Results.push_back(Load.getOperand(1));
2567  }
2568  break;
2569  }
2570  case ISD::BITCAST:
2571  if (isHvxBoolTy(ty(N->getOperand(0)))) {
2572  SDValue Op(N, 0);
2573  SDValue C = LowerHvxBitcast(Op, DAG);
2574  Results.push_back(C);
2575  }
2576  break;
2577  default:
2578  break;
2579  }
2580 }
2581 
2582 SDValue
2583 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
2584  const {
2585  const SDLoc &dl(N);
2586  SelectionDAG &DAG = DCI.DAG;
2587  SDValue Op(N, 0);
2588  unsigned Opc = Op.getOpcode();
2589  if (DCI.isBeforeLegalizeOps())
2590  return SDValue();
2591 
2592  SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end());
2593 
2594  switch (Opc) {
2595  case ISD::VSELECT: {
2596  // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
2597  SDValue Cond = Ops[0];
2598  if (Cond->getOpcode() == ISD::XOR) {
2599  SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
2600  if (C1->getOpcode() == HexagonISD::QTRUE)
2601  return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
2602  }
2603  break;
2604  }
2605  case HexagonISD::V2Q:
2606  if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
2607  if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
2608  return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
2609  : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
2610  }
2611  break;
2612  case HexagonISD::Q2V:
2613  if (Ops[0].getOpcode() == HexagonISD::QTRUE)
2614  return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
2615  DAG.getConstant(-1, dl, MVT::i32));
2616  if (Ops[0].getOpcode() == HexagonISD::QFALSE)
2617  return getZero(dl, ty(Op), DAG);
2618  break;
2619  case HexagonISD::VINSERTW0:
2620  if (isUndef(Ops[1]))
2621  return Ops[0];;
2622  break;
2623  case HexagonISD::VROR: {
2624  if (Ops[0].getOpcode() == HexagonISD::VROR) {
2625  SDValue Vec = Ops[0].getOperand(0);
2626  SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
2627  SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
2628  return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
2629  }
2630  break;
2631  }
2632  }
2633 
2634  return SDValue();
2635 }
2636 
2637 bool
2638 HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
2639  auto Action = getPreferredHvxVectorAction(Ty);
2640  if (Action == TargetLoweringBase::TypeWidenVector) {
2641  EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2642  assert(WideTy.isSimple());
2643  return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true);
2644  }
2645  return false;
2646 }
2647 
2648 bool
2649 HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
2650  if (!Subtarget.useHVXOps())
2651  return false;
2652  // If the type of any result, or any operand type are HVX vector types,
2653  // this is an HVX operation.
2654  auto IsHvxTy = [this](EVT Ty) {
2655  return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
2656  };
2657  auto IsHvxOp = [this](SDValue Op) {
2658  return Op.getValueType().isSimple() &&
2659  Subtarget.isHVXVectorType(ty(Op), true);
2660  };
2661  if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
2662  return true;
2663 
2664  // Check if this could be an HVX operation after type widening.
2665  auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
2666  if (!Op.getValueType().isSimple())
2667  return false;
2668  MVT ValTy = ty(Op);
2669  return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
2670  };
2671 
2672  for (int i = 0, e = N->getNumValues(); i != e; ++i) {
2673  if (IsWidenedToHvx(SDValue(N, i)))
2674  return true;
2675  }
2676  return llvm::any_of(N->ops(), IsWidenedToHvx);
2677 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
llvm::codeview::SimpleTypeKind::Byte
@ Byte
llvm::HexagonSubtarget::useHVX64BOps
bool useHVX64BOps() const
Definition: HexagonSubtarget.h:247
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1417
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:425
llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition: HexagonSubtarget.h:295
llvm::HexagonISD::QFALSE
@ QFALSE
Definition: HexagonISelLowering.h:81
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1564
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:586
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:519
llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition: TargetLowering.h:2378
llvm::HexagonTargetLowering::LowerConstantPool
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:1112
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1428
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1076
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::v128i1
@ v128i1
Definition: MachineValueType.h:71
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1090
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1429
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1384
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:936
HvxWidenThreshold
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:48
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1798
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:1000
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::ISD::MLOAD
@ MLOAD
Definition: ISDOpcodes.h:1198
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::HexagonISD::QCAT
@ QCAT
Definition: HexagonISelLowering.h:79
llvm::MVT::v64f32
@ v64f32
Definition: MachineValueType.h:164
llvm::HexagonSubtarget::useHVXFloatingPoint
bool useHVXFloatingPoint() const
Definition: HexagonSubtarget.h:221
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:456
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:65
PairTy
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Definition: MachineModuleInfoImpls.cpp:30
llvm::HexagonSubtarget::useHVXQFloatOps
bool useHVXQFloatOps() const
Definition: HexagonSubtarget.h:218
HexagonSubtarget.h
llvm::HexagonISD::COMBINE
@ COMBINE
Definition: HexagonISelLowering.h:55
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MemOp
Definition: TargetLowering.h:111
llvm::SelectionDAG::getMemBasePlusOffset
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
Definition: SelectionDAG.cpp:6587
llvm::MVT::v64f16
@ v64f16
Definition: MachineValueType.h:140
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1419
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Used
@ Used
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7752
llvm::SelectionDAG::ExtractVectorElements
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
Definition: SelectionDAG.cpp:11205
T1
#define T1
Definition: Mips16ISelLowering.cpp:340
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:819
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::HexagonISD::VUNPACKU
@ VUNPACKU
Definition: HexagonISelLowering.h:94
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:848
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::ISD::SIGN_EXTEND_VECTOR_INREG
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:803
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1373
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::HexagonTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: HexagonISelLowering.h:262
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:661
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2320
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:459
llvm::HexagonISD::V2Q
@ V2Q
Definition: HexagonISelLowering.h:74
llvm::HexagonISD::VINSERTW0
@ VINSERTW0
Definition: HexagonISelLowering.h:64
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2303
llvm::HexagonSubtarget::getHVXElementTypes
ArrayRef< MVT > getHVXElementTypes() const
Definition: HexagonSubtarget.h:304
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2392
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::HexagonTargetLowering::getPreferredVectorAction
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Definition: HexagonISelLowering.cpp:2151
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7702
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:632
llvm::SelectionDAG::getMaskedStore
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition: SelectionDAG.cpp:8539
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1125
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(ArrayRef< unsigned > Ops, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2256
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1426
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::HexagonISD::Q2V
@ Q2V
Definition: HexagonISelLowering.h:75
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:968
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::ISD::ANY_EXTEND_VECTOR_INREG
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:792
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1449
isSplat
static bool isSplat(ArrayRef< Value * > VL)
Definition: SLPVectorizer.cpp:260
llvm::HexagonISD::QTRUE
@ QTRUE
Definition: HexagonISelLowering.h:80
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::MVT::v64i1
@ v64i1
Definition: MachineValueType.h:70
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::MVT::v64i32
@ v64i32
Definition: MachineValueType.h:110
llvm::MVT::v16i1
@ v16i1
Definition: MachineValueType.h:68
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2239
llvm::TypeSize::Fixed
static TypeSize Fixed(ScalarTy MinVal)
Definition: TypeSize.h:427
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:623
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MVT::v8i1
@ v8i1
Definition: MachineValueType.h:67
llvm::HexagonSubtarget::useHVXOps
bool useHVXOps() const
Definition: HexagonSubtarget.h:222
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::HexagonISD::VEXTRACTW
@ VEXTRACTW
Definition: HexagonISelLowering.h:63
llvm::BitVector
Definition: BitVector.h:75
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1410
llvm::HexagonISD::TYPECAST
@ TYPECAST
Definition: HexagonISelLowering.h:82
llvm::ISD::SPLAT_VECTOR
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:613
llvm::TargetLoweringBase::TypeWidenVector
@ TypeWidenVector
Definition: TargetLowering.h:213
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1353
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:194
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:979
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1411
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1416
llvm::HexagonSubtarget::isHVXVectorType
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
Definition: HexagonSubtarget.cpp:200
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1353
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1420
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
LegalW128
static const MVT LegalW128[]
Definition: HexagonISelLoweringHVX.cpp:25
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:660
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::APFloat
Definition: APFloat.h:700
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::SelectionDAG::SplitVector
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part.
Definition: SelectionDAG.cpp:11158
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:405
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
uint64_t
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:932
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:959
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::MVT::v128i8
@ v128i8
Definition: MachineValueType.h:83
llvm::SelectionDAG::getConstantPool
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offs=0, bool isT=false, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1713
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::MVT::v32i1
@ v32i1
Definition: MachineValueType.h:69
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8786
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1670
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1367
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:129
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:850
llvm::ISD::MSTORE
@ MSTORE
Definition: ISDOpcodes.h:1199
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:171
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
HexagonRegisterInfo.h
llvm::MVT::v128i16
@ v128i16
Definition: MachineValueType.h:96
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1878
llvm::SelectionDAG::getMaskedLoad
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition: SelectionDAG.cpp:8493
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:638
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:864
llvm::HexagonISD::VUNPACK
@ VUNPACK
Definition: HexagonISelLowering.h:93
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1418
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2164
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
HexagonISelLowering.h
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9257
llvm::HexagonISD::VROR
@ VROR
Definition: HexagonISelLowering.h:65
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
LegalV64
static const MVT LegalV64[]
Definition: HexagonISelLoweringHVX.cpp:22
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1187
llvm::HexagonSubtarget::useHVXV68Ops
bool useHVXV68Ops() const
Definition: HexagonSubtarget.h:240
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:663
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1104
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:178
llvm::SelectionDAG::getTargetInsertSubreg
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
Definition: SelectionDAG.cpp:9617
llvm::HexagonSubtarget::isHVXElementType
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
Definition: HexagonSubtarget.cpp:189
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1811
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1133
llvm::VTSDNode
This class is used to represent EVT's, which are used to parameterize some operations.
Definition: SelectionDAGNodes.h:2263
llvm::MVT::v64i8
@ v64i8
Definition: MachineValueType.h:82
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::MVT::v64i16
@ v64i16
Definition: MachineValueType.h:95
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1389
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:109
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:802
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1628
llvm::HexagonISD::VALIGN
@ VALIGN
Definition: HexagonISelLowering.h:84
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:668
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:558
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1427
llvm::HexagonISD::VPACKL
@ VPACKL
Definition: HexagonISelLowering.h:88
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:937
LegalW64
static const MVT LegalW64[]
Definition: HexagonISelLoweringHVX.cpp:23
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:337
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:528
llvm::MVT::v256i8
@ v256i8
Definition: MachineValueType.h:84
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:452
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9607
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
llvm::HexagonISD::P2D
@ P2D
Definition: HexagonISelLowering.h:73
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:960
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:428
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1409
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:871
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:189
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4836
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
getVectorElementType
static llvm::Type * getVectorElementType(llvm::Type *Ty)
Definition: VETargetTransformInfo.h:24
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:814
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:54
N
#define N
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::MVT::v32f32
@ v32f32
Definition: MachineValueType.h:163
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2354
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition: MachineValueType.h:360
llvm::HexagonSubtarget::useHVXV62Ops
bool useHVXV62Ops() const
Definition: HexagonSubtarget.h:228
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:662
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::SmallVectorImpl< int >
llvm::HexagonSubtarget::useHVX128BOps
bool useHVX128BOps() const
Definition: HexagonSubtarget.h:246
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:637
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1121
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:649
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1412
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:449
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::cl::desc
Definition: CommandLine.h:405
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition: TargetLowering.h:212
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::HexagonSubtarget::useHVXIEEEFPOps
bool useHVXIEEEFPOps() const
Definition: HexagonSubtarget.h:217
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::MVT::v128f16
@ v128f16
Definition: MachineValueType.h:141
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
LegalV128
static const MVT LegalV128[]
Definition: HexagonISelLoweringHVX.cpp:24
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1075
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7466
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:66
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1168
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722