LLVM 17.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103 setOperationAction(Opc, FromTy, Promote);
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
119
120 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
121 Subtarget.useHVXFloatingPoint()) {
122
123 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
124 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
125
126 for (MVT T : FloatV) {
132
135
138
141 // Custom-lower BUILD_VECTOR. The standard (target-independent)
142 // handling of it would convert it to a load, which is not always
143 // the optimal choice.
145 }
146
147
148 // BUILD_VECTOR with f16 operands cannot be promoted without
149 // promoting the result, so lower the node to vsplat or constant pool
153
154 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
155 // generated.
156 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
157 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
158 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
159 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
160
161 for (MVT P : FloatW) {
170
171 // Custom-lower BUILD_VECTOR. The standard (target-independent)
172 // handling of it would convert it to a load, which is not always
173 // the optimal choice.
175 // Make concat-vectors custom to handle concats of more than 2 vectors.
177
180 }
181
182 if (Subtarget.useHVXQFloatOps()) {
185 } else if (Subtarget.useHVXIEEEFPOps()) {
188 }
189 }
190
191 for (MVT T : LegalV) {
194
206 if (T != ByteV) {
210 }
211
214 if (T.getScalarType() != MVT::i32) {
217 }
218
223 if (T.getScalarType() != MVT::i32) {
226 }
227
229 // Make concat-vectors custom to handle concats of more than 2 vectors.
240 if (T != ByteV) {
242 // HVX only has shifts of words and halfwords.
246
247 // Promote all shuffles to operate on vectors of bytes.
248 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
249 }
250
251 if (Subtarget.useHVXFloatingPoint()) {
252 // Same action for both QFloat and IEEE.
257 }
258
266 }
267
268 for (MVT T : LegalW) {
269 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
270 // independent) handling of it would convert it to a load, which is
271 // not always the optimal choice.
273 // Make concat-vectors custom to handle concats of more than 2 vectors.
275
276 // Custom-lower these operations for pairs. Expand them into a concat
277 // of the corresponding operations on individual vectors.
286
295
306 if (T != ByteW) {
310
311 // Promote all shuffles to operate on vectors of bytes.
312 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
313 }
316
319 if (T.getScalarType() != MVT::i32) {
322 }
323
324 if (Subtarget.useHVXFloatingPoint()) {
325 // Same action for both QFloat and IEEE.
330 }
331 }
332
333 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
334 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
335 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
338
351
364
365 // Boolean vectors.
366
367 for (MVT T : LegalW) {
368 // Boolean types for vector pairs will overlap with the boolean
369 // types for single vectors, e.g.
370 // v64i8 -> v64i1 (single)
371 // v64i16 -> v64i1 (pair)
372 // Set these actions first, and allow the single actions to overwrite
373 // any duplicates.
374 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
379 // Masked load/store takes a mask that may need splitting.
382 }
383
384 for (MVT T : LegalV) {
385 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
396 }
397
398 if (Use64b) {
401 } else {
404 }
405
406 // Handle store widening for short vectors.
407 unsigned HwLen = Subtarget.getVectorLength();
408 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
409 if (ElemTy == MVT::i1)
410 continue;
411 int ElemWidth = ElemTy.getFixedSizeInBits();
412 int MaxElems = (8*HwLen) / ElemWidth;
413 for (int N = 2; N < MaxElems; N *= 2) {
414 MVT VecTy = MVT::getVectorVT(ElemTy, N);
415 auto Action = getPreferredVectorAction(VecTy);
424 if (Subtarget.useHVXFloatingPoint()) {
429 }
430
431 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
432 if (!isTypeLegal(BoolTy))
434 }
435 }
436 }
437
439}
440
441unsigned
442HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
443 MVT ElemTy = VecTy.getVectorElementType();
444 unsigned VecLen = VecTy.getVectorNumElements();
445 unsigned HwLen = Subtarget.getVectorLength();
446
447 // Split vectors of i1 that exceed byte vector length.
448 if (ElemTy == MVT::i1 && VecLen > HwLen)
450
451 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
452 // For shorter vectors of i1, widen them if any of the corresponding
453 // vectors of integers needs to be widened.
454 if (ElemTy == MVT::i1) {
455 for (MVT T : Tys) {
456 assert(T != MVT::i1);
457 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
458 if (A != ~0u)
459 return A;
460 }
461 return ~0u;
462 }
463
464 // If the size of VecTy is at least half of the vector length,
465 // widen the vector. Note: the threshold was not selected in
466 // any scientific way.
467 if (llvm::is_contained(Tys, ElemTy)) {
468 unsigned VecWidth = VecTy.getSizeInBits();
469 unsigned HwWidth = 8*HwLen;
470 if (VecWidth > 2*HwWidth)
472
473 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
474 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
476 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
478 }
479
480 // Defer to default.
481 return ~0u;
482}
483
484unsigned
485HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
486 unsigned Opc = Op.getOpcode();
487 switch (Opc) {
492 }
494}
495
497HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
498 const SDLoc &dl, SelectionDAG &DAG) const {
500 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
501 append_range(IntOps, Ops);
502 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
503}
504
505MVT
506HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
507 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
508
509 MVT ElemTy = Tys.first.getVectorElementType();
510 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
511 Tys.second.getVectorNumElements());
512}
513
514HexagonTargetLowering::TypePair
515HexagonTargetLowering::typeSplit(MVT VecTy) const {
516 assert(VecTy.isVector());
517 unsigned NumElem = VecTy.getVectorNumElements();
518 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
519 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
520 return { HalfTy, HalfTy };
521}
522
523MVT
524HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
525 MVT ElemTy = VecTy.getVectorElementType();
526 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
527 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
528}
529
530MVT
531HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
532 MVT ElemTy = VecTy.getVectorElementType();
533 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
534 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
535}
536
538HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
539 SelectionDAG &DAG) const {
540 if (ty(Vec).getVectorElementType() == ElemTy)
541 return Vec;
542 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
543 return DAG.getBitcast(CastTy, Vec);
544}
545
547HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
548 SelectionDAG &DAG) const {
549 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
550 Ops.first, Ops.second);
551}
552
553HexagonTargetLowering::VectorPair
554HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
555 SelectionDAG &DAG) const {
556 TypePair Tys = typeSplit(ty(Vec));
557 if (Vec.getOpcode() == HexagonISD::QCAT)
558 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
559 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
560}
561
562bool
563HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
564 return Subtarget.isHVXVectorType(Ty) &&
565 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
566}
567
568bool
569HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
570 return Subtarget.isHVXVectorType(Ty) &&
571 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
572}
573
574bool
575HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
576 return Subtarget.isHVXVectorType(Ty, true) &&
578}
579
580bool HexagonTargetLowering::allowsHvxMemoryAccess(
581 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
582 // Bool vectors are excluded by default, but make it explicit to
583 // emphasize that bool vectors cannot be loaded or stored.
584 // Also, disallow double vector stores (to prevent unnecessary
585 // store widening in DAG combiner).
586 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
587 return false;
588 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
589 return false;
590 if (Fast)
591 *Fast = 1;
592 return true;
593}
594
595bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
596 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
597 if (!Subtarget.isHVXVectorType(VecTy))
598 return false;
599 // XXX Should this be false? vmemu are a bit slower than vmem.
600 if (Fast)
601 *Fast = 1;
602 return true;
603}
604
605void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
606 MachineInstr &MI, SDNode *Node) const {
607 unsigned Opc = MI.getOpcode();
608 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
609 MachineBasicBlock &MB = *MI.getParent();
610 MachineFunction &MF = *MB.getParent();
612 DebugLoc DL = MI.getDebugLoc();
613 auto At = MI.getIterator();
614
615 switch (Opc) {
616 case Hexagon::PS_vsplatib:
617 if (Subtarget.useHVXV62Ops()) {
618 // SplatV = A2_tfrsi #imm
619 // OutV = V6_lvsplatb SplatV
620 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
621 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
622 .add(MI.getOperand(1));
623 Register OutV = MI.getOperand(0).getReg();
624 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
625 .addReg(SplatV);
626 } else {
627 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
628 // OutV = V6_lvsplatw SplatV
629 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
630 const MachineOperand &InpOp = MI.getOperand(1);
631 assert(InpOp.isImm());
632 uint32_t V = InpOp.getImm() & 0xFF;
633 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
634 .addImm(V << 24 | V << 16 | V << 8 | V);
635 Register OutV = MI.getOperand(0).getReg();
636 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
637 }
638 MB.erase(At);
639 break;
640 case Hexagon::PS_vsplatrb:
641 if (Subtarget.useHVXV62Ops()) {
642 // OutV = V6_lvsplatb Inp
643 Register OutV = MI.getOperand(0).getReg();
644 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
645 .add(MI.getOperand(1));
646 } else {
647 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
648 const MachineOperand &InpOp = MI.getOperand(1);
649 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
650 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
651 Register OutV = MI.getOperand(0).getReg();
652 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
653 .addReg(SplatV);
654 }
655 MB.erase(At);
656 break;
657 case Hexagon::PS_vsplatih:
658 if (Subtarget.useHVXV62Ops()) {
659 // SplatV = A2_tfrsi #imm
660 // OutV = V6_lvsplath SplatV
661 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
662 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
663 .add(MI.getOperand(1));
664 Register OutV = MI.getOperand(0).getReg();
665 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
666 .addReg(SplatV);
667 } else {
668 // SplatV = A2_tfrsi #imm:#imm
669 // OutV = V6_lvsplatw SplatV
670 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
671 const MachineOperand &InpOp = MI.getOperand(1);
672 assert(InpOp.isImm());
673 uint32_t V = InpOp.getImm() & 0xFFFF;
674 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
675 .addImm(V << 16 | V);
676 Register OutV = MI.getOperand(0).getReg();
677 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
678 }
679 MB.erase(At);
680 break;
681 case Hexagon::PS_vsplatrh:
682 if (Subtarget.useHVXV62Ops()) {
683 // OutV = V6_lvsplath Inp
684 Register OutV = MI.getOperand(0).getReg();
685 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
686 .add(MI.getOperand(1));
687 } else {
688 // SplatV = A2_combine_ll Inp, Inp
689 // OutV = V6_lvsplatw SplatV
690 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
691 const MachineOperand &InpOp = MI.getOperand(1);
692 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
693 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
694 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
695 Register OutV = MI.getOperand(0).getReg();
696 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
697 }
698 MB.erase(At);
699 break;
700 case Hexagon::PS_vsplatiw:
701 case Hexagon::PS_vsplatrw:
702 if (Opc == Hexagon::PS_vsplatiw) {
703 // SplatV = A2_tfrsi #imm
704 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
705 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
706 .add(MI.getOperand(1));
707 MI.getOperand(1).ChangeToRegister(SplatV, false);
708 }
709 // OutV = V6_lvsplatw SplatV/Inp
710 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
711 break;
712 }
713}
714
716HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
717 SelectionDAG &DAG) const {
718 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
719 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
720
721 unsigned ElemWidth = ElemTy.getSizeInBits();
722 if (ElemWidth == 8)
723 return ElemIdx;
724
725 unsigned L = Log2_32(ElemWidth/8);
726 const SDLoc &dl(ElemIdx);
727 return DAG.getNode(ISD::SHL, dl, MVT::i32,
728 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
729}
730
732HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
733 SelectionDAG &DAG) const {
734 unsigned ElemWidth = ElemTy.getSizeInBits();
735 assert(ElemWidth >= 8 && ElemWidth <= 32);
736 if (ElemWidth == 32)
737 return Idx;
738
739 if (ty(Idx) != MVT::i32)
740 Idx = DAG.getBitcast(MVT::i32, Idx);
741 const SDLoc &dl(Idx);
742 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
743 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
744 return SubIdx;
745}
746
748HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
749 SDValue Op1, ArrayRef<int> Mask,
750 SelectionDAG &DAG) const {
751 MVT OpTy = ty(Op0);
752 assert(OpTy == ty(Op1));
753
754 MVT ElemTy = OpTy.getVectorElementType();
755 if (ElemTy == MVT::i8)
756 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
757 assert(ElemTy.getSizeInBits() >= 8);
758
759 MVT ResTy = tyVector(OpTy, MVT::i8);
760 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
761
762 SmallVector<int,128> ByteMask;
763 for (int M : Mask) {
764 if (M < 0) {
765 for (unsigned I = 0; I != ElemSize; ++I)
766 ByteMask.push_back(-1);
767 } else {
768 int NewM = M*ElemSize;
769 for (unsigned I = 0; I != ElemSize; ++I)
770 ByteMask.push_back(NewM+I);
771 }
772 }
773 assert(ResTy.getVectorNumElements() == ByteMask.size());
774 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
775 opCastElem(Op1, MVT::i8, DAG), ByteMask);
776}
777
779HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
780 const SDLoc &dl, MVT VecTy,
781 SelectionDAG &DAG) const {
782 unsigned VecLen = Values.size();
784 MVT ElemTy = VecTy.getVectorElementType();
785 unsigned ElemWidth = ElemTy.getSizeInBits();
786 unsigned HwLen = Subtarget.getVectorLength();
787
788 unsigned ElemSize = ElemWidth / 8;
789 assert(ElemSize*VecLen == HwLen);
791
792 if (VecTy.getVectorElementType() != MVT::i32 &&
793 !(Subtarget.useHVXFloatingPoint() &&
794 VecTy.getVectorElementType() == MVT::f32)) {
795 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
796 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
797 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
798 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
799 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
800 Words.push_back(DAG.getBitcast(MVT::i32, W));
801 }
802 } else {
803 for (SDValue V : Values)
804 Words.push_back(DAG.getBitcast(MVT::i32, V));
805 }
806 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
807 unsigned NumValues = Values.size();
808 assert(NumValues > 0);
809 bool IsUndef = true;
810 for (unsigned i = 0; i != NumValues; ++i) {
811 if (Values[i].isUndef())
812 continue;
813 IsUndef = false;
814 if (!SplatV.getNode())
815 SplatV = Values[i];
816 else if (SplatV != Values[i])
817 return false;
818 }
819 if (IsUndef)
820 SplatV = Values[0];
821 return true;
822 };
823
824 unsigned NumWords = Words.size();
825 SDValue SplatV;
826 bool IsSplat = isSplat(Words, SplatV);
827 if (IsSplat && isUndef(SplatV))
828 return DAG.getUNDEF(VecTy);
829 if (IsSplat) {
830 assert(SplatV.getNode());
831 auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
832 if (IdxN && IdxN->isZero())
833 return getZero(dl, VecTy, DAG);
834 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
835 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
836 return DAG.getBitcast(VecTy, S);
837 }
838
839 // Delay recognizing constant vectors until here, so that we can generate
840 // a vsplat.
841 SmallVector<ConstantInt*, 128> Consts(VecLen);
842 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
843 if (AllConst) {
844 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
845 (Constant**)Consts.end());
846 Constant *CV = ConstantVector::get(Tmp);
847 Align Alignment(HwLen);
848 SDValue CP =
849 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
850 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
852 }
853
854 // A special case is a situation where the vector is built entirely from
855 // elements extracted from another vector. This could be done via a shuffle
856 // more efficiently, but typically, the size of the source vector will not
857 // match the size of the vector being built (which precludes the use of a
858 // shuffle directly).
859 // This only handles a single source vector, and the vector being built
860 // should be of a sub-vector type of the source vector type.
861 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
862 SmallVectorImpl<int> &SrcIdx) {
863 SDValue Vec;
864 for (SDValue V : Values) {
865 if (isUndef(V)) {
866 SrcIdx.push_back(-1);
867 continue;
868 }
869 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
870 return false;
871 // All extracts should come from the same vector.
872 SDValue T = V.getOperand(0);
873 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
874 return false;
875 Vec = T;
876 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
877 if (C == nullptr)
878 return false;
879 int I = C->getSExtValue();
880 assert(I >= 0 && "Negative element index");
881 SrcIdx.push_back(I);
882 }
883 SrcVec = Vec;
884 return true;
885 };
886
888 SDValue ExtVec;
889 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
890 MVT ExtTy = ty(ExtVec);
891 unsigned ExtLen = ExtTy.getVectorNumElements();
892 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
893 // Construct a new shuffle mask that will produce a vector with the same
894 // number of elements as the input vector, and such that the vector we
895 // want will be the initial subvector of it.
897 BitVector Used(ExtLen);
898
899 for (int M : ExtIdx) {
900 Mask.push_back(M);
901 if (M >= 0)
902 Used.set(M);
903 }
904 // Fill the rest of the mask with the unused elements of ExtVec in hopes
905 // that it will result in a permutation of ExtVec's elements. It's still
906 // fine if it doesn't (e.g. if undefs are present, or elements are
907 // repeated), but permutations can always be done efficiently via vdelta
908 // and vrdelta.
909 for (unsigned I = 0; I != ExtLen; ++I) {
910 if (Mask.size() == ExtLen)
911 break;
912 if (!Used.test(I))
913 Mask.push_back(I);
914 }
915
916 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
917 DAG.getUNDEF(ExtTy), Mask);
918 return ExtLen == VecLen ? S : LoHalf(S, DAG);
919 }
920 }
921
922 // Find most common element to initialize vector with. This is to avoid
923 // unnecessary vinsert/valign for cases where the same value is present
924 // many times. Creates a histogram of the vector's elements to find the
925 // most common element n.
926 assert(4*Words.size() == Subtarget.getVectorLength());
927 int VecHist[32];
928 int n = 0;
929 for (unsigned i = 0; i != NumWords; ++i) {
930 VecHist[i] = 0;
931 if (Words[i].isUndef())
932 continue;
933 for (unsigned j = i; j != NumWords; ++j)
934 if (Words[i] == Words[j])
935 VecHist[i]++;
936
937 if (VecHist[i] > VecHist[n])
938 n = i;
939 }
940
941 SDValue HalfV = getZero(dl, VecTy, DAG);
942 if (VecHist[n] > 1) {
943 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
944 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
945 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
946 }
947 SDValue HalfV0 = HalfV;
948 SDValue HalfV1 = HalfV;
949
950 // Construct two halves in parallel, then or them together. Rn and Rm count
951 // number of rotations needed before the next element. One last rotation is
952 // performed post-loop to position the last element.
953 int Rn = 0, Rm = 0;
954 SDValue Sn, Sm;
955 SDValue N = HalfV0;
956 SDValue M = HalfV1;
957 for (unsigned i = 0; i != NumWords/2; ++i) {
958 // Rotate by element count since last insertion.
959 if (Words[i] != Words[n] || VecHist[n] <= 1) {
960 Sn = DAG.getConstant(Rn, dl, MVT::i32);
961 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
962 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
963 {HalfV0, Words[i]});
964 Rn = 0;
965 }
966 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
967 Sm = DAG.getConstant(Rm, dl, MVT::i32);
968 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
969 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
970 {HalfV1, Words[i+NumWords/2]});
971 Rm = 0;
972 }
973 Rn += 4;
974 Rm += 4;
975 }
976 // Perform last rotation.
977 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
978 Sm = DAG.getConstant(Rm, dl, MVT::i32);
979 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
980 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
981
982 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
983 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
984
985 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
986
987 SDValue OutV =
988 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
989 return OutV;
990}
991
993HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
994 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
995 MVT PredTy = ty(PredV);
996 unsigned HwLen = Subtarget.getVectorLength();
997 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
998
999 if (Subtarget.isHVXVectorType(PredTy, true)) {
1000 // Move the vector predicate SubV to a vector register, and scale it
1001 // down to match the representation (bytes per type element) that VecV
1002 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1003 // in general) element and put them at the front of the resulting
1004 // vector. This subvector will then be inserted into the Q2V of VecV.
1005 // To avoid having an operation that generates an illegal type (short
1006 // vector), generate a full size vector.
1007 //
1008 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1010 // Scale = BitBytes(PredV) / Given BitBytes.
1011 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1012 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1013
1014 for (unsigned i = 0; i != HwLen; ++i) {
1015 unsigned Num = i % Scale;
1016 unsigned Off = i / Scale;
1017 Mask[BlockLen*Num + Off] = i;
1018 }
1019 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1020 if (!ZeroFill)
1021 return S;
1022 // Fill the bytes beyond BlockLen with 0s.
1023 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1024 // when BlockLen < HwLen.
1025 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1026 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1027 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1028 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1029 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1030 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1031 }
1032
1033 // Make sure that this is a valid scalar predicate.
1034 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1035
1036 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1037 SmallVector<SDValue,4> Words[2];
1038 unsigned IdxW = 0;
1039
1040 SDValue W0 = isUndef(PredV)
1041 ? DAG.getUNDEF(MVT::i64)
1042 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1043 Words[IdxW].push_back(HiHalf(W0, DAG));
1044 Words[IdxW].push_back(LoHalf(W0, DAG));
1045
1046 while (Bytes < BitBytes) {
1047 IdxW ^= 1;
1048 Words[IdxW].clear();
1049
1050 if (Bytes < 4) {
1051 for (const SDValue &W : Words[IdxW ^ 1]) {
1052 SDValue T = expandPredicate(W, dl, DAG);
1053 Words[IdxW].push_back(HiHalf(T, DAG));
1054 Words[IdxW].push_back(LoHalf(T, DAG));
1055 }
1056 } else {
1057 for (const SDValue &W : Words[IdxW ^ 1]) {
1058 Words[IdxW].push_back(W);
1059 Words[IdxW].push_back(W);
1060 }
1061 }
1062 Bytes *= 2;
1063 }
1064
1065 assert(Bytes == BitBytes);
1066
1067 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1068 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1069 for (const SDValue &W : Words[IdxW]) {
1070 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1071 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1072 }
1073
1074 return Vec;
1075}
1076
1077SDValue
1078HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1079 const SDLoc &dl, MVT VecTy,
1080 SelectionDAG &DAG) const {
1081 // Construct a vector V of bytes, such that a comparison V >u 0 would
1082 // produce the required vector predicate.
1083 unsigned VecLen = Values.size();
1084 unsigned HwLen = Subtarget.getVectorLength();
1085 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1087 bool AllT = true, AllF = true;
1088
1089 auto IsTrue = [] (SDValue V) {
1090 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1091 return !N->isZero();
1092 return false;
1093 };
1094 auto IsFalse = [] (SDValue V) {
1095 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1096 return N->isZero();
1097 return false;
1098 };
1099
1100 if (VecLen <= HwLen) {
1101 // In the hardware, each bit of a vector predicate corresponds to a byte
1102 // of a vector register. Calculate how many bytes does a bit of VecTy
1103 // correspond to.
1104 assert(HwLen % VecLen == 0);
1105 unsigned BitBytes = HwLen / VecLen;
1106 for (SDValue V : Values) {
1107 AllT &= IsTrue(V);
1108 AllF &= IsFalse(V);
1109
1110 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1111 : DAG.getUNDEF(MVT::i8);
1112 for (unsigned B = 0; B != BitBytes; ++B)
1113 Bytes.push_back(Ext);
1114 }
1115 } else {
1116 // There are as many i1 values, as there are bits in a vector register.
1117 // Divide the values into groups of 8 and check that each group consists
1118 // of the same value (ignoring undefs).
1119 for (unsigned I = 0; I != VecLen; I += 8) {
1120 unsigned B = 0;
1121 // Find the first non-undef value in this group.
1122 for (; B != 8; ++B) {
1123 if (!Values[I+B].isUndef())
1124 break;
1125 }
1126 SDValue F = Values[I+B];
1127 AllT &= IsTrue(F);
1128 AllF &= IsFalse(F);
1129
1130 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1131 : DAG.getUNDEF(MVT::i8);
1132 Bytes.push_back(Ext);
1133 // Verify that the rest of values in the group are the same as the
1134 // first.
1135 for (; B != 8; ++B)
1136 assert(Values[I+B].isUndef() || Values[I+B] == F);
1137 }
1138 }
1139
1140 if (AllT)
1141 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1142 if (AllF)
1143 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1144
1145 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1146 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1147 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1148}
1149
1150SDValue
1151HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1152 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1153 MVT ElemTy = ty(VecV).getVectorElementType();
1154
1155 unsigned ElemWidth = ElemTy.getSizeInBits();
1156 assert(ElemWidth >= 8 && ElemWidth <= 32);
1157 (void)ElemWidth;
1158
1159 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1161 {VecV, ByteIdx});
1162 if (ElemTy == MVT::i32)
1163 return ExWord;
1164
1165 // Have an extracted word, need to extract the smaller element out of it.
1166 // 1. Extract the bits of (the original) IdxV that correspond to the index
1167 // of the desired element in the 32-bit word.
1168 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1169 // 2. Extract the element from the word.
1170 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1171 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1172}
1173
1174SDValue
1175HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1176 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1177 // Implement other return types if necessary.
1178 assert(ResTy == MVT::i1);
1179
1180 unsigned HwLen = Subtarget.getVectorLength();
1181 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1182 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1183
1184 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1185 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1186 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1187
1188 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1189 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1190 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1191}
1192
1193SDValue
1194HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1195 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1196 MVT ElemTy = ty(VecV).getVectorElementType();
1197
1198 unsigned ElemWidth = ElemTy.getSizeInBits();
1199 assert(ElemWidth >= 8 && ElemWidth <= 32);
1200 (void)ElemWidth;
1201
1202 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1203 SDValue ByteIdxV) {
1204 MVT VecTy = ty(VecV);
1205 unsigned HwLen = Subtarget.getVectorLength();
1206 SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
1207 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)});
1208 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1209 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1210 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1211 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1212 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1213 return TorV;
1214 };
1215
1216 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1217 if (ElemTy == MVT::i32)
1218 return InsertWord(VecV, ValV, ByteIdx);
1219
1220 // If this is not inserting a 32-bit word, convert it into such a thing.
1221 // 1. Extract the existing word from the target vector.
1222 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1223 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1224 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1225 dl, MVT::i32, DAG);
1226
1227 // 2. Treating the extracted word as a 32-bit vector, insert the given
1228 // value into it.
1229 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1230 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1231 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1232 ValV, SubIdx, dl, ElemTy, DAG);
1233
1234 // 3. Insert the 32-bit word back into the original vector.
1235 return InsertWord(VecV, Ins, ByteIdx);
1236}
1237
1238SDValue
1239HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1240 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1241 unsigned HwLen = Subtarget.getVectorLength();
1242 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1243 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1244
1245 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1246 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1247 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1248 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1249
1250 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1251 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1252}
1253
1254SDValue
1255HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1256 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1257 MVT VecTy = ty(VecV);
1258 unsigned HwLen = Subtarget.getVectorLength();
1259 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1260 MVT ElemTy = VecTy.getVectorElementType();
1261 unsigned ElemWidth = ElemTy.getSizeInBits();
1262
1263 // If the source vector is a vector pair, get the single vector containing
1264 // the subvector of interest. The subvector will never overlap two single
1265 // vectors.
1266 if (isHvxPairTy(VecTy)) {
1267 if (Idx * ElemWidth >= 8*HwLen)
1268 Idx -= VecTy.getVectorNumElements() / 2;
1269
1270 VecV = OrigOp;
1271 if (typeSplit(VecTy).first == ResTy)
1272 return VecV;
1273 }
1274
1275 // The only meaningful subvectors of a single HVX vector are those that
1276 // fit in a scalar register.
1277 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1278
1279 MVT WordTy = tyVector(VecTy, MVT::i32);
1280 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1281 unsigned WordIdx = (Idx*ElemWidth) / 32;
1282
1283 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1284 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1285 if (ResTy.getSizeInBits() == 32)
1286 return DAG.getBitcast(ResTy, W0);
1287
1288 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1289 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1290 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1291 return DAG.getBitcast(ResTy, WW);
1292}
1293
1294SDValue
1295HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1296 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1297 MVT VecTy = ty(VecV);
1298 unsigned HwLen = Subtarget.getVectorLength();
1299 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1300 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1301 // IdxV is required to be a constant.
1302 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1303
1304 unsigned ResLen = ResTy.getVectorNumElements();
1305 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1306 unsigned Offset = Idx * BitBytes;
1307 SDValue Undef = DAG.getUNDEF(ByteTy);
1309
1310 if (Subtarget.isHVXVectorType(ResTy, true)) {
1311 // Converting between two vector predicates. Since the result is shorter
1312 // than the source, it will correspond to a vector predicate with the
1313 // relevant bits replicated. The replication count is the ratio of the
1314 // source and target vector lengths.
1315 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1316 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1317 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1318 for (unsigned j = 0; j != Rep; ++j)
1319 Mask.push_back(i + Offset);
1320 }
1321 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1322 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1323 }
1324
1325 // Converting between a vector predicate and a scalar predicate. In the
1326 // vector predicate, a group of BitBytes bits will correspond to a single
1327 // i1 element of the source vector type. Those bits will all have the same
1328 // value. The same will be true for ByteVec, where each byte corresponds
1329 // to a bit in the vector predicate.
1330 // The algorithm is to traverse the ByteVec, going over the i1 values from
1331 // the source vector, and generate the corresponding representation in an
1332 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1333 // elements so that the interesting 8 bytes will be in the low end of the
1334 // vector.
1335 unsigned Rep = 8 / ResLen;
1336 // Make sure the output fill the entire vector register, so repeat the
1337 // 8-byte groups as many times as necessary.
1338 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1339 // This will generate the indexes of the 8 interesting bytes.
1340 for (unsigned i = 0; i != ResLen; ++i) {
1341 for (unsigned j = 0; j != Rep; ++j)
1342 Mask.push_back(Offset + i*BitBytes);
1343 }
1344 }
1345
1346 SDValue Zero = getZero(dl, MVT::i32, DAG);
1347 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1348 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1349 // them against 0.
1350 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1352 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1353 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1354 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1355 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1356}
1357
1358SDValue
1359HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1360 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1361 MVT VecTy = ty(VecV);
1362 MVT SubTy = ty(SubV);
1363 unsigned HwLen = Subtarget.getVectorLength();
1364 MVT ElemTy = VecTy.getVectorElementType();
1365 unsigned ElemWidth = ElemTy.getSizeInBits();
1366
1367 bool IsPair = isHvxPairTy(VecTy);
1368 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1369 // The two single vectors that VecV consists of, if it's a pair.
1370 SDValue V0, V1;
1371 SDValue SingleV = VecV;
1372 SDValue PickHi;
1373
1374 if (IsPair) {
1375 V0 = LoHalf(VecV, DAG);
1376 V1 = HiHalf(VecV, DAG);
1377
1378 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1379 dl, MVT::i32);
1380 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1381 if (isHvxSingleTy(SubTy)) {
1382 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1383 unsigned Idx = CN->getZExtValue();
1384 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1385 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1386 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1387 }
1388 // If IdxV is not a constant, generate the two variants: with the
1389 // SubV as the high and as the low subregister, and select the right
1390 // pair based on the IdxV.
1391 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1392 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1393 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1394 }
1395 // The subvector being inserted must be entirely contained in one of
1396 // the vectors V0 or V1. Set SingleV to the correct one, and update
1397 // IdxV to be the index relative to the beginning of that vector.
1398 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1399 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1400 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1401 }
1402
1403 // The only meaningful subvectors of a single HVX vector are those that
1404 // fit in a scalar register.
1405 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1406 // Convert IdxV to be index in bytes.
1407 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1408 if (!IdxN || !IdxN->isZero()) {
1409 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1410 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1411 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1412 }
1413 // When inserting a single word, the rotation back to the original position
1414 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1415 // by (HwLen-4)-Idx.
1416 unsigned RolBase = HwLen;
1417 if (SubTy.getSizeInBits() == 32) {
1418 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1419 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1420 } else {
1421 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1422 SDValue R0 = LoHalf(V, DAG);
1423 SDValue R1 = HiHalf(V, DAG);
1424 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1425 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1426 DAG.getConstant(4, dl, MVT::i32));
1427 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1428 RolBase = HwLen-4;
1429 }
1430 // If the vector wasn't ror'ed, don't ror it back.
1431 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1432 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1433 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1434 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1435 }
1436
1437 if (IsPair) {
1438 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1439 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1440 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1441 }
1442 return SingleV;
1443}
1444
1445SDValue
1446HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1447 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1448 MVT VecTy = ty(VecV);
1449 MVT SubTy = ty(SubV);
1450 assert(Subtarget.isHVXVectorType(VecTy, true));
1451 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1452 // predicate as well, or it can be a scalar predicate.
1453
1454 unsigned VecLen = VecTy.getVectorNumElements();
1455 unsigned HwLen = Subtarget.getVectorLength();
1456 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1457
1458 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1459 unsigned BitBytes = HwLen / VecLen;
1460 unsigned BlockLen = HwLen / Scale;
1461
1462 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1463 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1464 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1465 SDValue ByteIdx;
1466
1467 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1468 if (!IdxN || !IdxN->isZero()) {
1469 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1470 DAG.getConstant(BitBytes, dl, MVT::i32));
1471 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1472 }
1473
1474 // ByteVec is the target vector VecV rotated in such a way that the
1475 // subvector should be inserted at index 0. Generate a predicate mask
1476 // and use vmux to do the insertion.
1477 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1478 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1479 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1480 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1481 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1482 // Rotate ByteVec back, and convert to a vector predicate.
1483 if (!IdxN || !IdxN->isZero()) {
1484 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1485 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1486 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1487 }
1488 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1489}
1490
1491SDValue
1492HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1493 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1494 // Sign- and any-extending of a vector predicate to a vector register is
1495 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1496 // a vector of 1s (where the 1s are of type matching the vector type).
1497 assert(Subtarget.isHVXVectorType(ResTy));
1498 if (!ZeroExt)
1499 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1500
1501 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1502 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1503 DAG.getConstant(1, dl, MVT::i32));
1504 SDValue False = getZero(dl, ResTy, DAG);
1505 return DAG.getSelect(dl, ResTy, VecV, True, False);
1506}
1507
1508SDValue
1509HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1510 MVT ResTy, SelectionDAG &DAG) const {
1511 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1512 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1513 // vector register. The remaining bits of the vector register are
1514 // unspecified.
1515
1517 unsigned HwLen = Subtarget.getVectorLength();
1518 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1519 MVT PredTy = ty(VecQ);
1520 unsigned PredLen = PredTy.getVectorNumElements();
1521 assert(HwLen % PredLen == 0);
1522 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1523
1524 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1526 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1527 // These are bytes with the LSB rotated left with respect to their index.
1528 for (unsigned i = 0; i != HwLen/8; ++i) {
1529 for (unsigned j = 0; j != 8; ++j)
1530 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1531 }
1532 Constant *CV = ConstantVector::get(Tmp);
1533 Align Alignment(HwLen);
1534 SDValue CP =
1535 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1536 SDValue Bytes =
1537 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1539
1540 // Select the bytes that correspond to true bits in the vector predicate.
1541 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1542 getZero(dl, VecTy, DAG));
1543 // Calculate the OR of all bytes in each group of 8. That will compress
1544 // all the individual bits into a single byte.
1545 // First, OR groups of 4, via vrmpy with 0x01010101.
1546 SDValue All1 =
1547 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1548 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1549 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1550 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1551 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1552 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1553
1554 // Pick every 8th byte and coalesce them at the beginning of the output.
1555 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1556 // byte and so on.
1558 for (unsigned i = 0; i != HwLen; ++i)
1559 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1560 SDValue Collect =
1561 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1562 return DAG.getBitcast(ResTy, Collect);
1563}
1564
1565SDValue
1566HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1567 const SDLoc &dl, SelectionDAG &DAG) const {
1568 // Take a vector and resize the element type to match the given type.
1569 MVT InpTy = ty(VecV);
1570 if (InpTy == ResTy)
1571 return VecV;
1572
1573 unsigned InpWidth = InpTy.getSizeInBits();
1574 unsigned ResWidth = ResTy.getSizeInBits();
1575
1576 if (InpTy.isFloatingPoint()) {
1577 return InpWidth < ResWidth ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1578 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1579 getZero(dl, MVT::i32, DAG));
1580 }
1581
1582 assert(InpTy.isInteger());
1583
1584 if (InpWidth < ResWidth) {
1585 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1586 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1587 } else {
1588 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1589 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1590 }
1591}
1592
1593SDValue
1594HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1595 SelectionDAG &DAG) const {
1596 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1597
1598 const SDLoc &dl(Vec);
1599 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1600 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1601 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1602}
1603
1604SDValue
1605HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1606 const {
1607 const SDLoc &dl(Op);
1608 MVT VecTy = ty(Op);
1609
1610 unsigned Size = Op.getNumOperands();
1612 for (unsigned i = 0; i != Size; ++i)
1613 Ops.push_back(Op.getOperand(i));
1614
1615 // First, split the BUILD_VECTOR for vector pairs. We could generate
1616 // some pairs directly (via splat), but splats should be generated
1617 // by the combiner prior to getting here.
1618 if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
1619 ArrayRef<SDValue> A(Ops);
1620 MVT SingleTy = typeSplit(VecTy).first;
1621 SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
1622 SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
1623 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1624 }
1625
1626 if (VecTy.getVectorElementType() == MVT::i1)
1627 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1628
1629 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1630 // not a legal type, just bitcast the node to use i16
1631 // types and bitcast the result back to f16
1632 if (VecTy.getVectorElementType() == MVT::f16) {
1634 for (unsigned i = 0; i != Size; i++)
1635 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1636
1637 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1638 tyVector(VecTy, MVT::i16), NewOps);
1639 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1640 }
1641
1642 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1643}
1644
1645SDValue
1646HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1647 const {
1648 const SDLoc &dl(Op);
1649 MVT VecTy = ty(Op);
1650 MVT ArgTy = ty(Op.getOperand(0));
1651
1652 if (ArgTy == MVT::f16) {
1654 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1655 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1656 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1657 return DAG.getBitcast(VecTy, Splat);
1658 }
1659
1660 return SDValue();
1661}
1662
1663SDValue
1664HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1665 const {
1666 // Vector concatenation of two integer (non-bool) vectors does not need
1667 // special lowering. Custom-lower concats of bool vectors and expand
1668 // concats of more than 2 vectors.
1669 MVT VecTy = ty(Op);
1670 const SDLoc &dl(Op);
1671 unsigned NumOp = Op.getNumOperands();
1672 if (VecTy.getVectorElementType() != MVT::i1) {
1673 if (NumOp == 2)
1674 return Op;
1675 // Expand the other cases into a build-vector.
1677 for (SDValue V : Op.getNode()->ops())
1678 DAG.ExtractVectorElements(V, Elems);
1679 // A vector of i16 will be broken up into a build_vector of i16's.
1680 // This is a problem, since at the time of operation legalization,
1681 // all operations are expected to be type-legalized, and i16 is not
1682 // a legal type. If any of the extracted elements is not of a valid
1683 // type, sign-extend it to a valid one.
1684 for (unsigned i = 0, e = Elems.size(); i != e; ++i) {
1685 SDValue V = Elems[i];
1686 MVT Ty = ty(V);
1687 if (!isTypeLegal(Ty)) {
1688 MVT NTy = typeLegalize(Ty, DAG);
1689 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1690 Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1691 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1692 V.getOperand(0), V.getOperand(1)),
1693 DAG.getValueType(Ty));
1694 continue;
1695 }
1696 // A few less complicated cases.
1697 switch (V.getOpcode()) {
1698 case ISD::Constant:
1699 Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
1700 break;
1701 case ISD::UNDEF:
1702 Elems[i] = DAG.getUNDEF(NTy);
1703 break;
1704 case ISD::TRUNCATE:
1705 Elems[i] = V.getOperand(0);
1706 break;
1707 default:
1708 llvm_unreachable("Unexpected vector element");
1709 }
1710 }
1711 }
1712 return DAG.getBuildVector(VecTy, dl, Elems);
1713 }
1714
1716 unsigned HwLen = Subtarget.getVectorLength();
1717 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1718
1719 SDValue Op0 = Op.getOperand(0);
1720
1721 // If the operands are HVX types (i.e. not scalar predicates), then
1722 // defer the concatenation, and create QCAT instead.
1723 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1724 if (NumOp == 2)
1725 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1726
1727 ArrayRef<SDUse> U(Op.getNode()->ops());
1728 SmallVector<SDValue,4> SV(U.begin(), U.end());
1729 ArrayRef<SDValue> Ops(SV);
1730
1731 MVT HalfTy = typeSplit(VecTy).first;
1732 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1733 Ops.take_front(NumOp/2));
1734 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1735 Ops.take_back(NumOp/2));
1736 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1737 }
1738
1739 // Count how many bytes (in a vector register) each bit in VecTy
1740 // corresponds to.
1741 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1742
1743 SmallVector<SDValue,8> Prefixes;
1744 for (SDValue V : Op.getNode()->op_values()) {
1745 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1746 Prefixes.push_back(P);
1747 }
1748
1749 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1750 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1751 SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
1752 SDValue Res = getZero(dl, ByteTy, DAG);
1753 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1754 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1755 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1756 }
1757 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1758}
1759
1760SDValue
1761HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1762 const {
1763 // Change the type of the extracted element to i32.
1764 SDValue VecV = Op.getOperand(0);
1765 MVT ElemTy = ty(VecV).getVectorElementType();
1766 const SDLoc &dl(Op);
1767 SDValue IdxV = Op.getOperand(1);
1768 if (ElemTy == MVT::i1)
1769 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1770
1771 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1772}
1773
1774SDValue
1775HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1776 const {
1777 const SDLoc &dl(Op);
1778 MVT VecTy = ty(Op);
1779 SDValue VecV = Op.getOperand(0);
1780 SDValue ValV = Op.getOperand(1);
1781 SDValue IdxV = Op.getOperand(2);
1782 MVT ElemTy = ty(VecV).getVectorElementType();
1783 if (ElemTy == MVT::i1)
1784 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1785
1786 if (ElemTy == MVT::f16) {
1788 tyVector(VecTy, MVT::i16),
1789 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1790 DAG.getBitcast(MVT::i16, ValV), IdxV);
1791 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1792 }
1793
1794 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1795}
1796
1797SDValue
1798HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1799 const {
1800 SDValue SrcV = Op.getOperand(0);
1801 MVT SrcTy = ty(SrcV);
1802 MVT DstTy = ty(Op);
1803 SDValue IdxV = Op.getOperand(1);
1804 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1805 assert(Idx % DstTy.getVectorNumElements() == 0);
1806 (void)Idx;
1807 const SDLoc &dl(Op);
1808
1809 MVT ElemTy = SrcTy.getVectorElementType();
1810 if (ElemTy == MVT::i1)
1811 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1812
1813 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1814}
1815
1816SDValue
1817HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1818 const {
1819 // Idx does not need to be a constant.
1820 SDValue VecV = Op.getOperand(0);
1821 SDValue ValV = Op.getOperand(1);
1822 SDValue IdxV = Op.getOperand(2);
1823
1824 const SDLoc &dl(Op);
1825 MVT VecTy = ty(VecV);
1826 MVT ElemTy = VecTy.getVectorElementType();
1827 if (ElemTy == MVT::i1)
1828 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1829
1830 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1831}
1832
1833SDValue
1834HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1835 // Lower any-extends of boolean vectors to sign-extends, since they
1836 // translate directly to Q2V. Zero-extending could also be done equally
1837 // fast, but Q2V is used/recognized in more places.
1838 // For all other vectors, use zero-extend.
1839 MVT ResTy = ty(Op);
1840 SDValue InpV = Op.getOperand(0);
1841 MVT ElemTy = ty(InpV).getVectorElementType();
1842 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1843 return LowerHvxSignExt(Op, DAG);
1844 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1845}
1846
1847SDValue
1848HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1849 MVT ResTy = ty(Op);
1850 SDValue InpV = Op.getOperand(0);
1851 MVT ElemTy = ty(InpV).getVectorElementType();
1852 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1853 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1854 return Op;
1855}
1856
1857SDValue
1858HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1859 MVT ResTy = ty(Op);
1860 SDValue InpV = Op.getOperand(0);
1861 MVT ElemTy = ty(InpV).getVectorElementType();
1862 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1863 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1864 return Op;
1865}
1866
1867SDValue
1868HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1869 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1870 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1871 const SDLoc &dl(Op);
1872 MVT ResTy = ty(Op);
1873 SDValue InpV = Op.getOperand(0);
1874 assert(ResTy == ty(InpV));
1875
1876 // Calculate the vectors of 1 and bitwidth(x).
1877 MVT ElemTy = ty(InpV).getVectorElementType();
1878 unsigned ElemWidth = ElemTy.getSizeInBits();
1879
1880 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1881 DAG.getConstant(1, dl, MVT::i32));
1882 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1883 DAG.getConstant(ElemWidth, dl, MVT::i32));
1884 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1885 DAG.getConstant(-1, dl, MVT::i32));
1886
1887 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1888 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1889 // it separately in custom combine or selection).
1890 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1891 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1892 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1893 return DAG.getNode(ISD::SUB, dl, ResTy,
1894 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1895}
1896
1897SDValue
1898HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1899 const SDLoc &dl(Op);
1900 MVT ResTy = ty(Op);
1902
1903 SDValue Vs = Op.getOperand(0);
1904 SDValue Vt = Op.getOperand(1);
1905
1906 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1907 unsigned Opc = Op.getOpcode();
1908
1909 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1910 if (Opc == ISD::MULHU)
1911 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1912 if (Opc == ISD::MULHS)
1913 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1914
1915#ifndef NDEBUG
1916 Op.dump(&DAG);
1917#endif
1918 llvm_unreachable("Unexpected mulh operation");
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1923 const SDLoc &dl(Op);
1924 unsigned Opc = Op.getOpcode();
1925 SDValue Vu = Op.getOperand(0);
1926 SDValue Vv = Op.getOperand(1);
1927
1928 // If the HI part is not used, convert it to a regular MUL.
1929 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1930 // Need to preserve the types and the number of values.
1931 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1932 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1933 return DAG.getMergeValues({Lo, Hi}, dl);
1934 }
1935
1936 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1937 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1938
1939 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1940 // valued nodes.
1941 if (Subtarget.useHVXV62Ops())
1942 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1943
1944 if (Opc == HexagonISD::SMUL_LOHI) {
1945 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1946 // for other signedness LOHI is cheaper.
1947 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1948 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1949 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1950 return DAG.getMergeValues({Lo, Hi}, dl);
1951 }
1952 }
1953
1954 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1955}
1956
1957SDValue
1958HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1959 SDValue Val = Op.getOperand(0);
1960 MVT ResTy = ty(Op);
1961 MVT ValTy = ty(Val);
1962 const SDLoc &dl(Op);
1963
1964 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1965 unsigned HwLen = Subtarget.getVectorLength();
1966 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1967 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1968 unsigned BitWidth = ResTy.getSizeInBits();
1969
1970 if (BitWidth < 64) {
1971 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1972 dl, MVT::i32, DAG);
1973 if (BitWidth == 32)
1974 return W0;
1975 assert(BitWidth < 32u);
1976 return DAG.getZExtOrTrunc(W0, dl, ResTy);
1977 }
1978
1979 // The result is >= 64 bits. The only options are 64 or 128.
1980 assert(BitWidth == 64 || BitWidth == 128);
1982 for (unsigned i = 0; i != BitWidth/32; ++i) {
1983 SDValue W = extractHvxElementReg(
1984 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1985 Words.push_back(W);
1986 }
1987 SmallVector<SDValue,2> Combines;
1988 assert(Words.size() % 2 == 0);
1989 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
1990 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
1991 Combines.push_back(C);
1992 }
1993
1994 if (BitWidth == 64)
1995 return Combines[0];
1996
1997 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
1998 }
1999 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2000 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2001 unsigned BitWidth = ValTy.getSizeInBits();
2002 unsigned HwLen = Subtarget.getVectorLength();
2003 assert(BitWidth == HwLen);
2004
2005 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2006 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2007 // Splat each byte of Val 8 times.
2008 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2009 // where b0, b1,..., b15 are least to most significant bytes of I.
2011 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2012 // These are bytes with the LSB rotated left with respect to their index.
2014 for (unsigned I = 0; I != HwLen / 8; ++I) {
2015 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2016 SDValue Byte =
2017 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2018 for (unsigned J = 0; J != 8; ++J) {
2019 Bytes.push_back(Byte);
2020 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2021 }
2022 }
2023
2024 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2025 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2026 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2027
2028 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2029 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2030 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2031 }
2032
2033 return Op;
2034}
2035
2036SDValue
2037HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2038 // Sign- and zero-extends are legal.
2039 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2040 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2041 Op.getOperand(0));
2042}
2043
2044SDValue
2045HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2046 MVT ResTy = ty(Op);
2047 if (ResTy.getVectorElementType() != MVT::i1)
2048 return Op;
2049
2050 const SDLoc &dl(Op);
2051 unsigned HwLen = Subtarget.getVectorLength();
2052 unsigned VecLen = ResTy.getVectorNumElements();
2053 assert(HwLen % VecLen == 0);
2054 unsigned ElemSize = HwLen / VecLen;
2055
2056 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2057 SDValue S =
2058 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2059 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2060 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2061 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2062}
2063
2064SDValue
2065HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2066 if (SDValue S = getVectorShiftByInt(Op, DAG))
2067 return S;
2068 return Op;
2069}
2070
2071SDValue
2072HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2073 SelectionDAG &DAG) const {
2074 unsigned Opc = Op.getOpcode();
2075 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2076
2077 // Make sure the shift amount is within the range of the bitwidth
2078 // of the element type.
2079 SDValue A = Op.getOperand(0);
2080 SDValue B = Op.getOperand(1);
2081 SDValue S = Op.getOperand(2);
2082
2083 MVT InpTy = ty(A);
2084 MVT ElemTy = InpTy.getVectorElementType();
2085
2086 const SDLoc &dl(Op);
2087 unsigned ElemWidth = ElemTy.getSizeInBits();
2088 bool IsLeft = Opc == ISD::FSHL;
2089
2090 // The expansion into regular shifts produces worse code for i8 and for
2091 // right shift of i32 on v65+.
2092 bool UseShifts = ElemTy != MVT::i8;
2093 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2094 UseShifts = false;
2095
2096 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2097 // If this is a funnel shift by a scalar, lower it into regular shifts.
2098 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2099 SDValue ModS =
2100 DAG.getNode(ISD::AND, dl, MVT::i32,
2101 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2102 SDValue NegS =
2103 DAG.getNode(ISD::SUB, dl, MVT::i32,
2104 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2105 SDValue IsZero =
2106 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2107 // FSHL A, B => A << | B >>n
2108 // FSHR A, B => A <<n | B >>
2109 SDValue Part1 =
2110 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2111 SDValue Part2 =
2112 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2113 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2114 // If the shift amount was 0, pick A or B, depending on the direction.
2115 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2116 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2117 }
2118
2120 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2121
2122 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2123 return DAG.getNode(MOpc, dl, ty(Op),
2124 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2125}
2126
2127SDValue
2128HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2129 const SDLoc &dl(Op);
2130 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2131 SmallVector<SDValue> Ops(Op->ops().begin(), Op->ops().end());
2132
2133 auto Swap = [&](SDValue P) {
2134 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2135 };
2136
2137 switch (IntNo) {
2138 case Intrinsic::hexagon_V6_pred_typecast:
2139 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2140 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2141 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2142 if (ResTy == InpTy)
2143 return Ops[1];
2144 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2145 }
2146 break;
2147 }
2148 case Intrinsic::hexagon_V6_vmpyss_parts:
2149 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2150 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2151 {Ops[1], Ops[2]}));
2152 case Intrinsic::hexagon_V6_vmpyuu_parts:
2153 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2154 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2155 {Ops[1], Ops[2]}));
2156 case Intrinsic::hexagon_V6_vmpyus_parts:
2157 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2158 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2159 {Ops[1], Ops[2]}));
2160 }
2161 } // switch
2162
2163 return Op;
2164}
2165
2166SDValue
2167HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2168 const SDLoc &dl(Op);
2169 unsigned HwLen = Subtarget.getVectorLength();
2171 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2172 SDValue Mask = MaskN->getMask();
2173 SDValue Chain = MaskN->getChain();
2174 SDValue Base = MaskN->getBasePtr();
2175 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2176
2177 unsigned Opc = Op->getOpcode();
2178 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2179
2180 if (Opc == ISD::MLOAD) {
2181 MVT ValTy = ty(Op);
2182 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2183 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2184 if (isUndef(Thru))
2185 return Load;
2186 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2187 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2188 }
2189
2190 // MSTORE
2191 // HVX only has aligned masked stores.
2192
2193 // TODO: Fold negations of the mask into the store.
2194 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2195 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2196 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2197
2198 if (MaskN->getAlign().value() % HwLen == 0) {
2199 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2200 {Mask, Base, Offset0, Value, Chain}, DAG);
2201 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2202 return Store;
2203 }
2204
2205 // Unaligned case.
2206 auto StoreAlign = [&](SDValue V, SDValue A) {
2207 SDValue Z = getZero(dl, ty(V), DAG);
2208 // TODO: use funnel shifts?
2209 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2210 // upper half.
2211 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2212 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2213 return std::make_pair(LoV, HiV);
2214 };
2215
2216 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2217 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2218 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2219 VectorPair Tmp = StoreAlign(MaskV, Base);
2220 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2221 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2222 VectorPair ValueU = StoreAlign(Value, Base);
2223
2224 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2225 SDValue StoreLo =
2226 getInstr(StoreOpc, dl, MVT::Other,
2227 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2228 SDValue StoreHi =
2229 getInstr(StoreOpc, dl, MVT::Other,
2230 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2231 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2232 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2233 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2234}
2235
2236SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2237 SelectionDAG &DAG) const {
2238 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2239 // is legal (done via a pattern).
2240 assert(Subtarget.useHVXQFloatOps());
2241
2242 assert(Op->getOpcode() == ISD::FP_EXTEND);
2243
2244 MVT VecTy = ty(Op);
2245 MVT ArgTy = ty(Op.getOperand(0));
2246 const SDLoc &dl(Op);
2247 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2248
2249 SDValue F16Vec = Op.getOperand(0);
2250
2251 APFloat FloatVal = APFloat(1.0f);
2252 bool Ignored;
2254 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2255 SDValue VmpyVec =
2256 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2257
2258 MVT HalfTy = typeSplit(VecTy).first;
2259 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2260 SDValue LoVec =
2261 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2262 SDValue HiVec =
2263 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2264
2265 SDValue ShuffVec =
2266 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2267 {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG);
2268
2269 return ShuffVec;
2270}
2271
2272SDValue
2273HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2274 // Catch invalid conversion ops (just in case).
2275 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2276 Op.getOpcode() == ISD::FP_TO_UINT);
2277
2278 MVT ResTy = ty(Op);
2279 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2280 MVT IntTy = ResTy.getVectorElementType();
2281
2282 if (Subtarget.useHVXIEEEFPOps()) {
2283 // There are only conversions from f16.
2284 if (FpTy == MVT::f16) {
2285 // Other int types aren't legal in HVX, so we shouldn't see them here.
2286 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2287 // Conversions to i8 and i16 are legal.
2288 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2289 return Op;
2290 }
2291 }
2292
2293 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2294 return EqualizeFpIntConversion(Op, DAG);
2295
2296 return ExpandHvxFpToInt(Op, DAG);
2297}
2298
2299SDValue
2300HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2301 // Catch invalid conversion ops (just in case).
2302 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2303 Op.getOpcode() == ISD::UINT_TO_FP);
2304
2305 MVT ResTy = ty(Op);
2306 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2307 MVT FpTy = ResTy.getVectorElementType();
2308
2309 if (Subtarget.useHVXIEEEFPOps()) {
2310 // There are only conversions to f16.
2311 if (FpTy == MVT::f16) {
2312 // Other int types aren't legal in HVX, so we shouldn't see them here.
2313 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2314 // i8, i16 -> f16 is legal.
2315 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2316 return Op;
2317 }
2318 }
2319
2320 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2321 return EqualizeFpIntConversion(Op, DAG);
2322
2323 return ExpandHvxIntToFp(Op, DAG);
2324}
2325
2326HexagonTargetLowering::TypePair
2327HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2328 // Compare the widths of elements of the two types, and extend the narrower
2329 // type to match the with of the wider type. For vector types, apply this
2330 // to the element type.
2331 assert(Ty0.isVector() == Ty1.isVector());
2332
2333 MVT ElemTy0 = Ty0.getScalarType();
2334 MVT ElemTy1 = Ty1.getScalarType();
2335
2336 unsigned Width0 = ElemTy0.getSizeInBits();
2337 unsigned Width1 = ElemTy1.getSizeInBits();
2338 unsigned MaxWidth = std::max(Width0, Width1);
2339
2340 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2341 if (ScalarTy.isInteger())
2342 return MVT::getIntegerVT(Width);
2343 assert(ScalarTy.isFloatingPoint());
2344 return MVT::getFloatingPointVT(Width);
2345 };
2346
2347 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2348 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2349
2350 if (!Ty0.isVector()) {
2351 // Both types are scalars.
2352 return {WideETy0, WideETy1};
2353 }
2354
2355 // Vector types.
2356 unsigned NumElem = Ty0.getVectorNumElements();
2357 assert(NumElem == Ty1.getVectorNumElements());
2358
2359 return {MVT::getVectorVT(WideETy0, NumElem),
2360 MVT::getVectorVT(WideETy1, NumElem)};
2361}
2362
2363HexagonTargetLowering::TypePair
2364HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2365 // Compare the numbers of elements of two vector types, and widen the
2366 // narrower one to match the number of elements in the wider one.
2367 assert(Ty0.isVector() && Ty1.isVector());
2368
2369 unsigned Len0 = Ty0.getVectorNumElements();
2370 unsigned Len1 = Ty1.getVectorNumElements();
2371 if (Len0 == Len1)
2372 return {Ty0, Ty1};
2373
2374 unsigned MaxLen = std::max(Len0, Len1);
2375 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2376 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2377}
2378
2379MVT
2380HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2381 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2382 assert(LegalTy.isSimple());
2383 return LegalTy.getSimpleVT();
2384}
2385
2386MVT
2387HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2388 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2389 assert(Ty.getSizeInBits() <= HwWidth);
2390 if (Ty.getSizeInBits() == HwWidth)
2391 return Ty;
2392
2393 MVT ElemTy = Ty.getScalarType();
2394 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2395}
2396
2397HexagonTargetLowering::VectorPair
2398HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2399 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2400 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2401 // whether an overflow has occured.
2402 MVT ResTy = ty(A);
2403 assert(ResTy == ty(B));
2405
2406 if (!Signed) {
2407 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2408 // save any instructions.
2409 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2410 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2411 return {Add, Ovf};
2412 }
2413
2414 // Signed overflow has happened, if:
2415 // (A, B have the same sign) and (A+B has a different sign from either)
2416 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2417 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2418 SDValue NotA =
2419 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getConstant(-1, dl, ResTy)});
2420 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2421 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2422 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2423 SDValue MSB =
2424 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2425 return {Add, MSB};
2426}
2427
2428HexagonTargetLowering::VectorPair
2429HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2430 bool Signed, SelectionDAG &DAG) const {
2431 // Shift Val right by Amt bits, round the result to the nearest integer,
2432 // tie-break by rounding halves to even integer.
2433
2434 const SDLoc &dl(Val);
2435 MVT ValTy = ty(Val);
2436
2437 // This should also work for signed integers.
2438 //
2439 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2440 // bool ovf = (inp > tmp0);
2441 // uint rup = inp & (1 << (Amt+1));
2442 //
2443 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2444 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2445 // uint tmp3 = tmp2 + rup;
2446 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2447 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2448 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2449 MVT IntTy = tyVector(ValTy, ElemTy);
2451 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2452
2453 SDValue Inp = DAG.getBitcast(IntTy, Val);
2454 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2455
2456 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2457 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2458 SDValue Zero = getZero(dl, IntTy, DAG);
2459 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2460 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2461 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2462
2463 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2464 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2465 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2466 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2467
2468 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2469 SDValue One = DAG.getConstant(1, dl, IntTy);
2470 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2471 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2472 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2473 return {Mux, Ovf};
2474}
2475
2476SDValue
2477HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2478 SelectionDAG &DAG) const {
2479 MVT VecTy = ty(A);
2480 MVT PairTy = typeJoin({VecTy, VecTy});
2482
2483 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2484
2485 // mulhs(A,B) =
2486 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2487 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2488 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2489 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2490 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2491 // anything, so it cannot produce any carry over to higher bits),
2492 // so everything in [] can be shifted by 16 without loss of precision.
2493 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2494 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2495 // The final additions need to make sure to properly maintain any carry-
2496 // out bits.
2497 //
2498 // Hi(B) Lo(B)
2499 // Hi(A) Lo(A)
2500 // --------------
2501 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2502 // Hi(B)*Lo(A) | + dropping the low 16 bits
2503 // Hi(A)*Lo(B) | T2
2504 // Hi(B)*Hi(A)
2505
2506 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2507 // T1 = get Hi(A) into low halves.
2508 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2509 // P0 = interleaved T1.h*B.uh (full precision product)
2510 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2511 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2512 SDValue T2 = LoHalf(P0, DAG);
2513 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2514 // added to the final sum.
2515 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2516 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2517 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2518 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2519 // T3 = full-precision(T0+T2) >> 16
2520 // The low halves are added-unsigned, the high ones are added-signed.
2521 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2522 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2523 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2524 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2525 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2526 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2527 SDValue T5 = LoHalf(P3, DAG);
2528 // Add:
2529 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2530 return T6;
2531}
2532
2533SDValue
2534HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2535 bool SignedB, const SDLoc &dl,
2536 SelectionDAG &DAG) const {
2537 MVT VecTy = ty(A);
2538 MVT PairTy = typeJoin({VecTy, VecTy});
2540
2541 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2542
2543 if (SignedA && !SignedB) {
2544 // Make A:unsigned, B:signed.
2545 std::swap(A, B);
2546 std::swap(SignedA, SignedB);
2547 }
2548
2549 // Do halfword-wise multiplications for unsigned*unsigned product, then
2550 // add corrections for signed and unsigned*signed.
2551
2552 SDValue Lo, Hi;
2553
2554 // P0:lo = (uu) products of low halves of A and B,
2555 // P0:hi = (uu) products of high halves.
2556 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2557
2558 // Swap low/high halves in B
2559 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2560 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2561 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2562 // P1 = products of even/odd halfwords.
2563 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2564 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2565 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2566
2567 // P2:lo = low halves of P1:lo + P1:hi,
2568 // P2:hi = high halves of P1:lo + P1:hi.
2569 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2570 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2571 // Still need to add the high halves of P0:lo to P2:lo
2572 SDValue T2 =
2573 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2574 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2575
2576 // The high halves of T3 will contribute to the HI part of LOHI.
2577 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2578 {HiHalf(P2, DAG), T3, S16}, DAG);
2579
2580 // The low halves of P2 need to be added to high halves of the LO part.
2581 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2582 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2583 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2584
2585 if (SignedA) {
2586 assert(SignedB && "Signed A and unsigned B should have been inverted");
2587
2589 SDValue Zero = getZero(dl, VecTy, DAG);
2590 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2591 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2592 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2593 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2594 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2595 } else if (SignedB) {
2596 // Same correction as for mulhus:
2597 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2599 SDValue Zero = getZero(dl, VecTy, DAG);
2600 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2601 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2602 } else {
2603 assert(!SignedA && !SignedB);
2604 }
2605
2606 return DAG.getMergeValues({Lo, Hi}, dl);
2607}
2608
2609SDValue
2610HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2611 SDValue B, bool SignedB,
2612 const SDLoc &dl,
2613 SelectionDAG &DAG) const {
2614 MVT VecTy = ty(A);
2615 MVT PairTy = typeJoin({VecTy, VecTy});
2617
2618 if (SignedA && !SignedB) {
2619 // Make A:unsigned, B:signed.
2620 std::swap(A, B);
2621 std::swap(SignedA, SignedB);
2622 }
2623
2624 // Do S*S first, then make corrections for U*S or U*U if needed.
2625 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2626 SDValue P1 =
2627 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2628 SDValue Lo = LoHalf(P1, DAG);
2629 SDValue Hi = HiHalf(P1, DAG);
2630
2631 if (!SignedB) {
2632 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2633 SDValue Zero = getZero(dl, VecTy, DAG);
2635
2636 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2637 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2638 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2639 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2640 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2641 // $A))>;
2642 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2643 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2644 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2645 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2646 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2647 } else if (!SignedA) {
2648 SDValue Zero = getZero(dl, VecTy, DAG);
2650
2651 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2652 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2653 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2654 // (HiHalf (Muls64O $A, $B)),
2655 // $B)>;
2656 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2657 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2658 }
2659
2660 return DAG.getMergeValues({Lo, Hi}, dl);
2661}
2662
2663SDValue
2664HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2665 const {
2666 // Rewrite conversion between integer and floating-point in such a way that
2667 // the integer type is extended/narrowed to match the bitwidth of the
2668 // floating-point type, combined with additional integer-integer extensions
2669 // or narrowings to match the original input/result types.
2670 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2671 //
2672 // The input/result types are not required to be legal, but if they are
2673 // legal, this function should not introduce illegal types.
2674
2675 unsigned Opc = Op.getOpcode();
2676 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2677 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2678
2679 SDValue Inp = Op.getOperand(0);
2680 MVT InpTy = ty(Inp);
2681 MVT ResTy = ty(Op);
2682
2683 if (InpTy == ResTy)
2684 return Op;
2685
2686 const SDLoc &dl(Op);
2687 bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
2688
2689 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2690 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2691 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2692 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2693 return Res;
2694}
2695
2696SDValue
2697HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2698 unsigned Opc = Op.getOpcode();
2699 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
2700
2701 const SDLoc &dl(Op);
2702 SDValue Op0 = Op.getOperand(0);
2703 MVT InpTy = ty(Op0);
2704 MVT ResTy = ty(Op);
2705 assert(InpTy.changeTypeToInteger() == ResTy);
2706
2707 // int32_t conv_f32_to_i32(uint32_t inp) {
2708 // // s | exp8 | frac23
2709 //
2710 // int neg = (int32_t)inp < 0;
2711 //
2712 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2713 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2714 // // produce a large positive "expm1", which will result in max u/int.
2715 // // In all IEEE formats, bias is the largest positive number that can be
2716 // // represented in bias-width bits (i.e. 011..1).
2717 // int32_t expm1 = (inp << 1) - 0x80000000;
2718 // expm1 >>= 24;
2719 //
2720 // // Always insert the "implicit 1". Subnormal numbers will become 0
2721 // // regardless.
2722 // uint32_t frac = (inp << 8) | 0x80000000;
2723 //
2724 // // "frac" is the fraction part represented as Q1.31. If it was
2725 // // interpreted as uint32_t, it would be the fraction part multiplied
2726 // // by 2^31.
2727 //
2728 // // Calculate the amount of right shift, since shifting further to the
2729 // // left would lose significant bits. Limit it to 32, because we want
2730 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2731 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2732 // // left by 31). "rsh" can be negative.
2733 // int32_t rsh = min(31 - (expm1 + 1), 32);
2734 //
2735 // frac >>= rsh; // rsh == 32 will produce 0
2736 //
2737 // // Everything up to this point is the same for conversion to signed
2738 // // unsigned integer.
2739 //
2740 // if (neg) // Only for signed int
2741 // frac = -frac; //
2742 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2743 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2744 // if (rsh <= 0 && !neg) //
2745 // frac = 0x7fffffff; //
2746 //
2747 // if (neg) // Only for unsigned int
2748 // frac = 0; //
2749 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2750 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2751 //
2752 // return frac;
2753 // }
2754
2756
2757 // Zero = V6_vd0();
2758 // Neg = V6_vgtw(Zero, Inp);
2759 // One = V6_lvsplatw(1);
2760 // M80 = V6_lvsplatw(0x80000000);
2761 // Exp00 = V6_vaslwv(Inp, One);
2762 // Exp01 = V6_vsubw(Exp00, M80);
2763 // ExpM1 = V6_vasrw(Exp01, 24);
2764 // Frc00 = V6_vaslw(Inp, 8);
2765 // Frc01 = V6_vor(Frc00, M80);
2766 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2767 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2768 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2769
2770 // if signed int:
2771 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2772 // Pos = V6_vgtw(Rsh01, Zero);
2773 // Frc13 = V6_vsubw(Zero, Frc02);
2774 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2775 // Int = V6_vmux(Pos, Frc14, Bnd);
2776 //
2777 // if unsigned int:
2778 // Rsn = V6_vgtw(Zero, Rsh01)
2779 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2780 // Int = V6_vmux(Neg, Zero, Frc23)
2781
2782 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2783 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2784 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2785
2786 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2787 SDValue Zero = getZero(dl, ResTy, DAG);
2788 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2789 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2790 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2791 SDValue One = DAG.getConstant(1, dl, ResTy);
2792 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2793 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2794 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2795 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2796
2797 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2798 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2799 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2800
2801 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2802 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2803 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2804 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2805 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2806
2807 SDValue Int;
2808
2809 if (Opc == ISD::FP_TO_SINT) {
2810 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2811 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2812 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2813 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2814 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2815 } else {
2816 assert(Opc == ISD::FP_TO_UINT);
2817 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2818 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2819 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2820 }
2821
2822 return Int;
2823}
2824
2825SDValue
2826HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2827 unsigned Opc = Op.getOpcode();
2828 assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2829
2830 const SDLoc &dl(Op);
2831 SDValue Op0 = Op.getOperand(0);
2832 MVT InpTy = ty(Op0);
2833 MVT ResTy = ty(Op);
2834 assert(ResTy.changeTypeToInteger() == InpTy);
2835
2836 // uint32_t vnoc1_rnd(int32_t w) {
2837 // int32_t iszero = w == 0;
2838 // int32_t isneg = w < 0;
2839 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
2840 //
2841 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2842 // uint32_t frac0 = (uint64_t)u << norm_left;
2843 //
2844 // // Rounding:
2845 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
2846 // uint32_t renorm = (frac0 > frac1);
2847 // uint32_t rup = (int)(frac0 << 22) < 0;
2848 //
2849 // uint32_t frac2 = frac0 >> 8;
2850 // uint32_t frac3 = frac1 >> 8;
2851 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2852 //
2853 // int32_t exp = 32 - norm_left + renorm + 127;
2854 // exp <<= 23;
2855 //
2856 // uint32_t sign = 0x80000000 * isneg;
2857 // uint32_t f = sign | exp | frac;
2858 // return iszero ? 0 : f;
2859 // }
2860
2862 bool Signed = Opc == ISD::SINT_TO_FP;
2863
2864 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
2865 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2866
2867 SDValue Zero = getZero(dl, InpTy, DAG);
2868 SDValue One = DAG.getConstant(1, dl, InpTy);
2869 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
2870 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
2871 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
2872 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
2873 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
2874
2875 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
2876 if (Signed) {
2877 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
2878 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
2879 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
2880 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
2881 }
2882
2883 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
2884 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
2885 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
2886 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
2887 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
2888 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
2889 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
2890 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
2891 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
2892
2893 return Flt;
2894}
2895
2896SDValue
2897HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2898 unsigned Opc = Op.getOpcode();
2899 unsigned TLOpc;
2900 switch (Opc) {
2901 case ISD::ANY_EXTEND:
2902 case ISD::SIGN_EXTEND:
2903 case ISD::ZERO_EXTEND:
2904 TLOpc = HexagonISD::TL_EXTEND;
2905 break;
2906 case ISD::TRUNCATE:
2908 break;
2909#ifndef NDEBUG
2910 Op.dump(&DAG);
2911#endif
2912 llvm_unreachable("Unepected operator");
2913 }
2914
2915 const SDLoc &dl(Op);
2916 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
2917 DAG.getUNDEF(MVT::i128), // illegal type
2918 DAG.getConstant(Opc, dl, MVT::i32));
2919}
2920
2921SDValue
2922HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2923 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
2924 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2925 unsigned Opc = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2926 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
2927}
2928
2929HexagonTargetLowering::VectorPair
2930HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2931 assert(!Op.isMachineOpcode());
2932 SmallVector<SDValue, 2> OpsL, OpsH;
2933 const SDLoc &dl(Op);
2934
2935 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2936 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2937 SDValue TV = DAG.getValueType(Ty);
2938 return std::make_pair(TV, TV);
2939 };
2940
2941 for (SDValue A : Op.getNode()->ops()) {
2942 auto [Lo, Hi] =
2943 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
2944 // Special case for type operand.
2945 switch (Op.getOpcode()) {
2947 case HexagonISD::SSAT:
2948 case HexagonISD::USAT:
2949 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2950 std::tie(Lo, Hi) = SplitVTNode(N);
2951 break;
2952 }
2953 OpsL.push_back(Lo);
2954 OpsH.push_back(Hi);
2955 }
2956
2957 MVT ResTy = ty(Op);
2958 MVT HalfTy = typeSplit(ResTy).first;
2959 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2960 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2961 return {L, H};
2962}
2963
2964SDValue
2965HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2966 auto *MemN = cast<MemSDNode>(Op.getNode());
2967
2968 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2969 if (!isHvxPairTy(MemTy))
2970 return Op;
2971
2972 const SDLoc &dl(Op);
2973 unsigned HwLen = Subtarget.getVectorLength();
2974 MVT SingleTy = typeSplit(MemTy).first;
2975 SDValue Chain = MemN->getChain();
2976 SDValue Base0 = MemN->getBasePtr();
2977 SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
2978 unsigned MemOpc = MemN->getOpcode();
2979
2980 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
2981 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2983 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
2985 : HwLen;
2986 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
2987 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
2988 }
2989
2990 if (MemOpc == ISD::LOAD) {
2991 assert(cast<LoadSDNode>(Op)->isUnindexed());
2992 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
2993 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
2994 return DAG.getMergeValues(
2995 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
2997 Load0.getValue(1), Load1.getValue(1)) }, dl);
2998 }
2999 if (MemOpc == ISD::STORE) {
3000 assert(cast<StoreSDNode>(Op)->isUnindexed());
3001 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3002 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3003 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3004 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3005 }
3006
3007 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3008
3009 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3010 assert(MaskN->isUnindexed());
3011 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3013
3014 if (MemOpc == ISD::MLOAD) {
3015 VectorPair Thru =
3016 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3017 SDValue MLoad0 =
3018 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3019 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3020 ISD::NON_EXTLOAD, false);
3021 SDValue MLoad1 =
3022 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3023 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3024 ISD::NON_EXTLOAD, false);
3025 return DAG.getMergeValues(
3026 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3028 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3029 }
3030 if (MemOpc == ISD::MSTORE) {
3031 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3032 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3033 Masks.first, SingleTy, MOp0,
3034 ISD::UNINDEXED, false, false);
3035 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3036 Masks.second, SingleTy, MOp1,
3037 ISD::UNINDEXED, false, false);
3038 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3039 }
3040
3041 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3042 llvm_unreachable(Name.c_str());
3043}
3044
3045SDValue
3046HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3047 const SDLoc &dl(Op);
3048 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3049 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3050 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3051 "Not widening loads of i1 yet");
3052
3053 SDValue Chain = LoadN->getChain();
3054 SDValue Base = LoadN->getBasePtr();
3056
3057 MVT ResTy = ty(Op);
3058 unsigned HwLen = Subtarget.getVectorLength();
3059 unsigned ResLen = ResTy.getStoreSize();
3060 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3061
3062 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3063 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3064 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3065
3066 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3068 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3069
3070 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3071 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3073 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3074 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3075}
3076
3077SDValue
3078HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3079 const SDLoc &dl(Op);
3080 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3081 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3082 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3083 "Not widening stores of i1 yet");
3084
3085 SDValue Chain = StoreN->getChain();
3086 SDValue Base = StoreN->getBasePtr();
3088
3089 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3090 MVT ValueTy = ty(Value);
3091 unsigned ValueLen = ValueTy.getVectorNumElements();
3092 unsigned HwLen = Subtarget.getVectorLength();
3093 assert(isPowerOf2_32(ValueLen));
3094
3095 for (unsigned Len = ValueLen; Len < HwLen; ) {
3096 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3097 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3098 }
3099 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3100
3101 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3102 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3103 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3104 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3106 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3107 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3108 MemOp, ISD::UNINDEXED, false, false);
3109}
3110
3111SDValue
3112HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3113 const SDLoc &dl(Op);
3114 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3115 MVT ElemTy = ty(Op0).getVectorElementType();
3116 unsigned HwLen = Subtarget.getVectorLength();
3117
3118 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3119 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3120 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3121 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3122 return SDValue();
3123
3124 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3125 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3126 EVT ResTy =
3127 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3128 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3129 {WideOp0, WideOp1, Op.getOperand(2)});
3130
3131 EVT RetTy = typeLegalize(ty(Op), DAG);
3132 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3133 {SetCC, getZero(dl, MVT::i32, DAG)});
3134}
3135
3136SDValue
3137HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3138 unsigned Opc = Op.getOpcode();
3139 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3140 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3141 return isHvxPairTy(ty(V));
3142 });
3143
3144 if (IsPairOp) {
3145 switch (Opc) {
3146 default:
3147 break;
3148 case ISD::LOAD:
3149 case ISD::STORE:
3150 case ISD::MLOAD:
3151 case ISD::MSTORE:
3152 return SplitHvxMemOp(Op, DAG);
3153 case ISD::SINT_TO_FP:
3154 case ISD::UINT_TO_FP:
3155 case ISD::FP_TO_SINT:
3156 case ISD::FP_TO_UINT:
3157 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3158 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3159 break;
3160 case ISD::ABS:
3161 case ISD::CTPOP:
3162 case ISD::CTLZ:
3163 case ISD::CTTZ:
3164 case ISD::MUL:
3165 case ISD::FADD:
3166 case ISD::FSUB:
3167 case ISD::FMUL:
3168 case ISD::FMINNUM:
3169 case ISD::FMAXNUM:
3170 case ISD::MULHS:
3171 case ISD::MULHU:
3172 case ISD::AND:
3173 case ISD::OR:
3174 case ISD::XOR:
3175 case ISD::SRA:
3176 case ISD::SHL:
3177 case ISD::SRL:
3178 case ISD::FSHL:
3179 case ISD::FSHR:
3180 case ISD::SMIN:
3181 case ISD::SMAX:
3182 case ISD::UMIN:
3183 case ISD::UMAX:
3184 case ISD::SETCC:
3185 case ISD::VSELECT:
3187 case ISD::SPLAT_VECTOR:
3188 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3189 case ISD::SIGN_EXTEND:
3190 case ISD::ZERO_EXTEND:
3191 // In general, sign- and zero-extends can't be split and still
3192 // be legal. The only exception is extending bool vectors.
3193 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3194 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3195 break;
3196 }
3197 }
3198
3199 switch (Opc) {
3200 default:
3201 break;
3202 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3203 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3204 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3205 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3206 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3207 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3208 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3209 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3210 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3211 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3212 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3213 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3214 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3215 case ISD::SRA:
3216 case ISD::SHL:
3217 case ISD::SRL: return LowerHvxShift(Op, DAG);
3218 case ISD::FSHL:
3219 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3220 case ISD::MULHS:
3221 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3222 case ISD::SMUL_LOHI:
3223 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3224 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3225 case ISD::SETCC:
3226 case ISD::INTRINSIC_VOID: return Op;
3227 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3228 case ISD::MLOAD:
3229 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3230 // Unaligned loads will be handled by the default lowering.
3231 case ISD::LOAD: return SDValue();
3232 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3233 case ISD::FP_TO_SINT:
3234 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3235 case ISD::SINT_TO_FP:
3236 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3237
3238 // Special nodes:
3241 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3242 }
3243#ifndef NDEBUG
3244 Op.dumpr(&DAG);
3245#endif
3246 llvm_unreachable("Unhandled HVX operation");
3247}
3248
3249SDValue
3250HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3251 const {
3252 // Rewrite the extension/truncation/saturation op into steps where each
3253 // step changes the type widths by a factor of 2.
3254 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3255 //
3256 // Some of the vector types in Op may not be legal.
3257
3258 unsigned Opc = Op.getOpcode();
3259 switch (Opc) {
3260 case HexagonISD::SSAT:
3261 case HexagonISD::USAT:
3264 break;
3265 case ISD::ANY_EXTEND:
3266 case ISD::ZERO_EXTEND:
3267 case ISD::SIGN_EXTEND:
3268 case ISD::TRUNCATE:
3269 llvm_unreachable("ISD:: ops will be auto-folded");
3270 break;
3271#ifndef NDEBUG
3272 Op.dump(&DAG);
3273#endif
3274 llvm_unreachable("Unexpected operation");
3275 }
3276
3277 SDValue Inp = Op.getOperand(0);
3278 MVT InpTy = ty(Inp);
3279 MVT ResTy = ty(Op);
3280
3281 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3282 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3283 assert(InpWidth != ResWidth);
3284
3285 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3286 return Op;
3287
3288 const SDLoc &dl(Op);
3289 unsigned NumElems = InpTy.getVectorNumElements();
3290 assert(NumElems == ResTy.getVectorNumElements());
3291
3292 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3293 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3294 switch (Opc) {
3295 case HexagonISD::SSAT:
3296 case HexagonISD::USAT:
3297 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3300 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3301 default:
3302 llvm_unreachable("Unexpected opcode");
3303 }
3304 };
3305
3306 SDValue S = Inp;
3307 if (InpWidth < ResWidth) {
3308 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3309 while (InpWidth * 2 <= ResWidth)
3310 S = repeatOp(InpWidth *= 2, S);
3311 } else {
3312 // InpWidth > ResWidth
3313 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3314 while (InpWidth / 2 >= ResWidth)
3315 S = repeatOp(InpWidth /= 2, S);
3316 }
3317 return S;
3318}
3319
3320SDValue
3321HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3322 SDValue Inp0 = Op.getOperand(0);
3323 MVT InpTy = ty(Inp0);
3324 MVT ResTy = ty(Op);
3325 unsigned InpWidth = InpTy.getSizeInBits();
3326 unsigned ResWidth = ResTy.getSizeInBits();
3327 unsigned Opc = Op.getOpcode();
3328
3329 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3330 // First, make sure that the narrower type is widened to HVX.
3331 // This may cause the result to be wider than what the legalizer
3332 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3333 // desired type.
3334 auto [WInpTy, WResTy] =
3335 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3336 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3337 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3338 SDValue S;
3339 if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3340 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3341 Op.getOperand(2));
3342 } else {
3343 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3344 }
3345 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3346 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3347 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3348 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3349 } else {
3350 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3351 return RemoveTLWrapper(Op, DAG);
3352 }
3353 llvm_unreachable("Unexpected situation");
3354}
3355
3356void
3357HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3359 unsigned Opc = N->getOpcode();
3360 SDValue Op(N, 0);
3361 SDValue Inp0; // Optional first argument.
3362 if (N->getNumOperands() > 0)
3363 Inp0 = Op.getOperand(0);
3364
3365 switch (Opc) {
3366 case ISD::ANY_EXTEND:
3367 case ISD::SIGN_EXTEND:
3368 case ISD::ZERO_EXTEND:
3369 case ISD::TRUNCATE:
3370 if (Subtarget.isHVXElementType(ty(Op)) &&
3371 Subtarget.isHVXElementType(ty(Inp0))) {
3372 Results.push_back(CreateTLWrapper(Op, DAG));
3373 }
3374 break;
3375 case ISD::SETCC:
3376 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3377 if (SDValue T = WidenHvxSetCC(Op, DAG))
3378 Results.push_back(T);
3379 }
3380 break;
3381 case ISD::STORE: {
3382 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3383 SDValue Store = WidenHvxStore(Op, DAG);
3384 Results.push_back(Store);
3385 }
3386 break;
3387 }
3388 case ISD::MLOAD:
3389 if (isHvxPairTy(ty(Op))) {
3390 SDValue S = SplitHvxMemOp(Op, DAG);
3392 Results.push_back(S.getOperand(0));
3393 Results.push_back(S.getOperand(1));
3394 }
3395 break;
3396 case ISD::MSTORE:
3397 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3398 SDValue S = SplitHvxMemOp(Op, DAG);
3399 Results.push_back(S);
3400 }
3401 break;
3402 case ISD::SINT_TO_FP:
3403 case ISD::UINT_TO_FP:
3404 case ISD::FP_TO_SINT:
3405 case ISD::FP_TO_UINT:
3406 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3407 SDValue T = EqualizeFpIntConversion(Op, DAG);
3408 Results.push_back(T);
3409 }
3410 break;
3411 case HexagonISD::SSAT:
3412 case HexagonISD::USAT:
3415 Results.push_back(LegalizeHvxResize(Op, DAG));
3416 break;
3417 default:
3418 break;
3419 }
3420}
3421
3422void
3423HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3425 unsigned Opc = N->getOpcode();
3426 SDValue Op(N, 0);
3427 SDValue Inp0; // Optional first argument.
3428 if (N->getNumOperands() > 0)
3429 Inp0 = Op.getOperand(0);
3430
3431 switch (Opc) {
3432 case ISD::ANY_EXTEND:
3433 case ISD::SIGN_EXTEND:
3434 case ISD::ZERO_EXTEND:
3435 case ISD::TRUNCATE:
3436 if (Subtarget.isHVXElementType(ty(Op)) &&
3437 Subtarget.isHVXElementType(ty(Inp0))) {
3438 Results.push_back(CreateTLWrapper(Op, DAG));
3439 }
3440 break;
3441 case ISD::SETCC:
3442 if (shouldWidenToHvx(ty(Op), DAG)) {
3443 if (SDValue T = WidenHvxSetCC(Op, DAG))
3444 Results.push_back(T);
3445 }
3446 break;
3447 case ISD::LOAD: {
3448 if (shouldWidenToHvx(ty(Op), DAG)) {
3449 SDValue Load = WidenHvxLoad(Op, DAG);
3450 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3451 Results.push_back(Load.getOperand(0));
3452 Results.push_back(Load.getOperand(1));
3453 }
3454 break;
3455 }
3456 case ISD::BITCAST:
3457 if (isHvxBoolTy(ty(Inp0))) {
3458 SDValue C = LowerHvxBitcast(Op, DAG);
3459 Results.push_back(C);
3460 }
3461 break;
3462 case ISD::FP_TO_SINT:
3463 case ISD::FP_TO_UINT:
3464 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3465 SDValue T = EqualizeFpIntConversion(Op, DAG);
3466 Results.push_back(T);
3467 }
3468 break;
3469 case HexagonISD::SSAT:
3470 case HexagonISD::USAT:
3473 Results.push_back(LegalizeHvxResize(Op, DAG));
3474 break;
3475 default:
3476 break;
3477 }
3478}
3479
3480SDValue
3481HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3482 DAGCombinerInfo &DCI) const {
3483 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3484 // to extract-subvector (shuffle V, pick even, pick odd)
3485
3486 assert(Op.getOpcode() == ISD::TRUNCATE);
3487 SelectionDAG &DAG = DCI.DAG;
3488 const SDLoc &dl(Op);
3489
3490 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3491 return SDValue();
3492 SDValue Cast = Op.getOperand(0);
3493 SDValue Src = Cast.getOperand(0);
3494
3495 EVT TruncTy = Op.getValueType();
3496 EVT CastTy = Cast.getValueType();
3497 EVT SrcTy = Src.getValueType();
3498 if (SrcTy.isSimple())
3499 return SDValue();
3500 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3501 return SDValue();
3502 unsigned SrcLen = SrcTy.getVectorNumElements();
3503 unsigned CastLen = CastTy.getVectorNumElements();
3504 if (2 * CastLen != SrcLen)
3505 return SDValue();
3506
3508 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3509 Mask[i] = 2 * i;
3510 Mask[i + CastLen] = 2 * i + 1;
3511 }
3512 SDValue Deal =
3513 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3514 return opSplit(Deal, dl, DAG).first;
3515}
3516
3517SDValue
3518HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3519 SDValue Op, DAGCombinerInfo &DCI) const {
3520 // Fold
3521 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3522 // into
3523 // shuffle (concat x, y), undef, m3
3524 if (Op.getNumOperands() != 2)
3525 return SDValue();
3526
3527 SelectionDAG &DAG = DCI.DAG;
3528 const SDLoc &dl(Op);
3529 SDValue V0 = Op.getOperand(0);
3530 SDValue V1 = Op.getOperand(1);
3531
3532 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3533 return SDValue();
3534 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3535 return SDValue();
3536
3537 SetVector<SDValue> Order;
3538 Order.insert(V0.getOperand(0));
3539 Order.insert(V0.getOperand(1));
3540 Order.insert(V1.getOperand(0));
3541 Order.insert(V1.getOperand(1));
3542
3543 if (Order.size() > 2)
3544 return SDValue();
3545
3546 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3547 // result must be the same.
3548 EVT InpTy = V0.getValueType();
3549 assert(InpTy.isVector());
3550 unsigned InpLen = InpTy.getVectorNumElements();
3551
3552 SmallVector<int, 128> LongMask;
3553 auto AppendToMask = [&](SDValue Shuffle) {
3554 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3555 ArrayRef<int> Mask = SV->getMask();
3556 SDValue X = Shuffle.getOperand(0);
3557 SDValue Y = Shuffle.getOperand(1);
3558 for (int M : Mask) {
3559 if (M == -1) {
3560 LongMask.push_back(M);
3561 continue;
3562 }
3563 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3564 if (static_cast<unsigned>(M) >= InpLen)
3565 M -= InpLen;
3566
3567 int OutOffset = Order[0] == Src ? 0 : InpLen;
3568 LongMask.push_back(M + OutOffset);
3569 }
3570 };
3571
3572 AppendToMask(V0);
3573 AppendToMask(V1);
3574
3575 SDValue C0 = Order.front();
3576 SDValue C1 = Order.back(); // Can be same as front
3577 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3578
3579 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3580 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3581}
3582
3583SDValue
3584HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3585 const {
3586 const SDLoc &dl(N);
3587 SelectionDAG &DAG = DCI.DAG;
3588 SDValue Op(N, 0);
3589 unsigned Opc = Op.getOpcode();
3590
3591 SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end());
3592
3593 if (Opc == ISD::TRUNCATE)
3594 return combineTruncateBeforeLegal(Op, DCI);
3595 if (Opc == ISD::CONCAT_VECTORS)
3596 return combineConcatVectorsBeforeLegal(Op, DCI);
3597
3598 if (DCI.isBeforeLegalizeOps())
3599 return SDValue();
3600
3601 switch (Opc) {
3602 case ISD::VSELECT: {
3603 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3604 SDValue Cond = Ops[0];
3605 if (Cond->getOpcode() == ISD::XOR) {
3606 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3607 if (C1->getOpcode() == HexagonISD::QTRUE)
3608 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3609 }
3610 break;
3611 }
3612 case HexagonISD::V2Q:
3613 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3614 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3615 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3616 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3617 }
3618 break;
3619 case HexagonISD::Q2V:
3620 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3621 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3622 DAG.getConstant(-1, dl, MVT::i32));
3623 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3624 return getZero(dl, ty(Op), DAG);
3625 break;
3627 if (isUndef(Ops[1]))
3628 return Ops[0];;
3629 break;
3630 case HexagonISD::VROR: {
3631 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3632 SDValue Vec = Ops[0].getOperand(0);
3633 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3634 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3635 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3636 }
3637 break;
3638 }
3639 }
3640
3641 return SDValue();
3642}
3643
3644bool
3645HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3646 if (Subtarget.isHVXVectorType(Ty, true))
3647 return false;
3648 auto Action = getPreferredHvxVectorAction(Ty);
3650 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3651 return false;
3652}
3653
3654bool
3655HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3656 if (Subtarget.isHVXVectorType(Ty, true))
3657 return false;
3658 auto Action = getPreferredHvxVectorAction(Ty);
3660 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3661 return false;
3662}
3663
3664bool
3665HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3666 if (!Subtarget.useHVXOps())
3667 return false;
3668 // If the type of any result, or any operand type are HVX vector types,
3669 // this is an HVX operation.
3670 auto IsHvxTy = [this](EVT Ty) {
3671 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3672 };
3673 auto IsHvxOp = [this](SDValue Op) {
3674 return Op.getValueType().isSimple() &&
3675 Subtarget.isHVXVectorType(ty(Op), true);
3676 };
3677 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3678 return true;
3679
3680 // Check if this could be an HVX operation after type widening.
3681 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3682 if (!Op.getValueType().isSimple())
3683 return false;
3684 MVT ValTy = ty(Op);
3685 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3686 };
3687
3688 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3689 if (IsWidenedToHvx(SDValue(N, i)))
3690 return true;
3691 }
3692 return llvm::any_of(N->ops(), IsWidenedToHvx);
3693}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Function Alias Analysis Results
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
This file provides utility analysis objects describing memory locations.
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5450
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1349
This is an important base class in LLVM.
Definition: Constant.h:41
A debug info location.
Definition: DebugLoc.h:33
const HexagonInstrInfo * getInstrInfo() const override
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
unsigned getVectorLength() const
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:51
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:88
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
const value_type & front() const
Return the first element of the SetVector.
Definition: SetVector.h:133
const value_type & back() const
Return the last element of the SetVector.
Definition: SetVector.h:139
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...