LLVM 23.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
44 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
45 MVT ElemTy = Ty.getScalarType();
46 switch (ElemTy.SimpleTy) {
47 case MVT::f16:
48 return std::make_tuple(5, 15, 10);
49 case MVT::f32:
50 return std::make_tuple(8, 127, 23);
51 case MVT::f64:
52 return std::make_tuple(11, 1023, 52);
53 default:
54 break;
55 }
56 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
57}
58
59void
60HexagonTargetLowering::initializeHVXLowering() {
61 if (Subtarget.useHVX64BOps()) {
62 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
63 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
64 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
65 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
66 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
67 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
68 // These "short" boolean vector types should be legal because
69 // they will appear as results of vector compares. If they were
70 // not legal, type legalization would try to make them legal
71 // and that would require using operations that do not use or
72 // produce such types. That, in turn, would imply using custom
73 // nodes, which would be unoptimizable by the DAG combiner.
74 // The idea is to rely on target-independent operations as much
75 // as possible.
76 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
77 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
78 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
79 } else if (Subtarget.useHVX128BOps()) {
80 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
81 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
82 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
83 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
84 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
85 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
86 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
87 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
88 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
89 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
90 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
91 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
92 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
93 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
94 }
95 if (Subtarget.useHVXV81Ops()) {
96 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
97 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
98 }
99 }
100
101 // Set up operation actions.
102
103 bool Use64b = Subtarget.useHVX64BOps();
104 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
105 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
106 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
107 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
108 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
109
110 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
112 AddPromotedToType(Opc, FromTy, ToTy);
113 };
114
115 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
116 // Note: v16i1 -> i16 is handled in type legalization instead of op
117 // legalization.
127
128 if (Subtarget.useHVX128BOps()) {
131 }
132 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
133 Subtarget.useHVXFloatingPoint()) {
134
135 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
136 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
137
138 for (MVT T : FloatV) {
144
147
150
153 // Custom-lower BUILD_VECTOR. The standard (target-independent)
154 // handling of it would convert it to a load, which is not always
155 // the optimal choice.
157 }
158
159
160 // BUILD_VECTOR with f16 operands cannot be promoted without
161 // promoting the result, so lower the node to vsplat or constant pool
165
166 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
167 // generated.
168 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
169 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
170 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
171 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
172
173 if (Subtarget.useHVXV81Ops()) {
174 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
175 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
176 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
177 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
178 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
179 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
180 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
181 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
182
186
187 setOperationAction(ISD::LOAD, MVT::v128bf16, Custom);
188 setOperationAction(ISD::STORE, MVT::v128bf16, Custom);
189
190 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
191 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
194
195 setOperationAction(ISD::MLOAD, MVT::v128bf16, Custom);
196 setOperationAction(ISD::MSTORE, MVT::v128bf16, Custom);
199
203 }
204
205 for (MVT P : FloatW) {
215
216 // Custom-lower BUILD_VECTOR. The standard (target-independent)
217 // handling of it would convert it to a load, which is not always
218 // the optimal choice.
220 // Make concat-vectors custom to handle concats of more than 2 vectors.
222
225 }
226
227 if (Subtarget.useHVXQFloatOps()) {
230 } else if (Subtarget.useHVXIEEEFPOps()) {
233 }
234 }
235
236 for (MVT T : LegalV) {
239
255 if (T != ByteV) {
259 }
260
263 if (T.getScalarType() != MVT::i32) {
266 }
267
272 if (T.getScalarType() != MVT::i32) {
275 }
276
278 // Make concat-vectors custom to handle concats of more than 2 vectors.
289 if (T != ByteV) {
291 // HVX only has shifts of words and halfwords.
295
296 // Promote all shuffles to operate on vectors of bytes.
297 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
298 }
299
300 if (Subtarget.useHVXFloatingPoint()) {
301 // Same action for both QFloat and IEEE.
306 }
307
315 }
316
317 for (MVT T : LegalW) {
318 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
319 // independent) handling of it would convert it to a load, which is
320 // not always the optimal choice.
322 // Make concat-vectors custom to handle concats of more than 2 vectors.
324
325 // Custom-lower these operations for pairs. Expand them into a concat
326 // of the corresponding operations on individual vectors.
335
344
359 if (T != ByteW) {
363
364 // Promote all shuffles to operate on vectors of bytes.
365 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
366 }
369
372 if (T.getScalarType() != MVT::i32) {
375 }
376
377 if (Subtarget.useHVXFloatingPoint()) {
378 // Same action for both QFloat and IEEE.
383 }
384 }
385
386 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
387 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
388 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
391
392 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
393 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
394 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
395 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
396 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
397 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
398 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
399 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
400 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
401 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
402 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
403 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
404 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
405 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
406
407 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
408 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
409 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
410 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
411 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
412 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
413 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
414 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
415 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
416 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
417 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
418 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
419 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
420 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
421
422 // Boolean vectors.
423
424 for (MVT T : LegalW) {
425 // Boolean types for vector pairs will overlap with the boolean
426 // types for single vectors, e.g.
427 // v64i8 -> v64i1 (single)
428 // v64i16 -> v64i1 (pair)
429 // Set these actions first, and allow the single actions to overwrite
430 // any duplicates.
431 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
436 // Masked load/store takes a mask that may need splitting.
439 }
440
441 for (MVT T : LegalV) {
442 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
453 }
454
455 if (Use64b) {
456 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
458 } else {
459 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
461 }
462
463 // Handle store widening for short vectors.
464 unsigned HwLen = Subtarget.getVectorLength();
465 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
466 if (ElemTy == MVT::i1)
467 continue;
468 int ElemWidth = ElemTy.getFixedSizeInBits();
469 int MaxElems = (8*HwLen) / ElemWidth;
470 for (int N = 2; N < MaxElems; N *= 2) {
471 MVT VecTy = MVT::getVectorVT(ElemTy, N);
472 auto Action = getPreferredVectorAction(VecTy);
481 if (Subtarget.useHVXFloatingPoint()) {
486 }
487
488 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
489 if (!isTypeLegal(BoolTy))
491 }
492 }
493 }
494
495 // Include cases which are not hander earlier
499
501}
502
503unsigned
504HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
505 // Early exit for invalid input types
506 if (!VecTy.isVector())
507 return ~0u;
508
509 MVT ElemTy = VecTy.getVectorElementType();
510 unsigned VecLen = VecTy.getVectorNumElements();
511 unsigned HwLen = Subtarget.getVectorLength();
512
513 // Split vectors of i1 that exceed byte vector length.
514 if (ElemTy == MVT::i1 && VecLen > HwLen)
516
517 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
518 // For shorter vectors of i1, widen them if any of the corresponding
519 // vectors of integers needs to be widened.
520 if (ElemTy == MVT::i1) {
521 for (MVT T : Tys) {
522 assert(T != MVT::i1);
523 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
524 if (A != ~0u)
525 return A;
526 }
527 return ~0u;
528 }
529
530 // If the size of VecTy is at least half of the vector length,
531 // widen the vector. Note: the threshold was not selected in
532 // any scientific way.
533 if (llvm::is_contained(Tys, ElemTy)) {
534 unsigned VecWidth = VecTy.getSizeInBits();
535 unsigned HwWidth = 8*HwLen;
536 if (VecWidth > 2*HwWidth)
538
539 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
540 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
542 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
544 }
545
546 // Defer to default.
547 return ~0u;
548}
549
550unsigned
551HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
552 unsigned Opc = Op.getOpcode();
553 switch (Opc) {
554 case HexagonISD::SMUL_LOHI:
555 case HexagonISD::UMUL_LOHI:
556 case HexagonISD::USMUL_LOHI:
558 }
560}
561
563HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
564 const SDLoc &dl, SelectionDAG &DAG) const {
566 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
567 append_range(IntOps, Ops);
568 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
569}
570
571MVT
572HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
573 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
574
575 MVT ElemTy = Tys.first.getVectorElementType();
576 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
577 Tys.second.getVectorNumElements());
578}
579
580HexagonTargetLowering::TypePair
581HexagonTargetLowering::typeSplit(MVT VecTy) const {
582 assert(VecTy.isVector());
583 unsigned NumElem = VecTy.getVectorNumElements();
584 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
585 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
586 return { HalfTy, HalfTy };
587}
588
589MVT
590HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
591 MVT ElemTy = VecTy.getVectorElementType();
592 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
593 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
594}
595
596MVT
597HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
598 MVT ElemTy = VecTy.getVectorElementType();
599 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
600 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
601}
602
604HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
605 SelectionDAG &DAG) const {
606 if (ty(Vec).getVectorElementType() == ElemTy)
607 return Vec;
608 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
609 return DAG.getBitcast(CastTy, Vec);
610}
611
613HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
614 SelectionDAG &DAG) const {
615 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
616 Ops.first, Ops.second);
617}
618
619HexagonTargetLowering::VectorPair
620HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
621 SelectionDAG &DAG) const {
622 TypePair Tys = typeSplit(ty(Vec));
623 if (Vec.getOpcode() == HexagonISD::QCAT)
624 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
625 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
626}
627
628bool
629HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
630 return Subtarget.isHVXVectorType(Ty) &&
631 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
632}
633
634bool
635HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
636 return Subtarget.isHVXVectorType(Ty) &&
637 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
638}
639
640bool
641HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
642 return Subtarget.isHVXVectorType(Ty, true) &&
643 Ty.getVectorElementType() == MVT::i1;
644}
645
646bool HexagonTargetLowering::allowsHvxMemoryAccess(
647 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
648 // Bool vectors are excluded by default, but make it explicit to
649 // emphasize that bool vectors cannot be loaded or stored.
650 // Also, disallow double vector stores (to prevent unnecessary
651 // store widening in DAG combiner).
652 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
653 return false;
654 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
655 return false;
656 if (Fast)
657 *Fast = 1;
658 return true;
659}
660
661bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
662 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
663 if (!Subtarget.isHVXVectorType(VecTy))
664 return false;
665 // XXX Should this be false? vmemu are a bit slower than vmem.
666 if (Fast)
667 *Fast = 1;
668 return true;
669}
670
671void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
672 MachineInstr &MI, SDNode *Node) const {
673 unsigned Opc = MI.getOpcode();
674 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
675 MachineBasicBlock &MB = *MI.getParent();
676 MachineFunction &MF = *MB.getParent();
677 MachineRegisterInfo &MRI = MF.getRegInfo();
678 DebugLoc DL = MI.getDebugLoc();
679 auto At = MI.getIterator();
680
681 switch (Opc) {
682 case Hexagon::PS_vsplatib:
683 if (Subtarget.useHVXV62Ops()) {
684 // SplatV = A2_tfrsi #imm
685 // OutV = V6_lvsplatb SplatV
686 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
687 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
688 .add(MI.getOperand(1));
689 Register OutV = MI.getOperand(0).getReg();
690 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
691 .addReg(SplatV);
692 } else {
693 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
694 // OutV = V6_lvsplatw SplatV
695 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
696 const MachineOperand &InpOp = MI.getOperand(1);
697 assert(InpOp.isImm());
698 uint32_t V = InpOp.getImm() & 0xFF;
699 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
700 .addImm(V << 24 | V << 16 | V << 8 | V);
701 Register OutV = MI.getOperand(0).getReg();
702 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
703 }
704 MB.erase(At);
705 break;
706 case Hexagon::PS_vsplatrb:
707 if (Subtarget.useHVXV62Ops()) {
708 // OutV = V6_lvsplatb Inp
709 Register OutV = MI.getOperand(0).getReg();
710 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
711 .add(MI.getOperand(1));
712 } else {
713 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
714 const MachineOperand &InpOp = MI.getOperand(1);
715 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
716 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
717 Register OutV = MI.getOperand(0).getReg();
718 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
719 .addReg(SplatV);
720 }
721 MB.erase(At);
722 break;
723 case Hexagon::PS_vsplatih:
724 if (Subtarget.useHVXV62Ops()) {
725 // SplatV = A2_tfrsi #imm
726 // OutV = V6_lvsplath SplatV
727 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
728 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
729 .add(MI.getOperand(1));
730 Register OutV = MI.getOperand(0).getReg();
731 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
732 .addReg(SplatV);
733 } else {
734 // SplatV = A2_tfrsi #imm:#imm
735 // OutV = V6_lvsplatw SplatV
736 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
737 const MachineOperand &InpOp = MI.getOperand(1);
738 assert(InpOp.isImm());
739 uint32_t V = InpOp.getImm() & 0xFFFF;
740 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
741 .addImm(V << 16 | V);
742 Register OutV = MI.getOperand(0).getReg();
743 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
744 }
745 MB.erase(At);
746 break;
747 case Hexagon::PS_vsplatrh:
748 if (Subtarget.useHVXV62Ops()) {
749 // OutV = V6_lvsplath Inp
750 Register OutV = MI.getOperand(0).getReg();
751 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
752 .add(MI.getOperand(1));
753 } else {
754 // SplatV = A2_combine_ll Inp, Inp
755 // OutV = V6_lvsplatw SplatV
756 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
757 const MachineOperand &InpOp = MI.getOperand(1);
758 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
759 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
760 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
761 Register OutV = MI.getOperand(0).getReg();
762 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
763 }
764 MB.erase(At);
765 break;
766 case Hexagon::PS_vsplatiw:
767 case Hexagon::PS_vsplatrw:
768 if (Opc == Hexagon::PS_vsplatiw) {
769 // SplatV = A2_tfrsi #imm
770 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
771 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
772 .add(MI.getOperand(1));
773 MI.getOperand(1).ChangeToRegister(SplatV, false);
774 }
775 // OutV = V6_lvsplatw SplatV/Inp
776 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
777 break;
778 }
779}
780
782HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
783 SelectionDAG &DAG) const {
784 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
785 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
786
787 unsigned ElemWidth = ElemTy.getSizeInBits();
788 if (ElemWidth == 8)
789 return ElemIdx;
790
791 unsigned L = Log2_32(ElemWidth/8);
792 const SDLoc &dl(ElemIdx);
793 return DAG.getNode(ISD::SHL, dl, MVT::i32,
794 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
795}
796
798HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
799 SelectionDAG &DAG) const {
800 unsigned ElemWidth = ElemTy.getSizeInBits();
801 assert(ElemWidth >= 8 && ElemWidth <= 32);
802 if (ElemWidth == 32)
803 return Idx;
804
805 if (ty(Idx) != MVT::i32)
806 Idx = DAG.getBitcast(MVT::i32, Idx);
807 const SDLoc &dl(Idx);
808 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
809 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
810 return SubIdx;
811}
812
814HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
815 SDValue Op1, ArrayRef<int> Mask,
816 SelectionDAG &DAG) const {
817 MVT OpTy = ty(Op0);
818 assert(OpTy == ty(Op1));
819
820 MVT ElemTy = OpTy.getVectorElementType();
821 if (ElemTy == MVT::i8)
822 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
823 assert(ElemTy.getSizeInBits() >= 8);
824
825 MVT ResTy = tyVector(OpTy, MVT::i8);
826 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
827
828 SmallVector<int,128> ByteMask;
829 for (int M : Mask) {
830 if (M < 0) {
831 for (unsigned I = 0; I != ElemSize; ++I)
832 ByteMask.push_back(-1);
833 } else {
834 int NewM = M*ElemSize;
835 for (unsigned I = 0; I != ElemSize; ++I)
836 ByteMask.push_back(NewM+I);
837 }
838 }
839 assert(ResTy.getVectorNumElements() == ByteMask.size());
840 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
841 opCastElem(Op1, MVT::i8, DAG), ByteMask);
842}
843
845HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
846 const SDLoc &dl, MVT VecTy,
847 SelectionDAG &DAG) const {
848 unsigned VecLen = Values.size();
849 MachineFunction &MF = DAG.getMachineFunction();
850 MVT ElemTy = VecTy.getVectorElementType();
851 unsigned ElemWidth = ElemTy.getSizeInBits();
852 unsigned HwLen = Subtarget.getVectorLength();
853
854 unsigned ElemSize = ElemWidth / 8;
855 assert(ElemSize*VecLen == HwLen);
857
858 if (VecTy.getVectorElementType() != MVT::i32 &&
859 !(Subtarget.useHVXFloatingPoint() &&
860 VecTy.getVectorElementType() == MVT::f32)) {
861 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
862 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
863 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
864 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
865 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
866 Words.push_back(DAG.getBitcast(MVT::i32, W));
867 }
868 } else {
869 for (SDValue V : Values)
870 Words.push_back(DAG.getBitcast(MVT::i32, V));
871 }
872 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
873 unsigned NumValues = Values.size();
874 assert(NumValues > 0);
875 bool IsUndef = true;
876 for (unsigned i = 0; i != NumValues; ++i) {
877 if (Values[i].isUndef())
878 continue;
879 IsUndef = false;
880 if (!SplatV.getNode())
881 SplatV = Values[i];
882 else if (SplatV != Values[i])
883 return false;
884 }
885 if (IsUndef)
886 SplatV = Values[0];
887 return true;
888 };
889
890 unsigned NumWords = Words.size();
891 SDValue SplatV;
892 bool IsSplat = isSplat(Words, SplatV);
893 if (IsSplat && isUndef(SplatV))
894 return DAG.getUNDEF(VecTy);
895 if (IsSplat) {
896 assert(SplatV.getNode());
897 if (isNullConstant(SplatV))
898 return getZero(dl, VecTy, DAG);
899 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
900 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
901 return DAG.getBitcast(VecTy, S);
902 }
903
904 // Delay recognizing constant vectors until here, so that we can generate
905 // a vsplat.
906 SmallVector<ConstantInt*, 128> Consts(VecLen);
907 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
908 if (AllConst) {
909 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
910 (Constant**)Consts.end());
911 Constant *CV = ConstantVector::get(Tmp);
912 Align Alignment(HwLen);
914 DAG.getConstantPool(CV, getPointerTy(DAG.getDataLayout()), Alignment),
915 DAG);
916 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
918 }
919
920 // A special case is a situation where the vector is built entirely from
921 // elements extracted from another vector. This could be done via a shuffle
922 // more efficiently, but typically, the size of the source vector will not
923 // match the size of the vector being built (which precludes the use of a
924 // shuffle directly).
925 // This only handles a single source vector, and the vector being built
926 // should be of a sub-vector type of the source vector type.
927 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
928 SmallVectorImpl<int> &SrcIdx) {
929 SDValue Vec;
930 for (SDValue V : Values) {
931 if (isUndef(V)) {
932 SrcIdx.push_back(-1);
933 continue;
934 }
935 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
936 return false;
937 // All extracts should come from the same vector.
938 SDValue T = V.getOperand(0);
939 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
940 return false;
941 Vec = T;
942 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
943 if (C == nullptr)
944 return false;
945 int I = C->getSExtValue();
946 assert(I >= 0 && "Negative element index");
947 SrcIdx.push_back(I);
948 }
949 SrcVec = Vec;
950 return true;
951 };
952
953 SmallVector<int,128> ExtIdx;
954 SDValue ExtVec;
955 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
956 MVT ExtTy = ty(ExtVec);
957 unsigned ExtLen = ExtTy.getVectorNumElements();
958 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
959 // Construct a new shuffle mask that will produce a vector with the same
960 // number of elements as the input vector, and such that the vector we
961 // want will be the initial subvector of it.
962 SmallVector<int,128> Mask;
963 BitVector Used(ExtLen);
964
965 for (int M : ExtIdx) {
966 Mask.push_back(M);
967 if (M >= 0)
968 Used.set(M);
969 }
970 // Fill the rest of the mask with the unused elements of ExtVec in hopes
971 // that it will result in a permutation of ExtVec's elements. It's still
972 // fine if it doesn't (e.g. if undefs are present, or elements are
973 // repeated), but permutations can always be done efficiently via vdelta
974 // and vrdelta.
975 for (unsigned I = 0; I != ExtLen; ++I) {
976 if (Mask.size() == ExtLen)
977 break;
978 if (!Used.test(I))
979 Mask.push_back(I);
980 }
981
982 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
983 DAG.getUNDEF(ExtTy), Mask);
984 return ExtLen == VecLen ? S : LoHalf(S, DAG);
985 }
986 }
987
988 // Find most common element to initialize vector with. This is to avoid
989 // unnecessary vinsert/valign for cases where the same value is present
990 // many times. Creates a histogram of the vector's elements to find the
991 // most common element n.
992 assert(4*Words.size() == Subtarget.getVectorLength());
993 int VecHist[32];
994 int n = 0;
995 for (unsigned i = 0; i != NumWords; ++i) {
996 VecHist[i] = 0;
997 if (Words[i].isUndef())
998 continue;
999 for (unsigned j = i; j != NumWords; ++j)
1000 if (Words[i] == Words[j])
1001 VecHist[i]++;
1002
1003 if (VecHist[i] > VecHist[n])
1004 n = i;
1005 }
1006
1007 SDValue HalfV = getZero(dl, VecTy, DAG);
1008 if (VecHist[n] > 1) {
1009 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
1010 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
1011 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
1012 }
1013 SDValue HalfV0 = HalfV;
1014 SDValue HalfV1 = HalfV;
1015
1016 // Construct two halves in parallel, then or them together. Rn and Rm count
1017 // number of rotations needed before the next element. One last rotation is
1018 // performed post-loop to position the last element.
1019 int Rn = 0, Rm = 0;
1020 SDValue Sn, Sm;
1021 SDValue N = HalfV0;
1022 SDValue M = HalfV1;
1023 for (unsigned i = 0; i != NumWords/2; ++i) {
1024 // Rotate by element count since last insertion.
1025 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1026 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1027 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1028 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1029 {HalfV0, Words[i]});
1030 Rn = 0;
1031 }
1032 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1033 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1034 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1035 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1036 {HalfV1, Words[i+NumWords/2]});
1037 Rm = 0;
1038 }
1039 Rn += 4;
1040 Rm += 4;
1041 }
1042 // Perform last rotation.
1043 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1044 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1045 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1046 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1047
1048 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1049 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1050
1051 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1052
1053 SDValue OutV =
1054 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1055 return OutV;
1056}
1057
1058SDValue
1059HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1060 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1061 MVT PredTy = ty(PredV);
1062 unsigned HwLen = Subtarget.getVectorLength();
1063 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1064
1065 if (Subtarget.isHVXVectorType(PredTy, true)) {
1066 // Move the vector predicate SubV to a vector register, and scale it
1067 // down to match the representation (bytes per type element) that VecV
1068 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1069 // in general) element and put them at the front of the resulting
1070 // vector. This subvector will then be inserted into the Q2V of VecV.
1071 // To avoid having an operation that generates an illegal type (short
1072 // vector), generate a full size vector.
1073 //
1074 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1075 SmallVector<int,128> Mask(HwLen);
1076 // Scale = BitBytes(PredV) / Given BitBytes.
1077 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1078 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1079
1080 for (unsigned i = 0; i != HwLen; ++i) {
1081 unsigned Num = i % Scale;
1082 unsigned Off = i / Scale;
1083 Mask[BlockLen*Num + Off] = i;
1084 }
1085 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1086 if (!ZeroFill)
1087 return S;
1088 // Fill the bytes beyond BlockLen with 0s.
1089 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1090 // when BlockLen < HwLen.
1091 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1092 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1093 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1094 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1095 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1096 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1097 }
1098
1099 // Make sure that this is a valid scalar predicate.
1100 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1101
1102 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1103 SmallVector<SDValue,4> Words[2];
1104 unsigned IdxW = 0;
1105
1106 SDValue W0 = isUndef(PredV)
1107 ? DAG.getUNDEF(MVT::i64)
1108 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1109 if (Bytes < BitBytes) {
1110 Words[IdxW].push_back(HiHalf(W0, DAG));
1111 Words[IdxW].push_back(LoHalf(W0, DAG));
1112 } else
1113 Words[IdxW].push_back(W0);
1114
1115 while (Bytes < BitBytes) {
1116 IdxW ^= 1;
1117 Words[IdxW].clear();
1118
1119 if (Bytes < 4) {
1120 for (const SDValue &W : Words[IdxW ^ 1]) {
1121 SDValue T = expandPredicate(W, dl, DAG);
1122 Words[IdxW].push_back(HiHalf(T, DAG));
1123 Words[IdxW].push_back(LoHalf(T, DAG));
1124 }
1125 } else {
1126 for (const SDValue &W : Words[IdxW ^ 1]) {
1127 Words[IdxW].push_back(W);
1128 Words[IdxW].push_back(W);
1129 }
1130 }
1131 Bytes *= 2;
1132 }
1133
1134 while (Bytes > BitBytes) {
1135 IdxW ^= 1;
1136 Words[IdxW].clear();
1137
1138 if (Bytes <= 4) {
1139 for (const SDValue &W : Words[IdxW ^ 1]) {
1140 SDValue T = contractPredicate(W, dl, DAG);
1141 Words[IdxW].push_back(T);
1142 }
1143 } else {
1144 for (const SDValue &W : Words[IdxW ^ 1]) {
1145 Words[IdxW].push_back(W);
1146 }
1147 }
1148 Bytes /= 2;
1149 }
1150
1151 assert(Bytes == BitBytes);
1152 if (BitBytes == 1 && PredTy == MVT::v2i1)
1153 ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
1154
1155 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1156 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1157 for (const SDValue &W : Words[IdxW]) {
1158 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1159 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1160 }
1161
1162 return Vec;
1163}
1164
1165SDValue
1166HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1167 const SDLoc &dl, MVT VecTy,
1168 SelectionDAG &DAG) const {
1169 // Construct a vector V of bytes, such that a comparison V >u 0 would
1170 // produce the required vector predicate.
1171 unsigned VecLen = Values.size();
1172 unsigned HwLen = Subtarget.getVectorLength();
1173 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1175 bool AllT = true, AllF = true;
1176
1177 auto IsTrue = [] (SDValue V) {
1178 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1179 return !N->isZero();
1180 return false;
1181 };
1182 auto IsFalse = [] (SDValue V) {
1183 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1184 return N->isZero();
1185 return false;
1186 };
1187
1188 if (VecLen <= HwLen) {
1189 // In the hardware, each bit of a vector predicate corresponds to a byte
1190 // of a vector register. Calculate how many bytes does a bit of VecTy
1191 // correspond to.
1192 assert(HwLen % VecLen == 0);
1193 unsigned BitBytes = HwLen / VecLen;
1194 for (SDValue V : Values) {
1195 AllT &= IsTrue(V);
1196 AllF &= IsFalse(V);
1197
1198 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1199 : DAG.getUNDEF(MVT::i8);
1200 for (unsigned B = 0; B != BitBytes; ++B)
1201 Bytes.push_back(Ext);
1202 }
1203 } else {
1204 // There are as many i1 values, as there are bits in a vector register.
1205 // Divide the values into groups of 8 and check that each group consists
1206 // of the same value (ignoring undefs).
1207 for (unsigned I = 0; I != VecLen; I += 8) {
1208 unsigned B = 0;
1209 // Find the first non-undef value in this group.
1210 for (; B != 8; ++B) {
1211 if (!Values[I+B].isUndef())
1212 break;
1213 }
1214 SDValue F = Values[I+B];
1215 AllT &= IsTrue(F);
1216 AllF &= IsFalse(F);
1217
1218 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1219 : DAG.getUNDEF(MVT::i8);
1220 Bytes.push_back(Ext);
1221 // Verify that the rest of values in the group are the same as the
1222 // first.
1223 for (; B != 8; ++B)
1224 assert(Values[I+B].isUndef() || Values[I+B] == F);
1225 }
1226 }
1227
1228 if (AllT)
1229 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1230 if (AllF)
1231 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1232
1233 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1234 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1235 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1236}
1237
1238SDValue
1239HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1240 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1241 MVT ElemTy = ty(VecV).getVectorElementType();
1242
1243 unsigned ElemWidth = ElemTy.getSizeInBits();
1244 assert(ElemWidth >= 8 && ElemWidth <= 32);
1245 (void)ElemWidth;
1246
1247 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1248 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1249 {VecV, ByteIdx});
1250 if (ElemTy == MVT::i32)
1251 return ExWord;
1252
1253 // Have an extracted word, need to extract the smaller element out of it.
1254 // 1. Extract the bits of (the original) IdxV that correspond to the index
1255 // of the desired element in the 32-bit word.
1256 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1257 // 2. Extract the element from the word.
1258 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1259 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1260}
1261
1262SDValue
1263HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1264 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1265 // Implement other return types if necessary.
1266 assert(ResTy == MVT::i1);
1267
1268 unsigned HwLen = Subtarget.getVectorLength();
1269 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1270 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1271
1272 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1273 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1274 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1275
1276 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1277 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1278 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1279}
1280
1281SDValue
1282HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1283 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1284 MVT ElemTy = ty(VecV).getVectorElementType();
1285
1286 unsigned ElemWidth = ElemTy.getSizeInBits();
1287 assert(ElemWidth >= 8 && ElemWidth <= 32);
1288 (void)ElemWidth;
1289
1290 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1291 SDValue ByteIdxV) {
1292 MVT VecTy = ty(VecV);
1293 unsigned HwLen = Subtarget.getVectorLength();
1294 SDValue MaskV =
1295 DAG.getNode(ISD::AND, dl, MVT::i32,
1296 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1297 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1298 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1299 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1300 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1301 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1302 return TorV;
1303 };
1304
1305 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1306 if (ElemTy == MVT::i32)
1307 return InsertWord(VecV, ValV, ByteIdx);
1308
1309 // If this is not inserting a 32-bit word, convert it into such a thing.
1310 // 1. Extract the existing word from the target vector.
1311 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1312 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1313 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1314 dl, MVT::i32, DAG);
1315
1316 // 2. Treating the extracted word as a 32-bit vector, insert the given
1317 // value into it.
1318 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1319 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1320 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1321 ValV, SubIdx, dl, ElemTy, DAG);
1322
1323 // 3. Insert the 32-bit word back into the original vector.
1324 return InsertWord(VecV, Ins, ByteIdx);
1325}
1326
1327SDValue
1328HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1329 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1330 unsigned HwLen = Subtarget.getVectorLength();
1331 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1332 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1333
1334 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1335 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1336 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1337 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1338
1339 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1340 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1341}
1342
1343SDValue
1344HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1345 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1346 MVT VecTy = ty(VecV);
1347 unsigned HwLen = Subtarget.getVectorLength();
1348 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1349 MVT ElemTy = VecTy.getVectorElementType();
1350 unsigned ElemWidth = ElemTy.getSizeInBits();
1351
1352 // If the source vector is a vector pair, get the single vector containing
1353 // the subvector of interest. The subvector will never overlap two single
1354 // vectors.
1355 if (isHvxPairTy(VecTy)) {
1356 unsigned SubIdx = Hexagon::vsub_lo;
1357 if (Idx * ElemWidth >= 8 * HwLen) {
1358 SubIdx = Hexagon::vsub_hi;
1359 Idx -= VecTy.getVectorNumElements() / 2;
1360 }
1361
1362 VecTy = typeSplit(VecTy).first;
1363 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1364 if (VecTy == ResTy)
1365 return VecV;
1366 }
1367
1368 // The only meaningful subvectors of a single HVX vector are those that
1369 // fit in a scalar register.
1370 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1371
1372 MVT WordTy = tyVector(VecTy, MVT::i32);
1373 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1374 unsigned WordIdx = (Idx*ElemWidth) / 32;
1375
1376 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1377 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1378 if (ResTy.getSizeInBits() == 32)
1379 return DAG.getBitcast(ResTy, W0);
1380
1381 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1382 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1383 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1384 return DAG.getBitcast(ResTy, WW);
1385}
1386
1387SDValue
1388HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1389 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1390 MVT VecTy = ty(VecV);
1391 unsigned HwLen = Subtarget.getVectorLength();
1392 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1393 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1394 // IdxV is required to be a constant.
1395 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1396
1397 unsigned ResLen = ResTy.getVectorNumElements();
1398 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1399 unsigned Offset = Idx * BitBytes;
1400 SDValue Undef = DAG.getUNDEF(ByteTy);
1401 SmallVector<int,128> Mask;
1402
1403 if (Subtarget.isHVXVectorType(ResTy, true)) {
1404 // Converting between two vector predicates. Since the result is shorter
1405 // than the source, it will correspond to a vector predicate with the
1406 // relevant bits replicated. The replication count is the ratio of the
1407 // source and target vector lengths.
1408 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1409 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1410 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1411 for (unsigned j = 0; j != Rep; ++j)
1412 Mask.push_back(i + Offset);
1413 }
1414 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1415 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1416 }
1417
1418 // Converting between a vector predicate and a scalar predicate. In the
1419 // vector predicate, a group of BitBytes bits will correspond to a single
1420 // i1 element of the source vector type. Those bits will all have the same
1421 // value. The same will be true for ByteVec, where each byte corresponds
1422 // to a bit in the vector predicate.
1423 // The algorithm is to traverse the ByteVec, going over the i1 values from
1424 // the source vector, and generate the corresponding representation in an
1425 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1426 // elements so that the interesting 8 bytes will be in the low end of the
1427 // vector.
1428 unsigned Rep = 8 / ResLen;
1429 // Make sure the output fill the entire vector register, so repeat the
1430 // 8-byte groups as many times as necessary.
1431 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1432 // This will generate the indexes of the 8 interesting bytes.
1433 for (unsigned i = 0; i != ResLen; ++i) {
1434 for (unsigned j = 0; j != Rep; ++j)
1435 Mask.push_back(Offset + i*BitBytes);
1436 }
1437 }
1438
1439 SDValue Zero = getZero(dl, MVT::i32, DAG);
1440 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1441 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1442 // them against 0.
1443 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1444 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1445 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1446 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1447 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1448 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1449}
1450
1451SDValue
1452HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1453 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1454 MVT VecTy = ty(VecV);
1455 MVT SubTy = ty(SubV);
1456 unsigned HwLen = Subtarget.getVectorLength();
1457 MVT ElemTy = VecTy.getVectorElementType();
1458 unsigned ElemWidth = ElemTy.getSizeInBits();
1459
1460 bool IsPair = isHvxPairTy(VecTy);
1461 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1462 // The two single vectors that VecV consists of, if it's a pair.
1463 SDValue V0, V1;
1464 SDValue SingleV = VecV;
1465 SDValue PickHi;
1466
1467 if (IsPair) {
1468 V0 = LoHalf(VecV, DAG);
1469 V1 = HiHalf(VecV, DAG);
1470
1471 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1472 dl, MVT::i32);
1473 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1474 if (isHvxSingleTy(SubTy)) {
1475 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1476 unsigned Idx = CN->getZExtValue();
1477 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1478 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1479 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1480 }
1481 // If IdxV is not a constant, generate the two variants: with the
1482 // SubV as the high and as the low subregister, and select the right
1483 // pair based on the IdxV.
1484 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1485 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1486 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1487 }
1488 // The subvector being inserted must be entirely contained in one of
1489 // the vectors V0 or V1. Set SingleV to the correct one, and update
1490 // IdxV to be the index relative to the beginning of that vector.
1491 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1492 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1493 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1494 }
1495
1496 // The only meaningful subvectors of a single HVX vector are those that
1497 // fit in a scalar register.
1498 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1499 // Convert IdxV to be index in bytes.
1500 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1501 if (!IdxN || !IdxN->isZero()) {
1502 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1503 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1504 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1505 }
1506 // When inserting a single word, the rotation back to the original position
1507 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1508 // by (HwLen-4)-Idx.
1509 unsigned RolBase = HwLen;
1510 if (SubTy.getSizeInBits() == 32) {
1511 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1512 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1513 } else {
1514 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1515 SDValue R0 = LoHalf(V, DAG);
1516 SDValue R1 = HiHalf(V, DAG);
1517 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1518 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1519 DAG.getConstant(4, dl, MVT::i32));
1520 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1521 RolBase = HwLen-4;
1522 }
1523 // If the vector wasn't ror'ed, don't ror it back.
1524 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1525 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1526 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1527 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1528 }
1529
1530 if (IsPair) {
1531 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1532 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1533 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1534 }
1535 return SingleV;
1536}
1537
1538SDValue
1539HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1540 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1541 MVT VecTy = ty(VecV);
1542 MVT SubTy = ty(SubV);
1543 assert(Subtarget.isHVXVectorType(VecTy, true));
1544 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1545 // predicate as well, or it can be a scalar predicate.
1546
1547 unsigned VecLen = VecTy.getVectorNumElements();
1548 unsigned HwLen = Subtarget.getVectorLength();
1549 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1550
1551 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1552 unsigned BitBytes = HwLen / VecLen;
1553 unsigned BlockLen = HwLen / Scale;
1554
1555 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1556 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1557 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1558 SDValue ByteIdx;
1559
1560 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1561 if (!IdxN || !IdxN->isZero()) {
1562 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1563 DAG.getConstant(BitBytes, dl, MVT::i32));
1564 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1565 }
1566
1567 // ByteVec is the target vector VecV rotated in such a way that the
1568 // subvector should be inserted at index 0. Generate a predicate mask
1569 // and use vmux to do the insertion.
1570 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1571 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1572 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1573 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1574 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1575 // Rotate ByteVec back, and convert to a vector predicate.
1576 if (!IdxN || !IdxN->isZero()) {
1577 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1578 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1579 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1580 }
1581 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1582}
1583
1584SDValue
1585HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1586 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1587 // Sign- and any-extending of a vector predicate to a vector register is
1588 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1589 // a vector of 1s (where the 1s are of type matching the vector type).
1590 assert(Subtarget.isHVXVectorType(ResTy));
1591 if (!ZeroExt)
1592 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1593
1594 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1595 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1596 DAG.getConstant(1, dl, MVT::i32));
1597 SDValue False = getZero(dl, ResTy, DAG);
1598 return DAG.getSelect(dl, ResTy, VecV, True, False);
1599}
1600
1601SDValue
1602HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1603 MVT ResTy, SelectionDAG &DAG) const {
1604 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1605 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1606 // vector register. The remaining bits of the vector register are
1607 // unspecified.
1608
1609 MachineFunction &MF = DAG.getMachineFunction();
1610 unsigned HwLen = Subtarget.getVectorLength();
1611 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1612 MVT PredTy = ty(VecQ);
1613 unsigned PredLen = PredTy.getVectorNumElements();
1614 assert(HwLen % PredLen == 0);
1615 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1616
1617 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1619 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1620 // These are bytes with the LSB rotated left with respect to their index.
1621 for (unsigned i = 0; i != HwLen/8; ++i) {
1622 for (unsigned j = 0; j != 8; ++j)
1623 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1624 }
1625 Constant *CV = ConstantVector::get(Tmp);
1626 Align Alignment(HwLen);
1628 DAG.getConstantPool(CV, getPointerTy(DAG.getDataLayout()), Alignment),
1629 DAG);
1630 SDValue Bytes =
1631 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1633
1634 // Select the bytes that correspond to true bits in the vector predicate.
1635 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1636 getZero(dl, VecTy, DAG));
1637 // Calculate the OR of all bytes in each group of 8. That will compress
1638 // all the individual bits into a single byte.
1639 // First, OR groups of 4, via vrmpy with 0x01010101.
1640 SDValue All1 =
1641 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1642 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1643 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1644 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1645 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1646 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1647
1648 // Pick every 8th byte and coalesce them at the beginning of the output.
1649 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1650 // byte and so on.
1651 SmallVector<int,128> Mask;
1652 for (unsigned i = 0; i != HwLen; ++i)
1653 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1654 SDValue Collect =
1655 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1656 return DAG.getBitcast(ResTy, Collect);
1657}
1658
1659SDValue
1660HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1661 const SDLoc &dl, SelectionDAG &DAG) const {
1662 // Take a vector and resize the element type to match the given type.
1663 MVT InpTy = ty(VecV);
1664 if (InpTy == ResTy)
1665 return VecV;
1666
1667 unsigned InpWidth = InpTy.getSizeInBits();
1668 unsigned ResWidth = ResTy.getSizeInBits();
1669
1670 if (InpTy.isFloatingPoint()) {
1671 return InpWidth < ResWidth
1672 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1673 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1674 DAG.getTargetConstant(0, dl, MVT::i32));
1675 }
1676
1677 assert(InpTy.isInteger());
1678
1679 if (InpWidth < ResWidth) {
1680 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1681 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1682 } else {
1683 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1684 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1685 }
1686}
1687
1688SDValue
1689HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1690 SelectionDAG &DAG) const {
1691 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1692
1693 const SDLoc &dl(Vec);
1694 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1695 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1696 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1697}
1698
1699SDValue
1700HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1701 const {
1702 const SDLoc &dl(Op);
1703 MVT VecTy = ty(Op);
1704
1705 unsigned Size = Op.getNumOperands();
1707 for (unsigned i = 0; i != Size; ++i)
1708 Ops.push_back(Op.getOperand(i));
1709
1710 if (VecTy.getVectorElementType() == MVT::i1)
1711 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1712
1713 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1714 // not a legal type, just bitcast the node to use i16
1715 // types and bitcast the result back to f16
1716 if (VecTy.getVectorElementType() == MVT::f16 ||
1717 VecTy.getVectorElementType() == MVT::bf16) {
1719 for (unsigned i = 0; i != Size; i++)
1720 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1721
1722 SDValue T0 =
1723 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1724 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1725 }
1726
1727 // First, split the BUILD_VECTOR for vector pairs. We could generate
1728 // some pairs directly (via splat), but splats should be generated
1729 // by the combiner prior to getting here.
1730 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1732 MVT SingleTy = typeSplit(VecTy).first;
1733 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1734 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1735 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1736 }
1737
1738 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1739}
1740
1741SDValue
1742HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1743 const {
1744 const SDLoc &dl(Op);
1745 MVT VecTy = ty(Op);
1746 MVT ArgTy = ty(Op.getOperand(0));
1747
1748 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1749 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1750 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1751 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1752 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1753 return DAG.getBitcast(VecTy, Splat);
1754 }
1755
1756 return SDValue();
1757}
1758
1759SDValue
1760HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1761 const {
1762 // Vector concatenation of two integer (non-bool) vectors does not need
1763 // special lowering. Custom-lower concats of bool vectors and expand
1764 // concats of more than 2 vectors.
1765 MVT VecTy = ty(Op);
1766 const SDLoc &dl(Op);
1767 unsigned NumOp = Op.getNumOperands();
1768 if (VecTy.getVectorElementType() != MVT::i1) {
1769 if (NumOp == 2)
1770 return Op;
1771 // Expand the other cases into a build-vector.
1773 for (SDValue V : Op.getNode()->ops())
1774 DAG.ExtractVectorElements(V, Elems);
1775 // A vector of i16 will be broken up into a build_vector of i16's.
1776 // This is a problem, since at the time of operation legalization,
1777 // all operations are expected to be type-legalized, and i16 is not
1778 // a legal type. If any of the extracted elements is not of a valid
1779 // type, sign-extend it to a valid one.
1780 for (SDValue &V : Elems) {
1781 MVT Ty = ty(V);
1782 if (!isTypeLegal(Ty)) {
1783 MVT NTy = typeLegalize(Ty, DAG);
1784 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1785 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1786 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1787 V.getOperand(0), V.getOperand(1)),
1788 DAG.getValueType(Ty));
1789 continue;
1790 }
1791 // A few less complicated cases.
1792 switch (V.getOpcode()) {
1793 case ISD::Constant:
1794 V = DAG.getSExtOrTrunc(V, dl, NTy);
1795 break;
1796 case ISD::UNDEF:
1797 V = DAG.getUNDEF(NTy);
1798 break;
1799 case ISD::TRUNCATE:
1800 V = V.getOperand(0);
1801 break;
1802 default:
1803 llvm_unreachable("Unexpected vector element");
1804 }
1805 }
1806 }
1807 return DAG.getBuildVector(VecTy, dl, Elems);
1808 }
1809
1810 assert(VecTy.getVectorElementType() == MVT::i1);
1811 unsigned HwLen = Subtarget.getVectorLength();
1812 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1813
1814 SDValue Op0 = Op.getOperand(0);
1815
1816 // If the operands are HVX types (i.e. not scalar predicates), then
1817 // defer the concatenation, and create QCAT instead.
1818 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1819 if (NumOp == 2)
1820 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1821
1822 ArrayRef<SDUse> U(Op.getNode()->ops());
1825
1826 MVT HalfTy = typeSplit(VecTy).first;
1827 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1828 Ops.take_front(NumOp/2));
1829 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1830 Ops.take_back(NumOp/2));
1831 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1832 }
1833
1834 // Count how many bytes (in a vector register) each bit in VecTy
1835 // corresponds to.
1836 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1837
1838 SmallVector<SDValue,8> Prefixes;
1839 for (SDValue V : Op.getNode()->op_values()) {
1840 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1841 Prefixes.push_back(P);
1842 }
1843
1844 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1845 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1846 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1847 SDValue Res = getZero(dl, ByteTy, DAG);
1848 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1849 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1850 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1851 }
1852 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1853}
1854
1855SDValue
1856HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1857 const {
1858 // Change the type of the extracted element to i32.
1859 SDValue VecV = Op.getOperand(0);
1860 MVT ElemTy = ty(VecV).getVectorElementType();
1861 const SDLoc &dl(Op);
1862 SDValue IdxV = Op.getOperand(1);
1863 if (ElemTy == MVT::i1)
1864 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1865
1866 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1867}
1868
1869SDValue
1870HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1871 const {
1872 const SDLoc &dl(Op);
1873 MVT VecTy = ty(Op);
1874 SDValue VecV = Op.getOperand(0);
1875 SDValue ValV = Op.getOperand(1);
1876 SDValue IdxV = Op.getOperand(2);
1877 MVT ElemTy = ty(VecV).getVectorElementType();
1878 if (ElemTy == MVT::i1)
1879 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1880
1881 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1883 tyVector(VecTy, MVT::i16),
1884 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1885 DAG.getBitcast(MVT::i16, ValV), IdxV);
1886 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1887 }
1888
1889 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1890}
1891
1892SDValue
1893HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1894 const {
1895 SDValue SrcV = Op.getOperand(0);
1896 MVT SrcTy = ty(SrcV);
1897 MVT DstTy = ty(Op);
1898 SDValue IdxV = Op.getOperand(1);
1899 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1900 assert(Idx % DstTy.getVectorNumElements() == 0);
1901 (void)Idx;
1902 const SDLoc &dl(Op);
1903
1904 MVT ElemTy = SrcTy.getVectorElementType();
1905 if (ElemTy == MVT::i1)
1906 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1907
1908 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1909}
1910
1911SDValue
1912HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1913 const {
1914 // Idx does not need to be a constant.
1915 SDValue VecV = Op.getOperand(0);
1916 SDValue ValV = Op.getOperand(1);
1917 SDValue IdxV = Op.getOperand(2);
1918
1919 const SDLoc &dl(Op);
1920 MVT VecTy = ty(VecV);
1921 MVT ElemTy = VecTy.getVectorElementType();
1922 if (ElemTy == MVT::i1)
1923 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1924
1925 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1926}
1927
1928SDValue
1929HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1930 // Lower any-extends of boolean vectors to sign-extends, since they
1931 // translate directly to Q2V. Zero-extending could also be done equally
1932 // fast, but Q2V is used/recognized in more places.
1933 // For all other vectors, use zero-extend.
1934 MVT ResTy = ty(Op);
1935 SDValue InpV = Op.getOperand(0);
1936 MVT ElemTy = ty(InpV).getVectorElementType();
1937 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1938 return LowerHvxSignExt(Op, DAG);
1939 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1940}
1941
1942SDValue
1943HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1944 MVT ResTy = ty(Op);
1945 SDValue InpV = Op.getOperand(0);
1946 MVT ElemTy = ty(InpV).getVectorElementType();
1947 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1948 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1949 return Op;
1950}
1951
1952SDValue
1953HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1954 MVT ResTy = ty(Op);
1955 SDValue InpV = Op.getOperand(0);
1956 MVT ElemTy = ty(InpV).getVectorElementType();
1957 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1958 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1959 return Op;
1960}
1961
1962SDValue
1963HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1964 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1965 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1966 const SDLoc &dl(Op);
1967 MVT ResTy = ty(Op);
1968 SDValue InpV = Op.getOperand(0);
1969 assert(ResTy == ty(InpV));
1970
1971 // Calculate the vectors of 1 and bitwidth(x).
1972 MVT ElemTy = ty(InpV).getVectorElementType();
1973 unsigned ElemWidth = ElemTy.getSizeInBits();
1974
1975 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1976 DAG.getConstant(1, dl, MVT::i32));
1977 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1978 DAG.getConstant(ElemWidth, dl, MVT::i32));
1979 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1980 DAG.getAllOnesConstant(dl, MVT::i32));
1981
1982 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1983 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1984 // it separately in custom combine or selection).
1985 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1986 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1987 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1988 return DAG.getNode(ISD::SUB, dl, ResTy,
1989 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1990}
1991
1992SDValue
1993HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1994 const SDLoc &dl(Op);
1995 MVT ResTy = ty(Op);
1996 assert(ResTy.getVectorElementType() == MVT::i32);
1997
1998 SDValue Vs = Op.getOperand(0);
1999 SDValue Vt = Op.getOperand(1);
2000
2001 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
2002 unsigned Opc = Op.getOpcode();
2003
2004 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2005 if (Opc == ISD::MULHU)
2006 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2007 if (Opc == ISD::MULHS)
2008 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2009
2010#ifndef NDEBUG
2011 Op.dump(&DAG);
2012#endif
2013 llvm_unreachable("Unexpected mulh operation");
2014}
2015
2016SDValue
2017HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2018 const SDLoc &dl(Op);
2019 unsigned Opc = Op.getOpcode();
2020 SDValue Vu = Op.getOperand(0);
2021 SDValue Vv = Op.getOperand(1);
2022
2023 // If the HI part is not used, convert it to a regular MUL.
2024 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2025 // Need to preserve the types and the number of values.
2026 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2027 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2028 return DAG.getMergeValues({Lo, Hi}, dl);
2029 }
2030
2031 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2032 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2033
2034 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2035 // valued nodes.
2036 if (Subtarget.useHVXV62Ops())
2037 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2038
2039 if (Opc == HexagonISD::SMUL_LOHI) {
2040 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2041 // for other signedness LOHI is cheaper.
2042 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2043 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2044 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2045 return DAG.getMergeValues({Lo, Hi}, dl);
2046 }
2047 }
2048
2049 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2050}
2051
2052SDValue
2053HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2054 SDValue Val = Op.getOperand(0);
2055 MVT ResTy = ty(Op);
2056 MVT ValTy = ty(Val);
2057 const SDLoc &dl(Op);
2058
2059 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2060 unsigned HwLen = Subtarget.getVectorLength();
2061 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2062 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2063 unsigned BitWidth = ResTy.getSizeInBits();
2064
2065 if (BitWidth < 64) {
2066 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2067 dl, MVT::i32, DAG);
2068 if (BitWidth == 32)
2069 return W0;
2070 assert(BitWidth < 32u);
2071 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2072 }
2073
2074 // The result is >= 64 bits. The only options are 64 or 128.
2075 assert(BitWidth == 64 || BitWidth == 128);
2077 for (unsigned i = 0; i != BitWidth/32; ++i) {
2078 SDValue W = extractHvxElementReg(
2079 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2080 Words.push_back(W);
2081 }
2082 SmallVector<SDValue,2> Combines;
2083 assert(Words.size() % 2 == 0);
2084 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2085 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2086 Combines.push_back(C);
2087 }
2088
2089 if (BitWidth == 64)
2090 return Combines[0];
2091
2092 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2093 }
2094
2095 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2096 // Splat the input into a 32-element i32 vector, then AND each element
2097 // with a unique bitmask to isolate individual bits.
2098 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2099 assert(Val32.getValueType().getSizeInBits() == 32 &&
2100 "Input must be 32 bits");
2101 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2102 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2104 for (unsigned i = 0; i < 32; ++i)
2105 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2106
2107 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2108 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2109 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2110 };
2111 // === Case: v32i1 ===
2112 if (ResTy == MVT::v32i1 &&
2113 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2114 Subtarget.useHVX128BOps()) {
2115 SDValue Val32 = Val;
2116 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2117 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2118 return bitcastI32ToV32I1(Val32);
2119 }
2120 // === Case: v64i1 ===
2121 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2122 // Split i64 into lo/hi 32-bit halves.
2123 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2124 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2125 DAG.getConstant(32, dl, MVT::i64));
2126 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2127
2128 // Reuse the same 32-bit logic twice.
2129 SDValue LoRes = bitcastI32ToV32I1(Lo);
2130 SDValue HiRes = bitcastI32ToV32I1(Hi);
2131
2132 // Concatenate into a v64i1 predicate.
2133 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2134 }
2135
2136 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2137 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2138 unsigned BitWidth = ValTy.getSizeInBits();
2139 unsigned HwLen = Subtarget.getVectorLength();
2140 assert(BitWidth == HwLen);
2141
2142 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2143 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2144 // Splat each byte of Val 8 times.
2145 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2146 // where b0, b1,..., b15 are least to most significant bytes of I.
2148 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2149 // These are bytes with the LSB rotated left with respect to their index.
2151 for (unsigned I = 0; I != HwLen / 8; ++I) {
2152 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2153 SDValue Byte =
2154 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2155 for (unsigned J = 0; J != 8; ++J) {
2156 Bytes.push_back(Byte);
2157 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2158 }
2159 }
2160
2161 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2162 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2163 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2164
2165 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2166 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2167 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2168 }
2169
2170 return Op;
2171}
2172
2173SDValue
2174HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2175 // Sign- and zero-extends are legal.
2176 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2177 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2178 Op.getOperand(0));
2179}
2180
2181SDValue
2182HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2183 MVT ResTy = ty(Op);
2184 if (ResTy.getVectorElementType() != MVT::i1)
2185 return Op;
2186
2187 const SDLoc &dl(Op);
2188 unsigned HwLen = Subtarget.getVectorLength();
2189 unsigned VecLen = ResTy.getVectorNumElements();
2190 assert(HwLen % VecLen == 0);
2191 unsigned ElemSize = HwLen / VecLen;
2192
2193 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2194 SDValue S =
2195 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2196 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2197 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2198 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2199}
2200
2201SDValue
2202HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2203 if (SDValue S = getVectorShiftByInt(Op, DAG))
2204 return S;
2205 return Op;
2206}
2207
2208SDValue
2209HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2210 SelectionDAG &DAG) const {
2211 unsigned Opc = Op.getOpcode();
2212 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2213
2214 // Make sure the shift amount is within the range of the bitwidth
2215 // of the element type.
2216 SDValue A = Op.getOperand(0);
2217 SDValue B = Op.getOperand(1);
2218 SDValue S = Op.getOperand(2);
2219
2220 MVT InpTy = ty(A);
2221 MVT ElemTy = InpTy.getVectorElementType();
2222
2223 const SDLoc &dl(Op);
2224 unsigned ElemWidth = ElemTy.getSizeInBits();
2225 bool IsLeft = Opc == ISD::FSHL;
2226
2227 // The expansion into regular shifts produces worse code for i8 and for
2228 // right shift of i32 on v65+.
2229 bool UseShifts = ElemTy != MVT::i8;
2230 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2231 UseShifts = false;
2232
2233 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2234 // If this is a funnel shift by a scalar, lower it into regular shifts.
2235 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2236 SDValue ModS =
2237 DAG.getNode(ISD::AND, dl, MVT::i32,
2238 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2239 SDValue NegS =
2240 DAG.getNode(ISD::SUB, dl, MVT::i32,
2241 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2242 SDValue IsZero =
2243 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2244 // FSHL A, B => A << | B >>n
2245 // FSHR A, B => A <<n | B >>
2246 SDValue Part1 =
2247 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2248 SDValue Part2 =
2249 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2250 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2251 // If the shift amount was 0, pick A or B, depending on the direction.
2252 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2253 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2254 }
2255
2257 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2258
2259 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2260 return DAG.getNode(MOpc, dl, ty(Op),
2261 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2262}
2263
2264SDValue
2265HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2266 const SDLoc &dl(Op);
2267 unsigned IntNo = Op.getConstantOperandVal(0);
2268 SmallVector<SDValue> Ops(Op->ops());
2269
2270 auto Swap = [&](SDValue P) {
2271 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2272 };
2273
2274 switch (IntNo) {
2275 case Intrinsic::hexagon_V6_pred_typecast:
2276 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2277 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2278 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2279 if (ResTy == InpTy)
2280 return Ops[1];
2281 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2282 }
2283 break;
2284 }
2285 case Intrinsic::hexagon_V6_vmpyss_parts:
2286 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2287 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2288 {Ops[1], Ops[2]}));
2289 case Intrinsic::hexagon_V6_vmpyuu_parts:
2290 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2291 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2292 {Ops[1], Ops[2]}));
2293 case Intrinsic::hexagon_V6_vmpyus_parts:
2294 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2295 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2296 {Ops[1], Ops[2]}));
2297 }
2298 } // switch
2299
2300 return Op;
2301}
2302
2303SDValue
2304HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2305 const SDLoc &dl(Op);
2306 unsigned HwLen = Subtarget.getVectorLength();
2307 MachineFunction &MF = DAG.getMachineFunction();
2308 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2309 SDValue Mask = MaskN->getMask();
2310 SDValue Chain = MaskN->getChain();
2311 SDValue Base = MaskN->getBasePtr();
2312 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2313
2314 unsigned Opc = Op->getOpcode();
2316
2317 if (Opc == ISD::MLOAD) {
2318 MVT ValTy = ty(Op);
2319 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2320 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2321 if (isUndef(Thru))
2322 return Load;
2323 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2324 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2325 }
2326
2327 // MSTORE
2328 // HVX only has aligned masked stores.
2329
2330 // TODO: Fold negations of the mask into the store.
2331 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2332 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2333 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2334
2335 if (MaskN->getAlign().value() % HwLen == 0) {
2336 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2337 {Mask, Base, Offset0, Value, Chain}, DAG);
2338 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2339 return Store;
2340 }
2341
2342 // Unaligned case.
2343 auto StoreAlign = [&](SDValue V, SDValue A) {
2344 SDValue Z = getZero(dl, ty(V), DAG);
2345 // TODO: use funnel shifts?
2346 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2347 // upper half.
2348 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2349 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2350 return std::make_pair(LoV, HiV);
2351 };
2352
2353 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2354 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2355 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2356 VectorPair Tmp = StoreAlign(MaskV, Base);
2357 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2358 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2359 VectorPair ValueU = StoreAlign(Value, Base);
2360
2361 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2362 SDValue StoreLo =
2363 getInstr(StoreOpc, dl, MVT::Other,
2364 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2365 SDValue StoreHi =
2366 getInstr(StoreOpc, dl, MVT::Other,
2367 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2368 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2369 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2370 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2371}
2372
2373SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2374 SelectionDAG &DAG) const {
2375 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2376 // is legal (done via a pattern).
2377 assert(Subtarget.useHVXQFloatOps());
2378
2379 assert(Op->getOpcode() == ISD::FP_EXTEND);
2380
2381 MVT VecTy = ty(Op);
2382 MVT ArgTy = ty(Op.getOperand(0));
2383 const SDLoc &dl(Op);
2384
2385 if (ArgTy == MVT::v64bf16) {
2386 MVT HalfTy = typeSplit(VecTy).first;
2387 SDValue BF16Vec = Op.getOperand(0);
2388 SDValue Zeroes =
2389 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2390 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2391 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2392 // values.
2393 SDValue ShuffVec =
2394 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2395 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2396 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2397 {VecPair.second, VecPair.first,
2398 DAG.getSignedConstant(-4, dl, MVT::i32)},
2399 DAG);
2400 return Result;
2401 }
2402
2403 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2404
2405 SDValue F16Vec = Op.getOperand(0);
2406
2407 APFloat FloatVal = APFloat(1.0f);
2408 bool Ignored;
2410 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2411 SDValue VmpyVec =
2412 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2413
2414 MVT HalfTy = typeSplit(VecTy).first;
2415 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2416 SDValue LoVec =
2417 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2418 SDValue HiVec =
2419 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2420
2421 SDValue ShuffVec =
2422 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2423 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2424
2425 return ShuffVec;
2426}
2427
2428SDValue
2429HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2430 // Catch invalid conversion ops (just in case).
2431 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2432 Op.getOpcode() == ISD::FP_TO_UINT);
2433
2434 MVT ResTy = ty(Op);
2435 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2436 MVT IntTy = ResTy.getVectorElementType();
2437
2438 if (Subtarget.useHVXIEEEFPOps()) {
2439 // There are only conversions from f16.
2440 if (FpTy == MVT::f16) {
2441 // Other int types aren't legal in HVX, so we shouldn't see them here.
2442 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2443 // Conversions to i8 and i16 are legal.
2444 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2445 return Op;
2446 }
2447 }
2448
2449 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2450 return EqualizeFpIntConversion(Op, DAG);
2451
2452 return ExpandHvxFpToInt(Op, DAG);
2453}
2454
2455// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2456// R1 = #1, R2 holds the v32i1 param
2457// V1 = vsplat(R1)
2458// V2 = vsplat(R2)
2459// Q0 = vand(V1,R1)
2460// V0.w=prefixsum(Q0)
2461// V0.w=vsub(V0.w,V1.w)
2462// V2.w = vlsr(V2.w,V0.w)
2463// V2 = vand(V2,V1)
2464// V2.sf = V2.w
2465SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2466 SelectionDAG &DAG) const {
2467
2468 MVT ResTy = ty(PredOp);
2469 const SDLoc &dl(PredOp);
2470
2471 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2472 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2473 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2474 SDValue(RegConst, 0));
2475 SDNode *PredTransfer =
2476 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2477 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2478 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2479 SDValue(PredTransfer, 0));
2480 SDNode *SplatParam = DAG.getMachineNode(
2481 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2482 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2483 SDNode *Vsub =
2484 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2485 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2486 SDNode *IndexShift =
2487 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2488 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2489 SDNode *MaskOff =
2490 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2491 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2492 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2493 SDValue(MaskOff, 0));
2494 return SDValue(Convert, 0);
2495}
2496
2497// For vector type v64i1 uint_to_fo to v64f16:
2498// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2499// R3 = subreg_high (R32)
2500// R2 = subreg_low (R32)
2501// R1 = #1
2502// V1 = vsplat(R1)
2503// V2 = vsplat(R2)
2504// V3 = vsplat(R3)
2505// Q0 = vand(V1,R1)
2506// V0.w=prefixsum(Q0)
2507// V0.w=vsub(V0.w,V1.w)
2508// V2.w = vlsr(V2.w,V0.w)
2509// V3.w = vlsr(V3.w,V0.w)
2510// V2 = vand(V2,V1)
2511// V3 = vand(V3,V1)
2512// V2.h = vpacke(V3.w,V2.w)
2513// V2.hf = V2.h
2514SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2515 SelectionDAG &DAG) const {
2516
2517 MVT ResTy = ty(PredOp);
2518 const SDLoc &dl(PredOp);
2519
2520 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2521 // Get the hi and lo regs
2522 SDValue HiReg =
2523 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2524 SDValue LoReg =
2525 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2526 // Get constant #1 and splat into vector V1
2527 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2528 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2529 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2530 SDValue(RegConst, 0));
2531 // Splat the hi and lo args
2532 SDNode *SplatHi =
2533 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2534 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2535 SDNode *SplatLo =
2536 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2537 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2538 // vand between splatted const and const
2539 SDNode *PredTransfer =
2540 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2541 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2542 // Get the prefixsum
2543 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2544 SDValue(PredTransfer, 0));
2545 // Get the vsub
2546 SDNode *Vsub =
2547 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2548 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2549 // Get vlsr for hi and lo
2550 SDNode *IndexShift_hi =
2551 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2552 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2553 SDNode *IndexShift_lo =
2554 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2555 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2556 // Get vand of hi and lo
2557 SDNode *MaskOff_hi =
2558 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2559 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2560 SDNode *MaskOff_lo =
2561 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2562 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2563 // Pack them
2564 SDNode *Pack =
2565 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2566 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2567 SDNode *Convert =
2568 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2569 return SDValue(Convert, 0);
2570}
2571
2572SDValue
2573HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2574 // Catch invalid conversion ops (just in case).
2575 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2576 Op.getOpcode() == ISD::UINT_TO_FP);
2577
2578 MVT ResTy = ty(Op);
2579 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2580 MVT FpTy = ResTy.getVectorElementType();
2581
2582 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2583 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2584 return LowerHvxPred32ToFp(Op, DAG);
2585 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2586 return LowerHvxPred64ToFp(Op, DAG);
2587 }
2588
2589 if (Subtarget.useHVXIEEEFPOps()) {
2590 // There are only conversions to f16.
2591 if (FpTy == MVT::f16) {
2592 // Other int types aren't legal in HVX, so we shouldn't see them here.
2593 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2594 // i8, i16 -> f16 is legal.
2595 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2596 return Op;
2597 }
2598 }
2599
2600 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2601 return EqualizeFpIntConversion(Op, DAG);
2602
2603 return ExpandHvxIntToFp(Op, DAG);
2604}
2605
2606HexagonTargetLowering::TypePair
2607HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2608 // Compare the widths of elements of the two types, and extend the narrower
2609 // type to match the with of the wider type. For vector types, apply this
2610 // to the element type.
2611 assert(Ty0.isVector() == Ty1.isVector());
2612
2613 MVT ElemTy0 = Ty0.getScalarType();
2614 MVT ElemTy1 = Ty1.getScalarType();
2615
2616 unsigned Width0 = ElemTy0.getSizeInBits();
2617 unsigned Width1 = ElemTy1.getSizeInBits();
2618 unsigned MaxWidth = std::max(Width0, Width1);
2619
2620 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2621 if (ScalarTy.isInteger())
2622 return MVT::getIntegerVT(Width);
2623 assert(ScalarTy.isFloatingPoint());
2624 return MVT::getFloatingPointVT(Width);
2625 };
2626
2627 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2628 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2629
2630 if (!Ty0.isVector()) {
2631 // Both types are scalars.
2632 return {WideETy0, WideETy1};
2633 }
2634
2635 // Vector types.
2636 unsigned NumElem = Ty0.getVectorNumElements();
2637 assert(NumElem == Ty1.getVectorNumElements());
2638
2639 return {MVT::getVectorVT(WideETy0, NumElem),
2640 MVT::getVectorVT(WideETy1, NumElem)};
2641}
2642
2643HexagonTargetLowering::TypePair
2644HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2645 // Compare the numbers of elements of two vector types, and widen the
2646 // narrower one to match the number of elements in the wider one.
2647 assert(Ty0.isVector() && Ty1.isVector());
2648
2649 unsigned Len0 = Ty0.getVectorNumElements();
2650 unsigned Len1 = Ty1.getVectorNumElements();
2651 if (Len0 == Len1)
2652 return {Ty0, Ty1};
2653
2654 unsigned MaxLen = std::max(Len0, Len1);
2655 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2656 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2657}
2658
2659MVT
2660HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2661 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2662 assert(LegalTy.isSimple());
2663 return LegalTy.getSimpleVT();
2664}
2665
2666MVT
2667HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2668 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2669 assert(Ty.getSizeInBits() <= HwWidth);
2670 if (Ty.getSizeInBits() == HwWidth)
2671 return Ty;
2672
2673 MVT ElemTy = Ty.getScalarType();
2674 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2675}
2676
2677HexagonTargetLowering::VectorPair
2678HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2679 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2680 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2681 // whether an overflow has occurred.
2682 MVT ResTy = ty(A);
2683 assert(ResTy == ty(B));
2684 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2685
2686 if (!Signed) {
2687 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2688 // save any instructions.
2689 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2690 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2691 return {Add, Ovf};
2692 }
2693
2694 // Signed overflow has happened, if:
2695 // (A, B have the same sign) and (A+B has a different sign from either)
2696 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2697 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2698 SDValue NotA =
2699 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2700 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2701 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2702 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2703 SDValue MSB =
2704 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2705 return {Add, MSB};
2706}
2707
2708HexagonTargetLowering::VectorPair
2709HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2710 bool Signed, SelectionDAG &DAG) const {
2711 // Shift Val right by Amt bits, round the result to the nearest integer,
2712 // tie-break by rounding halves to even integer.
2713
2714 const SDLoc &dl(Val);
2715 MVT ValTy = ty(Val);
2716
2717 // This should also work for signed integers.
2718 //
2719 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2720 // bool ovf = (inp > tmp0);
2721 // uint rup = inp & (1 << (Amt+1));
2722 //
2723 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2724 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2725 // uint tmp3 = tmp2 + rup;
2726 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2727 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2728 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2729 MVT IntTy = tyVector(ValTy, ElemTy);
2730 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2731 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2732
2733 SDValue Inp = DAG.getBitcast(IntTy, Val);
2734 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2735
2736 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2737 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2738 SDValue Zero = getZero(dl, IntTy, DAG);
2739 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2740 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2741 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2742
2743 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2744 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2745 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2746 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2747
2748 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2749 SDValue One = DAG.getConstant(1, dl, IntTy);
2750 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2751 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2752 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2753 return {Mux, Ovf};
2754}
2755
2756SDValue
2757HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2758 SelectionDAG &DAG) const {
2759 MVT VecTy = ty(A);
2760 MVT PairTy = typeJoin({VecTy, VecTy});
2761 assert(VecTy.getVectorElementType() == MVT::i32);
2762
2763 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2764
2765 // mulhs(A,B) =
2766 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2767 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2768 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2769 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2770 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2771 // anything, so it cannot produce any carry over to higher bits),
2772 // so everything in [] can be shifted by 16 without loss of precision.
2773 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2774 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2775 // The final additions need to make sure to properly maintain any carry-
2776 // out bits.
2777 //
2778 // Hi(B) Lo(B)
2779 // Hi(A) Lo(A)
2780 // --------------
2781 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2782 // Hi(B)*Lo(A) | + dropping the low 16 bits
2783 // Hi(A)*Lo(B) | T2
2784 // Hi(B)*Hi(A)
2785
2786 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2787 // T1 = get Hi(A) into low halves.
2788 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2789 // P0 = interleaved T1.h*B.uh (full precision product)
2790 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2791 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2792 SDValue T2 = LoHalf(P0, DAG);
2793 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2794 // added to the final sum.
2795 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2796 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2797 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2798 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2799 // T3 = full-precision(T0+T2) >> 16
2800 // The low halves are added-unsigned, the high ones are added-signed.
2801 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2802 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2803 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2804 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2805 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2806 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2807 SDValue T5 = LoHalf(P3, DAG);
2808 // Add:
2809 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2810 return T6;
2811}
2812
2813SDValue
2814HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2815 bool SignedB, const SDLoc &dl,
2816 SelectionDAG &DAG) const {
2817 MVT VecTy = ty(A);
2818 MVT PairTy = typeJoin({VecTy, VecTy});
2819 assert(VecTy.getVectorElementType() == MVT::i32);
2820
2821 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2822
2823 if (SignedA && !SignedB) {
2824 // Make A:unsigned, B:signed.
2825 std::swap(A, B);
2826 std::swap(SignedA, SignedB);
2827 }
2828
2829 // Do halfword-wise multiplications for unsigned*unsigned product, then
2830 // add corrections for signed and unsigned*signed.
2831
2832 SDValue Lo, Hi;
2833
2834 // P0:lo = (uu) products of low halves of A and B,
2835 // P0:hi = (uu) products of high halves.
2836 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2837
2838 // Swap low/high halves in B
2839 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2840 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2841 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2842 // P1 = products of even/odd halfwords.
2843 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2844 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2845 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2846
2847 // P2:lo = low halves of P1:lo + P1:hi,
2848 // P2:hi = high halves of P1:lo + P1:hi.
2849 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2850 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2851 // Still need to add the high halves of P0:lo to P2:lo
2852 SDValue T2 =
2853 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2854 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2855
2856 // The high halves of T3 will contribute to the HI part of LOHI.
2857 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2858 {HiHalf(P2, DAG), T3, S16}, DAG);
2859
2860 // The low halves of P2 need to be added to high halves of the LO part.
2861 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2862 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2863 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2864
2865 if (SignedA) {
2866 assert(SignedB && "Signed A and unsigned B should have been inverted");
2867
2868 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2869 SDValue Zero = getZero(dl, VecTy, DAG);
2870 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2871 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2872 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2873 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2874 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2875 } else if (SignedB) {
2876 // Same correction as for mulhus:
2877 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2878 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2879 SDValue Zero = getZero(dl, VecTy, DAG);
2880 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2881 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2882 } else {
2883 assert(!SignedA && !SignedB);
2884 }
2885
2886 return DAG.getMergeValues({Lo, Hi}, dl);
2887}
2888
2889SDValue
2890HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2891 SDValue B, bool SignedB,
2892 const SDLoc &dl,
2893 SelectionDAG &DAG) const {
2894 MVT VecTy = ty(A);
2895 MVT PairTy = typeJoin({VecTy, VecTy});
2896 assert(VecTy.getVectorElementType() == MVT::i32);
2897
2898 if (SignedA && !SignedB) {
2899 // Make A:unsigned, B:signed.
2900 std::swap(A, B);
2901 std::swap(SignedA, SignedB);
2902 }
2903
2904 // Do S*S first, then make corrections for U*S or U*U if needed.
2905 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2906 SDValue P1 =
2907 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2908 SDValue Lo = LoHalf(P1, DAG);
2909 SDValue Hi = HiHalf(P1, DAG);
2910
2911 if (!SignedB) {
2912 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2913 SDValue Zero = getZero(dl, VecTy, DAG);
2914 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2915
2916 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2917 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2918 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2919 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2920 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2921 // $A))>;
2922 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2923 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2924 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2925 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2926 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2927 } else if (!SignedA) {
2928 SDValue Zero = getZero(dl, VecTy, DAG);
2929 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2930
2931 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2932 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2933 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2934 // (HiHalf (Muls64O $A, $B)),
2935 // $B)>;
2936 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2937 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2938 }
2939
2940 return DAG.getMergeValues({Lo, Hi}, dl);
2941}
2942
2943SDValue
2944HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2945 const {
2946 // Rewrite conversion between integer and floating-point in such a way that
2947 // the integer type is extended/narrowed to match the bitwidth of the
2948 // floating-point type, combined with additional integer-integer extensions
2949 // or narrowings to match the original input/result types.
2950 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2951 //
2952 // The input/result types are not required to be legal, but if they are
2953 // legal, this function should not introduce illegal types.
2954
2955 unsigned Opc = Op.getOpcode();
2958
2959 SDValue Inp = Op.getOperand(0);
2960 MVT InpTy = ty(Inp);
2961 MVT ResTy = ty(Op);
2962
2963 if (InpTy == ResTy)
2964 return Op;
2965
2966 const SDLoc &dl(Op);
2968
2969 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2970 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2971 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2972 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2973 return Res;
2974}
2975
2976SDValue
2977HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2978 unsigned Opc = Op.getOpcode();
2980
2981 const SDLoc &dl(Op);
2982 SDValue Op0 = Op.getOperand(0);
2983 MVT InpTy = ty(Op0);
2984 MVT ResTy = ty(Op);
2985 assert(InpTy.changeTypeToInteger() == ResTy);
2986
2987 // At this point this is an experiment under a flag.
2988 // In arch before V81 the rounding mode is towards nearest value.
2989 // The C/C++ standard requires rounding towards zero:
2990 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
2991 // finite value of real floating type is converted to an integer type, the
2992 // fractional part is discarded (i.e., the value is truncated toward zero)."
2993 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
2994 // floating-point type can be converted to a prvalue of an integer type. The
2995 // conversion truncates; that is, the fractional part is discarded."
2996 if (InpTy == MVT::v64f16) {
2997 if (Subtarget.useHVXV81Ops()) {
2998 // This is c/c++ compliant
2999 SDValue ConvVec =
3000 getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
3001 return ConvVec;
3002 } else if (EnableFpFastConvert) {
3003 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3004 SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
3005 return ConvVec;
3006 }
3007 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3008 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3009 SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3010 return ConvVec;
3011 }
3012
3013 // int32_t conv_f32_to_i32(uint32_t inp) {
3014 // // s | exp8 | frac23
3015 //
3016 // int neg = (int32_t)inp < 0;
3017 //
3018 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3019 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3020 // // produce a large positive "expm1", which will result in max u/int.
3021 // // In all IEEE formats, bias is the largest positive number that can be
3022 // // represented in bias-width bits (i.e. 011..1).
3023 // int32_t expm1 = (inp << 1) - 0x80000000;
3024 // expm1 >>= 24;
3025 //
3026 // // Always insert the "implicit 1". Subnormal numbers will become 0
3027 // // regardless.
3028 // uint32_t frac = (inp << 8) | 0x80000000;
3029 //
3030 // // "frac" is the fraction part represented as Q1.31. If it was
3031 // // interpreted as uint32_t, it would be the fraction part multiplied
3032 // // by 2^31.
3033 //
3034 // // Calculate the amount of right shift, since shifting further to the
3035 // // left would lose significant bits. Limit it to 32, because we want
3036 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3037 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3038 // // left by 31). "rsh" can be negative.
3039 // int32_t rsh = min(31 - (expm1 + 1), 32);
3040 //
3041 // frac >>= rsh; // rsh == 32 will produce 0
3042 //
3043 // // Everything up to this point is the same for conversion to signed
3044 // // unsigned integer.
3045 //
3046 // if (neg) // Only for signed int
3047 // frac = -frac; //
3048 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3049 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3050 // if (rsh <= 0 && !neg) //
3051 // frac = 0x7fffffff; //
3052 //
3053 // if (neg) // Only for unsigned int
3054 // frac = 0; //
3055 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3056 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3057 //
3058 // return frac;
3059 // }
3060
3061 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3062
3063 // Zero = V6_vd0();
3064 // Neg = V6_vgtw(Zero, Inp);
3065 // One = V6_lvsplatw(1);
3066 // M80 = V6_lvsplatw(0x80000000);
3067 // Exp00 = V6_vaslwv(Inp, One);
3068 // Exp01 = V6_vsubw(Exp00, M80);
3069 // ExpM1 = V6_vasrw(Exp01, 24);
3070 // Frc00 = V6_vaslw(Inp, 8);
3071 // Frc01 = V6_vor(Frc00, M80);
3072 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3073 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3074 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3075
3076 // if signed int:
3077 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3078 // Pos = V6_vgtw(Rsh01, Zero);
3079 // Frc13 = V6_vsubw(Zero, Frc02);
3080 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3081 // Int = V6_vmux(Pos, Frc14, Bnd);
3082 //
3083 // if unsigned int:
3084 // Rsn = V6_vgtw(Zero, Rsh01)
3085 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3086 // Int = V6_vmux(Neg, Zero, Frc23)
3087
3088 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3089 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3090 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3091
3092 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3093 SDValue Zero = getZero(dl, ResTy, DAG);
3094 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3095 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3096 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3097 SDValue One = DAG.getConstant(1, dl, ResTy);
3098 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3099 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3100 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3101 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3102
3103 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3104 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3105 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3106
3107 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3108 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3109 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3110 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3111 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3112
3113 SDValue Int;
3114
3115 if (Opc == ISD::FP_TO_SINT) {
3116 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3117 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3118 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3119 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3120 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3121 } else {
3123 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3124 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3125 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3126 }
3127
3128 return Int;
3129}
3130
3131SDValue
3132HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3133 unsigned Opc = Op.getOpcode();
3135
3136 const SDLoc &dl(Op);
3137 SDValue Op0 = Op.getOperand(0);
3138 MVT InpTy = ty(Op0);
3139 MVT ResTy = ty(Op);
3140 assert(ResTy.changeTypeToInteger() == InpTy);
3141
3142 // uint32_t vnoc1_rnd(int32_t w) {
3143 // int32_t iszero = w == 0;
3144 // int32_t isneg = w < 0;
3145 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3146 //
3147 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3148 // uint32_t frac0 = (uint64_t)u << norm_left;
3149 //
3150 // // Rounding:
3151 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3152 // uint32_t renorm = (frac0 > frac1);
3153 // uint32_t rup = (int)(frac0 << 22) < 0;
3154 //
3155 // uint32_t frac2 = frac0 >> 8;
3156 // uint32_t frac3 = frac1 >> 8;
3157 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3158 //
3159 // int32_t exp = 32 - norm_left + renorm + 127;
3160 // exp <<= 23;
3161 //
3162 // uint32_t sign = 0x80000000 * isneg;
3163 // uint32_t f = sign | exp | frac;
3164 // return iszero ? 0 : f;
3165 // }
3166
3167 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3168 bool Signed = Opc == ISD::SINT_TO_FP;
3169
3170 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3171 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3172
3173 SDValue Zero = getZero(dl, InpTy, DAG);
3174 SDValue One = DAG.getConstant(1, dl, InpTy);
3175 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3176 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3177 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3178 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3179 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3180
3181 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3182 if (Signed) {
3183 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3184 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3185 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3186 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3187 }
3188
3189 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3190 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3191 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3192 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3193 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3194 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3195 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3196 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3197 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3198
3199 return Flt;
3200}
3201
3202SDValue
3203HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3204 unsigned Opc = Op.getOpcode();
3205 unsigned TLOpc;
3206 switch (Opc) {
3207 case ISD::ANY_EXTEND:
3208 case ISD::SIGN_EXTEND:
3209 case ISD::ZERO_EXTEND:
3210 TLOpc = HexagonISD::TL_EXTEND;
3211 break;
3212 case ISD::TRUNCATE:
3214 break;
3215#ifndef NDEBUG
3216 Op.dump(&DAG);
3217#endif
3218 llvm_unreachable("Unexpected operator");
3219 }
3220
3221 const SDLoc &dl(Op);
3222 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3223 DAG.getUNDEF(MVT::i128), // illegal type
3224 DAG.getConstant(Opc, dl, MVT::i32));
3225}
3226
3227SDValue
3228HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3229 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3230 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3231 unsigned Opc = Op.getConstantOperandVal(2);
3232 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3233}
3234
3235HexagonTargetLowering::VectorPair
3236HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3237 assert(!Op.isMachineOpcode());
3238 SmallVector<SDValue, 2> OpsL, OpsH;
3239 const SDLoc &dl(Op);
3240
3241 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3242 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3243 SDValue TV = DAG.getValueType(Ty);
3244 return std::make_pair(TV, TV);
3245 };
3246
3247 for (SDValue A : Op.getNode()->ops()) {
3248 auto [Lo, Hi] =
3249 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3250 // Special case for type operand.
3251 switch (Op.getOpcode()) {
3252 case ISD::SIGN_EXTEND_INREG:
3253 case HexagonISD::SSAT:
3254 case HexagonISD::USAT:
3255 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3256 std::tie(Lo, Hi) = SplitVTNode(N);
3257 break;
3258 }
3259 OpsL.push_back(Lo);
3260 OpsH.push_back(Hi);
3261 }
3262
3263 MVT ResTy = ty(Op);
3264 MVT HalfTy = typeSplit(ResTy).first;
3265 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3266 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3267 return {L, H};
3268}
3269
3270SDValue
3271HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3272 auto *MemN = cast<MemSDNode>(Op.getNode());
3273
3274 if (!MemN->getMemoryVT().isSimple())
3275 return Op;
3276
3277 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3278 if (!isHvxPairTy(MemTy))
3279 return Op;
3280
3281 const SDLoc &dl(Op);
3282 unsigned HwLen = Subtarget.getVectorLength();
3283 MVT SingleTy = typeSplit(MemTy).first;
3284 SDValue Chain = MemN->getChain();
3285 SDValue Base0 = MemN->getBasePtr();
3286 SDValue Base1 =
3287 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3288 unsigned MemOpc = MemN->getOpcode();
3289
3290 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3291 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3292 MachineFunction &MF = DAG.getMachineFunction();
3293 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3294 ? (uint64_t)MemoryLocation::UnknownSize
3295 : HwLen;
3296 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3297 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3298 }
3299
3300 if (MemOpc == ISD::LOAD) {
3301 assert(cast<LoadSDNode>(Op)->isUnindexed());
3302 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3303 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3304 return DAG.getMergeValues(
3305 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3306 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3307 Load0.getValue(1), Load1.getValue(1)) }, dl);
3308 }
3309 if (MemOpc == ISD::STORE) {
3310 assert(cast<StoreSDNode>(Op)->isUnindexed());
3311 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3312 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3313 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3314 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3315 }
3316
3317 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3318
3319 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3320 assert(MaskN->isUnindexed());
3321 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3322 SDValue Offset = DAG.getUNDEF(MVT::i32);
3323
3324 if (MemOpc == ISD::MLOAD) {
3325 VectorPair Thru =
3326 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3327 SDValue MLoad0 =
3328 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3329 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3330 ISD::NON_EXTLOAD, false);
3331 SDValue MLoad1 =
3332 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3333 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3334 ISD::NON_EXTLOAD, false);
3335 return DAG.getMergeValues(
3336 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3337 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3338 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3339 }
3340 if (MemOpc == ISD::MSTORE) {
3341 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3342 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3343 Masks.first, SingleTy, MOp0,
3344 ISD::UNINDEXED, false, false);
3345 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3346 Masks.second, SingleTy, MOp1,
3347 ISD::UNINDEXED, false, false);
3348 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3349 }
3350
3351 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3352 llvm_unreachable(Name.c_str());
3353}
3354
3355SDValue
3356HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3357 const SDLoc &dl(Op);
3358 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3359 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3360 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3361 "Not widening loads of i1 yet");
3362
3363 SDValue Chain = LoadN->getChain();
3364 SDValue Base = LoadN->getBasePtr();
3365 SDValue Offset = DAG.getUNDEF(MVT::i32);
3366
3367 MVT ResTy = ty(Op);
3368 unsigned HwLen = Subtarget.getVectorLength();
3369 unsigned ResLen = ResTy.getStoreSize();
3370 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3371
3372 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3373 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3374 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3375
3376 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3377 MachineFunction &MF = DAG.getMachineFunction();
3378 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3379
3380 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3381 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3383 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3384 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3385}
3386
3387SDValue
3388HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3389 const SDLoc &dl(Op);
3390 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3391 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3392 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3393 "Not widening stores of i1 yet");
3394
3395 SDValue Chain = StoreN->getChain();
3396 SDValue Base = StoreN->getBasePtr();
3397 SDValue Offset = DAG.getUNDEF(MVT::i32);
3398
3399 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3400 MVT ValueTy = ty(Value);
3401 unsigned ValueLen = ValueTy.getVectorNumElements();
3402 unsigned HwLen = Subtarget.getVectorLength();
3403 assert(isPowerOf2_32(ValueLen));
3404
3405 for (unsigned Len = ValueLen; Len < HwLen; ) {
3406 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3407 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3408 }
3409 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3410
3411 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3412 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3413 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3414 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3415 MachineFunction &MF = DAG.getMachineFunction();
3416 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3417 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3418 MemOp, ISD::UNINDEXED, false, false);
3419}
3420
3421SDValue
3422HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3423 const SDLoc &dl(Op);
3424 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3425 MVT ElemTy = ty(Op0).getVectorElementType();
3426 unsigned HwLen = Subtarget.getVectorLength();
3427
3428 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3429 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3430 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3431 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3432 return SDValue();
3433
3434 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3435 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3436 EVT ResTy =
3437 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3438 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3439 {WideOp0, WideOp1, Op.getOperand(2)});
3440
3441 EVT RetTy = typeLegalize(ty(Op), DAG);
3442 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3443 {SetCC, getZero(dl, MVT::i32, DAG)});
3444}
3445
3446SDValue
3447HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3448 unsigned Opc = Op.getOpcode();
3449 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3450 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3451 return isHvxPairTy(ty(V));
3452 });
3453
3454 if (IsPairOp) {
3455 switch (Opc) {
3456 default:
3457 break;
3458 case ISD::LOAD:
3459 case ISD::STORE:
3460 case ISD::MLOAD:
3461 case ISD::MSTORE:
3462 return SplitHvxMemOp(Op, DAG);
3463 case ISD::SINT_TO_FP:
3464 case ISD::UINT_TO_FP:
3465 case ISD::FP_TO_SINT:
3466 case ISD::FP_TO_UINT:
3467 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3468 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3469 break;
3470 case ISD::ABS:
3471 case ISD::CTPOP:
3472 case ISD::CTLZ:
3473 case ISD::CTTZ:
3474 case ISD::MUL:
3475 case ISD::FADD:
3476 case ISD::FSUB:
3477 case ISD::FMUL:
3478 case ISD::FMINIMUMNUM:
3479 case ISD::FMAXIMUMNUM:
3480 case ISD::MULHS:
3481 case ISD::MULHU:
3482 case ISD::AND:
3483 case ISD::OR:
3484 case ISD::XOR:
3485 case ISD::SRA:
3486 case ISD::SHL:
3487 case ISD::SRL:
3488 case ISD::FSHL:
3489 case ISD::FSHR:
3490 case ISD::SMIN:
3491 case ISD::SMAX:
3492 case ISD::UMIN:
3493 case ISD::UMAX:
3494 case ISD::SETCC:
3495 case ISD::VSELECT:
3497 case ISD::SPLAT_VECTOR:
3498 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3499 case ISD::SIGN_EXTEND:
3500 case ISD::ZERO_EXTEND:
3501 // In general, sign- and zero-extends can't be split and still
3502 // be legal. The only exception is extending bool vectors.
3503 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3504 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3505 break;
3506 }
3507 }
3508
3509 switch (Opc) {
3510 default:
3511 break;
3512 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3513 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3514 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3515 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3516 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3517 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3518 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3519 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3520 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3521 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3522 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3523 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3524 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3525 case ISD::SRA:
3526 case ISD::SHL:
3527 case ISD::SRL: return LowerHvxShift(Op, DAG);
3528 case ISD::FSHL:
3529 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3530 case ISD::MULHS:
3531 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3532 case ISD::SMUL_LOHI:
3533 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3534 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3535 case ISD::SETCC:
3536 case ISD::INTRINSIC_VOID: return Op;
3537 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3538 case ISD::MLOAD:
3539 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3540 // Unaligned loads will be handled by the default lowering.
3541 case ISD::LOAD: return SDValue();
3542 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3543 case ISD::FP_TO_SINT:
3544 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3545 case ISD::SINT_TO_FP:
3546 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3547
3548 // Special nodes:
3549 case HexagonISD::SMUL_LOHI:
3550 case HexagonISD::UMUL_LOHI:
3551 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3552 }
3553#ifndef NDEBUG
3554 Op.dumpr(&DAG);
3555#endif
3556 llvm_unreachable("Unhandled HVX operation");
3557}
3558
3559SDValue
3560HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3561 const {
3562 // Rewrite the extension/truncation/saturation op into steps where each
3563 // step changes the type widths by a factor of 2.
3564 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3565 //
3566 // Some of the vector types in Op may not be legal.
3567
3568 unsigned Opc = Op.getOpcode();
3569 switch (Opc) {
3570 case HexagonISD::SSAT:
3571 case HexagonISD::USAT:
3574 break;
3575 case ISD::ANY_EXTEND:
3576 case ISD::ZERO_EXTEND:
3577 case ISD::SIGN_EXTEND:
3578 case ISD::TRUNCATE:
3579 llvm_unreachable("ISD:: ops will be auto-folded");
3580 break;
3581#ifndef NDEBUG
3582 Op.dump(&DAG);
3583#endif
3584 llvm_unreachable("Unexpected operation");
3585 }
3586
3587 SDValue Inp = Op.getOperand(0);
3588 MVT InpTy = ty(Inp);
3589 MVT ResTy = ty(Op);
3590
3591 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3592 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3593 assert(InpWidth != ResWidth);
3594
3595 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3596 return Op;
3597
3598 const SDLoc &dl(Op);
3599 unsigned NumElems = InpTy.getVectorNumElements();
3600 assert(NumElems == ResTy.getVectorNumElements());
3601
3602 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3603 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3604 switch (Opc) {
3605 case HexagonISD::SSAT:
3606 case HexagonISD::USAT:
3607 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3610 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3611 default:
3612 llvm_unreachable("Unexpected opcode");
3613 }
3614 };
3615
3616 SDValue S = Inp;
3617 if (InpWidth < ResWidth) {
3618 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3619 while (InpWidth * 2 <= ResWidth)
3620 S = repeatOp(InpWidth *= 2, S);
3621 } else {
3622 // InpWidth > ResWidth
3623 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3624 while (InpWidth / 2 >= ResWidth)
3625 S = repeatOp(InpWidth /= 2, S);
3626 }
3627 return S;
3628}
3629
3630SDValue
3631HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3632 SDValue Inp0 = Op.getOperand(0);
3633 MVT InpTy = ty(Inp0);
3634 MVT ResTy = ty(Op);
3635 unsigned InpWidth = InpTy.getSizeInBits();
3636 unsigned ResWidth = ResTy.getSizeInBits();
3637 unsigned Opc = Op.getOpcode();
3638
3639 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3640 // First, make sure that the narrower type is widened to HVX.
3641 // This may cause the result to be wider than what the legalizer
3642 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3643 // desired type.
3644 auto [WInpTy, WResTy] =
3645 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3646 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3647 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3648 SDValue S;
3650 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3651 Op.getOperand(2));
3652 } else {
3653 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3654 }
3655 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3656 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3657 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3658 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3659 } else {
3660 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3661 return RemoveTLWrapper(Op, DAG);
3662 }
3663 llvm_unreachable("Unexpected situation");
3664}
3665
3666void
3667HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3669 unsigned Opc = N->getOpcode();
3670 SDValue Op(N, 0);
3671 SDValue Inp0; // Optional first argument.
3672 if (N->getNumOperands() > 0)
3673 Inp0 = Op.getOperand(0);
3674
3675 switch (Opc) {
3676 case ISD::ANY_EXTEND:
3677 case ISD::SIGN_EXTEND:
3678 case ISD::ZERO_EXTEND:
3679 case ISD::TRUNCATE:
3680 if (Subtarget.isHVXElementType(ty(Op)) &&
3681 Subtarget.isHVXElementType(ty(Inp0))) {
3682 Results.push_back(CreateTLWrapper(Op, DAG));
3683 }
3684 break;
3685 case ISD::SETCC:
3686 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3687 if (SDValue T = WidenHvxSetCC(Op, DAG))
3688 Results.push_back(T);
3689 }
3690 break;
3691 case ISD::STORE: {
3692 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3693 SDValue Store = WidenHvxStore(Op, DAG);
3694 Results.push_back(Store);
3695 }
3696 break;
3697 }
3698 case ISD::MLOAD:
3699 if (isHvxPairTy(ty(Op))) {
3700 SDValue S = SplitHvxMemOp(Op, DAG);
3702 Results.push_back(S.getOperand(0));
3703 Results.push_back(S.getOperand(1));
3704 }
3705 break;
3706 case ISD::MSTORE:
3707 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3708 SDValue S = SplitHvxMemOp(Op, DAG);
3709 Results.push_back(S);
3710 }
3711 break;
3712 case ISD::SINT_TO_FP:
3713 case ISD::UINT_TO_FP:
3714 case ISD::FP_TO_SINT:
3715 case ISD::FP_TO_UINT:
3716 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3717 SDValue T = EqualizeFpIntConversion(Op, DAG);
3718 Results.push_back(T);
3719 }
3720 break;
3721 case HexagonISD::SSAT:
3722 case HexagonISD::USAT:
3725 Results.push_back(LegalizeHvxResize(Op, DAG));
3726 break;
3727 default:
3728 break;
3729 }
3730}
3731
3732void
3733HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3735 unsigned Opc = N->getOpcode();
3736 SDValue Op(N, 0);
3737 SDValue Inp0; // Optional first argument.
3738 if (N->getNumOperands() > 0)
3739 Inp0 = Op.getOperand(0);
3740
3741 switch (Opc) {
3742 case ISD::ANY_EXTEND:
3743 case ISD::SIGN_EXTEND:
3744 case ISD::ZERO_EXTEND:
3745 case ISD::TRUNCATE:
3746 if (Subtarget.isHVXElementType(ty(Op)) &&
3747 Subtarget.isHVXElementType(ty(Inp0))) {
3748 Results.push_back(CreateTLWrapper(Op, DAG));
3749 }
3750 break;
3751 case ISD::SETCC:
3752 if (shouldWidenToHvx(ty(Op), DAG)) {
3753 if (SDValue T = WidenHvxSetCC(Op, DAG))
3754 Results.push_back(T);
3755 }
3756 break;
3757 case ISD::LOAD: {
3758 if (shouldWidenToHvx(ty(Op), DAG)) {
3759 SDValue Load = WidenHvxLoad(Op, DAG);
3760 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3761 Results.push_back(Load.getOperand(0));
3762 Results.push_back(Load.getOperand(1));
3763 }
3764 break;
3765 }
3766 case ISD::BITCAST:
3767 if (isHvxBoolTy(ty(Inp0))) {
3768 SDValue C = LowerHvxBitcast(Op, DAG);
3769 Results.push_back(C);
3770 }
3771 break;
3772 case ISD::FP_TO_SINT:
3773 case ISD::FP_TO_UINT:
3774 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3775 SDValue T = EqualizeFpIntConversion(Op, DAG);
3776 Results.push_back(T);
3777 }
3778 break;
3779 case HexagonISD::SSAT:
3780 case HexagonISD::USAT:
3783 Results.push_back(LegalizeHvxResize(Op, DAG));
3784 break;
3785 default:
3786 break;
3787 }
3788}
3789
3790SDValue
3791HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3792 DAGCombinerInfo &DCI) const {
3793 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3794 // to extract-subvector (shuffle V, pick even, pick odd)
3795
3796 assert(Op.getOpcode() == ISD::TRUNCATE);
3797 SelectionDAG &DAG = DCI.DAG;
3798 const SDLoc &dl(Op);
3799
3800 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3801 return SDValue();
3802 SDValue Cast = Op.getOperand(0);
3803 SDValue Src = Cast.getOperand(0);
3804
3805 EVT TruncTy = Op.getValueType();
3806 EVT CastTy = Cast.getValueType();
3807 EVT SrcTy = Src.getValueType();
3808 if (SrcTy.isSimple())
3809 return SDValue();
3810 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3811 return SDValue();
3812 unsigned SrcLen = SrcTy.getVectorNumElements();
3813 unsigned CastLen = CastTy.getVectorNumElements();
3814 if (2 * CastLen != SrcLen)
3815 return SDValue();
3816
3817 SmallVector<int, 128> Mask(SrcLen);
3818 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3819 Mask[i] = 2 * i;
3820 Mask[i + CastLen] = 2 * i + 1;
3821 }
3822 SDValue Deal =
3823 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3824 return opSplit(Deal, dl, DAG).first;
3825}
3826
3827SDValue
3828HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3829 SDValue Op, DAGCombinerInfo &DCI) const {
3830 // Fold
3831 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3832 // into
3833 // shuffle (concat x, y), undef, m3
3834 if (Op.getNumOperands() != 2)
3835 return SDValue();
3836
3837 SelectionDAG &DAG = DCI.DAG;
3838 const SDLoc &dl(Op);
3839 SDValue V0 = Op.getOperand(0);
3840 SDValue V1 = Op.getOperand(1);
3841
3842 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3843 return SDValue();
3844 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3845 return SDValue();
3846
3847 SetVector<SDValue> Order;
3848 Order.insert(V0.getOperand(0));
3849 Order.insert(V0.getOperand(1));
3850 Order.insert(V1.getOperand(0));
3851 Order.insert(V1.getOperand(1));
3852
3853 if (Order.size() > 2)
3854 return SDValue();
3855
3856 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3857 // result must be the same.
3858 EVT InpTy = V0.getValueType();
3859 assert(InpTy.isVector());
3860 unsigned InpLen = InpTy.getVectorNumElements();
3861
3862 SmallVector<int, 128> LongMask;
3863 auto AppendToMask = [&](SDValue Shuffle) {
3864 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3865 ArrayRef<int> Mask = SV->getMask();
3866 SDValue X = Shuffle.getOperand(0);
3867 SDValue Y = Shuffle.getOperand(1);
3868 for (int M : Mask) {
3869 if (M == -1) {
3870 LongMask.push_back(M);
3871 continue;
3872 }
3873 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3874 if (static_cast<unsigned>(M) >= InpLen)
3875 M -= InpLen;
3876
3877 int OutOffset = Order[0] == Src ? 0 : InpLen;
3878 LongMask.push_back(M + OutOffset);
3879 }
3880 };
3881
3882 AppendToMask(V0);
3883 AppendToMask(V1);
3884
3885 SDValue C0 = Order.front();
3886 SDValue C1 = Order.back(); // Can be same as front
3887 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3888
3889 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3890 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3891}
3892
3893SDValue
3894HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3895 const {
3896 const SDLoc &dl(N);
3897 SelectionDAG &DAG = DCI.DAG;
3898 SDValue Op(N, 0);
3899 unsigned Opc = Op.getOpcode();
3900
3902
3903 if (Opc == ISD::TRUNCATE)
3904 return combineTruncateBeforeLegal(Op, DCI);
3905 if (Opc == ISD::CONCAT_VECTORS)
3906 return combineConcatVectorsBeforeLegal(Op, DCI);
3907
3908 if (DCI.isBeforeLegalizeOps())
3909 return SDValue();
3910
3911 switch (Opc) {
3912 case ISD::VSELECT: {
3913 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3914 SDValue Cond = Ops[0];
3915 if (Cond->getOpcode() == ISD::XOR) {
3916 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3917 if (C1->getOpcode() == HexagonISD::QTRUE)
3918 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3919 }
3920 break;
3921 }
3922 case HexagonISD::V2Q:
3923 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3924 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3925 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3926 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3927 }
3928 break;
3929 case HexagonISD::Q2V:
3930 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3931 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3932 DAG.getAllOnesConstant(dl, MVT::i32));
3933 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3934 return getZero(dl, ty(Op), DAG);
3935 break;
3936 case HexagonISD::VINSERTW0:
3937 if (isUndef(Ops[1]))
3938 return Ops[0];
3939 break;
3940 case HexagonISD::VROR: {
3941 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3942 SDValue Vec = Ops[0].getOperand(0);
3943 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3944 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3945 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3946 }
3947 break;
3948 }
3949 }
3950
3951 return SDValue();
3952}
3953
3954bool
3955HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3956 if (Subtarget.isHVXVectorType(Ty, true))
3957 return false;
3958 auto Action = getPreferredHvxVectorAction(Ty);
3960 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3961 return false;
3962}
3963
3964bool
3965HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3966 if (Subtarget.isHVXVectorType(Ty, true))
3967 return false;
3968 auto Action = getPreferredHvxVectorAction(Ty);
3970 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3971 return false;
3972}
3973
3974bool
3975HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3976 if (!Subtarget.useHVXOps())
3977 return false;
3978 // If the type of any result, or any operand type are HVX vector types,
3979 // this is an HVX operation.
3980 auto IsHvxTy = [this](EVT Ty) {
3981 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3982 };
3983 auto IsHvxOp = [this](SDValue Op) {
3984 return Op.getValueType().isSimple() &&
3985 Subtarget.isHVXVectorType(ty(Op), true);
3986 };
3987 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3988 return true;
3989
3990 // Check if this could be an HVX operation after type widening.
3991 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3992 if (!Op.getValueType().isSimple())
3993 return false;
3994 MVT ValTy = ty(Op);
3995 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3996 };
3997
3998 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3999 if (IsWidenedToHvx(SDValue(N, i)))
4000 return true;
4001 }
4002 return llvm::any_of(N->ops(), IsWidenedToHvx);
4003}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
static cl::opt< bool > EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), cl::desc("Enable FP fast conversion routine."))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5975
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:138
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:852
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:909
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:795
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:898
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:887
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:804
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:958
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:920
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.