LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 if (Subtarget.useHVXV81Ops()) {
92 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
93 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
94 }
95 }
96
97 // Set up operation actions.
98
99 bool Use64b = Subtarget.useHVX64BOps();
100 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
101 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
102 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
103 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
104 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
105
106 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
108 AddPromotedToType(Opc, FromTy, ToTy);
109 };
110
111 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
112 // Note: v16i1 -> i16 is handled in type legalization instead of op
113 // legalization.
114 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
115 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
116 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
117 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
118 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
119 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
123
124 if (Subtarget.useHVX128BOps()) {
125 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
126 setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
127 }
128 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
129 Subtarget.useHVXFloatingPoint()) {
130
131 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
132 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
133
134 for (MVT T : FloatV) {
138 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
139 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
140
143
146
147 setOperationAction(ISD::MLOAD, T, Custom);
148 setOperationAction(ISD::MSTORE, T, Custom);
149 // Custom-lower BUILD_VECTOR. The standard (target-independent)
150 // handling of it would convert it to a load, which is not always
151 // the optimal choice.
153 }
154
155
156 // BUILD_VECTOR with f16 operands cannot be promoted without
157 // promoting the result, so lower the node to vsplat or constant pool
161
162 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
163 // generated.
164 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
165 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
166 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
167 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
168
169 if (Subtarget.useHVXV81Ops()) {
170 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
171 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
172 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
173 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
174 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
175 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
176 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
177 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
178
182
183 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
184 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
187
191 }
192
193 for (MVT P : FloatW) {
194 setOperationAction(ISD::LOAD, P, Custom);
195 setOperationAction(ISD::STORE, P, Custom);
199 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
200 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
203
204 // Custom-lower BUILD_VECTOR. The standard (target-independent)
205 // handling of it would convert it to a load, which is not always
206 // the optimal choice.
208 // Make concat-vectors custom to handle concats of more than 2 vectors.
210
211 setOperationAction(ISD::MLOAD, P, Custom);
212 setOperationAction(ISD::MSTORE, P, Custom);
213 }
214
215 if (Subtarget.useHVXQFloatOps()) {
216 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
218 } else if (Subtarget.useHVXIEEEFPOps()) {
219 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
221 }
222 }
223
224 for (MVT T : LegalV) {
227
243 if (T != ByteV) {
247 }
248
251 if (T.getScalarType() != MVT::i32) {
254 }
255
257 setOperationAction(ISD::LOAD, T, Custom);
258 setOperationAction(ISD::MLOAD, T, Custom);
259 setOperationAction(ISD::MSTORE, T, Custom);
260 if (T.getScalarType() != MVT::i32) {
263 }
264
266 // Make concat-vectors custom to handle concats of more than 2 vectors.
277 if (T != ByteV) {
279 // HVX only has shifts of words and halfwords.
283
284 // Promote all shuffles to operate on vectors of bytes.
285 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
286 }
287
288 if (Subtarget.useHVXFloatingPoint()) {
289 // Same action for both QFloat and IEEE.
294 }
295
303 }
304
305 for (MVT T : LegalW) {
306 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
307 // independent) handling of it would convert it to a load, which is
308 // not always the optimal choice.
310 // Make concat-vectors custom to handle concats of more than 2 vectors.
312
313 // Custom-lower these operations for pairs. Expand them into a concat
314 // of the corresponding operations on individual vectors.
323
324 setOperationAction(ISD::LOAD, T, Custom);
325 setOperationAction(ISD::STORE, T, Custom);
326 setOperationAction(ISD::MLOAD, T, Custom);
327 setOperationAction(ISD::MSTORE, T, Custom);
332
347 if (T != ByteW) {
351
352 // Promote all shuffles to operate on vectors of bytes.
353 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
354 }
357
360 if (T.getScalarType() != MVT::i32) {
363 }
364
365 if (Subtarget.useHVXFloatingPoint()) {
366 // Same action for both QFloat and IEEE.
371 }
372 }
373
374 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
375 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
376 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
379
380 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
381 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
382 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
383 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
384 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
385 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
386 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
387 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
388 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
389 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
390 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
391 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
392 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
393 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
394
395 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
396 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
397 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
398 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
399 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
400 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
401 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
402 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
403 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
404 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
405 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
406 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
407 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
408 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
409
410 // Boolean vectors.
411
412 for (MVT T : LegalW) {
413 // Boolean types for vector pairs will overlap with the boolean
414 // types for single vectors, e.g.
415 // v64i8 -> v64i1 (single)
416 // v64i16 -> v64i1 (pair)
417 // Set these actions first, and allow the single actions to overwrite
418 // any duplicates.
419 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
424 // Masked load/store takes a mask that may need splitting.
425 setOperationAction(ISD::MLOAD, BoolW, Custom);
426 setOperationAction(ISD::MSTORE, BoolW, Custom);
427 }
428
429 for (MVT T : LegalV) {
430 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
441 }
442
443 if (Use64b) {
444 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
446 } else {
447 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
449 }
450
451 // Handle store widening for short vectors.
452 unsigned HwLen = Subtarget.getVectorLength();
453 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
454 if (ElemTy == MVT::i1)
455 continue;
456 int ElemWidth = ElemTy.getFixedSizeInBits();
457 int MaxElems = (8*HwLen) / ElemWidth;
458 for (int N = 2; N < MaxElems; N *= 2) {
459 MVT VecTy = MVT::getVectorVT(ElemTy, N);
460 auto Action = getPreferredVectorAction(VecTy);
462 setOperationAction(ISD::LOAD, VecTy, Custom);
463 setOperationAction(ISD::STORE, VecTy, Custom);
469 if (Subtarget.useHVXFloatingPoint()) {
474 }
475
476 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
477 if (!isTypeLegal(BoolTy))
479 }
480 }
481 }
482
483 // Include cases which are not hander earlier
487
489}
490
491unsigned
492HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
493 // Early exit for invalid input types
494 if (!VecTy.isVector())
495 return ~0u;
496
497 MVT ElemTy = VecTy.getVectorElementType();
498 unsigned VecLen = VecTy.getVectorNumElements();
499 unsigned HwLen = Subtarget.getVectorLength();
500
501 // Split vectors of i1 that exceed byte vector length.
502 if (ElemTy == MVT::i1 && VecLen > HwLen)
504
505 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
506 // For shorter vectors of i1, widen them if any of the corresponding
507 // vectors of integers needs to be widened.
508 if (ElemTy == MVT::i1) {
509 for (MVT T : Tys) {
510 assert(T != MVT::i1);
511 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
512 if (A != ~0u)
513 return A;
514 }
515 return ~0u;
516 }
517
518 // If the size of VecTy is at least half of the vector length,
519 // widen the vector. Note: the threshold was not selected in
520 // any scientific way.
521 if (llvm::is_contained(Tys, ElemTy)) {
522 unsigned VecWidth = VecTy.getSizeInBits();
523 unsigned HwWidth = 8*HwLen;
524 if (VecWidth > 2*HwWidth)
526
527 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
528 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
530 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
532 }
533
534 // Defer to default.
535 return ~0u;
536}
537
538unsigned
539HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
540 unsigned Opc = Op.getOpcode();
541 switch (Opc) {
546 }
548}
549
551HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
552 const SDLoc &dl, SelectionDAG &DAG) const {
554 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
555 append_range(IntOps, Ops);
556 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
557}
558
559MVT
560HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
561 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
562
563 MVT ElemTy = Tys.first.getVectorElementType();
564 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
565 Tys.second.getVectorNumElements());
566}
567
568HexagonTargetLowering::TypePair
569HexagonTargetLowering::typeSplit(MVT VecTy) const {
570 assert(VecTy.isVector());
571 unsigned NumElem = VecTy.getVectorNumElements();
572 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
573 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
574 return { HalfTy, HalfTy };
575}
576
577MVT
578HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
579 MVT ElemTy = VecTy.getVectorElementType();
580 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
581 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
582}
583
584MVT
585HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
586 MVT ElemTy = VecTy.getVectorElementType();
587 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
588 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
589}
590
592HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
593 SelectionDAG &DAG) const {
594 if (ty(Vec).getVectorElementType() == ElemTy)
595 return Vec;
596 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
597 return DAG.getBitcast(CastTy, Vec);
598}
599
601HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
602 SelectionDAG &DAG) const {
603 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
604 Ops.first, Ops.second);
605}
606
607HexagonTargetLowering::VectorPair
608HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
609 SelectionDAG &DAG) const {
610 TypePair Tys = typeSplit(ty(Vec));
611 if (Vec.getOpcode() == HexagonISD::QCAT)
612 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
613 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
614}
615
616bool
617HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
618 return Subtarget.isHVXVectorType(Ty) &&
619 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
620}
621
622bool
623HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
624 return Subtarget.isHVXVectorType(Ty) &&
625 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
626}
627
628bool
629HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
630 return Subtarget.isHVXVectorType(Ty, true) &&
631 Ty.getVectorElementType() == MVT::i1;
632}
633
634bool HexagonTargetLowering::allowsHvxMemoryAccess(
635 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
636 // Bool vectors are excluded by default, but make it explicit to
637 // emphasize that bool vectors cannot be loaded or stored.
638 // Also, disallow double vector stores (to prevent unnecessary
639 // store widening in DAG combiner).
640 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
641 return false;
642 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
643 return false;
644 if (Fast)
645 *Fast = 1;
646 return true;
647}
648
649bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
650 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
651 if (!Subtarget.isHVXVectorType(VecTy))
652 return false;
653 // XXX Should this be false? vmemu are a bit slower than vmem.
654 if (Fast)
655 *Fast = 1;
656 return true;
657}
658
659void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
660 MachineInstr &MI, SDNode *Node) const {
661 unsigned Opc = MI.getOpcode();
662 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
663 MachineBasicBlock &MB = *MI.getParent();
664 MachineFunction &MF = *MB.getParent();
665 MachineRegisterInfo &MRI = MF.getRegInfo();
666 DebugLoc DL = MI.getDebugLoc();
667 auto At = MI.getIterator();
668
669 switch (Opc) {
670 case Hexagon::PS_vsplatib:
671 if (Subtarget.useHVXV62Ops()) {
672 // SplatV = A2_tfrsi #imm
673 // OutV = V6_lvsplatb SplatV
674 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
675 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
676 .add(MI.getOperand(1));
677 Register OutV = MI.getOperand(0).getReg();
678 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
679 .addReg(SplatV);
680 } else {
681 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
682 // OutV = V6_lvsplatw SplatV
683 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
684 const MachineOperand &InpOp = MI.getOperand(1);
685 assert(InpOp.isImm());
686 uint32_t V = InpOp.getImm() & 0xFF;
687 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
688 .addImm(V << 24 | V << 16 | V << 8 | V);
689 Register OutV = MI.getOperand(0).getReg();
690 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
691 }
692 MB.erase(At);
693 break;
694 case Hexagon::PS_vsplatrb:
695 if (Subtarget.useHVXV62Ops()) {
696 // OutV = V6_lvsplatb Inp
697 Register OutV = MI.getOperand(0).getReg();
698 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
699 .add(MI.getOperand(1));
700 } else {
701 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
702 const MachineOperand &InpOp = MI.getOperand(1);
703 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
704 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
705 Register OutV = MI.getOperand(0).getReg();
706 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
707 .addReg(SplatV);
708 }
709 MB.erase(At);
710 break;
711 case Hexagon::PS_vsplatih:
712 if (Subtarget.useHVXV62Ops()) {
713 // SplatV = A2_tfrsi #imm
714 // OutV = V6_lvsplath SplatV
715 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
716 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
717 .add(MI.getOperand(1));
718 Register OutV = MI.getOperand(0).getReg();
719 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
720 .addReg(SplatV);
721 } else {
722 // SplatV = A2_tfrsi #imm:#imm
723 // OutV = V6_lvsplatw SplatV
724 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
725 const MachineOperand &InpOp = MI.getOperand(1);
726 assert(InpOp.isImm());
727 uint32_t V = InpOp.getImm() & 0xFFFF;
728 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
729 .addImm(V << 16 | V);
730 Register OutV = MI.getOperand(0).getReg();
731 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
732 }
733 MB.erase(At);
734 break;
735 case Hexagon::PS_vsplatrh:
736 if (Subtarget.useHVXV62Ops()) {
737 // OutV = V6_lvsplath Inp
738 Register OutV = MI.getOperand(0).getReg();
739 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
740 .add(MI.getOperand(1));
741 } else {
742 // SplatV = A2_combine_ll Inp, Inp
743 // OutV = V6_lvsplatw SplatV
744 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
745 const MachineOperand &InpOp = MI.getOperand(1);
746 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
747 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
748 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
749 Register OutV = MI.getOperand(0).getReg();
750 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
751 }
752 MB.erase(At);
753 break;
754 case Hexagon::PS_vsplatiw:
755 case Hexagon::PS_vsplatrw:
756 if (Opc == Hexagon::PS_vsplatiw) {
757 // SplatV = A2_tfrsi #imm
758 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
759 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
760 .add(MI.getOperand(1));
761 MI.getOperand(1).ChangeToRegister(SplatV, false);
762 }
763 // OutV = V6_lvsplatw SplatV/Inp
764 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
765 break;
766 }
767}
768
770HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
771 SelectionDAG &DAG) const {
772 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
773 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
774
775 unsigned ElemWidth = ElemTy.getSizeInBits();
776 if (ElemWidth == 8)
777 return ElemIdx;
778
779 unsigned L = Log2_32(ElemWidth/8);
780 const SDLoc &dl(ElemIdx);
781 return DAG.getNode(ISD::SHL, dl, MVT::i32,
782 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
783}
784
786HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
787 SelectionDAG &DAG) const {
788 unsigned ElemWidth = ElemTy.getSizeInBits();
789 assert(ElemWidth >= 8 && ElemWidth <= 32);
790 if (ElemWidth == 32)
791 return Idx;
792
793 if (ty(Idx) != MVT::i32)
794 Idx = DAG.getBitcast(MVT::i32, Idx);
795 const SDLoc &dl(Idx);
796 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
797 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
798 return SubIdx;
799}
800
802HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
803 SDValue Op1, ArrayRef<int> Mask,
804 SelectionDAG &DAG) const {
805 MVT OpTy = ty(Op0);
806 assert(OpTy == ty(Op1));
807
808 MVT ElemTy = OpTy.getVectorElementType();
809 if (ElemTy == MVT::i8)
810 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
811 assert(ElemTy.getSizeInBits() >= 8);
812
813 MVT ResTy = tyVector(OpTy, MVT::i8);
814 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
815
816 SmallVector<int,128> ByteMask;
817 for (int M : Mask) {
818 if (M < 0) {
819 for (unsigned I = 0; I != ElemSize; ++I)
820 ByteMask.push_back(-1);
821 } else {
822 int NewM = M*ElemSize;
823 for (unsigned I = 0; I != ElemSize; ++I)
824 ByteMask.push_back(NewM+I);
825 }
826 }
827 assert(ResTy.getVectorNumElements() == ByteMask.size());
828 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
829 opCastElem(Op1, MVT::i8, DAG), ByteMask);
830}
831
833HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
834 const SDLoc &dl, MVT VecTy,
835 SelectionDAG &DAG) const {
836 unsigned VecLen = Values.size();
837 MachineFunction &MF = DAG.getMachineFunction();
838 MVT ElemTy = VecTy.getVectorElementType();
839 unsigned ElemWidth = ElemTy.getSizeInBits();
840 unsigned HwLen = Subtarget.getVectorLength();
841
842 unsigned ElemSize = ElemWidth / 8;
843 assert(ElemSize*VecLen == HwLen);
845
846 if (VecTy.getVectorElementType() != MVT::i32 &&
847 !(Subtarget.useHVXFloatingPoint() &&
848 VecTy.getVectorElementType() == MVT::f32)) {
849 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
850 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
851 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
852 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
853 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
854 Words.push_back(DAG.getBitcast(MVT::i32, W));
855 }
856 } else {
857 for (SDValue V : Values)
858 Words.push_back(DAG.getBitcast(MVT::i32, V));
859 }
860 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
861 unsigned NumValues = Values.size();
862 assert(NumValues > 0);
863 bool IsUndef = true;
864 for (unsigned i = 0; i != NumValues; ++i) {
865 if (Values[i].isUndef())
866 continue;
867 IsUndef = false;
868 if (!SplatV.getNode())
869 SplatV = Values[i];
870 else if (SplatV != Values[i])
871 return false;
872 }
873 if (IsUndef)
874 SplatV = Values[0];
875 return true;
876 };
877
878 unsigned NumWords = Words.size();
879 SDValue SplatV;
880 bool IsSplat = isSplat(Words, SplatV);
881 if (IsSplat && isUndef(SplatV))
882 return DAG.getUNDEF(VecTy);
883 if (IsSplat) {
884 assert(SplatV.getNode());
885 if (isNullConstant(SplatV))
886 return getZero(dl, VecTy, DAG);
887 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
888 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
889 return DAG.getBitcast(VecTy, S);
890 }
891
892 // Delay recognizing constant vectors until here, so that we can generate
893 // a vsplat.
894 SmallVector<ConstantInt*, 128> Consts(VecLen);
895 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
896 if (AllConst) {
897 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
898 (Constant**)Consts.end());
899 Constant *CV = ConstantVector::get(Tmp);
900 Align Alignment(HwLen);
901 SDValue CP =
902 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
903 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
905 }
906
907 // A special case is a situation where the vector is built entirely from
908 // elements extracted from another vector. This could be done via a shuffle
909 // more efficiently, but typically, the size of the source vector will not
910 // match the size of the vector being built (which precludes the use of a
911 // shuffle directly).
912 // This only handles a single source vector, and the vector being built
913 // should be of a sub-vector type of the source vector type.
914 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
915 SmallVectorImpl<int> &SrcIdx) {
916 SDValue Vec;
917 for (SDValue V : Values) {
918 if (isUndef(V)) {
919 SrcIdx.push_back(-1);
920 continue;
921 }
922 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
923 return false;
924 // All extracts should come from the same vector.
925 SDValue T = V.getOperand(0);
926 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
927 return false;
928 Vec = T;
929 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
930 if (C == nullptr)
931 return false;
932 int I = C->getSExtValue();
933 assert(I >= 0 && "Negative element index");
934 SrcIdx.push_back(I);
935 }
936 SrcVec = Vec;
937 return true;
938 };
939
940 SmallVector<int,128> ExtIdx;
941 SDValue ExtVec;
942 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
943 MVT ExtTy = ty(ExtVec);
944 unsigned ExtLen = ExtTy.getVectorNumElements();
945 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
946 // Construct a new shuffle mask that will produce a vector with the same
947 // number of elements as the input vector, and such that the vector we
948 // want will be the initial subvector of it.
949 SmallVector<int,128> Mask;
950 BitVector Used(ExtLen);
951
952 for (int M : ExtIdx) {
953 Mask.push_back(M);
954 if (M >= 0)
955 Used.set(M);
956 }
957 // Fill the rest of the mask with the unused elements of ExtVec in hopes
958 // that it will result in a permutation of ExtVec's elements. It's still
959 // fine if it doesn't (e.g. if undefs are present, or elements are
960 // repeated), but permutations can always be done efficiently via vdelta
961 // and vrdelta.
962 for (unsigned I = 0; I != ExtLen; ++I) {
963 if (Mask.size() == ExtLen)
964 break;
965 if (!Used.test(I))
966 Mask.push_back(I);
967 }
968
969 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
970 DAG.getUNDEF(ExtTy), Mask);
971 return ExtLen == VecLen ? S : LoHalf(S, DAG);
972 }
973 }
974
975 // Find most common element to initialize vector with. This is to avoid
976 // unnecessary vinsert/valign for cases where the same value is present
977 // many times. Creates a histogram of the vector's elements to find the
978 // most common element n.
979 assert(4*Words.size() == Subtarget.getVectorLength());
980 int VecHist[32];
981 int n = 0;
982 for (unsigned i = 0; i != NumWords; ++i) {
983 VecHist[i] = 0;
984 if (Words[i].isUndef())
985 continue;
986 for (unsigned j = i; j != NumWords; ++j)
987 if (Words[i] == Words[j])
988 VecHist[i]++;
989
990 if (VecHist[i] > VecHist[n])
991 n = i;
992 }
993
994 SDValue HalfV = getZero(dl, VecTy, DAG);
995 if (VecHist[n] > 1) {
996 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
997 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
998 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
999 }
1000 SDValue HalfV0 = HalfV;
1001 SDValue HalfV1 = HalfV;
1002
1003 // Construct two halves in parallel, then or them together. Rn and Rm count
1004 // number of rotations needed before the next element. One last rotation is
1005 // performed post-loop to position the last element.
1006 int Rn = 0, Rm = 0;
1007 SDValue Sn, Sm;
1008 SDValue N = HalfV0;
1009 SDValue M = HalfV1;
1010 for (unsigned i = 0; i != NumWords/2; ++i) {
1011 // Rotate by element count since last insertion.
1012 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1013 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1014 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1015 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1016 {HalfV0, Words[i]});
1017 Rn = 0;
1018 }
1019 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1020 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1021 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1022 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1023 {HalfV1, Words[i+NumWords/2]});
1024 Rm = 0;
1025 }
1026 Rn += 4;
1027 Rm += 4;
1028 }
1029 // Perform last rotation.
1030 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1031 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1032 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1033 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1034
1035 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1036 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1037
1038 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1039
1040 SDValue OutV =
1041 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1042 return OutV;
1043}
1044
1045SDValue
1046HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1047 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1048 MVT PredTy = ty(PredV);
1049 unsigned HwLen = Subtarget.getVectorLength();
1050 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1051
1052 if (Subtarget.isHVXVectorType(PredTy, true)) {
1053 // Move the vector predicate SubV to a vector register, and scale it
1054 // down to match the representation (bytes per type element) that VecV
1055 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1056 // in general) element and put them at the front of the resulting
1057 // vector. This subvector will then be inserted into the Q2V of VecV.
1058 // To avoid having an operation that generates an illegal type (short
1059 // vector), generate a full size vector.
1060 //
1061 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1062 SmallVector<int,128> Mask(HwLen);
1063 // Scale = BitBytes(PredV) / Given BitBytes.
1064 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1065 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1066
1067 for (unsigned i = 0; i != HwLen; ++i) {
1068 unsigned Num = i % Scale;
1069 unsigned Off = i / Scale;
1070 Mask[BlockLen*Num + Off] = i;
1071 }
1072 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1073 if (!ZeroFill)
1074 return S;
1075 // Fill the bytes beyond BlockLen with 0s.
1076 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1077 // when BlockLen < HwLen.
1078 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1079 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1080 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1081 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1082 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1083 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1084 }
1085
1086 // Make sure that this is a valid scalar predicate.
1087 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1088
1089 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1090 SmallVector<SDValue,4> Words[2];
1091 unsigned IdxW = 0;
1092
1093 SDValue W0 = isUndef(PredV)
1094 ? DAG.getUNDEF(MVT::i64)
1095 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1096 if (Bytes < BitBytes) {
1097 Words[IdxW].push_back(HiHalf(W0, DAG));
1098 Words[IdxW].push_back(LoHalf(W0, DAG));
1099 } else
1100 Words[IdxW].push_back(W0);
1101
1102 while (Bytes < BitBytes) {
1103 IdxW ^= 1;
1104 Words[IdxW].clear();
1105
1106 if (Bytes < 4) {
1107 for (const SDValue &W : Words[IdxW ^ 1]) {
1108 SDValue T = expandPredicate(W, dl, DAG);
1109 Words[IdxW].push_back(HiHalf(T, DAG));
1110 Words[IdxW].push_back(LoHalf(T, DAG));
1111 }
1112 } else {
1113 for (const SDValue &W : Words[IdxW ^ 1]) {
1114 Words[IdxW].push_back(W);
1115 Words[IdxW].push_back(W);
1116 }
1117 }
1118 Bytes *= 2;
1119 }
1120
1121 while (Bytes > BitBytes) {
1122 IdxW ^= 1;
1123 Words[IdxW].clear();
1124
1125 if (Bytes <= 4) {
1126 for (const SDValue &W : Words[IdxW ^ 1]) {
1127 SDValue T = contractPredicate(W, dl, DAG);
1128 Words[IdxW].push_back(T);
1129 }
1130 } else {
1131 for (const SDValue &W : Words[IdxW ^ 1]) {
1132 Words[IdxW].push_back(W);
1133 }
1134 }
1135 Bytes /= 2;
1136 }
1137
1138 assert(Bytes == BitBytes);
1139 if (BitBytes == 1 && PredTy == MVT::v2i1)
1140 ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
1141
1142 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1143 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1144 for (const SDValue &W : Words[IdxW]) {
1145 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1146 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1147 }
1148
1149 return Vec;
1150}
1151
1152SDValue
1153HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1154 const SDLoc &dl, MVT VecTy,
1155 SelectionDAG &DAG) const {
1156 // Construct a vector V of bytes, such that a comparison V >u 0 would
1157 // produce the required vector predicate.
1158 unsigned VecLen = Values.size();
1159 unsigned HwLen = Subtarget.getVectorLength();
1160 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1162 bool AllT = true, AllF = true;
1163
1164 auto IsTrue = [] (SDValue V) {
1165 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1166 return !N->isZero();
1167 return false;
1168 };
1169 auto IsFalse = [] (SDValue V) {
1170 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1171 return N->isZero();
1172 return false;
1173 };
1174
1175 if (VecLen <= HwLen) {
1176 // In the hardware, each bit of a vector predicate corresponds to a byte
1177 // of a vector register. Calculate how many bytes does a bit of VecTy
1178 // correspond to.
1179 assert(HwLen % VecLen == 0);
1180 unsigned BitBytes = HwLen / VecLen;
1181 for (SDValue V : Values) {
1182 AllT &= IsTrue(V);
1183 AllF &= IsFalse(V);
1184
1185 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1186 : DAG.getUNDEF(MVT::i8);
1187 for (unsigned B = 0; B != BitBytes; ++B)
1188 Bytes.push_back(Ext);
1189 }
1190 } else {
1191 // There are as many i1 values, as there are bits in a vector register.
1192 // Divide the values into groups of 8 and check that each group consists
1193 // of the same value (ignoring undefs).
1194 for (unsigned I = 0; I != VecLen; I += 8) {
1195 unsigned B = 0;
1196 // Find the first non-undef value in this group.
1197 for (; B != 8; ++B) {
1198 if (!Values[I+B].isUndef())
1199 break;
1200 }
1201 SDValue F = Values[I+B];
1202 AllT &= IsTrue(F);
1203 AllF &= IsFalse(F);
1204
1205 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1206 : DAG.getUNDEF(MVT::i8);
1207 Bytes.push_back(Ext);
1208 // Verify that the rest of values in the group are the same as the
1209 // first.
1210 for (; B != 8; ++B)
1211 assert(Values[I+B].isUndef() || Values[I+B] == F);
1212 }
1213 }
1214
1215 if (AllT)
1216 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1217 if (AllF)
1218 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1219
1220 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1221 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1222 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1223}
1224
1225SDValue
1226HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1227 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1228 MVT ElemTy = ty(VecV).getVectorElementType();
1229
1230 unsigned ElemWidth = ElemTy.getSizeInBits();
1231 assert(ElemWidth >= 8 && ElemWidth <= 32);
1232 (void)ElemWidth;
1233
1234 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1235 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1236 {VecV, ByteIdx});
1237 if (ElemTy == MVT::i32)
1238 return ExWord;
1239
1240 // Have an extracted word, need to extract the smaller element out of it.
1241 // 1. Extract the bits of (the original) IdxV that correspond to the index
1242 // of the desired element in the 32-bit word.
1243 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1244 // 2. Extract the element from the word.
1245 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1246 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1247}
1248
1249SDValue
1250HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1251 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1252 // Implement other return types if necessary.
1253 assert(ResTy == MVT::i1);
1254
1255 unsigned HwLen = Subtarget.getVectorLength();
1256 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1257 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1258
1259 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1260 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1261 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1262
1263 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1264 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1265 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1266}
1267
1268SDValue
1269HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1270 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1271 MVT ElemTy = ty(VecV).getVectorElementType();
1272
1273 unsigned ElemWidth = ElemTy.getSizeInBits();
1274 assert(ElemWidth >= 8 && ElemWidth <= 32);
1275 (void)ElemWidth;
1276
1277 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1278 SDValue ByteIdxV) {
1279 MVT VecTy = ty(VecV);
1280 unsigned HwLen = Subtarget.getVectorLength();
1281 SDValue MaskV =
1282 DAG.getNode(ISD::AND, dl, MVT::i32,
1283 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1284 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1285 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1286 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1287 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1288 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1289 return TorV;
1290 };
1291
1292 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1293 if (ElemTy == MVT::i32)
1294 return InsertWord(VecV, ValV, ByteIdx);
1295
1296 // If this is not inserting a 32-bit word, convert it into such a thing.
1297 // 1. Extract the existing word from the target vector.
1298 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1299 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1300 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1301 dl, MVT::i32, DAG);
1302
1303 // 2. Treating the extracted word as a 32-bit vector, insert the given
1304 // value into it.
1305 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1306 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1307 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1308 ValV, SubIdx, dl, ElemTy, DAG);
1309
1310 // 3. Insert the 32-bit word back into the original vector.
1311 return InsertWord(VecV, Ins, ByteIdx);
1312}
1313
1314SDValue
1315HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1316 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1317 unsigned HwLen = Subtarget.getVectorLength();
1318 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1319 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1320
1321 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1322 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1323 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1324 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1325
1326 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1327 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1328}
1329
1330SDValue
1331HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1332 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1333 MVT VecTy = ty(VecV);
1334 unsigned HwLen = Subtarget.getVectorLength();
1335 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1336 MVT ElemTy = VecTy.getVectorElementType();
1337 unsigned ElemWidth = ElemTy.getSizeInBits();
1338
1339 // If the source vector is a vector pair, get the single vector containing
1340 // the subvector of interest. The subvector will never overlap two single
1341 // vectors.
1342 if (isHvxPairTy(VecTy)) {
1343 unsigned SubIdx = Hexagon::vsub_lo;
1344 if (Idx * ElemWidth >= 8 * HwLen) {
1345 SubIdx = Hexagon::vsub_hi;
1346 Idx -= VecTy.getVectorNumElements() / 2;
1347 }
1348
1349 VecTy = typeSplit(VecTy).first;
1350 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1351 if (VecTy == ResTy)
1352 return VecV;
1353 }
1354
1355 // The only meaningful subvectors of a single HVX vector are those that
1356 // fit in a scalar register.
1357 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1358
1359 MVT WordTy = tyVector(VecTy, MVT::i32);
1360 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1361 unsigned WordIdx = (Idx*ElemWidth) / 32;
1362
1363 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1364 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1365 if (ResTy.getSizeInBits() == 32)
1366 return DAG.getBitcast(ResTy, W0);
1367
1368 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1369 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1370 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1371 return DAG.getBitcast(ResTy, WW);
1372}
1373
1374SDValue
1375HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1376 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1377 MVT VecTy = ty(VecV);
1378 unsigned HwLen = Subtarget.getVectorLength();
1379 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1380 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1381 // IdxV is required to be a constant.
1382 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1383
1384 unsigned ResLen = ResTy.getVectorNumElements();
1385 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1386 unsigned Offset = Idx * BitBytes;
1387 SDValue Undef = DAG.getUNDEF(ByteTy);
1388 SmallVector<int,128> Mask;
1389
1390 if (Subtarget.isHVXVectorType(ResTy, true)) {
1391 // Converting between two vector predicates. Since the result is shorter
1392 // than the source, it will correspond to a vector predicate with the
1393 // relevant bits replicated. The replication count is the ratio of the
1394 // source and target vector lengths.
1395 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1396 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1397 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1398 for (unsigned j = 0; j != Rep; ++j)
1399 Mask.push_back(i + Offset);
1400 }
1401 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1402 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1403 }
1404
1405 // Converting between a vector predicate and a scalar predicate. In the
1406 // vector predicate, a group of BitBytes bits will correspond to a single
1407 // i1 element of the source vector type. Those bits will all have the same
1408 // value. The same will be true for ByteVec, where each byte corresponds
1409 // to a bit in the vector predicate.
1410 // The algorithm is to traverse the ByteVec, going over the i1 values from
1411 // the source vector, and generate the corresponding representation in an
1412 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1413 // elements so that the interesting 8 bytes will be in the low end of the
1414 // vector.
1415 unsigned Rep = 8 / ResLen;
1416 // Make sure the output fill the entire vector register, so repeat the
1417 // 8-byte groups as many times as necessary.
1418 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1419 // This will generate the indexes of the 8 interesting bytes.
1420 for (unsigned i = 0; i != ResLen; ++i) {
1421 for (unsigned j = 0; j != Rep; ++j)
1422 Mask.push_back(Offset + i*BitBytes);
1423 }
1424 }
1425
1426 SDValue Zero = getZero(dl, MVT::i32, DAG);
1427 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1428 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1429 // them against 0.
1430 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1431 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1432 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1433 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1434 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1435 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1436}
1437
1438SDValue
1439HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1440 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1441 MVT VecTy = ty(VecV);
1442 MVT SubTy = ty(SubV);
1443 unsigned HwLen = Subtarget.getVectorLength();
1444 MVT ElemTy = VecTy.getVectorElementType();
1445 unsigned ElemWidth = ElemTy.getSizeInBits();
1446
1447 bool IsPair = isHvxPairTy(VecTy);
1448 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1449 // The two single vectors that VecV consists of, if it's a pair.
1450 SDValue V0, V1;
1451 SDValue SingleV = VecV;
1452 SDValue PickHi;
1453
1454 if (IsPair) {
1455 V0 = LoHalf(VecV, DAG);
1456 V1 = HiHalf(VecV, DAG);
1457
1458 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1459 dl, MVT::i32);
1460 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1461 if (isHvxSingleTy(SubTy)) {
1462 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1463 unsigned Idx = CN->getZExtValue();
1464 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1465 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1466 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1467 }
1468 // If IdxV is not a constant, generate the two variants: with the
1469 // SubV as the high and as the low subregister, and select the right
1470 // pair based on the IdxV.
1471 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1472 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1473 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1474 }
1475 // The subvector being inserted must be entirely contained in one of
1476 // the vectors V0 or V1. Set SingleV to the correct one, and update
1477 // IdxV to be the index relative to the beginning of that vector.
1478 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1479 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1480 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1481 }
1482
1483 // The only meaningful subvectors of a single HVX vector are those that
1484 // fit in a scalar register.
1485 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1486 // Convert IdxV to be index in bytes.
1487 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1488 if (!IdxN || !IdxN->isZero()) {
1489 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1490 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1491 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1492 }
1493 // When inserting a single word, the rotation back to the original position
1494 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1495 // by (HwLen-4)-Idx.
1496 unsigned RolBase = HwLen;
1497 if (SubTy.getSizeInBits() == 32) {
1498 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1499 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1500 } else {
1501 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1502 SDValue R0 = LoHalf(V, DAG);
1503 SDValue R1 = HiHalf(V, DAG);
1504 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1505 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1506 DAG.getConstant(4, dl, MVT::i32));
1507 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1508 RolBase = HwLen-4;
1509 }
1510 // If the vector wasn't ror'ed, don't ror it back.
1511 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1512 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1513 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1514 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1515 }
1516
1517 if (IsPair) {
1518 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1519 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1520 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1521 }
1522 return SingleV;
1523}
1524
1525SDValue
1526HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1527 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1528 MVT VecTy = ty(VecV);
1529 MVT SubTy = ty(SubV);
1530 assert(Subtarget.isHVXVectorType(VecTy, true));
1531 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1532 // predicate as well, or it can be a scalar predicate.
1533
1534 unsigned VecLen = VecTy.getVectorNumElements();
1535 unsigned HwLen = Subtarget.getVectorLength();
1536 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1537
1538 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1539 unsigned BitBytes = HwLen / VecLen;
1540 unsigned BlockLen = HwLen / Scale;
1541
1542 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1543 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1544 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1545 SDValue ByteIdx;
1546
1547 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1548 if (!IdxN || !IdxN->isZero()) {
1549 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1550 DAG.getConstant(BitBytes, dl, MVT::i32));
1551 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1552 }
1553
1554 // ByteVec is the target vector VecV rotated in such a way that the
1555 // subvector should be inserted at index 0. Generate a predicate mask
1556 // and use vmux to do the insertion.
1557 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1558 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1559 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1560 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1561 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1562 // Rotate ByteVec back, and convert to a vector predicate.
1563 if (!IdxN || !IdxN->isZero()) {
1564 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1565 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1566 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1567 }
1568 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1569}
1570
1571SDValue
1572HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1573 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1574 // Sign- and any-extending of a vector predicate to a vector register is
1575 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1576 // a vector of 1s (where the 1s are of type matching the vector type).
1577 assert(Subtarget.isHVXVectorType(ResTy));
1578 if (!ZeroExt)
1579 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1580
1581 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1582 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1583 DAG.getConstant(1, dl, MVT::i32));
1584 SDValue False = getZero(dl, ResTy, DAG);
1585 return DAG.getSelect(dl, ResTy, VecV, True, False);
1586}
1587
1588SDValue
1589HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1590 MVT ResTy, SelectionDAG &DAG) const {
1591 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1592 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1593 // vector register. The remaining bits of the vector register are
1594 // unspecified.
1595
1596 MachineFunction &MF = DAG.getMachineFunction();
1597 unsigned HwLen = Subtarget.getVectorLength();
1598 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1599 MVT PredTy = ty(VecQ);
1600 unsigned PredLen = PredTy.getVectorNumElements();
1601 assert(HwLen % PredLen == 0);
1602 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1603
1604 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1606 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1607 // These are bytes with the LSB rotated left with respect to their index.
1608 for (unsigned i = 0; i != HwLen/8; ++i) {
1609 for (unsigned j = 0; j != 8; ++j)
1610 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1611 }
1612 Constant *CV = ConstantVector::get(Tmp);
1613 Align Alignment(HwLen);
1614 SDValue CP =
1615 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1616 SDValue Bytes =
1617 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1619
1620 // Select the bytes that correspond to true bits in the vector predicate.
1621 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1622 getZero(dl, VecTy, DAG));
1623 // Calculate the OR of all bytes in each group of 8. That will compress
1624 // all the individual bits into a single byte.
1625 // First, OR groups of 4, via vrmpy with 0x01010101.
1626 SDValue All1 =
1627 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1628 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1629 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1630 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1631 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1632 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1633
1634 // Pick every 8th byte and coalesce them at the beginning of the output.
1635 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1636 // byte and so on.
1637 SmallVector<int,128> Mask;
1638 for (unsigned i = 0; i != HwLen; ++i)
1639 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1640 SDValue Collect =
1641 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1642 return DAG.getBitcast(ResTy, Collect);
1643}
1644
1645SDValue
1646HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1647 const SDLoc &dl, SelectionDAG &DAG) const {
1648 // Take a vector and resize the element type to match the given type.
1649 MVT InpTy = ty(VecV);
1650 if (InpTy == ResTy)
1651 return VecV;
1652
1653 unsigned InpWidth = InpTy.getSizeInBits();
1654 unsigned ResWidth = ResTy.getSizeInBits();
1655
1656 if (InpTy.isFloatingPoint()) {
1657 return InpWidth < ResWidth
1658 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1659 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1660 DAG.getTargetConstant(0, dl, MVT::i32));
1661 }
1662
1663 assert(InpTy.isInteger());
1664
1665 if (InpWidth < ResWidth) {
1666 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1667 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1668 } else {
1669 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1670 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1671 }
1672}
1673
1674SDValue
1675HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1676 SelectionDAG &DAG) const {
1677 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1678
1679 const SDLoc &dl(Vec);
1680 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1681 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1682 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1683}
1684
1685SDValue
1686HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1687 const {
1688 const SDLoc &dl(Op);
1689 MVT VecTy = ty(Op);
1690
1691 unsigned Size = Op.getNumOperands();
1693 for (unsigned i = 0; i != Size; ++i)
1694 Ops.push_back(Op.getOperand(i));
1695
1696 if (VecTy.getVectorElementType() == MVT::i1)
1697 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1698
1699 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1700 // not a legal type, just bitcast the node to use i16
1701 // types and bitcast the result back to f16
1702 if (VecTy.getVectorElementType() == MVT::f16 ||
1703 VecTy.getVectorElementType() == MVT::bf16) {
1705 for (unsigned i = 0; i != Size; i++)
1706 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1707
1708 SDValue T0 =
1709 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1710 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1711 }
1712
1713 // First, split the BUILD_VECTOR for vector pairs. We could generate
1714 // some pairs directly (via splat), but splats should be generated
1715 // by the combiner prior to getting here.
1716 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1718 MVT SingleTy = typeSplit(VecTy).first;
1719 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1720 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1721 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1722 }
1723
1724 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1725}
1726
1727SDValue
1728HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1729 const {
1730 const SDLoc &dl(Op);
1731 MVT VecTy = ty(Op);
1732 MVT ArgTy = ty(Op.getOperand(0));
1733
1734 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1735 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1736 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1737 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1738 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1739 return DAG.getBitcast(VecTy, Splat);
1740 }
1741
1742 return SDValue();
1743}
1744
1745SDValue
1746HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1747 const {
1748 // Vector concatenation of two integer (non-bool) vectors does not need
1749 // special lowering. Custom-lower concats of bool vectors and expand
1750 // concats of more than 2 vectors.
1751 MVT VecTy = ty(Op);
1752 const SDLoc &dl(Op);
1753 unsigned NumOp = Op.getNumOperands();
1754 if (VecTy.getVectorElementType() != MVT::i1) {
1755 if (NumOp == 2)
1756 return Op;
1757 // Expand the other cases into a build-vector.
1759 for (SDValue V : Op.getNode()->ops())
1760 DAG.ExtractVectorElements(V, Elems);
1761 // A vector of i16 will be broken up into a build_vector of i16's.
1762 // This is a problem, since at the time of operation legalization,
1763 // all operations are expected to be type-legalized, and i16 is not
1764 // a legal type. If any of the extracted elements is not of a valid
1765 // type, sign-extend it to a valid one.
1766 for (SDValue &V : Elems) {
1767 MVT Ty = ty(V);
1768 if (!isTypeLegal(Ty)) {
1769 MVT NTy = typeLegalize(Ty, DAG);
1770 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1771 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1772 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1773 V.getOperand(0), V.getOperand(1)),
1774 DAG.getValueType(Ty));
1775 continue;
1776 }
1777 // A few less complicated cases.
1778 switch (V.getOpcode()) {
1779 case ISD::Constant:
1780 V = DAG.getSExtOrTrunc(V, dl, NTy);
1781 break;
1782 case ISD::UNDEF:
1783 V = DAG.getUNDEF(NTy);
1784 break;
1785 case ISD::TRUNCATE:
1786 V = V.getOperand(0);
1787 break;
1788 default:
1789 llvm_unreachable("Unexpected vector element");
1790 }
1791 }
1792 }
1793 return DAG.getBuildVector(VecTy, dl, Elems);
1794 }
1795
1796 assert(VecTy.getVectorElementType() == MVT::i1);
1797 unsigned HwLen = Subtarget.getVectorLength();
1798 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1799
1800 SDValue Op0 = Op.getOperand(0);
1801
1802 // If the operands are HVX types (i.e. not scalar predicates), then
1803 // defer the concatenation, and create QCAT instead.
1804 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1805 if (NumOp == 2)
1806 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1807
1808 ArrayRef<SDUse> U(Op.getNode()->ops());
1811
1812 MVT HalfTy = typeSplit(VecTy).first;
1813 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1814 Ops.take_front(NumOp/2));
1815 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1816 Ops.take_back(NumOp/2));
1817 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1818 }
1819
1820 // Count how many bytes (in a vector register) each bit in VecTy
1821 // corresponds to.
1822 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1823
1824 SmallVector<SDValue,8> Prefixes;
1825 for (SDValue V : Op.getNode()->op_values()) {
1826 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1827 Prefixes.push_back(P);
1828 }
1829
1830 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1831 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1832 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1833 SDValue Res = getZero(dl, ByteTy, DAG);
1834 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1835 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1836 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1837 }
1838 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1839}
1840
1841SDValue
1842HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1843 const {
1844 // Change the type of the extracted element to i32.
1845 SDValue VecV = Op.getOperand(0);
1846 MVT ElemTy = ty(VecV).getVectorElementType();
1847 const SDLoc &dl(Op);
1848 SDValue IdxV = Op.getOperand(1);
1849 if (ElemTy == MVT::i1)
1850 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1851
1852 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1853}
1854
1855SDValue
1856HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1857 const {
1858 const SDLoc &dl(Op);
1859 MVT VecTy = ty(Op);
1860 SDValue VecV = Op.getOperand(0);
1861 SDValue ValV = Op.getOperand(1);
1862 SDValue IdxV = Op.getOperand(2);
1863 MVT ElemTy = ty(VecV).getVectorElementType();
1864 if (ElemTy == MVT::i1)
1865 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1866
1867 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1869 tyVector(VecTy, MVT::i16),
1870 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1871 DAG.getBitcast(MVT::i16, ValV), IdxV);
1872 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1873 }
1874
1875 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1876}
1877
1878SDValue
1879HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1880 const {
1881 SDValue SrcV = Op.getOperand(0);
1882 MVT SrcTy = ty(SrcV);
1883 MVT DstTy = ty(Op);
1884 SDValue IdxV = Op.getOperand(1);
1885 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1886 assert(Idx % DstTy.getVectorNumElements() == 0);
1887 (void)Idx;
1888 const SDLoc &dl(Op);
1889
1890 MVT ElemTy = SrcTy.getVectorElementType();
1891 if (ElemTy == MVT::i1)
1892 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1893
1894 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1895}
1896
1897SDValue
1898HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1899 const {
1900 // Idx does not need to be a constant.
1901 SDValue VecV = Op.getOperand(0);
1902 SDValue ValV = Op.getOperand(1);
1903 SDValue IdxV = Op.getOperand(2);
1904
1905 const SDLoc &dl(Op);
1906 MVT VecTy = ty(VecV);
1907 MVT ElemTy = VecTy.getVectorElementType();
1908 if (ElemTy == MVT::i1)
1909 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1910
1911 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1912}
1913
1914SDValue
1915HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1916 // Lower any-extends of boolean vectors to sign-extends, since they
1917 // translate directly to Q2V. Zero-extending could also be done equally
1918 // fast, but Q2V is used/recognized in more places.
1919 // For all other vectors, use zero-extend.
1920 MVT ResTy = ty(Op);
1921 SDValue InpV = Op.getOperand(0);
1922 MVT ElemTy = ty(InpV).getVectorElementType();
1923 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1924 return LowerHvxSignExt(Op, DAG);
1925 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1926}
1927
1928SDValue
1929HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1930 MVT ResTy = ty(Op);
1931 SDValue InpV = Op.getOperand(0);
1932 MVT ElemTy = ty(InpV).getVectorElementType();
1933 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1934 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1935 return Op;
1936}
1937
1938SDValue
1939HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1940 MVT ResTy = ty(Op);
1941 SDValue InpV = Op.getOperand(0);
1942 MVT ElemTy = ty(InpV).getVectorElementType();
1943 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1944 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1945 return Op;
1946}
1947
1948SDValue
1949HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1950 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1951 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1952 const SDLoc &dl(Op);
1953 MVT ResTy = ty(Op);
1954 SDValue InpV = Op.getOperand(0);
1955 assert(ResTy == ty(InpV));
1956
1957 // Calculate the vectors of 1 and bitwidth(x).
1958 MVT ElemTy = ty(InpV).getVectorElementType();
1959 unsigned ElemWidth = ElemTy.getSizeInBits();
1960
1961 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1962 DAG.getConstant(1, dl, MVT::i32));
1963 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1964 DAG.getConstant(ElemWidth, dl, MVT::i32));
1965 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1966 DAG.getAllOnesConstant(dl, MVT::i32));
1967
1968 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1969 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1970 // it separately in custom combine or selection).
1971 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1972 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1973 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1974 return DAG.getNode(ISD::SUB, dl, ResTy,
1975 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1976}
1977
1978SDValue
1979HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1980 const SDLoc &dl(Op);
1981 MVT ResTy = ty(Op);
1982 assert(ResTy.getVectorElementType() == MVT::i32);
1983
1984 SDValue Vs = Op.getOperand(0);
1985 SDValue Vt = Op.getOperand(1);
1986
1987 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1988 unsigned Opc = Op.getOpcode();
1989
1990 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1991 if (Opc == ISD::MULHU)
1992 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1993 if (Opc == ISD::MULHS)
1994 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1995
1996#ifndef NDEBUG
1997 Op.dump(&DAG);
1998#endif
1999 llvm_unreachable("Unexpected mulh operation");
2000}
2001
2002SDValue
2003HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2004 const SDLoc &dl(Op);
2005 unsigned Opc = Op.getOpcode();
2006 SDValue Vu = Op.getOperand(0);
2007 SDValue Vv = Op.getOperand(1);
2008
2009 // If the HI part is not used, convert it to a regular MUL.
2010 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2011 // Need to preserve the types and the number of values.
2012 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2013 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2014 return DAG.getMergeValues({Lo, Hi}, dl);
2015 }
2016
2017 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2018 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2019
2020 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2021 // valued nodes.
2022 if (Subtarget.useHVXV62Ops())
2023 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2024
2025 if (Opc == HexagonISD::SMUL_LOHI) {
2026 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2027 // for other signedness LOHI is cheaper.
2028 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2029 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2030 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2031 return DAG.getMergeValues({Lo, Hi}, dl);
2032 }
2033 }
2034
2035 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2036}
2037
2038SDValue
2039HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2040 SDValue Val = Op.getOperand(0);
2041 MVT ResTy = ty(Op);
2042 MVT ValTy = ty(Val);
2043 const SDLoc &dl(Op);
2044
2045 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2046 unsigned HwLen = Subtarget.getVectorLength();
2047 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2048 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2049 unsigned BitWidth = ResTy.getSizeInBits();
2050
2051 if (BitWidth < 64) {
2052 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2053 dl, MVT::i32, DAG);
2054 if (BitWidth == 32)
2055 return W0;
2056 assert(BitWidth < 32u);
2057 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2058 }
2059
2060 // The result is >= 64 bits. The only options are 64 or 128.
2061 assert(BitWidth == 64 || BitWidth == 128);
2063 for (unsigned i = 0; i != BitWidth/32; ++i) {
2064 SDValue W = extractHvxElementReg(
2065 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2066 Words.push_back(W);
2067 }
2068 SmallVector<SDValue,2> Combines;
2069 assert(Words.size() % 2 == 0);
2070 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2071 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2072 Combines.push_back(C);
2073 }
2074
2075 if (BitWidth == 64)
2076 return Combines[0];
2077
2078 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2079 }
2080
2081 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2082 // Splat the input into a 32-element i32 vector, then AND each element
2083 // with a unique bitmask to isolate individual bits.
2084 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2085 assert(Val32.getValueType().getSizeInBits() == 32 &&
2086 "Input must be 32 bits");
2087 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2088 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2090 for (unsigned i = 0; i < 32; ++i)
2091 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2092
2093 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2094 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2095 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2096 };
2097 // === Case: v32i1 ===
2098 if (ResTy == MVT::v32i1 &&
2099 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2100 Subtarget.useHVX128BOps()) {
2101 SDValue Val32 = Val;
2102 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2103 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2104 return bitcastI32ToV32I1(Val32);
2105 }
2106 // === Case: v64i1 ===
2107 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2108 // Split i64 into lo/hi 32-bit halves.
2109 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2110 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2111 DAG.getConstant(32, dl, MVT::i64));
2112 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2113
2114 // Reuse the same 32-bit logic twice.
2115 SDValue LoRes = bitcastI32ToV32I1(Lo);
2116 SDValue HiRes = bitcastI32ToV32I1(Hi);
2117
2118 // Concatenate into a v64i1 predicate.
2119 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2120 }
2121
2122 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2123 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2124 unsigned BitWidth = ValTy.getSizeInBits();
2125 unsigned HwLen = Subtarget.getVectorLength();
2126 assert(BitWidth == HwLen);
2127
2128 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2129 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2130 // Splat each byte of Val 8 times.
2131 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2132 // where b0, b1,..., b15 are least to most significant bytes of I.
2134 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2135 // These are bytes with the LSB rotated left with respect to their index.
2137 for (unsigned I = 0; I != HwLen / 8; ++I) {
2138 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2139 SDValue Byte =
2140 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2141 for (unsigned J = 0; J != 8; ++J) {
2142 Bytes.push_back(Byte);
2143 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2144 }
2145 }
2146
2147 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2148 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2149 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2150
2151 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2152 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2153 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2154 }
2155
2156 return Op;
2157}
2158
2159SDValue
2160HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2161 // Sign- and zero-extends are legal.
2162 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2163 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2164 Op.getOperand(0));
2165}
2166
2167SDValue
2168HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2169 MVT ResTy = ty(Op);
2170 if (ResTy.getVectorElementType() != MVT::i1)
2171 return Op;
2172
2173 const SDLoc &dl(Op);
2174 unsigned HwLen = Subtarget.getVectorLength();
2175 unsigned VecLen = ResTy.getVectorNumElements();
2176 assert(HwLen % VecLen == 0);
2177 unsigned ElemSize = HwLen / VecLen;
2178
2179 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2180 SDValue S =
2181 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2182 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2183 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2184 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2185}
2186
2187SDValue
2188HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2189 if (SDValue S = getVectorShiftByInt(Op, DAG))
2190 return S;
2191 return Op;
2192}
2193
2194SDValue
2195HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2196 SelectionDAG &DAG) const {
2197 unsigned Opc = Op.getOpcode();
2198 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2199
2200 // Make sure the shift amount is within the range of the bitwidth
2201 // of the element type.
2202 SDValue A = Op.getOperand(0);
2203 SDValue B = Op.getOperand(1);
2204 SDValue S = Op.getOperand(2);
2205
2206 MVT InpTy = ty(A);
2207 MVT ElemTy = InpTy.getVectorElementType();
2208
2209 const SDLoc &dl(Op);
2210 unsigned ElemWidth = ElemTy.getSizeInBits();
2211 bool IsLeft = Opc == ISD::FSHL;
2212
2213 // The expansion into regular shifts produces worse code for i8 and for
2214 // right shift of i32 on v65+.
2215 bool UseShifts = ElemTy != MVT::i8;
2216 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2217 UseShifts = false;
2218
2219 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2220 // If this is a funnel shift by a scalar, lower it into regular shifts.
2221 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2222 SDValue ModS =
2223 DAG.getNode(ISD::AND, dl, MVT::i32,
2224 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2225 SDValue NegS =
2226 DAG.getNode(ISD::SUB, dl, MVT::i32,
2227 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2228 SDValue IsZero =
2229 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2230 // FSHL A, B => A << | B >>n
2231 // FSHR A, B => A <<n | B >>
2232 SDValue Part1 =
2233 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2234 SDValue Part2 =
2235 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2236 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2237 // If the shift amount was 0, pick A or B, depending on the direction.
2238 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2239 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2240 }
2241
2243 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2244
2245 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2246 return DAG.getNode(MOpc, dl, ty(Op),
2247 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2248}
2249
2250SDValue
2251HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2252 const SDLoc &dl(Op);
2253 unsigned IntNo = Op.getConstantOperandVal(0);
2254 SmallVector<SDValue> Ops(Op->ops());
2255
2256 auto Swap = [&](SDValue P) {
2257 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2258 };
2259
2260 switch (IntNo) {
2261 case Intrinsic::hexagon_V6_pred_typecast:
2262 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2263 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2264 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2265 if (ResTy == InpTy)
2266 return Ops[1];
2267 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2268 }
2269 break;
2270 }
2271 case Intrinsic::hexagon_V6_vmpyss_parts:
2272 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2273 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2274 {Ops[1], Ops[2]}));
2275 case Intrinsic::hexagon_V6_vmpyuu_parts:
2276 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2277 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2278 {Ops[1], Ops[2]}));
2279 case Intrinsic::hexagon_V6_vmpyus_parts:
2280 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2281 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2282 {Ops[1], Ops[2]}));
2283 }
2284 } // switch
2285
2286 return Op;
2287}
2288
2289SDValue
2290HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2291 const SDLoc &dl(Op);
2292 unsigned HwLen = Subtarget.getVectorLength();
2293 MachineFunction &MF = DAG.getMachineFunction();
2294 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2295 SDValue Mask = MaskN->getMask();
2296 SDValue Chain = MaskN->getChain();
2297 SDValue Base = MaskN->getBasePtr();
2298 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2299
2300 unsigned Opc = Op->getOpcode();
2301 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2302
2303 if (Opc == ISD::MLOAD) {
2304 MVT ValTy = ty(Op);
2305 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2306 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2307 if (isUndef(Thru))
2308 return Load;
2309 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2310 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2311 }
2312
2313 // MSTORE
2314 // HVX only has aligned masked stores.
2315
2316 // TODO: Fold negations of the mask into the store.
2317 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2318 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2319 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2320
2321 if (MaskN->getAlign().value() % HwLen == 0) {
2322 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2323 {Mask, Base, Offset0, Value, Chain}, DAG);
2324 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2325 return Store;
2326 }
2327
2328 // Unaligned case.
2329 auto StoreAlign = [&](SDValue V, SDValue A) {
2330 SDValue Z = getZero(dl, ty(V), DAG);
2331 // TODO: use funnel shifts?
2332 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2333 // upper half.
2334 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2335 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2336 return std::make_pair(LoV, HiV);
2337 };
2338
2339 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2340 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2341 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2342 VectorPair Tmp = StoreAlign(MaskV, Base);
2343 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2344 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2345 VectorPair ValueU = StoreAlign(Value, Base);
2346
2347 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2348 SDValue StoreLo =
2349 getInstr(StoreOpc, dl, MVT::Other,
2350 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2351 SDValue StoreHi =
2352 getInstr(StoreOpc, dl, MVT::Other,
2353 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2354 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2355 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2356 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2357}
2358
2359SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2360 SelectionDAG &DAG) const {
2361 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2362 // is legal (done via a pattern).
2363 assert(Subtarget.useHVXQFloatOps());
2364
2365 assert(Op->getOpcode() == ISD::FP_EXTEND);
2366
2367 MVT VecTy = ty(Op);
2368 MVT ArgTy = ty(Op.getOperand(0));
2369 const SDLoc &dl(Op);
2370
2371 if (ArgTy == MVT::v64bf16) {
2372 MVT HalfTy = typeSplit(VecTy).first;
2373 SDValue BF16Vec = Op.getOperand(0);
2374 SDValue Zeroes =
2375 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2376 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2377 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2378 // values.
2379 SDValue ShuffVec =
2380 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2381 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2382 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2383 {VecPair.second, VecPair.first,
2384 DAG.getSignedConstant(-4, dl, MVT::i32)},
2385 DAG);
2386 return Result;
2387 }
2388
2389 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2390
2391 SDValue F16Vec = Op.getOperand(0);
2392
2393 APFloat FloatVal = APFloat(1.0f);
2394 bool Ignored;
2396 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2397 SDValue VmpyVec =
2398 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2399
2400 MVT HalfTy = typeSplit(VecTy).first;
2401 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2402 SDValue LoVec =
2403 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2404 SDValue HiVec =
2405 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2406
2407 SDValue ShuffVec =
2408 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2409 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2410
2411 return ShuffVec;
2412}
2413
2414SDValue
2415HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2416 // Catch invalid conversion ops (just in case).
2417 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2418 Op.getOpcode() == ISD::FP_TO_UINT);
2419
2420 MVT ResTy = ty(Op);
2421 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2422 MVT IntTy = ResTy.getVectorElementType();
2423
2424 if (Subtarget.useHVXIEEEFPOps()) {
2425 // There are only conversions from f16.
2426 if (FpTy == MVT::f16) {
2427 // Other int types aren't legal in HVX, so we shouldn't see them here.
2428 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2429 // Conversions to i8 and i16 are legal.
2430 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2431 return Op;
2432 }
2433 }
2434
2435 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2436 return EqualizeFpIntConversion(Op, DAG);
2437
2438 return ExpandHvxFpToInt(Op, DAG);
2439}
2440
2441// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2442// R1 = #1, R2 holds the v32i1 param
2443// V1 = vsplat(R1)
2444// V2 = vsplat(R2)
2445// Q0 = vand(V1,R1)
2446// V0.w=prefixsum(Q0)
2447// V0.w=vsub(V0.w,V1.w)
2448// V2.w = vlsr(V2.w,V0.w)
2449// V2 = vand(V2,V1)
2450// V2.sf = V2.w
2451SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2452 SelectionDAG &DAG) const {
2453
2454 MVT ResTy = ty(PredOp);
2455 const SDLoc &dl(PredOp);
2456
2457 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2458 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2459 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2460 SDValue(RegConst, 0));
2461 SDNode *PredTransfer =
2462 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2463 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2464 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2465 SDValue(PredTransfer, 0));
2466 SDNode *SplatParam = DAG.getMachineNode(
2467 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2468 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2469 SDNode *Vsub =
2470 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2471 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2472 SDNode *IndexShift =
2473 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2474 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2475 SDNode *MaskOff =
2476 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2477 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2478 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2479 SDValue(MaskOff, 0));
2480 return SDValue(Convert, 0);
2481}
2482
2483// For vector type v64i1 uint_to_fo to v64f16:
2484// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2485// R3 = subreg_high (R32)
2486// R2 = subreg_low (R32)
2487// R1 = #1
2488// V1 = vsplat(R1)
2489// V2 = vsplat(R2)
2490// V3 = vsplat(R3)
2491// Q0 = vand(V1,R1)
2492// V0.w=prefixsum(Q0)
2493// V0.w=vsub(V0.w,V1.w)
2494// V2.w = vlsr(V2.w,V0.w)
2495// V3.w = vlsr(V3.w,V0.w)
2496// V2 = vand(V2,V1)
2497// V3 = vand(V3,V1)
2498// V2.h = vpacke(V3.w,V2.w)
2499// V2.hf = V2.h
2500SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2501 SelectionDAG &DAG) const {
2502
2503 MVT ResTy = ty(PredOp);
2504 const SDLoc &dl(PredOp);
2505
2506 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2507 // Get the hi and lo regs
2508 SDValue HiReg =
2509 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2510 SDValue LoReg =
2511 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2512 // Get constant #1 and splat into vector V1
2513 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2514 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2515 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2516 SDValue(RegConst, 0));
2517 // Splat the hi and lo args
2518 SDNode *SplatHi =
2519 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2520 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2521 SDNode *SplatLo =
2522 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2523 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2524 // vand between splatted const and const
2525 SDNode *PredTransfer =
2526 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2527 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2528 // Get the prefixsum
2529 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2530 SDValue(PredTransfer, 0));
2531 // Get the vsub
2532 SDNode *Vsub =
2533 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2534 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2535 // Get vlsr for hi and lo
2536 SDNode *IndexShift_hi =
2537 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2538 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2539 SDNode *IndexShift_lo =
2540 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2541 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2542 // Get vand of hi and lo
2543 SDNode *MaskOff_hi =
2544 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2545 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2546 SDNode *MaskOff_lo =
2547 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2548 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2549 // Pack them
2550 SDNode *Pack =
2551 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2552 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2553 SDNode *Convert =
2554 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2555 return SDValue(Convert, 0);
2556}
2557
2558SDValue
2559HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2560 // Catch invalid conversion ops (just in case).
2561 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2562 Op.getOpcode() == ISD::UINT_TO_FP);
2563
2564 MVT ResTy = ty(Op);
2565 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2566 MVT FpTy = ResTy.getVectorElementType();
2567
2568 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2569 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2570 return LowerHvxPred32ToFp(Op, DAG);
2571 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2572 return LowerHvxPred64ToFp(Op, DAG);
2573 }
2574
2575 if (Subtarget.useHVXIEEEFPOps()) {
2576 // There are only conversions to f16.
2577 if (FpTy == MVT::f16) {
2578 // Other int types aren't legal in HVX, so we shouldn't see them here.
2579 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2580 // i8, i16 -> f16 is legal.
2581 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2582 return Op;
2583 }
2584 }
2585
2586 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2587 return EqualizeFpIntConversion(Op, DAG);
2588
2589 return ExpandHvxIntToFp(Op, DAG);
2590}
2591
2592HexagonTargetLowering::TypePair
2593HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2594 // Compare the widths of elements of the two types, and extend the narrower
2595 // type to match the with of the wider type. For vector types, apply this
2596 // to the element type.
2597 assert(Ty0.isVector() == Ty1.isVector());
2598
2599 MVT ElemTy0 = Ty0.getScalarType();
2600 MVT ElemTy1 = Ty1.getScalarType();
2601
2602 unsigned Width0 = ElemTy0.getSizeInBits();
2603 unsigned Width1 = ElemTy1.getSizeInBits();
2604 unsigned MaxWidth = std::max(Width0, Width1);
2605
2606 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2607 if (ScalarTy.isInteger())
2608 return MVT::getIntegerVT(Width);
2609 assert(ScalarTy.isFloatingPoint());
2610 return MVT::getFloatingPointVT(Width);
2611 };
2612
2613 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2614 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2615
2616 if (!Ty0.isVector()) {
2617 // Both types are scalars.
2618 return {WideETy0, WideETy1};
2619 }
2620
2621 // Vector types.
2622 unsigned NumElem = Ty0.getVectorNumElements();
2623 assert(NumElem == Ty1.getVectorNumElements());
2624
2625 return {MVT::getVectorVT(WideETy0, NumElem),
2626 MVT::getVectorVT(WideETy1, NumElem)};
2627}
2628
2629HexagonTargetLowering::TypePair
2630HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2631 // Compare the numbers of elements of two vector types, and widen the
2632 // narrower one to match the number of elements in the wider one.
2633 assert(Ty0.isVector() && Ty1.isVector());
2634
2635 unsigned Len0 = Ty0.getVectorNumElements();
2636 unsigned Len1 = Ty1.getVectorNumElements();
2637 if (Len0 == Len1)
2638 return {Ty0, Ty1};
2639
2640 unsigned MaxLen = std::max(Len0, Len1);
2641 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2642 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2643}
2644
2645MVT
2646HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2647 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2648 assert(LegalTy.isSimple());
2649 return LegalTy.getSimpleVT();
2650}
2651
2652MVT
2653HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2654 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2655 assert(Ty.getSizeInBits() <= HwWidth);
2656 if (Ty.getSizeInBits() == HwWidth)
2657 return Ty;
2658
2659 MVT ElemTy = Ty.getScalarType();
2660 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2661}
2662
2663HexagonTargetLowering::VectorPair
2664HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2665 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2666 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2667 // whether an overflow has occurred.
2668 MVT ResTy = ty(A);
2669 assert(ResTy == ty(B));
2670 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2671
2672 if (!Signed) {
2673 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2674 // save any instructions.
2675 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2676 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2677 return {Add, Ovf};
2678 }
2679
2680 // Signed overflow has happened, if:
2681 // (A, B have the same sign) and (A+B has a different sign from either)
2682 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2683 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2684 SDValue NotA =
2685 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2686 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2687 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2688 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2689 SDValue MSB =
2690 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2691 return {Add, MSB};
2692}
2693
2694HexagonTargetLowering::VectorPair
2695HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2696 bool Signed, SelectionDAG &DAG) const {
2697 // Shift Val right by Amt bits, round the result to the nearest integer,
2698 // tie-break by rounding halves to even integer.
2699
2700 const SDLoc &dl(Val);
2701 MVT ValTy = ty(Val);
2702
2703 // This should also work for signed integers.
2704 //
2705 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2706 // bool ovf = (inp > tmp0);
2707 // uint rup = inp & (1 << (Amt+1));
2708 //
2709 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2710 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2711 // uint tmp3 = tmp2 + rup;
2712 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2713 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2714 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2715 MVT IntTy = tyVector(ValTy, ElemTy);
2716 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2717 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2718
2719 SDValue Inp = DAG.getBitcast(IntTy, Val);
2720 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2721
2722 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2723 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2724 SDValue Zero = getZero(dl, IntTy, DAG);
2725 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2726 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2727 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2728
2729 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2730 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2731 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2732 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2733
2734 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2735 SDValue One = DAG.getConstant(1, dl, IntTy);
2736 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2737 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2738 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2739 return {Mux, Ovf};
2740}
2741
2742SDValue
2743HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2744 SelectionDAG &DAG) const {
2745 MVT VecTy = ty(A);
2746 MVT PairTy = typeJoin({VecTy, VecTy});
2747 assert(VecTy.getVectorElementType() == MVT::i32);
2748
2749 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2750
2751 // mulhs(A,B) =
2752 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2753 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2754 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2755 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2756 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2757 // anything, so it cannot produce any carry over to higher bits),
2758 // so everything in [] can be shifted by 16 without loss of precision.
2759 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2760 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2761 // The final additions need to make sure to properly maintain any carry-
2762 // out bits.
2763 //
2764 // Hi(B) Lo(B)
2765 // Hi(A) Lo(A)
2766 // --------------
2767 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2768 // Hi(B)*Lo(A) | + dropping the low 16 bits
2769 // Hi(A)*Lo(B) | T2
2770 // Hi(B)*Hi(A)
2771
2772 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2773 // T1 = get Hi(A) into low halves.
2774 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2775 // P0 = interleaved T1.h*B.uh (full precision product)
2776 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2777 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2778 SDValue T2 = LoHalf(P0, DAG);
2779 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2780 // added to the final sum.
2781 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2782 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2783 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2784 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2785 // T3 = full-precision(T0+T2) >> 16
2786 // The low halves are added-unsigned, the high ones are added-signed.
2787 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2788 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2789 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2790 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2791 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2792 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2793 SDValue T5 = LoHalf(P3, DAG);
2794 // Add:
2795 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2796 return T6;
2797}
2798
2799SDValue
2800HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2801 bool SignedB, const SDLoc &dl,
2802 SelectionDAG &DAG) const {
2803 MVT VecTy = ty(A);
2804 MVT PairTy = typeJoin({VecTy, VecTy});
2805 assert(VecTy.getVectorElementType() == MVT::i32);
2806
2807 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2808
2809 if (SignedA && !SignedB) {
2810 // Make A:unsigned, B:signed.
2811 std::swap(A, B);
2812 std::swap(SignedA, SignedB);
2813 }
2814
2815 // Do halfword-wise multiplications for unsigned*unsigned product, then
2816 // add corrections for signed and unsigned*signed.
2817
2818 SDValue Lo, Hi;
2819
2820 // P0:lo = (uu) products of low halves of A and B,
2821 // P0:hi = (uu) products of high halves.
2822 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2823
2824 // Swap low/high halves in B
2825 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2826 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2827 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2828 // P1 = products of even/odd halfwords.
2829 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2830 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2831 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2832
2833 // P2:lo = low halves of P1:lo + P1:hi,
2834 // P2:hi = high halves of P1:lo + P1:hi.
2835 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2836 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2837 // Still need to add the high halves of P0:lo to P2:lo
2838 SDValue T2 =
2839 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2840 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2841
2842 // The high halves of T3 will contribute to the HI part of LOHI.
2843 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2844 {HiHalf(P2, DAG), T3, S16}, DAG);
2845
2846 // The low halves of P2 need to be added to high halves of the LO part.
2847 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2848 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2849 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2850
2851 if (SignedA) {
2852 assert(SignedB && "Signed A and unsigned B should have been inverted");
2853
2854 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2855 SDValue Zero = getZero(dl, VecTy, DAG);
2856 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2857 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2858 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2859 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2860 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2861 } else if (SignedB) {
2862 // Same correction as for mulhus:
2863 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2864 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2865 SDValue Zero = getZero(dl, VecTy, DAG);
2866 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2867 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2868 } else {
2869 assert(!SignedA && !SignedB);
2870 }
2871
2872 return DAG.getMergeValues({Lo, Hi}, dl);
2873}
2874
2875SDValue
2876HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2877 SDValue B, bool SignedB,
2878 const SDLoc &dl,
2879 SelectionDAG &DAG) const {
2880 MVT VecTy = ty(A);
2881 MVT PairTy = typeJoin({VecTy, VecTy});
2882 assert(VecTy.getVectorElementType() == MVT::i32);
2883
2884 if (SignedA && !SignedB) {
2885 // Make A:unsigned, B:signed.
2886 std::swap(A, B);
2887 std::swap(SignedA, SignedB);
2888 }
2889
2890 // Do S*S first, then make corrections for U*S or U*U if needed.
2891 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2892 SDValue P1 =
2893 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2894 SDValue Lo = LoHalf(P1, DAG);
2895 SDValue Hi = HiHalf(P1, DAG);
2896
2897 if (!SignedB) {
2898 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2899 SDValue Zero = getZero(dl, VecTy, DAG);
2900 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2901
2902 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2903 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2904 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2905 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2906 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2907 // $A))>;
2908 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2909 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2910 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2911 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2912 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2913 } else if (!SignedA) {
2914 SDValue Zero = getZero(dl, VecTy, DAG);
2915 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2916
2917 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2918 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2919 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2920 // (HiHalf (Muls64O $A, $B)),
2921 // $B)>;
2922 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2923 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2924 }
2925
2926 return DAG.getMergeValues({Lo, Hi}, dl);
2927}
2928
2929SDValue
2930HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2931 const {
2932 // Rewrite conversion between integer and floating-point in such a way that
2933 // the integer type is extended/narrowed to match the bitwidth of the
2934 // floating-point type, combined with additional integer-integer extensions
2935 // or narrowings to match the original input/result types.
2936 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2937 //
2938 // The input/result types are not required to be legal, but if they are
2939 // legal, this function should not introduce illegal types.
2940
2941 unsigned Opc = Op.getOpcode();
2944
2945 SDValue Inp = Op.getOperand(0);
2946 MVT InpTy = ty(Inp);
2947 MVT ResTy = ty(Op);
2948
2949 if (InpTy == ResTy)
2950 return Op;
2951
2952 const SDLoc &dl(Op);
2954
2955 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2956 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2957 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2958 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2959 return Res;
2960}
2961
2962SDValue
2963HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2964 unsigned Opc = Op.getOpcode();
2966
2967 const SDLoc &dl(Op);
2968 SDValue Op0 = Op.getOperand(0);
2969 MVT InpTy = ty(Op0);
2970 MVT ResTy = ty(Op);
2971 assert(InpTy.changeTypeToInteger() == ResTy);
2972
2973 // int32_t conv_f32_to_i32(uint32_t inp) {
2974 // // s | exp8 | frac23
2975 //
2976 // int neg = (int32_t)inp < 0;
2977 //
2978 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2979 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2980 // // produce a large positive "expm1", which will result in max u/int.
2981 // // In all IEEE formats, bias is the largest positive number that can be
2982 // // represented in bias-width bits (i.e. 011..1).
2983 // int32_t expm1 = (inp << 1) - 0x80000000;
2984 // expm1 >>= 24;
2985 //
2986 // // Always insert the "implicit 1". Subnormal numbers will become 0
2987 // // regardless.
2988 // uint32_t frac = (inp << 8) | 0x80000000;
2989 //
2990 // // "frac" is the fraction part represented as Q1.31. If it was
2991 // // interpreted as uint32_t, it would be the fraction part multiplied
2992 // // by 2^31.
2993 //
2994 // // Calculate the amount of right shift, since shifting further to the
2995 // // left would lose significant bits. Limit it to 32, because we want
2996 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2997 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2998 // // left by 31). "rsh" can be negative.
2999 // int32_t rsh = min(31 - (expm1 + 1), 32);
3000 //
3001 // frac >>= rsh; // rsh == 32 will produce 0
3002 //
3003 // // Everything up to this point is the same for conversion to signed
3004 // // unsigned integer.
3005 //
3006 // if (neg) // Only for signed int
3007 // frac = -frac; //
3008 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3009 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3010 // if (rsh <= 0 && !neg) //
3011 // frac = 0x7fffffff; //
3012 //
3013 // if (neg) // Only for unsigned int
3014 // frac = 0; //
3015 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3016 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3017 //
3018 // return frac;
3019 // }
3020
3021 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3022
3023 // Zero = V6_vd0();
3024 // Neg = V6_vgtw(Zero, Inp);
3025 // One = V6_lvsplatw(1);
3026 // M80 = V6_lvsplatw(0x80000000);
3027 // Exp00 = V6_vaslwv(Inp, One);
3028 // Exp01 = V6_vsubw(Exp00, M80);
3029 // ExpM1 = V6_vasrw(Exp01, 24);
3030 // Frc00 = V6_vaslw(Inp, 8);
3031 // Frc01 = V6_vor(Frc00, M80);
3032 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3033 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3034 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3035
3036 // if signed int:
3037 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3038 // Pos = V6_vgtw(Rsh01, Zero);
3039 // Frc13 = V6_vsubw(Zero, Frc02);
3040 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3041 // Int = V6_vmux(Pos, Frc14, Bnd);
3042 //
3043 // if unsigned int:
3044 // Rsn = V6_vgtw(Zero, Rsh01)
3045 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3046 // Int = V6_vmux(Neg, Zero, Frc23)
3047
3048 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3049 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3050 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3051
3052 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3053 SDValue Zero = getZero(dl, ResTy, DAG);
3054 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3055 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3056 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3057 SDValue One = DAG.getConstant(1, dl, ResTy);
3058 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3059 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3060 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3061 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3062
3063 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3064 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3065 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3066
3067 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3068 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3069 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3070 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3071 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3072
3073 SDValue Int;
3074
3075 if (Opc == ISD::FP_TO_SINT) {
3076 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3077 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3078 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3079 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3080 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3081 } else {
3083 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3084 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3085 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3086 }
3087
3088 return Int;
3089}
3090
3091SDValue
3092HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3093 unsigned Opc = Op.getOpcode();
3095
3096 const SDLoc &dl(Op);
3097 SDValue Op0 = Op.getOperand(0);
3098 MVT InpTy = ty(Op0);
3099 MVT ResTy = ty(Op);
3100 assert(ResTy.changeTypeToInteger() == InpTy);
3101
3102 // uint32_t vnoc1_rnd(int32_t w) {
3103 // int32_t iszero = w == 0;
3104 // int32_t isneg = w < 0;
3105 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3106 //
3107 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3108 // uint32_t frac0 = (uint64_t)u << norm_left;
3109 //
3110 // // Rounding:
3111 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3112 // uint32_t renorm = (frac0 > frac1);
3113 // uint32_t rup = (int)(frac0 << 22) < 0;
3114 //
3115 // uint32_t frac2 = frac0 >> 8;
3116 // uint32_t frac3 = frac1 >> 8;
3117 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3118 //
3119 // int32_t exp = 32 - norm_left + renorm + 127;
3120 // exp <<= 23;
3121 //
3122 // uint32_t sign = 0x80000000 * isneg;
3123 // uint32_t f = sign | exp | frac;
3124 // return iszero ? 0 : f;
3125 // }
3126
3127 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3128 bool Signed = Opc == ISD::SINT_TO_FP;
3129
3130 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3131 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3132
3133 SDValue Zero = getZero(dl, InpTy, DAG);
3134 SDValue One = DAG.getConstant(1, dl, InpTy);
3135 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3136 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3137 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3138 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3139 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3140
3141 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3142 if (Signed) {
3143 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3144 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3145 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3146 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3147 }
3148
3149 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3150 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3151 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3152 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3153 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3154 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3155 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3156 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3157 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3158
3159 return Flt;
3160}
3161
3162SDValue
3163HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3164 unsigned Opc = Op.getOpcode();
3165 unsigned TLOpc;
3166 switch (Opc) {
3167 case ISD::ANY_EXTEND:
3168 case ISD::SIGN_EXTEND:
3169 case ISD::ZERO_EXTEND:
3170 TLOpc = HexagonISD::TL_EXTEND;
3171 break;
3172 case ISD::TRUNCATE:
3174 break;
3175#ifndef NDEBUG
3176 Op.dump(&DAG);
3177#endif
3178 llvm_unreachable("Unexpected operator");
3179 }
3180
3181 const SDLoc &dl(Op);
3182 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3183 DAG.getUNDEF(MVT::i128), // illegal type
3184 DAG.getConstant(Opc, dl, MVT::i32));
3185}
3186
3187SDValue
3188HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3189 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3190 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3191 unsigned Opc = Op.getConstantOperandVal(2);
3192 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3193}
3194
3195HexagonTargetLowering::VectorPair
3196HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3197 assert(!Op.isMachineOpcode());
3198 SmallVector<SDValue, 2> OpsL, OpsH;
3199 const SDLoc &dl(Op);
3200
3201 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3202 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3203 SDValue TV = DAG.getValueType(Ty);
3204 return std::make_pair(TV, TV);
3205 };
3206
3207 for (SDValue A : Op.getNode()->ops()) {
3208 auto [Lo, Hi] =
3209 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3210 // Special case for type operand.
3211 switch (Op.getOpcode()) {
3212 case ISD::SIGN_EXTEND_INREG:
3213 case HexagonISD::SSAT:
3214 case HexagonISD::USAT:
3215 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3216 std::tie(Lo, Hi) = SplitVTNode(N);
3217 break;
3218 }
3219 OpsL.push_back(Lo);
3220 OpsH.push_back(Hi);
3221 }
3222
3223 MVT ResTy = ty(Op);
3224 MVT HalfTy = typeSplit(ResTy).first;
3225 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3226 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3227 return {L, H};
3228}
3229
3230SDValue
3231HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3232 auto *MemN = cast<MemSDNode>(Op.getNode());
3233
3234 if (!MemN->getMemoryVT().isSimple())
3235 return Op;
3236
3237 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3238 if (!isHvxPairTy(MemTy))
3239 return Op;
3240
3241 const SDLoc &dl(Op);
3242 unsigned HwLen = Subtarget.getVectorLength();
3243 MVT SingleTy = typeSplit(MemTy).first;
3244 SDValue Chain = MemN->getChain();
3245 SDValue Base0 = MemN->getBasePtr();
3246 SDValue Base1 =
3247 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3248 unsigned MemOpc = MemN->getOpcode();
3249
3250 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3251 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3252 MachineFunction &MF = DAG.getMachineFunction();
3253 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3254 ? (uint64_t)MemoryLocation::UnknownSize
3255 : HwLen;
3256 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3257 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3258 }
3259
3260 if (MemOpc == ISD::LOAD) {
3261 assert(cast<LoadSDNode>(Op)->isUnindexed());
3262 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3263 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3264 return DAG.getMergeValues(
3265 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3266 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3267 Load0.getValue(1), Load1.getValue(1)) }, dl);
3268 }
3269 if (MemOpc == ISD::STORE) {
3270 assert(cast<StoreSDNode>(Op)->isUnindexed());
3271 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3272 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3273 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3274 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3275 }
3276
3277 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3278
3279 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3280 assert(MaskN->isUnindexed());
3281 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3282 SDValue Offset = DAG.getUNDEF(MVT::i32);
3283
3284 if (MemOpc == ISD::MLOAD) {
3285 VectorPair Thru =
3286 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3287 SDValue MLoad0 =
3288 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3289 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3290 ISD::NON_EXTLOAD, false);
3291 SDValue MLoad1 =
3292 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3293 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3294 ISD::NON_EXTLOAD, false);
3295 return DAG.getMergeValues(
3296 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3297 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3298 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3299 }
3300 if (MemOpc == ISD::MSTORE) {
3301 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3302 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3303 Masks.first, SingleTy, MOp0,
3304 ISD::UNINDEXED, false, false);
3305 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3306 Masks.second, SingleTy, MOp1,
3307 ISD::UNINDEXED, false, false);
3308 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3309 }
3310
3311 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3312 llvm_unreachable(Name.c_str());
3313}
3314
3315SDValue
3316HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3317 const SDLoc &dl(Op);
3318 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3319 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3320 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3321 "Not widening loads of i1 yet");
3322
3323 SDValue Chain = LoadN->getChain();
3324 SDValue Base = LoadN->getBasePtr();
3325 SDValue Offset = DAG.getUNDEF(MVT::i32);
3326
3327 MVT ResTy = ty(Op);
3328 unsigned HwLen = Subtarget.getVectorLength();
3329 unsigned ResLen = ResTy.getStoreSize();
3330 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3331
3332 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3333 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3334 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3335
3336 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3337 MachineFunction &MF = DAG.getMachineFunction();
3338 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3339
3340 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3341 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3343 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3344 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3345}
3346
3347SDValue
3348HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3349 const SDLoc &dl(Op);
3350 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3351 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3352 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3353 "Not widening stores of i1 yet");
3354
3355 SDValue Chain = StoreN->getChain();
3356 SDValue Base = StoreN->getBasePtr();
3357 SDValue Offset = DAG.getUNDEF(MVT::i32);
3358
3359 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3360 MVT ValueTy = ty(Value);
3361 unsigned ValueLen = ValueTy.getVectorNumElements();
3362 unsigned HwLen = Subtarget.getVectorLength();
3363 assert(isPowerOf2_32(ValueLen));
3364
3365 for (unsigned Len = ValueLen; Len < HwLen; ) {
3366 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3367 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3368 }
3369 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3370
3371 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3372 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3373 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3374 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3375 MachineFunction &MF = DAG.getMachineFunction();
3376 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3377 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3378 MemOp, ISD::UNINDEXED, false, false);
3379}
3380
3381SDValue
3382HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3383 const SDLoc &dl(Op);
3384 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3385 MVT ElemTy = ty(Op0).getVectorElementType();
3386 unsigned HwLen = Subtarget.getVectorLength();
3387
3388 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3389 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3390 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3391 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3392 return SDValue();
3393
3394 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3395 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3396 EVT ResTy =
3397 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3398 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3399 {WideOp0, WideOp1, Op.getOperand(2)});
3400
3401 EVT RetTy = typeLegalize(ty(Op), DAG);
3402 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3403 {SetCC, getZero(dl, MVT::i32, DAG)});
3404}
3405
3406SDValue
3407HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3408 unsigned Opc = Op.getOpcode();
3409 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3410 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3411 return isHvxPairTy(ty(V));
3412 });
3413
3414 if (IsPairOp) {
3415 switch (Opc) {
3416 default:
3417 break;
3418 case ISD::LOAD:
3419 case ISD::STORE:
3420 case ISD::MLOAD:
3421 case ISD::MSTORE:
3422 return SplitHvxMemOp(Op, DAG);
3423 case ISD::SINT_TO_FP:
3424 case ISD::UINT_TO_FP:
3425 case ISD::FP_TO_SINT:
3426 case ISD::FP_TO_UINT:
3427 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3428 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3429 break;
3430 case ISD::ABS:
3431 case ISD::CTPOP:
3432 case ISD::CTLZ:
3433 case ISD::CTTZ:
3434 case ISD::MUL:
3435 case ISD::FADD:
3436 case ISD::FSUB:
3437 case ISD::FMUL:
3438 case ISD::FMINIMUMNUM:
3439 case ISD::FMAXIMUMNUM:
3440 case ISD::MULHS:
3441 case ISD::MULHU:
3442 case ISD::AND:
3443 case ISD::OR:
3444 case ISD::XOR:
3445 case ISD::SRA:
3446 case ISD::SHL:
3447 case ISD::SRL:
3448 case ISD::FSHL:
3449 case ISD::FSHR:
3450 case ISD::SMIN:
3451 case ISD::SMAX:
3452 case ISD::UMIN:
3453 case ISD::UMAX:
3454 case ISD::SETCC:
3455 case ISD::VSELECT:
3457 case ISD::SPLAT_VECTOR:
3458 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3459 case ISD::SIGN_EXTEND:
3460 case ISD::ZERO_EXTEND:
3461 // In general, sign- and zero-extends can't be split and still
3462 // be legal. The only exception is extending bool vectors.
3463 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3464 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3465 break;
3466 }
3467 }
3468
3469 switch (Opc) {
3470 default:
3471 break;
3472 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3473 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3474 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3475 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3476 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3477 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3478 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3479 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3480 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3481 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3482 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3483 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3484 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3485 case ISD::SRA:
3486 case ISD::SHL:
3487 case ISD::SRL: return LowerHvxShift(Op, DAG);
3488 case ISD::FSHL:
3489 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3490 case ISD::MULHS:
3491 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3492 case ISD::SMUL_LOHI:
3493 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3494 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3495 case ISD::SETCC:
3496 case ISD::INTRINSIC_VOID: return Op;
3497 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3498 case ISD::MLOAD:
3499 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3500 // Unaligned loads will be handled by the default lowering.
3501 case ISD::LOAD: return SDValue();
3502 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3503 case ISD::FP_TO_SINT:
3504 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3505 case ISD::SINT_TO_FP:
3506 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3507
3508 // Special nodes:
3511 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3512 }
3513#ifndef NDEBUG
3514 Op.dumpr(&DAG);
3515#endif
3516 llvm_unreachable("Unhandled HVX operation");
3517}
3518
3519SDValue
3520HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3521 const {
3522 // Rewrite the extension/truncation/saturation op into steps where each
3523 // step changes the type widths by a factor of 2.
3524 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3525 //
3526 // Some of the vector types in Op may not be legal.
3527
3528 unsigned Opc = Op.getOpcode();
3529 switch (Opc) {
3530 case HexagonISD::SSAT:
3531 case HexagonISD::USAT:
3534 break;
3535 case ISD::ANY_EXTEND:
3536 case ISD::ZERO_EXTEND:
3537 case ISD::SIGN_EXTEND:
3538 case ISD::TRUNCATE:
3539 llvm_unreachable("ISD:: ops will be auto-folded");
3540 break;
3541#ifndef NDEBUG
3542 Op.dump(&DAG);
3543#endif
3544 llvm_unreachable("Unexpected operation");
3545 }
3546
3547 SDValue Inp = Op.getOperand(0);
3548 MVT InpTy = ty(Inp);
3549 MVT ResTy = ty(Op);
3550
3551 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3552 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3553 assert(InpWidth != ResWidth);
3554
3555 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3556 return Op;
3557
3558 const SDLoc &dl(Op);
3559 unsigned NumElems = InpTy.getVectorNumElements();
3560 assert(NumElems == ResTy.getVectorNumElements());
3561
3562 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3563 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3564 switch (Opc) {
3565 case HexagonISD::SSAT:
3566 case HexagonISD::USAT:
3567 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3570 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3571 default:
3572 llvm_unreachable("Unexpected opcode");
3573 }
3574 };
3575
3576 SDValue S = Inp;
3577 if (InpWidth < ResWidth) {
3578 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3579 while (InpWidth * 2 <= ResWidth)
3580 S = repeatOp(InpWidth *= 2, S);
3581 } else {
3582 // InpWidth > ResWidth
3583 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3584 while (InpWidth / 2 >= ResWidth)
3585 S = repeatOp(InpWidth /= 2, S);
3586 }
3587 return S;
3588}
3589
3590SDValue
3591HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3592 SDValue Inp0 = Op.getOperand(0);
3593 MVT InpTy = ty(Inp0);
3594 MVT ResTy = ty(Op);
3595 unsigned InpWidth = InpTy.getSizeInBits();
3596 unsigned ResWidth = ResTy.getSizeInBits();
3597 unsigned Opc = Op.getOpcode();
3598
3599 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3600 // First, make sure that the narrower type is widened to HVX.
3601 // This may cause the result to be wider than what the legalizer
3602 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3603 // desired type.
3604 auto [WInpTy, WResTy] =
3605 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3606 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3607 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3608 SDValue S;
3610 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3611 Op.getOperand(2));
3612 } else {
3613 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3614 }
3615 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3616 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3617 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3618 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3619 } else {
3620 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3621 return RemoveTLWrapper(Op, DAG);
3622 }
3623 llvm_unreachable("Unexpected situation");
3624}
3625
3626void
3627HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3629 unsigned Opc = N->getOpcode();
3630 SDValue Op(N, 0);
3631 SDValue Inp0; // Optional first argument.
3632 if (N->getNumOperands() > 0)
3633 Inp0 = Op.getOperand(0);
3634
3635 switch (Opc) {
3636 case ISD::ANY_EXTEND:
3637 case ISD::SIGN_EXTEND:
3638 case ISD::ZERO_EXTEND:
3639 case ISD::TRUNCATE:
3640 if (Subtarget.isHVXElementType(ty(Op)) &&
3641 Subtarget.isHVXElementType(ty(Inp0))) {
3642 Results.push_back(CreateTLWrapper(Op, DAG));
3643 }
3644 break;
3645 case ISD::SETCC:
3646 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3647 if (SDValue T = WidenHvxSetCC(Op, DAG))
3648 Results.push_back(T);
3649 }
3650 break;
3651 case ISD::STORE: {
3652 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3653 SDValue Store = WidenHvxStore(Op, DAG);
3654 Results.push_back(Store);
3655 }
3656 break;
3657 }
3658 case ISD::MLOAD:
3659 if (isHvxPairTy(ty(Op))) {
3660 SDValue S = SplitHvxMemOp(Op, DAG);
3662 Results.push_back(S.getOperand(0));
3663 Results.push_back(S.getOperand(1));
3664 }
3665 break;
3666 case ISD::MSTORE:
3667 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3668 SDValue S = SplitHvxMemOp(Op, DAG);
3669 Results.push_back(S);
3670 }
3671 break;
3672 case ISD::SINT_TO_FP:
3673 case ISD::UINT_TO_FP:
3674 case ISD::FP_TO_SINT:
3675 case ISD::FP_TO_UINT:
3676 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3677 SDValue T = EqualizeFpIntConversion(Op, DAG);
3678 Results.push_back(T);
3679 }
3680 break;
3681 case HexagonISD::SSAT:
3682 case HexagonISD::USAT:
3685 Results.push_back(LegalizeHvxResize(Op, DAG));
3686 break;
3687 default:
3688 break;
3689 }
3690}
3691
3692void
3693HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3695 unsigned Opc = N->getOpcode();
3696 SDValue Op(N, 0);
3697 SDValue Inp0; // Optional first argument.
3698 if (N->getNumOperands() > 0)
3699 Inp0 = Op.getOperand(0);
3700
3701 switch (Opc) {
3702 case ISD::ANY_EXTEND:
3703 case ISD::SIGN_EXTEND:
3704 case ISD::ZERO_EXTEND:
3705 case ISD::TRUNCATE:
3706 if (Subtarget.isHVXElementType(ty(Op)) &&
3707 Subtarget.isHVXElementType(ty(Inp0))) {
3708 Results.push_back(CreateTLWrapper(Op, DAG));
3709 }
3710 break;
3711 case ISD::SETCC:
3712 if (shouldWidenToHvx(ty(Op), DAG)) {
3713 if (SDValue T = WidenHvxSetCC(Op, DAG))
3714 Results.push_back(T);
3715 }
3716 break;
3717 case ISD::LOAD: {
3718 if (shouldWidenToHvx(ty(Op), DAG)) {
3719 SDValue Load = WidenHvxLoad(Op, DAG);
3720 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3721 Results.push_back(Load.getOperand(0));
3722 Results.push_back(Load.getOperand(1));
3723 }
3724 break;
3725 }
3726 case ISD::BITCAST:
3727 if (isHvxBoolTy(ty(Inp0))) {
3728 SDValue C = LowerHvxBitcast(Op, DAG);
3729 Results.push_back(C);
3730 }
3731 break;
3732 case ISD::FP_TO_SINT:
3733 case ISD::FP_TO_UINT:
3734 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3735 SDValue T = EqualizeFpIntConversion(Op, DAG);
3736 Results.push_back(T);
3737 }
3738 break;
3739 case HexagonISD::SSAT:
3740 case HexagonISD::USAT:
3743 Results.push_back(LegalizeHvxResize(Op, DAG));
3744 break;
3745 default:
3746 break;
3747 }
3748}
3749
3750SDValue
3751HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3752 DAGCombinerInfo &DCI) const {
3753 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3754 // to extract-subvector (shuffle V, pick even, pick odd)
3755
3756 assert(Op.getOpcode() == ISD::TRUNCATE);
3757 SelectionDAG &DAG = DCI.DAG;
3758 const SDLoc &dl(Op);
3759
3760 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3761 return SDValue();
3762 SDValue Cast = Op.getOperand(0);
3763 SDValue Src = Cast.getOperand(0);
3764
3765 EVT TruncTy = Op.getValueType();
3766 EVT CastTy = Cast.getValueType();
3767 EVT SrcTy = Src.getValueType();
3768 if (SrcTy.isSimple())
3769 return SDValue();
3770 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3771 return SDValue();
3772 unsigned SrcLen = SrcTy.getVectorNumElements();
3773 unsigned CastLen = CastTy.getVectorNumElements();
3774 if (2 * CastLen != SrcLen)
3775 return SDValue();
3776
3777 SmallVector<int, 128> Mask(SrcLen);
3778 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3779 Mask[i] = 2 * i;
3780 Mask[i + CastLen] = 2 * i + 1;
3781 }
3782 SDValue Deal =
3783 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3784 return opSplit(Deal, dl, DAG).first;
3785}
3786
3787SDValue
3788HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3789 SDValue Op, DAGCombinerInfo &DCI) const {
3790 // Fold
3791 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3792 // into
3793 // shuffle (concat x, y), undef, m3
3794 if (Op.getNumOperands() != 2)
3795 return SDValue();
3796
3797 SelectionDAG &DAG = DCI.DAG;
3798 const SDLoc &dl(Op);
3799 SDValue V0 = Op.getOperand(0);
3800 SDValue V1 = Op.getOperand(1);
3801
3802 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3803 return SDValue();
3804 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3805 return SDValue();
3806
3807 SetVector<SDValue> Order;
3808 Order.insert(V0.getOperand(0));
3809 Order.insert(V0.getOperand(1));
3810 Order.insert(V1.getOperand(0));
3811 Order.insert(V1.getOperand(1));
3812
3813 if (Order.size() > 2)
3814 return SDValue();
3815
3816 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3817 // result must be the same.
3818 EVT InpTy = V0.getValueType();
3819 assert(InpTy.isVector());
3820 unsigned InpLen = InpTy.getVectorNumElements();
3821
3822 SmallVector<int, 128> LongMask;
3823 auto AppendToMask = [&](SDValue Shuffle) {
3824 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3825 ArrayRef<int> Mask = SV->getMask();
3826 SDValue X = Shuffle.getOperand(0);
3827 SDValue Y = Shuffle.getOperand(1);
3828 for (int M : Mask) {
3829 if (M == -1) {
3830 LongMask.push_back(M);
3831 continue;
3832 }
3833 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3834 if (static_cast<unsigned>(M) >= InpLen)
3835 M -= InpLen;
3836
3837 int OutOffset = Order[0] == Src ? 0 : InpLen;
3838 LongMask.push_back(M + OutOffset);
3839 }
3840 };
3841
3842 AppendToMask(V0);
3843 AppendToMask(V1);
3844
3845 SDValue C0 = Order.front();
3846 SDValue C1 = Order.back(); // Can be same as front
3847 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3848
3849 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3850 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3851}
3852
3853SDValue
3854HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3855 const {
3856 const SDLoc &dl(N);
3857 SelectionDAG &DAG = DCI.DAG;
3858 SDValue Op(N, 0);
3859 unsigned Opc = Op.getOpcode();
3860
3862
3863 if (Opc == ISD::TRUNCATE)
3864 return combineTruncateBeforeLegal(Op, DCI);
3865 if (Opc == ISD::CONCAT_VECTORS)
3866 return combineConcatVectorsBeforeLegal(Op, DCI);
3867
3868 if (DCI.isBeforeLegalizeOps())
3869 return SDValue();
3870
3871 switch (Opc) {
3872 case ISD::VSELECT: {
3873 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3874 SDValue Cond = Ops[0];
3875 if (Cond->getOpcode() == ISD::XOR) {
3876 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3877 if (C1->getOpcode() == HexagonISD::QTRUE)
3878 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3879 }
3880 break;
3881 }
3882 case HexagonISD::V2Q:
3883 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3884 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3885 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3886 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3887 }
3888 break;
3889 case HexagonISD::Q2V:
3890 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3891 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3892 DAG.getAllOnesConstant(dl, MVT::i32));
3893 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3894 return getZero(dl, ty(Op), DAG);
3895 break;
3897 if (isUndef(Ops[1]))
3898 return Ops[0];
3899 break;
3900 case HexagonISD::VROR: {
3901 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3902 SDValue Vec = Ops[0].getOperand(0);
3903 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3904 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3905 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3906 }
3907 break;
3908 }
3909 }
3910
3911 return SDValue();
3912}
3913
3914bool
3915HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3916 if (Subtarget.isHVXVectorType(Ty, true))
3917 return false;
3918 auto Action = getPreferredHvxVectorAction(Ty);
3920 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3921 return false;
3922}
3923
3924bool
3925HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3926 if (Subtarget.isHVXVectorType(Ty, true))
3927 return false;
3928 auto Action = getPreferredHvxVectorAction(Ty);
3930 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3931 return false;
3932}
3933
3934bool
3935HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3936 if (!Subtarget.useHVXOps())
3937 return false;
3938 // If the type of any result, or any operand type are HVX vector types,
3939 // this is an HVX operation.
3940 auto IsHvxTy = [this](EVT Ty) {
3941 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3942 };
3943 auto IsHvxOp = [this](SDValue Op) {
3944 return Op.getValueType().isSimple() &&
3945 Subtarget.isHVXVectorType(ty(Op), true);
3946 };
3947 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3948 return true;
3949
3950 // Check if this could be an HVX operation after type widening.
3951 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3952 if (!Op.getValueType().isSimple())
3953 return false;
3954 MVT ValTy = ty(Op);
3955 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3956 };
3957
3958 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3959 if (IsWidenedToHvx(SDValue(N, i)))
3960 return true;
3961 }
3962 return llvm::any_of(N->ops(), IsWidenedToHvx);
3963}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:101
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:130
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:136
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.