LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
44 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
45 MVT ElemTy = Ty.getScalarType();
46 switch (ElemTy.SimpleTy) {
47 case MVT::f16:
48 return std::make_tuple(5, 15, 10);
49 case MVT::f32:
50 return std::make_tuple(8, 127, 23);
51 case MVT::f64:
52 return std::make_tuple(11, 1023, 52);
53 default:
54 break;
55 }
56 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
57}
58
59void
60HexagonTargetLowering::initializeHVXLowering() {
61 if (Subtarget.useHVX64BOps()) {
62 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
63 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
64 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
65 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
66 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
67 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
68 // These "short" boolean vector types should be legal because
69 // they will appear as results of vector compares. If they were
70 // not legal, type legalization would try to make them legal
71 // and that would require using operations that do not use or
72 // produce such types. That, in turn, would imply using custom
73 // nodes, which would be unoptimizable by the DAG combiner.
74 // The idea is to rely on target-independent operations as much
75 // as possible.
76 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
77 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
78 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
79 } else if (Subtarget.useHVX128BOps()) {
80 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
81 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
82 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
83 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
84 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
85 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
86 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
87 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
88 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
89 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
90 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
91 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
92 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
93 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
94 }
95 if (Subtarget.useHVXV81Ops()) {
96 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
97 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
98 }
99 }
100
101 // Set up operation actions.
102
103 bool Use64b = Subtarget.useHVX64BOps();
104 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
105 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
106 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
107 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
108 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
109
110 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
112 AddPromotedToType(Opc, FromTy, ToTy);
113 };
114
115 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
116 // Note: v16i1 -> i16 is handled in type legalization instead of op
117 // legalization.
118 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
119 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
120 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
121 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
122 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
123 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
127
128 if (Subtarget.useHVX128BOps()) {
129 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
130 setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
131 }
132 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
133 Subtarget.useHVXFloatingPoint()) {
134
135 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
136 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
137
138 for (MVT T : FloatV) {
142 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
143 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
144
147
150
151 setOperationAction(ISD::MLOAD, T, Custom);
152 setOperationAction(ISD::MSTORE, T, Custom);
153 // Custom-lower BUILD_VECTOR. The standard (target-independent)
154 // handling of it would convert it to a load, which is not always
155 // the optimal choice.
157 }
158
159
160 // BUILD_VECTOR with f16 operands cannot be promoted without
161 // promoting the result, so lower the node to vsplat or constant pool
165
166 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
167 // generated.
168 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
169 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
170 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
171 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
172
173 if (Subtarget.useHVXV81Ops()) {
174 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
175 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
176 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
177 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
178 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
179 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
180 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
181 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
182
186
187 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
188 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
191
195 }
196
197 for (MVT P : FloatW) {
198 setOperationAction(ISD::LOAD, P, Custom);
199 setOperationAction(ISD::STORE, P, Custom);
203 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
204 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
207
208 // Custom-lower BUILD_VECTOR. The standard (target-independent)
209 // handling of it would convert it to a load, which is not always
210 // the optimal choice.
212 // Make concat-vectors custom to handle concats of more than 2 vectors.
214
215 setOperationAction(ISD::MLOAD, P, Custom);
216 setOperationAction(ISD::MSTORE, P, Custom);
217 }
218
219 if (Subtarget.useHVXQFloatOps()) {
220 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
222 } else if (Subtarget.useHVXIEEEFPOps()) {
223 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
225 }
226 }
227
228 for (MVT T : LegalV) {
231
247 if (T != ByteV) {
251 }
252
255 if (T.getScalarType() != MVT::i32) {
258 }
259
261 setOperationAction(ISD::LOAD, T, Custom);
262 setOperationAction(ISD::MLOAD, T, Custom);
263 setOperationAction(ISD::MSTORE, T, Custom);
264 if (T.getScalarType() != MVT::i32) {
267 }
268
270 // Make concat-vectors custom to handle concats of more than 2 vectors.
281 if (T != ByteV) {
283 // HVX only has shifts of words and halfwords.
287
288 // Promote all shuffles to operate on vectors of bytes.
289 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
290 }
291
292 if (Subtarget.useHVXFloatingPoint()) {
293 // Same action for both QFloat and IEEE.
298 }
299
307 }
308
309 for (MVT T : LegalW) {
310 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
311 // independent) handling of it would convert it to a load, which is
312 // not always the optimal choice.
314 // Make concat-vectors custom to handle concats of more than 2 vectors.
316
317 // Custom-lower these operations for pairs. Expand them into a concat
318 // of the corresponding operations on individual vectors.
327
328 setOperationAction(ISD::LOAD, T, Custom);
329 setOperationAction(ISD::STORE, T, Custom);
330 setOperationAction(ISD::MLOAD, T, Custom);
331 setOperationAction(ISD::MSTORE, T, Custom);
336
351 if (T != ByteW) {
355
356 // Promote all shuffles to operate on vectors of bytes.
357 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
358 }
361
364 if (T.getScalarType() != MVT::i32) {
367 }
368
369 if (Subtarget.useHVXFloatingPoint()) {
370 // Same action for both QFloat and IEEE.
375 }
376 }
377
378 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
379 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
380 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
383
384 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
385 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
386 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
387 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
388 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
389 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
390 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
391 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
392 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
393 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
394 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
395 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
396 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
397 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
398
399 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
400 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
401 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
402 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
403 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
404 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
405 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
406 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
407 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
408 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
409 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
410 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
411 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
412 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
413
414 // Boolean vectors.
415
416 for (MVT T : LegalW) {
417 // Boolean types for vector pairs will overlap with the boolean
418 // types for single vectors, e.g.
419 // v64i8 -> v64i1 (single)
420 // v64i16 -> v64i1 (pair)
421 // Set these actions first, and allow the single actions to overwrite
422 // any duplicates.
423 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
428 // Masked load/store takes a mask that may need splitting.
429 setOperationAction(ISD::MLOAD, BoolW, Custom);
430 setOperationAction(ISD::MSTORE, BoolW, Custom);
431 }
432
433 for (MVT T : LegalV) {
434 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
445 }
446
447 if (Use64b) {
448 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
450 } else {
451 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
453 }
454
455 // Handle store widening for short vectors.
456 unsigned HwLen = Subtarget.getVectorLength();
457 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
458 if (ElemTy == MVT::i1)
459 continue;
460 int ElemWidth = ElemTy.getFixedSizeInBits();
461 int MaxElems = (8*HwLen) / ElemWidth;
462 for (int N = 2; N < MaxElems; N *= 2) {
463 MVT VecTy = MVT::getVectorVT(ElemTy, N);
464 auto Action = getPreferredVectorAction(VecTy);
466 setOperationAction(ISD::LOAD, VecTy, Custom);
467 setOperationAction(ISD::STORE, VecTy, Custom);
473 if (Subtarget.useHVXFloatingPoint()) {
478 }
479
480 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
481 if (!isTypeLegal(BoolTy))
483 }
484 }
485 }
486
487 // Include cases which are not hander earlier
491
493}
494
495unsigned
496HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
497 // Early exit for invalid input types
498 if (!VecTy.isVector())
499 return ~0u;
500
501 MVT ElemTy = VecTy.getVectorElementType();
502 unsigned VecLen = VecTy.getVectorNumElements();
503 unsigned HwLen = Subtarget.getVectorLength();
504
505 // Split vectors of i1 that exceed byte vector length.
506 if (ElemTy == MVT::i1 && VecLen > HwLen)
508
509 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
510 // For shorter vectors of i1, widen them if any of the corresponding
511 // vectors of integers needs to be widened.
512 if (ElemTy == MVT::i1) {
513 for (MVT T : Tys) {
514 assert(T != MVT::i1);
515 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
516 if (A != ~0u)
517 return A;
518 }
519 return ~0u;
520 }
521
522 // If the size of VecTy is at least half of the vector length,
523 // widen the vector. Note: the threshold was not selected in
524 // any scientific way.
525 if (llvm::is_contained(Tys, ElemTy)) {
526 unsigned VecWidth = VecTy.getSizeInBits();
527 unsigned HwWidth = 8*HwLen;
528 if (VecWidth > 2*HwWidth)
530
531 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
532 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
534 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
536 }
537
538 // Defer to default.
539 return ~0u;
540}
541
542unsigned
543HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
544 unsigned Opc = Op.getOpcode();
545 switch (Opc) {
550 }
552}
553
555HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
556 const SDLoc &dl, SelectionDAG &DAG) const {
558 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
559 append_range(IntOps, Ops);
560 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
561}
562
563MVT
564HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
565 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
566
567 MVT ElemTy = Tys.first.getVectorElementType();
568 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
569 Tys.second.getVectorNumElements());
570}
571
572HexagonTargetLowering::TypePair
573HexagonTargetLowering::typeSplit(MVT VecTy) const {
574 assert(VecTy.isVector());
575 unsigned NumElem = VecTy.getVectorNumElements();
576 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
577 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
578 return { HalfTy, HalfTy };
579}
580
581MVT
582HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
583 MVT ElemTy = VecTy.getVectorElementType();
584 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
585 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
586}
587
588MVT
589HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
590 MVT ElemTy = VecTy.getVectorElementType();
591 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
592 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
593}
594
596HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
597 SelectionDAG &DAG) const {
598 if (ty(Vec).getVectorElementType() == ElemTy)
599 return Vec;
600 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
601 return DAG.getBitcast(CastTy, Vec);
602}
603
605HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
606 SelectionDAG &DAG) const {
607 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
608 Ops.first, Ops.second);
609}
610
611HexagonTargetLowering::VectorPair
612HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
613 SelectionDAG &DAG) const {
614 TypePair Tys = typeSplit(ty(Vec));
615 if (Vec.getOpcode() == HexagonISD::QCAT)
616 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
617 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
618}
619
620bool
621HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
622 return Subtarget.isHVXVectorType(Ty) &&
623 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
624}
625
626bool
627HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
628 return Subtarget.isHVXVectorType(Ty) &&
629 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
630}
631
632bool
633HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
634 return Subtarget.isHVXVectorType(Ty, true) &&
635 Ty.getVectorElementType() == MVT::i1;
636}
637
638bool HexagonTargetLowering::allowsHvxMemoryAccess(
639 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
640 // Bool vectors are excluded by default, but make it explicit to
641 // emphasize that bool vectors cannot be loaded or stored.
642 // Also, disallow double vector stores (to prevent unnecessary
643 // store widening in DAG combiner).
644 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
645 return false;
646 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
647 return false;
648 if (Fast)
649 *Fast = 1;
650 return true;
651}
652
653bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
654 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
655 if (!Subtarget.isHVXVectorType(VecTy))
656 return false;
657 // XXX Should this be false? vmemu are a bit slower than vmem.
658 if (Fast)
659 *Fast = 1;
660 return true;
661}
662
663void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
664 MachineInstr &MI, SDNode *Node) const {
665 unsigned Opc = MI.getOpcode();
666 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
667 MachineBasicBlock &MB = *MI.getParent();
668 MachineFunction &MF = *MB.getParent();
669 MachineRegisterInfo &MRI = MF.getRegInfo();
670 DebugLoc DL = MI.getDebugLoc();
671 auto At = MI.getIterator();
672
673 switch (Opc) {
674 case Hexagon::PS_vsplatib:
675 if (Subtarget.useHVXV62Ops()) {
676 // SplatV = A2_tfrsi #imm
677 // OutV = V6_lvsplatb SplatV
678 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
679 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
680 .add(MI.getOperand(1));
681 Register OutV = MI.getOperand(0).getReg();
682 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
683 .addReg(SplatV);
684 } else {
685 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
686 // OutV = V6_lvsplatw SplatV
687 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
688 const MachineOperand &InpOp = MI.getOperand(1);
689 assert(InpOp.isImm());
690 uint32_t V = InpOp.getImm() & 0xFF;
691 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
692 .addImm(V << 24 | V << 16 | V << 8 | V);
693 Register OutV = MI.getOperand(0).getReg();
694 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
695 }
696 MB.erase(At);
697 break;
698 case Hexagon::PS_vsplatrb:
699 if (Subtarget.useHVXV62Ops()) {
700 // OutV = V6_lvsplatb Inp
701 Register OutV = MI.getOperand(0).getReg();
702 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
703 .add(MI.getOperand(1));
704 } else {
705 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
706 const MachineOperand &InpOp = MI.getOperand(1);
707 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
708 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
709 Register OutV = MI.getOperand(0).getReg();
710 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
711 .addReg(SplatV);
712 }
713 MB.erase(At);
714 break;
715 case Hexagon::PS_vsplatih:
716 if (Subtarget.useHVXV62Ops()) {
717 // SplatV = A2_tfrsi #imm
718 // OutV = V6_lvsplath SplatV
719 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
720 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
721 .add(MI.getOperand(1));
722 Register OutV = MI.getOperand(0).getReg();
723 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
724 .addReg(SplatV);
725 } else {
726 // SplatV = A2_tfrsi #imm:#imm
727 // OutV = V6_lvsplatw SplatV
728 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
729 const MachineOperand &InpOp = MI.getOperand(1);
730 assert(InpOp.isImm());
731 uint32_t V = InpOp.getImm() & 0xFFFF;
732 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
733 .addImm(V << 16 | V);
734 Register OutV = MI.getOperand(0).getReg();
735 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
736 }
737 MB.erase(At);
738 break;
739 case Hexagon::PS_vsplatrh:
740 if (Subtarget.useHVXV62Ops()) {
741 // OutV = V6_lvsplath Inp
742 Register OutV = MI.getOperand(0).getReg();
743 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
744 .add(MI.getOperand(1));
745 } else {
746 // SplatV = A2_combine_ll Inp, Inp
747 // OutV = V6_lvsplatw SplatV
748 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
749 const MachineOperand &InpOp = MI.getOperand(1);
750 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
751 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
752 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
753 Register OutV = MI.getOperand(0).getReg();
754 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
755 }
756 MB.erase(At);
757 break;
758 case Hexagon::PS_vsplatiw:
759 case Hexagon::PS_vsplatrw:
760 if (Opc == Hexagon::PS_vsplatiw) {
761 // SplatV = A2_tfrsi #imm
762 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
763 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
764 .add(MI.getOperand(1));
765 MI.getOperand(1).ChangeToRegister(SplatV, false);
766 }
767 // OutV = V6_lvsplatw SplatV/Inp
768 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
769 break;
770 }
771}
772
774HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
775 SelectionDAG &DAG) const {
776 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
777 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
778
779 unsigned ElemWidth = ElemTy.getSizeInBits();
780 if (ElemWidth == 8)
781 return ElemIdx;
782
783 unsigned L = Log2_32(ElemWidth/8);
784 const SDLoc &dl(ElemIdx);
785 return DAG.getNode(ISD::SHL, dl, MVT::i32,
786 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
787}
788
790HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
791 SelectionDAG &DAG) const {
792 unsigned ElemWidth = ElemTy.getSizeInBits();
793 assert(ElemWidth >= 8 && ElemWidth <= 32);
794 if (ElemWidth == 32)
795 return Idx;
796
797 if (ty(Idx) != MVT::i32)
798 Idx = DAG.getBitcast(MVT::i32, Idx);
799 const SDLoc &dl(Idx);
800 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
801 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
802 return SubIdx;
803}
804
806HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
807 SDValue Op1, ArrayRef<int> Mask,
808 SelectionDAG &DAG) const {
809 MVT OpTy = ty(Op0);
810 assert(OpTy == ty(Op1));
811
812 MVT ElemTy = OpTy.getVectorElementType();
813 if (ElemTy == MVT::i8)
814 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
815 assert(ElemTy.getSizeInBits() >= 8);
816
817 MVT ResTy = tyVector(OpTy, MVT::i8);
818 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
819
820 SmallVector<int,128> ByteMask;
821 for (int M : Mask) {
822 if (M < 0) {
823 for (unsigned I = 0; I != ElemSize; ++I)
824 ByteMask.push_back(-1);
825 } else {
826 int NewM = M*ElemSize;
827 for (unsigned I = 0; I != ElemSize; ++I)
828 ByteMask.push_back(NewM+I);
829 }
830 }
831 assert(ResTy.getVectorNumElements() == ByteMask.size());
832 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
833 opCastElem(Op1, MVT::i8, DAG), ByteMask);
834}
835
837HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
838 const SDLoc &dl, MVT VecTy,
839 SelectionDAG &DAG) const {
840 unsigned VecLen = Values.size();
841 MachineFunction &MF = DAG.getMachineFunction();
842 MVT ElemTy = VecTy.getVectorElementType();
843 unsigned ElemWidth = ElemTy.getSizeInBits();
844 unsigned HwLen = Subtarget.getVectorLength();
845
846 unsigned ElemSize = ElemWidth / 8;
847 assert(ElemSize*VecLen == HwLen);
849
850 if (VecTy.getVectorElementType() != MVT::i32 &&
851 !(Subtarget.useHVXFloatingPoint() &&
852 VecTy.getVectorElementType() == MVT::f32)) {
853 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
854 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
855 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
856 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
857 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
858 Words.push_back(DAG.getBitcast(MVT::i32, W));
859 }
860 } else {
861 for (SDValue V : Values)
862 Words.push_back(DAG.getBitcast(MVT::i32, V));
863 }
864 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
865 unsigned NumValues = Values.size();
866 assert(NumValues > 0);
867 bool IsUndef = true;
868 for (unsigned i = 0; i != NumValues; ++i) {
869 if (Values[i].isUndef())
870 continue;
871 IsUndef = false;
872 if (!SplatV.getNode())
873 SplatV = Values[i];
874 else if (SplatV != Values[i])
875 return false;
876 }
877 if (IsUndef)
878 SplatV = Values[0];
879 return true;
880 };
881
882 unsigned NumWords = Words.size();
883 SDValue SplatV;
884 bool IsSplat = isSplat(Words, SplatV);
885 if (IsSplat && isUndef(SplatV))
886 return DAG.getUNDEF(VecTy);
887 if (IsSplat) {
888 assert(SplatV.getNode());
889 if (isNullConstant(SplatV))
890 return getZero(dl, VecTy, DAG);
891 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
892 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
893 return DAG.getBitcast(VecTy, S);
894 }
895
896 // Delay recognizing constant vectors until here, so that we can generate
897 // a vsplat.
898 SmallVector<ConstantInt*, 128> Consts(VecLen);
899 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
900 if (AllConst) {
901 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
902 (Constant**)Consts.end());
903 Constant *CV = ConstantVector::get(Tmp);
904 Align Alignment(HwLen);
905 SDValue CP =
906 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
907 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
909 }
910
911 // A special case is a situation where the vector is built entirely from
912 // elements extracted from another vector. This could be done via a shuffle
913 // more efficiently, but typically, the size of the source vector will not
914 // match the size of the vector being built (which precludes the use of a
915 // shuffle directly).
916 // This only handles a single source vector, and the vector being built
917 // should be of a sub-vector type of the source vector type.
918 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
919 SmallVectorImpl<int> &SrcIdx) {
920 SDValue Vec;
921 for (SDValue V : Values) {
922 if (isUndef(V)) {
923 SrcIdx.push_back(-1);
924 continue;
925 }
926 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
927 return false;
928 // All extracts should come from the same vector.
929 SDValue T = V.getOperand(0);
930 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
931 return false;
932 Vec = T;
933 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
934 if (C == nullptr)
935 return false;
936 int I = C->getSExtValue();
937 assert(I >= 0 && "Negative element index");
938 SrcIdx.push_back(I);
939 }
940 SrcVec = Vec;
941 return true;
942 };
943
944 SmallVector<int,128> ExtIdx;
945 SDValue ExtVec;
946 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
947 MVT ExtTy = ty(ExtVec);
948 unsigned ExtLen = ExtTy.getVectorNumElements();
949 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
950 // Construct a new shuffle mask that will produce a vector with the same
951 // number of elements as the input vector, and such that the vector we
952 // want will be the initial subvector of it.
953 SmallVector<int,128> Mask;
954 BitVector Used(ExtLen);
955
956 for (int M : ExtIdx) {
957 Mask.push_back(M);
958 if (M >= 0)
959 Used.set(M);
960 }
961 // Fill the rest of the mask with the unused elements of ExtVec in hopes
962 // that it will result in a permutation of ExtVec's elements. It's still
963 // fine if it doesn't (e.g. if undefs are present, or elements are
964 // repeated), but permutations can always be done efficiently via vdelta
965 // and vrdelta.
966 for (unsigned I = 0; I != ExtLen; ++I) {
967 if (Mask.size() == ExtLen)
968 break;
969 if (!Used.test(I))
970 Mask.push_back(I);
971 }
972
973 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
974 DAG.getUNDEF(ExtTy), Mask);
975 return ExtLen == VecLen ? S : LoHalf(S, DAG);
976 }
977 }
978
979 // Find most common element to initialize vector with. This is to avoid
980 // unnecessary vinsert/valign for cases where the same value is present
981 // many times. Creates a histogram of the vector's elements to find the
982 // most common element n.
983 assert(4*Words.size() == Subtarget.getVectorLength());
984 int VecHist[32];
985 int n = 0;
986 for (unsigned i = 0; i != NumWords; ++i) {
987 VecHist[i] = 0;
988 if (Words[i].isUndef())
989 continue;
990 for (unsigned j = i; j != NumWords; ++j)
991 if (Words[i] == Words[j])
992 VecHist[i]++;
993
994 if (VecHist[i] > VecHist[n])
995 n = i;
996 }
997
998 SDValue HalfV = getZero(dl, VecTy, DAG);
999 if (VecHist[n] > 1) {
1000 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
1001 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
1002 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
1003 }
1004 SDValue HalfV0 = HalfV;
1005 SDValue HalfV1 = HalfV;
1006
1007 // Construct two halves in parallel, then or them together. Rn and Rm count
1008 // number of rotations needed before the next element. One last rotation is
1009 // performed post-loop to position the last element.
1010 int Rn = 0, Rm = 0;
1011 SDValue Sn, Sm;
1012 SDValue N = HalfV0;
1013 SDValue M = HalfV1;
1014 for (unsigned i = 0; i != NumWords/2; ++i) {
1015 // Rotate by element count since last insertion.
1016 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1017 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1018 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1019 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1020 {HalfV0, Words[i]});
1021 Rn = 0;
1022 }
1023 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1024 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1025 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1026 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1027 {HalfV1, Words[i+NumWords/2]});
1028 Rm = 0;
1029 }
1030 Rn += 4;
1031 Rm += 4;
1032 }
1033 // Perform last rotation.
1034 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1035 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1036 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1037 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1038
1039 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1040 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1041
1042 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1043
1044 SDValue OutV =
1045 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1046 return OutV;
1047}
1048
1049SDValue
1050HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1051 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1052 MVT PredTy = ty(PredV);
1053 unsigned HwLen = Subtarget.getVectorLength();
1054 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1055
1056 if (Subtarget.isHVXVectorType(PredTy, true)) {
1057 // Move the vector predicate SubV to a vector register, and scale it
1058 // down to match the representation (bytes per type element) that VecV
1059 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1060 // in general) element and put them at the front of the resulting
1061 // vector. This subvector will then be inserted into the Q2V of VecV.
1062 // To avoid having an operation that generates an illegal type (short
1063 // vector), generate a full size vector.
1064 //
1065 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1066 SmallVector<int,128> Mask(HwLen);
1067 // Scale = BitBytes(PredV) / Given BitBytes.
1068 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1069 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1070
1071 for (unsigned i = 0; i != HwLen; ++i) {
1072 unsigned Num = i % Scale;
1073 unsigned Off = i / Scale;
1074 Mask[BlockLen*Num + Off] = i;
1075 }
1076 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1077 if (!ZeroFill)
1078 return S;
1079 // Fill the bytes beyond BlockLen with 0s.
1080 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1081 // when BlockLen < HwLen.
1082 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1083 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1084 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1085 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1086 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1087 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1088 }
1089
1090 // Make sure that this is a valid scalar predicate.
1091 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1092
1093 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1094 SmallVector<SDValue,4> Words[2];
1095 unsigned IdxW = 0;
1096
1097 SDValue W0 = isUndef(PredV)
1098 ? DAG.getUNDEF(MVT::i64)
1099 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1100 if (Bytes < BitBytes) {
1101 Words[IdxW].push_back(HiHalf(W0, DAG));
1102 Words[IdxW].push_back(LoHalf(W0, DAG));
1103 } else
1104 Words[IdxW].push_back(W0);
1105
1106 while (Bytes < BitBytes) {
1107 IdxW ^= 1;
1108 Words[IdxW].clear();
1109
1110 if (Bytes < 4) {
1111 for (const SDValue &W : Words[IdxW ^ 1]) {
1112 SDValue T = expandPredicate(W, dl, DAG);
1113 Words[IdxW].push_back(HiHalf(T, DAG));
1114 Words[IdxW].push_back(LoHalf(T, DAG));
1115 }
1116 } else {
1117 for (const SDValue &W : Words[IdxW ^ 1]) {
1118 Words[IdxW].push_back(W);
1119 Words[IdxW].push_back(W);
1120 }
1121 }
1122 Bytes *= 2;
1123 }
1124
1125 while (Bytes > BitBytes) {
1126 IdxW ^= 1;
1127 Words[IdxW].clear();
1128
1129 if (Bytes <= 4) {
1130 for (const SDValue &W : Words[IdxW ^ 1]) {
1131 SDValue T = contractPredicate(W, dl, DAG);
1132 Words[IdxW].push_back(T);
1133 }
1134 } else {
1135 for (const SDValue &W : Words[IdxW ^ 1]) {
1136 Words[IdxW].push_back(W);
1137 }
1138 }
1139 Bytes /= 2;
1140 }
1141
1142 assert(Bytes == BitBytes);
1143 if (BitBytes == 1 && PredTy == MVT::v2i1)
1144 ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
1145
1146 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1147 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1148 for (const SDValue &W : Words[IdxW]) {
1149 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1150 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1151 }
1152
1153 return Vec;
1154}
1155
1156SDValue
1157HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1158 const SDLoc &dl, MVT VecTy,
1159 SelectionDAG &DAG) const {
1160 // Construct a vector V of bytes, such that a comparison V >u 0 would
1161 // produce the required vector predicate.
1162 unsigned VecLen = Values.size();
1163 unsigned HwLen = Subtarget.getVectorLength();
1164 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1166 bool AllT = true, AllF = true;
1167
1168 auto IsTrue = [] (SDValue V) {
1169 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1170 return !N->isZero();
1171 return false;
1172 };
1173 auto IsFalse = [] (SDValue V) {
1174 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1175 return N->isZero();
1176 return false;
1177 };
1178
1179 if (VecLen <= HwLen) {
1180 // In the hardware, each bit of a vector predicate corresponds to a byte
1181 // of a vector register. Calculate how many bytes does a bit of VecTy
1182 // correspond to.
1183 assert(HwLen % VecLen == 0);
1184 unsigned BitBytes = HwLen / VecLen;
1185 for (SDValue V : Values) {
1186 AllT &= IsTrue(V);
1187 AllF &= IsFalse(V);
1188
1189 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1190 : DAG.getUNDEF(MVT::i8);
1191 for (unsigned B = 0; B != BitBytes; ++B)
1192 Bytes.push_back(Ext);
1193 }
1194 } else {
1195 // There are as many i1 values, as there are bits in a vector register.
1196 // Divide the values into groups of 8 and check that each group consists
1197 // of the same value (ignoring undefs).
1198 for (unsigned I = 0; I != VecLen; I += 8) {
1199 unsigned B = 0;
1200 // Find the first non-undef value in this group.
1201 for (; B != 8; ++B) {
1202 if (!Values[I+B].isUndef())
1203 break;
1204 }
1205 SDValue F = Values[I+B];
1206 AllT &= IsTrue(F);
1207 AllF &= IsFalse(F);
1208
1209 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1210 : DAG.getUNDEF(MVT::i8);
1211 Bytes.push_back(Ext);
1212 // Verify that the rest of values in the group are the same as the
1213 // first.
1214 for (; B != 8; ++B)
1215 assert(Values[I+B].isUndef() || Values[I+B] == F);
1216 }
1217 }
1218
1219 if (AllT)
1220 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1221 if (AllF)
1222 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1223
1224 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1225 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1226 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1227}
1228
1229SDValue
1230HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1231 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1232 MVT ElemTy = ty(VecV).getVectorElementType();
1233
1234 unsigned ElemWidth = ElemTy.getSizeInBits();
1235 assert(ElemWidth >= 8 && ElemWidth <= 32);
1236 (void)ElemWidth;
1237
1238 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1239 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1240 {VecV, ByteIdx});
1241 if (ElemTy == MVT::i32)
1242 return ExWord;
1243
1244 // Have an extracted word, need to extract the smaller element out of it.
1245 // 1. Extract the bits of (the original) IdxV that correspond to the index
1246 // of the desired element in the 32-bit word.
1247 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1248 // 2. Extract the element from the word.
1249 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1250 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1251}
1252
1253SDValue
1254HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1255 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1256 // Implement other return types if necessary.
1257 assert(ResTy == MVT::i1);
1258
1259 unsigned HwLen = Subtarget.getVectorLength();
1260 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1261 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1262
1263 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1264 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1265 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1266
1267 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1268 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1269 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1270}
1271
1272SDValue
1273HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1274 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1275 MVT ElemTy = ty(VecV).getVectorElementType();
1276
1277 unsigned ElemWidth = ElemTy.getSizeInBits();
1278 assert(ElemWidth >= 8 && ElemWidth <= 32);
1279 (void)ElemWidth;
1280
1281 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1282 SDValue ByteIdxV) {
1283 MVT VecTy = ty(VecV);
1284 unsigned HwLen = Subtarget.getVectorLength();
1285 SDValue MaskV =
1286 DAG.getNode(ISD::AND, dl, MVT::i32,
1287 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1288 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1289 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1290 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1291 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1292 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1293 return TorV;
1294 };
1295
1296 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1297 if (ElemTy == MVT::i32)
1298 return InsertWord(VecV, ValV, ByteIdx);
1299
1300 // If this is not inserting a 32-bit word, convert it into such a thing.
1301 // 1. Extract the existing word from the target vector.
1302 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1303 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1304 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1305 dl, MVT::i32, DAG);
1306
1307 // 2. Treating the extracted word as a 32-bit vector, insert the given
1308 // value into it.
1309 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1310 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1311 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1312 ValV, SubIdx, dl, ElemTy, DAG);
1313
1314 // 3. Insert the 32-bit word back into the original vector.
1315 return InsertWord(VecV, Ins, ByteIdx);
1316}
1317
1318SDValue
1319HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1320 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1321 unsigned HwLen = Subtarget.getVectorLength();
1322 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1323 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1324
1325 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1326 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1327 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1328 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1329
1330 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1331 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1332}
1333
1334SDValue
1335HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1336 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1337 MVT VecTy = ty(VecV);
1338 unsigned HwLen = Subtarget.getVectorLength();
1339 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1340 MVT ElemTy = VecTy.getVectorElementType();
1341 unsigned ElemWidth = ElemTy.getSizeInBits();
1342
1343 // If the source vector is a vector pair, get the single vector containing
1344 // the subvector of interest. The subvector will never overlap two single
1345 // vectors.
1346 if (isHvxPairTy(VecTy)) {
1347 unsigned SubIdx = Hexagon::vsub_lo;
1348 if (Idx * ElemWidth >= 8 * HwLen) {
1349 SubIdx = Hexagon::vsub_hi;
1350 Idx -= VecTy.getVectorNumElements() / 2;
1351 }
1352
1353 VecTy = typeSplit(VecTy).first;
1354 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1355 if (VecTy == ResTy)
1356 return VecV;
1357 }
1358
1359 // The only meaningful subvectors of a single HVX vector are those that
1360 // fit in a scalar register.
1361 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1362
1363 MVT WordTy = tyVector(VecTy, MVT::i32);
1364 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1365 unsigned WordIdx = (Idx*ElemWidth) / 32;
1366
1367 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1368 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1369 if (ResTy.getSizeInBits() == 32)
1370 return DAG.getBitcast(ResTy, W0);
1371
1372 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1373 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1374 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1375 return DAG.getBitcast(ResTy, WW);
1376}
1377
1378SDValue
1379HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1380 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1381 MVT VecTy = ty(VecV);
1382 unsigned HwLen = Subtarget.getVectorLength();
1383 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1384 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1385 // IdxV is required to be a constant.
1386 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1387
1388 unsigned ResLen = ResTy.getVectorNumElements();
1389 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1390 unsigned Offset = Idx * BitBytes;
1391 SDValue Undef = DAG.getUNDEF(ByteTy);
1392 SmallVector<int,128> Mask;
1393
1394 if (Subtarget.isHVXVectorType(ResTy, true)) {
1395 // Converting between two vector predicates. Since the result is shorter
1396 // than the source, it will correspond to a vector predicate with the
1397 // relevant bits replicated. The replication count is the ratio of the
1398 // source and target vector lengths.
1399 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1400 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1401 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1402 for (unsigned j = 0; j != Rep; ++j)
1403 Mask.push_back(i + Offset);
1404 }
1405 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1406 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1407 }
1408
1409 // Converting between a vector predicate and a scalar predicate. In the
1410 // vector predicate, a group of BitBytes bits will correspond to a single
1411 // i1 element of the source vector type. Those bits will all have the same
1412 // value. The same will be true for ByteVec, where each byte corresponds
1413 // to a bit in the vector predicate.
1414 // The algorithm is to traverse the ByteVec, going over the i1 values from
1415 // the source vector, and generate the corresponding representation in an
1416 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1417 // elements so that the interesting 8 bytes will be in the low end of the
1418 // vector.
1419 unsigned Rep = 8 / ResLen;
1420 // Make sure the output fill the entire vector register, so repeat the
1421 // 8-byte groups as many times as necessary.
1422 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1423 // This will generate the indexes of the 8 interesting bytes.
1424 for (unsigned i = 0; i != ResLen; ++i) {
1425 for (unsigned j = 0; j != Rep; ++j)
1426 Mask.push_back(Offset + i*BitBytes);
1427 }
1428 }
1429
1430 SDValue Zero = getZero(dl, MVT::i32, DAG);
1431 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1432 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1433 // them against 0.
1434 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1435 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1436 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1437 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1438 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1439 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1440}
1441
1442SDValue
1443HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1444 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1445 MVT VecTy = ty(VecV);
1446 MVT SubTy = ty(SubV);
1447 unsigned HwLen = Subtarget.getVectorLength();
1448 MVT ElemTy = VecTy.getVectorElementType();
1449 unsigned ElemWidth = ElemTy.getSizeInBits();
1450
1451 bool IsPair = isHvxPairTy(VecTy);
1452 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1453 // The two single vectors that VecV consists of, if it's a pair.
1454 SDValue V0, V1;
1455 SDValue SingleV = VecV;
1456 SDValue PickHi;
1457
1458 if (IsPair) {
1459 V0 = LoHalf(VecV, DAG);
1460 V1 = HiHalf(VecV, DAG);
1461
1462 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1463 dl, MVT::i32);
1464 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1465 if (isHvxSingleTy(SubTy)) {
1466 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1467 unsigned Idx = CN->getZExtValue();
1468 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1469 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1470 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1471 }
1472 // If IdxV is not a constant, generate the two variants: with the
1473 // SubV as the high and as the low subregister, and select the right
1474 // pair based on the IdxV.
1475 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1476 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1477 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1478 }
1479 // The subvector being inserted must be entirely contained in one of
1480 // the vectors V0 or V1. Set SingleV to the correct one, and update
1481 // IdxV to be the index relative to the beginning of that vector.
1482 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1483 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1484 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1485 }
1486
1487 // The only meaningful subvectors of a single HVX vector are those that
1488 // fit in a scalar register.
1489 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1490 // Convert IdxV to be index in bytes.
1491 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1492 if (!IdxN || !IdxN->isZero()) {
1493 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1494 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1495 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1496 }
1497 // When inserting a single word, the rotation back to the original position
1498 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1499 // by (HwLen-4)-Idx.
1500 unsigned RolBase = HwLen;
1501 if (SubTy.getSizeInBits() == 32) {
1502 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1503 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1504 } else {
1505 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1506 SDValue R0 = LoHalf(V, DAG);
1507 SDValue R1 = HiHalf(V, DAG);
1508 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1509 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1510 DAG.getConstant(4, dl, MVT::i32));
1511 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1512 RolBase = HwLen-4;
1513 }
1514 // If the vector wasn't ror'ed, don't ror it back.
1515 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1516 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1517 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1518 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1519 }
1520
1521 if (IsPair) {
1522 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1523 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1524 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1525 }
1526 return SingleV;
1527}
1528
1529SDValue
1530HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1531 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1532 MVT VecTy = ty(VecV);
1533 MVT SubTy = ty(SubV);
1534 assert(Subtarget.isHVXVectorType(VecTy, true));
1535 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1536 // predicate as well, or it can be a scalar predicate.
1537
1538 unsigned VecLen = VecTy.getVectorNumElements();
1539 unsigned HwLen = Subtarget.getVectorLength();
1540 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1541
1542 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1543 unsigned BitBytes = HwLen / VecLen;
1544 unsigned BlockLen = HwLen / Scale;
1545
1546 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1547 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1548 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1549 SDValue ByteIdx;
1550
1551 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1552 if (!IdxN || !IdxN->isZero()) {
1553 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1554 DAG.getConstant(BitBytes, dl, MVT::i32));
1555 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1556 }
1557
1558 // ByteVec is the target vector VecV rotated in such a way that the
1559 // subvector should be inserted at index 0. Generate a predicate mask
1560 // and use vmux to do the insertion.
1561 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1562 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1563 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1564 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1565 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1566 // Rotate ByteVec back, and convert to a vector predicate.
1567 if (!IdxN || !IdxN->isZero()) {
1568 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1569 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1570 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1571 }
1572 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1573}
1574
1575SDValue
1576HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1577 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1578 // Sign- and any-extending of a vector predicate to a vector register is
1579 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1580 // a vector of 1s (where the 1s are of type matching the vector type).
1581 assert(Subtarget.isHVXVectorType(ResTy));
1582 if (!ZeroExt)
1583 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1584
1585 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1586 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1587 DAG.getConstant(1, dl, MVT::i32));
1588 SDValue False = getZero(dl, ResTy, DAG);
1589 return DAG.getSelect(dl, ResTy, VecV, True, False);
1590}
1591
1592SDValue
1593HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1594 MVT ResTy, SelectionDAG &DAG) const {
1595 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1596 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1597 // vector register. The remaining bits of the vector register are
1598 // unspecified.
1599
1600 MachineFunction &MF = DAG.getMachineFunction();
1601 unsigned HwLen = Subtarget.getVectorLength();
1602 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1603 MVT PredTy = ty(VecQ);
1604 unsigned PredLen = PredTy.getVectorNumElements();
1605 assert(HwLen % PredLen == 0);
1606 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1607
1608 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1610 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1611 // These are bytes with the LSB rotated left with respect to their index.
1612 for (unsigned i = 0; i != HwLen/8; ++i) {
1613 for (unsigned j = 0; j != 8; ++j)
1614 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1615 }
1616 Constant *CV = ConstantVector::get(Tmp);
1617 Align Alignment(HwLen);
1618 SDValue CP =
1619 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1620 SDValue Bytes =
1621 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1623
1624 // Select the bytes that correspond to true bits in the vector predicate.
1625 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1626 getZero(dl, VecTy, DAG));
1627 // Calculate the OR of all bytes in each group of 8. That will compress
1628 // all the individual bits into a single byte.
1629 // First, OR groups of 4, via vrmpy with 0x01010101.
1630 SDValue All1 =
1631 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1632 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1633 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1634 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1635 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1636 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1637
1638 // Pick every 8th byte and coalesce them at the beginning of the output.
1639 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1640 // byte and so on.
1641 SmallVector<int,128> Mask;
1642 for (unsigned i = 0; i != HwLen; ++i)
1643 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1644 SDValue Collect =
1645 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1646 return DAG.getBitcast(ResTy, Collect);
1647}
1648
1649SDValue
1650HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1651 const SDLoc &dl, SelectionDAG &DAG) const {
1652 // Take a vector and resize the element type to match the given type.
1653 MVT InpTy = ty(VecV);
1654 if (InpTy == ResTy)
1655 return VecV;
1656
1657 unsigned InpWidth = InpTy.getSizeInBits();
1658 unsigned ResWidth = ResTy.getSizeInBits();
1659
1660 if (InpTy.isFloatingPoint()) {
1661 return InpWidth < ResWidth
1662 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1663 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1664 DAG.getTargetConstant(0, dl, MVT::i32));
1665 }
1666
1667 assert(InpTy.isInteger());
1668
1669 if (InpWidth < ResWidth) {
1670 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1671 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1672 } else {
1673 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1674 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1675 }
1676}
1677
1678SDValue
1679HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1680 SelectionDAG &DAG) const {
1681 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1682
1683 const SDLoc &dl(Vec);
1684 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1685 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1686 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1687}
1688
1689SDValue
1690HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1691 const {
1692 const SDLoc &dl(Op);
1693 MVT VecTy = ty(Op);
1694
1695 unsigned Size = Op.getNumOperands();
1697 for (unsigned i = 0; i != Size; ++i)
1698 Ops.push_back(Op.getOperand(i));
1699
1700 if (VecTy.getVectorElementType() == MVT::i1)
1701 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1702
1703 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1704 // not a legal type, just bitcast the node to use i16
1705 // types and bitcast the result back to f16
1706 if (VecTy.getVectorElementType() == MVT::f16 ||
1707 VecTy.getVectorElementType() == MVT::bf16) {
1709 for (unsigned i = 0; i != Size; i++)
1710 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1711
1712 SDValue T0 =
1713 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1714 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1715 }
1716
1717 // First, split the BUILD_VECTOR for vector pairs. We could generate
1718 // some pairs directly (via splat), but splats should be generated
1719 // by the combiner prior to getting here.
1720 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1722 MVT SingleTy = typeSplit(VecTy).first;
1723 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1724 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1725 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1726 }
1727
1728 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1729}
1730
1731SDValue
1732HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1733 const {
1734 const SDLoc &dl(Op);
1735 MVT VecTy = ty(Op);
1736 MVT ArgTy = ty(Op.getOperand(0));
1737
1738 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1739 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1740 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1741 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1742 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1743 return DAG.getBitcast(VecTy, Splat);
1744 }
1745
1746 return SDValue();
1747}
1748
1749SDValue
1750HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1751 const {
1752 // Vector concatenation of two integer (non-bool) vectors does not need
1753 // special lowering. Custom-lower concats of bool vectors and expand
1754 // concats of more than 2 vectors.
1755 MVT VecTy = ty(Op);
1756 const SDLoc &dl(Op);
1757 unsigned NumOp = Op.getNumOperands();
1758 if (VecTy.getVectorElementType() != MVT::i1) {
1759 if (NumOp == 2)
1760 return Op;
1761 // Expand the other cases into a build-vector.
1763 for (SDValue V : Op.getNode()->ops())
1764 DAG.ExtractVectorElements(V, Elems);
1765 // A vector of i16 will be broken up into a build_vector of i16's.
1766 // This is a problem, since at the time of operation legalization,
1767 // all operations are expected to be type-legalized, and i16 is not
1768 // a legal type. If any of the extracted elements is not of a valid
1769 // type, sign-extend it to a valid one.
1770 for (SDValue &V : Elems) {
1771 MVT Ty = ty(V);
1772 if (!isTypeLegal(Ty)) {
1773 MVT NTy = typeLegalize(Ty, DAG);
1774 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1775 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1776 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1777 V.getOperand(0), V.getOperand(1)),
1778 DAG.getValueType(Ty));
1779 continue;
1780 }
1781 // A few less complicated cases.
1782 switch (V.getOpcode()) {
1783 case ISD::Constant:
1784 V = DAG.getSExtOrTrunc(V, dl, NTy);
1785 break;
1786 case ISD::UNDEF:
1787 V = DAG.getUNDEF(NTy);
1788 break;
1789 case ISD::TRUNCATE:
1790 V = V.getOperand(0);
1791 break;
1792 default:
1793 llvm_unreachable("Unexpected vector element");
1794 }
1795 }
1796 }
1797 return DAG.getBuildVector(VecTy, dl, Elems);
1798 }
1799
1800 assert(VecTy.getVectorElementType() == MVT::i1);
1801 unsigned HwLen = Subtarget.getVectorLength();
1802 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1803
1804 SDValue Op0 = Op.getOperand(0);
1805
1806 // If the operands are HVX types (i.e. not scalar predicates), then
1807 // defer the concatenation, and create QCAT instead.
1808 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1809 if (NumOp == 2)
1810 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1811
1812 ArrayRef<SDUse> U(Op.getNode()->ops());
1815
1816 MVT HalfTy = typeSplit(VecTy).first;
1817 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1818 Ops.take_front(NumOp/2));
1819 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1820 Ops.take_back(NumOp/2));
1821 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1822 }
1823
1824 // Count how many bytes (in a vector register) each bit in VecTy
1825 // corresponds to.
1826 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1827
1828 SmallVector<SDValue,8> Prefixes;
1829 for (SDValue V : Op.getNode()->op_values()) {
1830 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1831 Prefixes.push_back(P);
1832 }
1833
1834 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1835 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1836 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1837 SDValue Res = getZero(dl, ByteTy, DAG);
1838 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1839 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1840 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1841 }
1842 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1843}
1844
1845SDValue
1846HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1847 const {
1848 // Change the type of the extracted element to i32.
1849 SDValue VecV = Op.getOperand(0);
1850 MVT ElemTy = ty(VecV).getVectorElementType();
1851 const SDLoc &dl(Op);
1852 SDValue IdxV = Op.getOperand(1);
1853 if (ElemTy == MVT::i1)
1854 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1855
1856 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1857}
1858
1859SDValue
1860HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1861 const {
1862 const SDLoc &dl(Op);
1863 MVT VecTy = ty(Op);
1864 SDValue VecV = Op.getOperand(0);
1865 SDValue ValV = Op.getOperand(1);
1866 SDValue IdxV = Op.getOperand(2);
1867 MVT ElemTy = ty(VecV).getVectorElementType();
1868 if (ElemTy == MVT::i1)
1869 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1870
1871 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1873 tyVector(VecTy, MVT::i16),
1874 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1875 DAG.getBitcast(MVT::i16, ValV), IdxV);
1876 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1877 }
1878
1879 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1880}
1881
1882SDValue
1883HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1884 const {
1885 SDValue SrcV = Op.getOperand(0);
1886 MVT SrcTy = ty(SrcV);
1887 MVT DstTy = ty(Op);
1888 SDValue IdxV = Op.getOperand(1);
1889 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1890 assert(Idx % DstTy.getVectorNumElements() == 0);
1891 (void)Idx;
1892 const SDLoc &dl(Op);
1893
1894 MVT ElemTy = SrcTy.getVectorElementType();
1895 if (ElemTy == MVT::i1)
1896 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1897
1898 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1899}
1900
1901SDValue
1902HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1903 const {
1904 // Idx does not need to be a constant.
1905 SDValue VecV = Op.getOperand(0);
1906 SDValue ValV = Op.getOperand(1);
1907 SDValue IdxV = Op.getOperand(2);
1908
1909 const SDLoc &dl(Op);
1910 MVT VecTy = ty(VecV);
1911 MVT ElemTy = VecTy.getVectorElementType();
1912 if (ElemTy == MVT::i1)
1913 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1914
1915 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1916}
1917
1918SDValue
1919HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1920 // Lower any-extends of boolean vectors to sign-extends, since they
1921 // translate directly to Q2V. Zero-extending could also be done equally
1922 // fast, but Q2V is used/recognized in more places.
1923 // For all other vectors, use zero-extend.
1924 MVT ResTy = ty(Op);
1925 SDValue InpV = Op.getOperand(0);
1926 MVT ElemTy = ty(InpV).getVectorElementType();
1927 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1928 return LowerHvxSignExt(Op, DAG);
1929 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1930}
1931
1932SDValue
1933HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1934 MVT ResTy = ty(Op);
1935 SDValue InpV = Op.getOperand(0);
1936 MVT ElemTy = ty(InpV).getVectorElementType();
1937 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1938 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1939 return Op;
1940}
1941
1942SDValue
1943HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1944 MVT ResTy = ty(Op);
1945 SDValue InpV = Op.getOperand(0);
1946 MVT ElemTy = ty(InpV).getVectorElementType();
1947 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1948 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1949 return Op;
1950}
1951
1952SDValue
1953HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1954 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1955 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1956 const SDLoc &dl(Op);
1957 MVT ResTy = ty(Op);
1958 SDValue InpV = Op.getOperand(0);
1959 assert(ResTy == ty(InpV));
1960
1961 // Calculate the vectors of 1 and bitwidth(x).
1962 MVT ElemTy = ty(InpV).getVectorElementType();
1963 unsigned ElemWidth = ElemTy.getSizeInBits();
1964
1965 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1966 DAG.getConstant(1, dl, MVT::i32));
1967 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1968 DAG.getConstant(ElemWidth, dl, MVT::i32));
1969 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1970 DAG.getAllOnesConstant(dl, MVT::i32));
1971
1972 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1973 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1974 // it separately in custom combine or selection).
1975 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1976 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1977 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1978 return DAG.getNode(ISD::SUB, dl, ResTy,
1979 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1980}
1981
1982SDValue
1983HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1984 const SDLoc &dl(Op);
1985 MVT ResTy = ty(Op);
1986 assert(ResTy.getVectorElementType() == MVT::i32);
1987
1988 SDValue Vs = Op.getOperand(0);
1989 SDValue Vt = Op.getOperand(1);
1990
1991 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1992 unsigned Opc = Op.getOpcode();
1993
1994 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1995 if (Opc == ISD::MULHU)
1996 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1997 if (Opc == ISD::MULHS)
1998 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1999
2000#ifndef NDEBUG
2001 Op.dump(&DAG);
2002#endif
2003 llvm_unreachable("Unexpected mulh operation");
2004}
2005
2006SDValue
2007HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2008 const SDLoc &dl(Op);
2009 unsigned Opc = Op.getOpcode();
2010 SDValue Vu = Op.getOperand(0);
2011 SDValue Vv = Op.getOperand(1);
2012
2013 // If the HI part is not used, convert it to a regular MUL.
2014 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2015 // Need to preserve the types and the number of values.
2016 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2017 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2018 return DAG.getMergeValues({Lo, Hi}, dl);
2019 }
2020
2021 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2022 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2023
2024 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2025 // valued nodes.
2026 if (Subtarget.useHVXV62Ops())
2027 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2028
2029 if (Opc == HexagonISD::SMUL_LOHI) {
2030 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2031 // for other signedness LOHI is cheaper.
2032 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2033 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2034 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2035 return DAG.getMergeValues({Lo, Hi}, dl);
2036 }
2037 }
2038
2039 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2040}
2041
2042SDValue
2043HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2044 SDValue Val = Op.getOperand(0);
2045 MVT ResTy = ty(Op);
2046 MVT ValTy = ty(Val);
2047 const SDLoc &dl(Op);
2048
2049 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2050 unsigned HwLen = Subtarget.getVectorLength();
2051 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2052 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2053 unsigned BitWidth = ResTy.getSizeInBits();
2054
2055 if (BitWidth < 64) {
2056 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2057 dl, MVT::i32, DAG);
2058 if (BitWidth == 32)
2059 return W0;
2060 assert(BitWidth < 32u);
2061 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2062 }
2063
2064 // The result is >= 64 bits. The only options are 64 or 128.
2065 assert(BitWidth == 64 || BitWidth == 128);
2067 for (unsigned i = 0; i != BitWidth/32; ++i) {
2068 SDValue W = extractHvxElementReg(
2069 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2070 Words.push_back(W);
2071 }
2072 SmallVector<SDValue,2> Combines;
2073 assert(Words.size() % 2 == 0);
2074 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2075 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2076 Combines.push_back(C);
2077 }
2078
2079 if (BitWidth == 64)
2080 return Combines[0];
2081
2082 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2083 }
2084
2085 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2086 // Splat the input into a 32-element i32 vector, then AND each element
2087 // with a unique bitmask to isolate individual bits.
2088 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2089 assert(Val32.getValueType().getSizeInBits() == 32 &&
2090 "Input must be 32 bits");
2091 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2092 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2094 for (unsigned i = 0; i < 32; ++i)
2095 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2096
2097 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2098 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2099 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2100 };
2101 // === Case: v32i1 ===
2102 if (ResTy == MVT::v32i1 &&
2103 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2104 Subtarget.useHVX128BOps()) {
2105 SDValue Val32 = Val;
2106 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2107 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2108 return bitcastI32ToV32I1(Val32);
2109 }
2110 // === Case: v64i1 ===
2111 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2112 // Split i64 into lo/hi 32-bit halves.
2113 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2114 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2115 DAG.getConstant(32, dl, MVT::i64));
2116 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2117
2118 // Reuse the same 32-bit logic twice.
2119 SDValue LoRes = bitcastI32ToV32I1(Lo);
2120 SDValue HiRes = bitcastI32ToV32I1(Hi);
2121
2122 // Concatenate into a v64i1 predicate.
2123 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2124 }
2125
2126 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2127 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2128 unsigned BitWidth = ValTy.getSizeInBits();
2129 unsigned HwLen = Subtarget.getVectorLength();
2130 assert(BitWidth == HwLen);
2131
2132 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2133 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2134 // Splat each byte of Val 8 times.
2135 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2136 // where b0, b1,..., b15 are least to most significant bytes of I.
2138 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2139 // These are bytes with the LSB rotated left with respect to their index.
2141 for (unsigned I = 0; I != HwLen / 8; ++I) {
2142 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2143 SDValue Byte =
2144 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2145 for (unsigned J = 0; J != 8; ++J) {
2146 Bytes.push_back(Byte);
2147 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2148 }
2149 }
2150
2151 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2152 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2153 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2154
2155 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2156 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2157 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2158 }
2159
2160 return Op;
2161}
2162
2163SDValue
2164HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2165 // Sign- and zero-extends are legal.
2166 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2167 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2168 Op.getOperand(0));
2169}
2170
2171SDValue
2172HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2173 MVT ResTy = ty(Op);
2174 if (ResTy.getVectorElementType() != MVT::i1)
2175 return Op;
2176
2177 const SDLoc &dl(Op);
2178 unsigned HwLen = Subtarget.getVectorLength();
2179 unsigned VecLen = ResTy.getVectorNumElements();
2180 assert(HwLen % VecLen == 0);
2181 unsigned ElemSize = HwLen / VecLen;
2182
2183 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2184 SDValue S =
2185 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2186 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2187 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2188 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2189}
2190
2191SDValue
2192HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2193 if (SDValue S = getVectorShiftByInt(Op, DAG))
2194 return S;
2195 return Op;
2196}
2197
2198SDValue
2199HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2200 SelectionDAG &DAG) const {
2201 unsigned Opc = Op.getOpcode();
2202 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2203
2204 // Make sure the shift amount is within the range of the bitwidth
2205 // of the element type.
2206 SDValue A = Op.getOperand(0);
2207 SDValue B = Op.getOperand(1);
2208 SDValue S = Op.getOperand(2);
2209
2210 MVT InpTy = ty(A);
2211 MVT ElemTy = InpTy.getVectorElementType();
2212
2213 const SDLoc &dl(Op);
2214 unsigned ElemWidth = ElemTy.getSizeInBits();
2215 bool IsLeft = Opc == ISD::FSHL;
2216
2217 // The expansion into regular shifts produces worse code for i8 and for
2218 // right shift of i32 on v65+.
2219 bool UseShifts = ElemTy != MVT::i8;
2220 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2221 UseShifts = false;
2222
2223 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2224 // If this is a funnel shift by a scalar, lower it into regular shifts.
2225 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2226 SDValue ModS =
2227 DAG.getNode(ISD::AND, dl, MVT::i32,
2228 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2229 SDValue NegS =
2230 DAG.getNode(ISD::SUB, dl, MVT::i32,
2231 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2232 SDValue IsZero =
2233 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2234 // FSHL A, B => A << | B >>n
2235 // FSHR A, B => A <<n | B >>
2236 SDValue Part1 =
2237 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2238 SDValue Part2 =
2239 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2240 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2241 // If the shift amount was 0, pick A or B, depending on the direction.
2242 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2243 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2244 }
2245
2247 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2248
2249 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2250 return DAG.getNode(MOpc, dl, ty(Op),
2251 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2252}
2253
2254SDValue
2255HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2256 const SDLoc &dl(Op);
2257 unsigned IntNo = Op.getConstantOperandVal(0);
2258 SmallVector<SDValue> Ops(Op->ops());
2259
2260 auto Swap = [&](SDValue P) {
2261 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2262 };
2263
2264 switch (IntNo) {
2265 case Intrinsic::hexagon_V6_pred_typecast:
2266 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2267 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2268 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2269 if (ResTy == InpTy)
2270 return Ops[1];
2271 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2272 }
2273 break;
2274 }
2275 case Intrinsic::hexagon_V6_vmpyss_parts:
2276 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2277 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2278 {Ops[1], Ops[2]}));
2279 case Intrinsic::hexagon_V6_vmpyuu_parts:
2280 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2281 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2282 {Ops[1], Ops[2]}));
2283 case Intrinsic::hexagon_V6_vmpyus_parts:
2284 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2285 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2286 {Ops[1], Ops[2]}));
2287 }
2288 } // switch
2289
2290 return Op;
2291}
2292
2293SDValue
2294HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2295 const SDLoc &dl(Op);
2296 unsigned HwLen = Subtarget.getVectorLength();
2297 MachineFunction &MF = DAG.getMachineFunction();
2298 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2299 SDValue Mask = MaskN->getMask();
2300 SDValue Chain = MaskN->getChain();
2301 SDValue Base = MaskN->getBasePtr();
2302 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2303
2304 unsigned Opc = Op->getOpcode();
2305 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2306
2307 if (Opc == ISD::MLOAD) {
2308 MVT ValTy = ty(Op);
2309 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2310 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2311 if (isUndef(Thru))
2312 return Load;
2313 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2314 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2315 }
2316
2317 // MSTORE
2318 // HVX only has aligned masked stores.
2319
2320 // TODO: Fold negations of the mask into the store.
2321 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2322 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2323 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2324
2325 if (MaskN->getAlign().value() % HwLen == 0) {
2326 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2327 {Mask, Base, Offset0, Value, Chain}, DAG);
2328 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2329 return Store;
2330 }
2331
2332 // Unaligned case.
2333 auto StoreAlign = [&](SDValue V, SDValue A) {
2334 SDValue Z = getZero(dl, ty(V), DAG);
2335 // TODO: use funnel shifts?
2336 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2337 // upper half.
2338 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2339 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2340 return std::make_pair(LoV, HiV);
2341 };
2342
2343 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2344 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2345 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2346 VectorPair Tmp = StoreAlign(MaskV, Base);
2347 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2348 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2349 VectorPair ValueU = StoreAlign(Value, Base);
2350
2351 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2352 SDValue StoreLo =
2353 getInstr(StoreOpc, dl, MVT::Other,
2354 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2355 SDValue StoreHi =
2356 getInstr(StoreOpc, dl, MVT::Other,
2357 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2358 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2359 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2360 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2361}
2362
2363SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2364 SelectionDAG &DAG) const {
2365 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2366 // is legal (done via a pattern).
2367 assert(Subtarget.useHVXQFloatOps());
2368
2369 assert(Op->getOpcode() == ISD::FP_EXTEND);
2370
2371 MVT VecTy = ty(Op);
2372 MVT ArgTy = ty(Op.getOperand(0));
2373 const SDLoc &dl(Op);
2374
2375 if (ArgTy == MVT::v64bf16) {
2376 MVT HalfTy = typeSplit(VecTy).first;
2377 SDValue BF16Vec = Op.getOperand(0);
2378 SDValue Zeroes =
2379 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2380 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2381 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2382 // values.
2383 SDValue ShuffVec =
2384 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2385 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2386 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2387 {VecPair.second, VecPair.first,
2388 DAG.getSignedConstant(-4, dl, MVT::i32)},
2389 DAG);
2390 return Result;
2391 }
2392
2393 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2394
2395 SDValue F16Vec = Op.getOperand(0);
2396
2397 APFloat FloatVal = APFloat(1.0f);
2398 bool Ignored;
2400 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2401 SDValue VmpyVec =
2402 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2403
2404 MVT HalfTy = typeSplit(VecTy).first;
2405 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2406 SDValue LoVec =
2407 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2408 SDValue HiVec =
2409 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2410
2411 SDValue ShuffVec =
2412 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2413 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2414
2415 return ShuffVec;
2416}
2417
2418SDValue
2419HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2420 // Catch invalid conversion ops (just in case).
2421 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2422 Op.getOpcode() == ISD::FP_TO_UINT);
2423
2424 MVT ResTy = ty(Op);
2425 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2426 MVT IntTy = ResTy.getVectorElementType();
2427
2428 if (Subtarget.useHVXIEEEFPOps()) {
2429 // There are only conversions from f16.
2430 if (FpTy == MVT::f16) {
2431 // Other int types aren't legal in HVX, so we shouldn't see them here.
2432 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2433 // Conversions to i8 and i16 are legal.
2434 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2435 return Op;
2436 }
2437 }
2438
2439 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2440 return EqualizeFpIntConversion(Op, DAG);
2441
2442 return ExpandHvxFpToInt(Op, DAG);
2443}
2444
2445// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2446// R1 = #1, R2 holds the v32i1 param
2447// V1 = vsplat(R1)
2448// V2 = vsplat(R2)
2449// Q0 = vand(V1,R1)
2450// V0.w=prefixsum(Q0)
2451// V0.w=vsub(V0.w,V1.w)
2452// V2.w = vlsr(V2.w,V0.w)
2453// V2 = vand(V2,V1)
2454// V2.sf = V2.w
2455SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2456 SelectionDAG &DAG) const {
2457
2458 MVT ResTy = ty(PredOp);
2459 const SDLoc &dl(PredOp);
2460
2461 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2462 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2463 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2464 SDValue(RegConst, 0));
2465 SDNode *PredTransfer =
2466 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2467 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2468 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2469 SDValue(PredTransfer, 0));
2470 SDNode *SplatParam = DAG.getMachineNode(
2471 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2472 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2473 SDNode *Vsub =
2474 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2475 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2476 SDNode *IndexShift =
2477 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2478 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2479 SDNode *MaskOff =
2480 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2481 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2482 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2483 SDValue(MaskOff, 0));
2484 return SDValue(Convert, 0);
2485}
2486
2487// For vector type v64i1 uint_to_fo to v64f16:
2488// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2489// R3 = subreg_high (R32)
2490// R2 = subreg_low (R32)
2491// R1 = #1
2492// V1 = vsplat(R1)
2493// V2 = vsplat(R2)
2494// V3 = vsplat(R3)
2495// Q0 = vand(V1,R1)
2496// V0.w=prefixsum(Q0)
2497// V0.w=vsub(V0.w,V1.w)
2498// V2.w = vlsr(V2.w,V0.w)
2499// V3.w = vlsr(V3.w,V0.w)
2500// V2 = vand(V2,V1)
2501// V3 = vand(V3,V1)
2502// V2.h = vpacke(V3.w,V2.w)
2503// V2.hf = V2.h
2504SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2505 SelectionDAG &DAG) const {
2506
2507 MVT ResTy = ty(PredOp);
2508 const SDLoc &dl(PredOp);
2509
2510 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2511 // Get the hi and lo regs
2512 SDValue HiReg =
2513 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2514 SDValue LoReg =
2515 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2516 // Get constant #1 and splat into vector V1
2517 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2518 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2519 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2520 SDValue(RegConst, 0));
2521 // Splat the hi and lo args
2522 SDNode *SplatHi =
2523 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2524 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2525 SDNode *SplatLo =
2526 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2527 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2528 // vand between splatted const and const
2529 SDNode *PredTransfer =
2530 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2531 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2532 // Get the prefixsum
2533 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2534 SDValue(PredTransfer, 0));
2535 // Get the vsub
2536 SDNode *Vsub =
2537 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2538 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2539 // Get vlsr for hi and lo
2540 SDNode *IndexShift_hi =
2541 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2542 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2543 SDNode *IndexShift_lo =
2544 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2545 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2546 // Get vand of hi and lo
2547 SDNode *MaskOff_hi =
2548 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2549 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2550 SDNode *MaskOff_lo =
2551 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2552 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2553 // Pack them
2554 SDNode *Pack =
2555 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2556 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2557 SDNode *Convert =
2558 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2559 return SDValue(Convert, 0);
2560}
2561
2562SDValue
2563HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2564 // Catch invalid conversion ops (just in case).
2565 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2566 Op.getOpcode() == ISD::UINT_TO_FP);
2567
2568 MVT ResTy = ty(Op);
2569 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2570 MVT FpTy = ResTy.getVectorElementType();
2571
2572 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2573 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2574 return LowerHvxPred32ToFp(Op, DAG);
2575 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2576 return LowerHvxPred64ToFp(Op, DAG);
2577 }
2578
2579 if (Subtarget.useHVXIEEEFPOps()) {
2580 // There are only conversions to f16.
2581 if (FpTy == MVT::f16) {
2582 // Other int types aren't legal in HVX, so we shouldn't see them here.
2583 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2584 // i8, i16 -> f16 is legal.
2585 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2586 return Op;
2587 }
2588 }
2589
2590 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2591 return EqualizeFpIntConversion(Op, DAG);
2592
2593 return ExpandHvxIntToFp(Op, DAG);
2594}
2595
2596HexagonTargetLowering::TypePair
2597HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2598 // Compare the widths of elements of the two types, and extend the narrower
2599 // type to match the with of the wider type. For vector types, apply this
2600 // to the element type.
2601 assert(Ty0.isVector() == Ty1.isVector());
2602
2603 MVT ElemTy0 = Ty0.getScalarType();
2604 MVT ElemTy1 = Ty1.getScalarType();
2605
2606 unsigned Width0 = ElemTy0.getSizeInBits();
2607 unsigned Width1 = ElemTy1.getSizeInBits();
2608 unsigned MaxWidth = std::max(Width0, Width1);
2609
2610 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2611 if (ScalarTy.isInteger())
2612 return MVT::getIntegerVT(Width);
2613 assert(ScalarTy.isFloatingPoint());
2614 return MVT::getFloatingPointVT(Width);
2615 };
2616
2617 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2618 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2619
2620 if (!Ty0.isVector()) {
2621 // Both types are scalars.
2622 return {WideETy0, WideETy1};
2623 }
2624
2625 // Vector types.
2626 unsigned NumElem = Ty0.getVectorNumElements();
2627 assert(NumElem == Ty1.getVectorNumElements());
2628
2629 return {MVT::getVectorVT(WideETy0, NumElem),
2630 MVT::getVectorVT(WideETy1, NumElem)};
2631}
2632
2633HexagonTargetLowering::TypePair
2634HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2635 // Compare the numbers of elements of two vector types, and widen the
2636 // narrower one to match the number of elements in the wider one.
2637 assert(Ty0.isVector() && Ty1.isVector());
2638
2639 unsigned Len0 = Ty0.getVectorNumElements();
2640 unsigned Len1 = Ty1.getVectorNumElements();
2641 if (Len0 == Len1)
2642 return {Ty0, Ty1};
2643
2644 unsigned MaxLen = std::max(Len0, Len1);
2645 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2646 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2647}
2648
2649MVT
2650HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2651 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2652 assert(LegalTy.isSimple());
2653 return LegalTy.getSimpleVT();
2654}
2655
2656MVT
2657HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2658 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2659 assert(Ty.getSizeInBits() <= HwWidth);
2660 if (Ty.getSizeInBits() == HwWidth)
2661 return Ty;
2662
2663 MVT ElemTy = Ty.getScalarType();
2664 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2665}
2666
2667HexagonTargetLowering::VectorPair
2668HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2669 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2670 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2671 // whether an overflow has occurred.
2672 MVT ResTy = ty(A);
2673 assert(ResTy == ty(B));
2674 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2675
2676 if (!Signed) {
2677 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2678 // save any instructions.
2679 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2680 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2681 return {Add, Ovf};
2682 }
2683
2684 // Signed overflow has happened, if:
2685 // (A, B have the same sign) and (A+B has a different sign from either)
2686 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2687 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2688 SDValue NotA =
2689 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2690 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2691 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2692 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2693 SDValue MSB =
2694 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2695 return {Add, MSB};
2696}
2697
2698HexagonTargetLowering::VectorPair
2699HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2700 bool Signed, SelectionDAG &DAG) const {
2701 // Shift Val right by Amt bits, round the result to the nearest integer,
2702 // tie-break by rounding halves to even integer.
2703
2704 const SDLoc &dl(Val);
2705 MVT ValTy = ty(Val);
2706
2707 // This should also work for signed integers.
2708 //
2709 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2710 // bool ovf = (inp > tmp0);
2711 // uint rup = inp & (1 << (Amt+1));
2712 //
2713 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2714 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2715 // uint tmp3 = tmp2 + rup;
2716 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2717 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2718 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2719 MVT IntTy = tyVector(ValTy, ElemTy);
2720 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2721 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2722
2723 SDValue Inp = DAG.getBitcast(IntTy, Val);
2724 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2725
2726 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2727 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2728 SDValue Zero = getZero(dl, IntTy, DAG);
2729 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2730 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2731 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2732
2733 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2734 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2735 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2736 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2737
2738 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2739 SDValue One = DAG.getConstant(1, dl, IntTy);
2740 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2741 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2742 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2743 return {Mux, Ovf};
2744}
2745
2746SDValue
2747HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2748 SelectionDAG &DAG) const {
2749 MVT VecTy = ty(A);
2750 MVT PairTy = typeJoin({VecTy, VecTy});
2751 assert(VecTy.getVectorElementType() == MVT::i32);
2752
2753 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2754
2755 // mulhs(A,B) =
2756 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2757 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2758 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2759 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2760 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2761 // anything, so it cannot produce any carry over to higher bits),
2762 // so everything in [] can be shifted by 16 without loss of precision.
2763 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2764 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2765 // The final additions need to make sure to properly maintain any carry-
2766 // out bits.
2767 //
2768 // Hi(B) Lo(B)
2769 // Hi(A) Lo(A)
2770 // --------------
2771 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2772 // Hi(B)*Lo(A) | + dropping the low 16 bits
2773 // Hi(A)*Lo(B) | T2
2774 // Hi(B)*Hi(A)
2775
2776 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2777 // T1 = get Hi(A) into low halves.
2778 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2779 // P0 = interleaved T1.h*B.uh (full precision product)
2780 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2781 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2782 SDValue T2 = LoHalf(P0, DAG);
2783 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2784 // added to the final sum.
2785 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2786 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2787 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2788 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2789 // T3 = full-precision(T0+T2) >> 16
2790 // The low halves are added-unsigned, the high ones are added-signed.
2791 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2792 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2793 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2794 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2795 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2796 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2797 SDValue T5 = LoHalf(P3, DAG);
2798 // Add:
2799 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2800 return T6;
2801}
2802
2803SDValue
2804HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2805 bool SignedB, const SDLoc &dl,
2806 SelectionDAG &DAG) const {
2807 MVT VecTy = ty(A);
2808 MVT PairTy = typeJoin({VecTy, VecTy});
2809 assert(VecTy.getVectorElementType() == MVT::i32);
2810
2811 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2812
2813 if (SignedA && !SignedB) {
2814 // Make A:unsigned, B:signed.
2815 std::swap(A, B);
2816 std::swap(SignedA, SignedB);
2817 }
2818
2819 // Do halfword-wise multiplications for unsigned*unsigned product, then
2820 // add corrections for signed and unsigned*signed.
2821
2822 SDValue Lo, Hi;
2823
2824 // P0:lo = (uu) products of low halves of A and B,
2825 // P0:hi = (uu) products of high halves.
2826 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2827
2828 // Swap low/high halves in B
2829 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2830 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2831 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2832 // P1 = products of even/odd halfwords.
2833 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2834 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2835 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2836
2837 // P2:lo = low halves of P1:lo + P1:hi,
2838 // P2:hi = high halves of P1:lo + P1:hi.
2839 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2840 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2841 // Still need to add the high halves of P0:lo to P2:lo
2842 SDValue T2 =
2843 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2844 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2845
2846 // The high halves of T3 will contribute to the HI part of LOHI.
2847 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2848 {HiHalf(P2, DAG), T3, S16}, DAG);
2849
2850 // The low halves of P2 need to be added to high halves of the LO part.
2851 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2852 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2853 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2854
2855 if (SignedA) {
2856 assert(SignedB && "Signed A and unsigned B should have been inverted");
2857
2858 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2859 SDValue Zero = getZero(dl, VecTy, DAG);
2860 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2861 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2862 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2863 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2864 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2865 } else if (SignedB) {
2866 // Same correction as for mulhus:
2867 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2868 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2869 SDValue Zero = getZero(dl, VecTy, DAG);
2870 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2871 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2872 } else {
2873 assert(!SignedA && !SignedB);
2874 }
2875
2876 return DAG.getMergeValues({Lo, Hi}, dl);
2877}
2878
2879SDValue
2880HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2881 SDValue B, bool SignedB,
2882 const SDLoc &dl,
2883 SelectionDAG &DAG) const {
2884 MVT VecTy = ty(A);
2885 MVT PairTy = typeJoin({VecTy, VecTy});
2886 assert(VecTy.getVectorElementType() == MVT::i32);
2887
2888 if (SignedA && !SignedB) {
2889 // Make A:unsigned, B:signed.
2890 std::swap(A, B);
2891 std::swap(SignedA, SignedB);
2892 }
2893
2894 // Do S*S first, then make corrections for U*S or U*U if needed.
2895 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2896 SDValue P1 =
2897 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2898 SDValue Lo = LoHalf(P1, DAG);
2899 SDValue Hi = HiHalf(P1, DAG);
2900
2901 if (!SignedB) {
2902 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2903 SDValue Zero = getZero(dl, VecTy, DAG);
2904 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2905
2906 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2907 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2908 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2909 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2910 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2911 // $A))>;
2912 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2913 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2914 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2915 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2916 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2917 } else if (!SignedA) {
2918 SDValue Zero = getZero(dl, VecTy, DAG);
2919 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2920
2921 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2922 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2923 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2924 // (HiHalf (Muls64O $A, $B)),
2925 // $B)>;
2926 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2927 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2928 }
2929
2930 return DAG.getMergeValues({Lo, Hi}, dl);
2931}
2932
2933SDValue
2934HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2935 const {
2936 // Rewrite conversion between integer and floating-point in such a way that
2937 // the integer type is extended/narrowed to match the bitwidth of the
2938 // floating-point type, combined with additional integer-integer extensions
2939 // or narrowings to match the original input/result types.
2940 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2941 //
2942 // The input/result types are not required to be legal, but if they are
2943 // legal, this function should not introduce illegal types.
2944
2945 unsigned Opc = Op.getOpcode();
2948
2949 SDValue Inp = Op.getOperand(0);
2950 MVT InpTy = ty(Inp);
2951 MVT ResTy = ty(Op);
2952
2953 if (InpTy == ResTy)
2954 return Op;
2955
2956 const SDLoc &dl(Op);
2958
2959 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2960 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2961 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2962 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2963 return Res;
2964}
2965
2966SDValue
2967HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2968 unsigned Opc = Op.getOpcode();
2970
2971 const SDLoc &dl(Op);
2972 SDValue Op0 = Op.getOperand(0);
2973 MVT InpTy = ty(Op0);
2974 MVT ResTy = ty(Op);
2975 assert(InpTy.changeTypeToInteger() == ResTy);
2976
2977 // At this point this is an experiment under a flag.
2978 // In arch before V81 the rounding mode is towards nearest value.
2979 // The C/C++ standard requires rounding towards zero:
2980 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
2981 // finite value of real floating type is converted to an integer type, the
2982 // fractional part is discarded (i.e., the value is truncated toward zero)."
2983 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
2984 // floating-point type can be converted to a prvalue of an integer type. The
2985 // conversion truncates; that is, the fractional part is discarded."
2986 if (InpTy == MVT::v64f16) {
2987 if (Subtarget.useHVXV81Ops()) {
2988 // This is c/c++ compliant
2989 SDValue ConvVec =
2990 getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
2991 return ConvVec;
2992 } else if (EnableFpFastConvert) {
2993 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
2994 SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
2995 return ConvVec;
2996 }
2997 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
2998 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
2999 SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3000 return ConvVec;
3001 }
3002
3003 // int32_t conv_f32_to_i32(uint32_t inp) {
3004 // // s | exp8 | frac23
3005 //
3006 // int neg = (int32_t)inp < 0;
3007 //
3008 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3009 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3010 // // produce a large positive "expm1", which will result in max u/int.
3011 // // In all IEEE formats, bias is the largest positive number that can be
3012 // // represented in bias-width bits (i.e. 011..1).
3013 // int32_t expm1 = (inp << 1) - 0x80000000;
3014 // expm1 >>= 24;
3015 //
3016 // // Always insert the "implicit 1". Subnormal numbers will become 0
3017 // // regardless.
3018 // uint32_t frac = (inp << 8) | 0x80000000;
3019 //
3020 // // "frac" is the fraction part represented as Q1.31. If it was
3021 // // interpreted as uint32_t, it would be the fraction part multiplied
3022 // // by 2^31.
3023 //
3024 // // Calculate the amount of right shift, since shifting further to the
3025 // // left would lose significant bits. Limit it to 32, because we want
3026 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3027 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3028 // // left by 31). "rsh" can be negative.
3029 // int32_t rsh = min(31 - (expm1 + 1), 32);
3030 //
3031 // frac >>= rsh; // rsh == 32 will produce 0
3032 //
3033 // // Everything up to this point is the same for conversion to signed
3034 // // unsigned integer.
3035 //
3036 // if (neg) // Only for signed int
3037 // frac = -frac; //
3038 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3039 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3040 // if (rsh <= 0 && !neg) //
3041 // frac = 0x7fffffff; //
3042 //
3043 // if (neg) // Only for unsigned int
3044 // frac = 0; //
3045 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3046 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3047 //
3048 // return frac;
3049 // }
3050
3051 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3052
3053 // Zero = V6_vd0();
3054 // Neg = V6_vgtw(Zero, Inp);
3055 // One = V6_lvsplatw(1);
3056 // M80 = V6_lvsplatw(0x80000000);
3057 // Exp00 = V6_vaslwv(Inp, One);
3058 // Exp01 = V6_vsubw(Exp00, M80);
3059 // ExpM1 = V6_vasrw(Exp01, 24);
3060 // Frc00 = V6_vaslw(Inp, 8);
3061 // Frc01 = V6_vor(Frc00, M80);
3062 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3063 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3064 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3065
3066 // if signed int:
3067 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3068 // Pos = V6_vgtw(Rsh01, Zero);
3069 // Frc13 = V6_vsubw(Zero, Frc02);
3070 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3071 // Int = V6_vmux(Pos, Frc14, Bnd);
3072 //
3073 // if unsigned int:
3074 // Rsn = V6_vgtw(Zero, Rsh01)
3075 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3076 // Int = V6_vmux(Neg, Zero, Frc23)
3077
3078 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3079 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3080 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3081
3082 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3083 SDValue Zero = getZero(dl, ResTy, DAG);
3084 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3085 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3086 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3087 SDValue One = DAG.getConstant(1, dl, ResTy);
3088 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3089 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3090 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3091 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3092
3093 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3094 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3095 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3096
3097 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3098 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3099 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3100 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3101 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3102
3103 SDValue Int;
3104
3105 if (Opc == ISD::FP_TO_SINT) {
3106 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3107 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3108 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3109 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3110 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3111 } else {
3113 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3114 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3115 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3116 }
3117
3118 return Int;
3119}
3120
3121SDValue
3122HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3123 unsigned Opc = Op.getOpcode();
3125
3126 const SDLoc &dl(Op);
3127 SDValue Op0 = Op.getOperand(0);
3128 MVT InpTy = ty(Op0);
3129 MVT ResTy = ty(Op);
3130 assert(ResTy.changeTypeToInteger() == InpTy);
3131
3132 // uint32_t vnoc1_rnd(int32_t w) {
3133 // int32_t iszero = w == 0;
3134 // int32_t isneg = w < 0;
3135 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3136 //
3137 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3138 // uint32_t frac0 = (uint64_t)u << norm_left;
3139 //
3140 // // Rounding:
3141 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3142 // uint32_t renorm = (frac0 > frac1);
3143 // uint32_t rup = (int)(frac0 << 22) < 0;
3144 //
3145 // uint32_t frac2 = frac0 >> 8;
3146 // uint32_t frac3 = frac1 >> 8;
3147 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3148 //
3149 // int32_t exp = 32 - norm_left + renorm + 127;
3150 // exp <<= 23;
3151 //
3152 // uint32_t sign = 0x80000000 * isneg;
3153 // uint32_t f = sign | exp | frac;
3154 // return iszero ? 0 : f;
3155 // }
3156
3157 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3158 bool Signed = Opc == ISD::SINT_TO_FP;
3159
3160 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3161 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3162
3163 SDValue Zero = getZero(dl, InpTy, DAG);
3164 SDValue One = DAG.getConstant(1, dl, InpTy);
3165 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3166 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3167 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3168 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3169 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3170
3171 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3172 if (Signed) {
3173 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3174 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3175 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3176 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3177 }
3178
3179 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3180 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3181 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3182 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3183 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3184 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3185 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3186 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3187 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3188
3189 return Flt;
3190}
3191
3192SDValue
3193HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3194 unsigned Opc = Op.getOpcode();
3195 unsigned TLOpc;
3196 switch (Opc) {
3197 case ISD::ANY_EXTEND:
3198 case ISD::SIGN_EXTEND:
3199 case ISD::ZERO_EXTEND:
3200 TLOpc = HexagonISD::TL_EXTEND;
3201 break;
3202 case ISD::TRUNCATE:
3204 break;
3205#ifndef NDEBUG
3206 Op.dump(&DAG);
3207#endif
3208 llvm_unreachable("Unexpected operator");
3209 }
3210
3211 const SDLoc &dl(Op);
3212 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3213 DAG.getUNDEF(MVT::i128), // illegal type
3214 DAG.getConstant(Opc, dl, MVT::i32));
3215}
3216
3217SDValue
3218HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3219 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3220 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3221 unsigned Opc = Op.getConstantOperandVal(2);
3222 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3223}
3224
3225HexagonTargetLowering::VectorPair
3226HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3227 assert(!Op.isMachineOpcode());
3228 SmallVector<SDValue, 2> OpsL, OpsH;
3229 const SDLoc &dl(Op);
3230
3231 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3232 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3233 SDValue TV = DAG.getValueType(Ty);
3234 return std::make_pair(TV, TV);
3235 };
3236
3237 for (SDValue A : Op.getNode()->ops()) {
3238 auto [Lo, Hi] =
3239 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3240 // Special case for type operand.
3241 switch (Op.getOpcode()) {
3242 case ISD::SIGN_EXTEND_INREG:
3243 case HexagonISD::SSAT:
3244 case HexagonISD::USAT:
3245 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3246 std::tie(Lo, Hi) = SplitVTNode(N);
3247 break;
3248 }
3249 OpsL.push_back(Lo);
3250 OpsH.push_back(Hi);
3251 }
3252
3253 MVT ResTy = ty(Op);
3254 MVT HalfTy = typeSplit(ResTy).first;
3255 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3256 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3257 return {L, H};
3258}
3259
3260SDValue
3261HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3262 auto *MemN = cast<MemSDNode>(Op.getNode());
3263
3264 if (!MemN->getMemoryVT().isSimple())
3265 return Op;
3266
3267 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3268 if (!isHvxPairTy(MemTy))
3269 return Op;
3270
3271 const SDLoc &dl(Op);
3272 unsigned HwLen = Subtarget.getVectorLength();
3273 MVT SingleTy = typeSplit(MemTy).first;
3274 SDValue Chain = MemN->getChain();
3275 SDValue Base0 = MemN->getBasePtr();
3276 SDValue Base1 =
3277 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3278 unsigned MemOpc = MemN->getOpcode();
3279
3280 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3281 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3282 MachineFunction &MF = DAG.getMachineFunction();
3283 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3284 ? (uint64_t)MemoryLocation::UnknownSize
3285 : HwLen;
3286 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3287 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3288 }
3289
3290 if (MemOpc == ISD::LOAD) {
3291 assert(cast<LoadSDNode>(Op)->isUnindexed());
3292 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3293 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3294 return DAG.getMergeValues(
3295 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3296 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3297 Load0.getValue(1), Load1.getValue(1)) }, dl);
3298 }
3299 if (MemOpc == ISD::STORE) {
3300 assert(cast<StoreSDNode>(Op)->isUnindexed());
3301 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3302 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3303 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3304 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3305 }
3306
3307 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3308
3309 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3310 assert(MaskN->isUnindexed());
3311 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3312 SDValue Offset = DAG.getUNDEF(MVT::i32);
3313
3314 if (MemOpc == ISD::MLOAD) {
3315 VectorPair Thru =
3316 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3317 SDValue MLoad0 =
3318 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3319 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3320 ISD::NON_EXTLOAD, false);
3321 SDValue MLoad1 =
3322 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3323 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3324 ISD::NON_EXTLOAD, false);
3325 return DAG.getMergeValues(
3326 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3327 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3328 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3329 }
3330 if (MemOpc == ISD::MSTORE) {
3331 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3332 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3333 Masks.first, SingleTy, MOp0,
3334 ISD::UNINDEXED, false, false);
3335 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3336 Masks.second, SingleTy, MOp1,
3337 ISD::UNINDEXED, false, false);
3338 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3339 }
3340
3341 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3342 llvm_unreachable(Name.c_str());
3343}
3344
3345SDValue
3346HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3347 const SDLoc &dl(Op);
3348 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3349 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3350 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3351 "Not widening loads of i1 yet");
3352
3353 SDValue Chain = LoadN->getChain();
3354 SDValue Base = LoadN->getBasePtr();
3355 SDValue Offset = DAG.getUNDEF(MVT::i32);
3356
3357 MVT ResTy = ty(Op);
3358 unsigned HwLen = Subtarget.getVectorLength();
3359 unsigned ResLen = ResTy.getStoreSize();
3360 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3361
3362 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3363 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3364 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3365
3366 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3367 MachineFunction &MF = DAG.getMachineFunction();
3368 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3369
3370 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3371 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3373 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3374 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3375}
3376
3377SDValue
3378HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3379 const SDLoc &dl(Op);
3380 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3381 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3382 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3383 "Not widening stores of i1 yet");
3384
3385 SDValue Chain = StoreN->getChain();
3386 SDValue Base = StoreN->getBasePtr();
3387 SDValue Offset = DAG.getUNDEF(MVT::i32);
3388
3389 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3390 MVT ValueTy = ty(Value);
3391 unsigned ValueLen = ValueTy.getVectorNumElements();
3392 unsigned HwLen = Subtarget.getVectorLength();
3393 assert(isPowerOf2_32(ValueLen));
3394
3395 for (unsigned Len = ValueLen; Len < HwLen; ) {
3396 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3397 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3398 }
3399 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3400
3401 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3402 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3403 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3404 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3405 MachineFunction &MF = DAG.getMachineFunction();
3406 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3407 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3408 MemOp, ISD::UNINDEXED, false, false);
3409}
3410
3411SDValue
3412HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3413 const SDLoc &dl(Op);
3414 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3415 MVT ElemTy = ty(Op0).getVectorElementType();
3416 unsigned HwLen = Subtarget.getVectorLength();
3417
3418 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3419 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3420 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3421 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3422 return SDValue();
3423
3424 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3425 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3426 EVT ResTy =
3427 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3428 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3429 {WideOp0, WideOp1, Op.getOperand(2)});
3430
3431 EVT RetTy = typeLegalize(ty(Op), DAG);
3432 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3433 {SetCC, getZero(dl, MVT::i32, DAG)});
3434}
3435
3436SDValue
3437HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3438 unsigned Opc = Op.getOpcode();
3439 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3440 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3441 return isHvxPairTy(ty(V));
3442 });
3443
3444 if (IsPairOp) {
3445 switch (Opc) {
3446 default:
3447 break;
3448 case ISD::LOAD:
3449 case ISD::STORE:
3450 case ISD::MLOAD:
3451 case ISD::MSTORE:
3452 return SplitHvxMemOp(Op, DAG);
3453 case ISD::SINT_TO_FP:
3454 case ISD::UINT_TO_FP:
3455 case ISD::FP_TO_SINT:
3456 case ISD::FP_TO_UINT:
3457 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3458 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3459 break;
3460 case ISD::ABS:
3461 case ISD::CTPOP:
3462 case ISD::CTLZ:
3463 case ISD::CTTZ:
3464 case ISD::MUL:
3465 case ISD::FADD:
3466 case ISD::FSUB:
3467 case ISD::FMUL:
3468 case ISD::FMINIMUMNUM:
3469 case ISD::FMAXIMUMNUM:
3470 case ISD::MULHS:
3471 case ISD::MULHU:
3472 case ISD::AND:
3473 case ISD::OR:
3474 case ISD::XOR:
3475 case ISD::SRA:
3476 case ISD::SHL:
3477 case ISD::SRL:
3478 case ISD::FSHL:
3479 case ISD::FSHR:
3480 case ISD::SMIN:
3481 case ISD::SMAX:
3482 case ISD::UMIN:
3483 case ISD::UMAX:
3484 case ISD::SETCC:
3485 case ISD::VSELECT:
3487 case ISD::SPLAT_VECTOR:
3488 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3489 case ISD::SIGN_EXTEND:
3490 case ISD::ZERO_EXTEND:
3491 // In general, sign- and zero-extends can't be split and still
3492 // be legal. The only exception is extending bool vectors.
3493 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3494 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3495 break;
3496 }
3497 }
3498
3499 switch (Opc) {
3500 default:
3501 break;
3502 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3503 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3504 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3505 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3506 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3507 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3508 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3509 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3510 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3511 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3512 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3513 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3514 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3515 case ISD::SRA:
3516 case ISD::SHL:
3517 case ISD::SRL: return LowerHvxShift(Op, DAG);
3518 case ISD::FSHL:
3519 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3520 case ISD::MULHS:
3521 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3522 case ISD::SMUL_LOHI:
3523 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3524 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3525 case ISD::SETCC:
3526 case ISD::INTRINSIC_VOID: return Op;
3527 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3528 case ISD::MLOAD:
3529 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3530 // Unaligned loads will be handled by the default lowering.
3531 case ISD::LOAD: return SDValue();
3532 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3533 case ISD::FP_TO_SINT:
3534 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3535 case ISD::SINT_TO_FP:
3536 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3537
3538 // Special nodes:
3541 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3542 }
3543#ifndef NDEBUG
3544 Op.dumpr(&DAG);
3545#endif
3546 llvm_unreachable("Unhandled HVX operation");
3547}
3548
3549SDValue
3550HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3551 const {
3552 // Rewrite the extension/truncation/saturation op into steps where each
3553 // step changes the type widths by a factor of 2.
3554 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3555 //
3556 // Some of the vector types in Op may not be legal.
3557
3558 unsigned Opc = Op.getOpcode();
3559 switch (Opc) {
3560 case HexagonISD::SSAT:
3561 case HexagonISD::USAT:
3564 break;
3565 case ISD::ANY_EXTEND:
3566 case ISD::ZERO_EXTEND:
3567 case ISD::SIGN_EXTEND:
3568 case ISD::TRUNCATE:
3569 llvm_unreachable("ISD:: ops will be auto-folded");
3570 break;
3571#ifndef NDEBUG
3572 Op.dump(&DAG);
3573#endif
3574 llvm_unreachable("Unexpected operation");
3575 }
3576
3577 SDValue Inp = Op.getOperand(0);
3578 MVT InpTy = ty(Inp);
3579 MVT ResTy = ty(Op);
3580
3581 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3582 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3583 assert(InpWidth != ResWidth);
3584
3585 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3586 return Op;
3587
3588 const SDLoc &dl(Op);
3589 unsigned NumElems = InpTy.getVectorNumElements();
3590 assert(NumElems == ResTy.getVectorNumElements());
3591
3592 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3593 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3594 switch (Opc) {
3595 case HexagonISD::SSAT:
3596 case HexagonISD::USAT:
3597 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3600 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3601 default:
3602 llvm_unreachable("Unexpected opcode");
3603 }
3604 };
3605
3606 SDValue S = Inp;
3607 if (InpWidth < ResWidth) {
3608 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3609 while (InpWidth * 2 <= ResWidth)
3610 S = repeatOp(InpWidth *= 2, S);
3611 } else {
3612 // InpWidth > ResWidth
3613 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3614 while (InpWidth / 2 >= ResWidth)
3615 S = repeatOp(InpWidth /= 2, S);
3616 }
3617 return S;
3618}
3619
3620SDValue
3621HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3622 SDValue Inp0 = Op.getOperand(0);
3623 MVT InpTy = ty(Inp0);
3624 MVT ResTy = ty(Op);
3625 unsigned InpWidth = InpTy.getSizeInBits();
3626 unsigned ResWidth = ResTy.getSizeInBits();
3627 unsigned Opc = Op.getOpcode();
3628
3629 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3630 // First, make sure that the narrower type is widened to HVX.
3631 // This may cause the result to be wider than what the legalizer
3632 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3633 // desired type.
3634 auto [WInpTy, WResTy] =
3635 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3636 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3637 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3638 SDValue S;
3640 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3641 Op.getOperand(2));
3642 } else {
3643 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3644 }
3645 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3646 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3647 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3648 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3649 } else {
3650 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3651 return RemoveTLWrapper(Op, DAG);
3652 }
3653 llvm_unreachable("Unexpected situation");
3654}
3655
3656void
3657HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3659 unsigned Opc = N->getOpcode();
3660 SDValue Op(N, 0);
3661 SDValue Inp0; // Optional first argument.
3662 if (N->getNumOperands() > 0)
3663 Inp0 = Op.getOperand(0);
3664
3665 switch (Opc) {
3666 case ISD::ANY_EXTEND:
3667 case ISD::SIGN_EXTEND:
3668 case ISD::ZERO_EXTEND:
3669 case ISD::TRUNCATE:
3670 if (Subtarget.isHVXElementType(ty(Op)) &&
3671 Subtarget.isHVXElementType(ty(Inp0))) {
3672 Results.push_back(CreateTLWrapper(Op, DAG));
3673 }
3674 break;
3675 case ISD::SETCC:
3676 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3677 if (SDValue T = WidenHvxSetCC(Op, DAG))
3678 Results.push_back(T);
3679 }
3680 break;
3681 case ISD::STORE: {
3682 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3683 SDValue Store = WidenHvxStore(Op, DAG);
3684 Results.push_back(Store);
3685 }
3686 break;
3687 }
3688 case ISD::MLOAD:
3689 if (isHvxPairTy(ty(Op))) {
3690 SDValue S = SplitHvxMemOp(Op, DAG);
3692 Results.push_back(S.getOperand(0));
3693 Results.push_back(S.getOperand(1));
3694 }
3695 break;
3696 case ISD::MSTORE:
3697 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3698 SDValue S = SplitHvxMemOp(Op, DAG);
3699 Results.push_back(S);
3700 }
3701 break;
3702 case ISD::SINT_TO_FP:
3703 case ISD::UINT_TO_FP:
3704 case ISD::FP_TO_SINT:
3705 case ISD::FP_TO_UINT:
3706 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3707 SDValue T = EqualizeFpIntConversion(Op, DAG);
3708 Results.push_back(T);
3709 }
3710 break;
3711 case HexagonISD::SSAT:
3712 case HexagonISD::USAT:
3715 Results.push_back(LegalizeHvxResize(Op, DAG));
3716 break;
3717 default:
3718 break;
3719 }
3720}
3721
3722void
3723HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3725 unsigned Opc = N->getOpcode();
3726 SDValue Op(N, 0);
3727 SDValue Inp0; // Optional first argument.
3728 if (N->getNumOperands() > 0)
3729 Inp0 = Op.getOperand(0);
3730
3731 switch (Opc) {
3732 case ISD::ANY_EXTEND:
3733 case ISD::SIGN_EXTEND:
3734 case ISD::ZERO_EXTEND:
3735 case ISD::TRUNCATE:
3736 if (Subtarget.isHVXElementType(ty(Op)) &&
3737 Subtarget.isHVXElementType(ty(Inp0))) {
3738 Results.push_back(CreateTLWrapper(Op, DAG));
3739 }
3740 break;
3741 case ISD::SETCC:
3742 if (shouldWidenToHvx(ty(Op), DAG)) {
3743 if (SDValue T = WidenHvxSetCC(Op, DAG))
3744 Results.push_back(T);
3745 }
3746 break;
3747 case ISD::LOAD: {
3748 if (shouldWidenToHvx(ty(Op), DAG)) {
3749 SDValue Load = WidenHvxLoad(Op, DAG);
3750 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3751 Results.push_back(Load.getOperand(0));
3752 Results.push_back(Load.getOperand(1));
3753 }
3754 break;
3755 }
3756 case ISD::BITCAST:
3757 if (isHvxBoolTy(ty(Inp0))) {
3758 SDValue C = LowerHvxBitcast(Op, DAG);
3759 Results.push_back(C);
3760 }
3761 break;
3762 case ISD::FP_TO_SINT:
3763 case ISD::FP_TO_UINT:
3764 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3765 SDValue T = EqualizeFpIntConversion(Op, DAG);
3766 Results.push_back(T);
3767 }
3768 break;
3769 case HexagonISD::SSAT:
3770 case HexagonISD::USAT:
3773 Results.push_back(LegalizeHvxResize(Op, DAG));
3774 break;
3775 default:
3776 break;
3777 }
3778}
3779
3780SDValue
3781HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3782 DAGCombinerInfo &DCI) const {
3783 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3784 // to extract-subvector (shuffle V, pick even, pick odd)
3785
3786 assert(Op.getOpcode() == ISD::TRUNCATE);
3787 SelectionDAG &DAG = DCI.DAG;
3788 const SDLoc &dl(Op);
3789
3790 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3791 return SDValue();
3792 SDValue Cast = Op.getOperand(0);
3793 SDValue Src = Cast.getOperand(0);
3794
3795 EVT TruncTy = Op.getValueType();
3796 EVT CastTy = Cast.getValueType();
3797 EVT SrcTy = Src.getValueType();
3798 if (SrcTy.isSimple())
3799 return SDValue();
3800 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3801 return SDValue();
3802 unsigned SrcLen = SrcTy.getVectorNumElements();
3803 unsigned CastLen = CastTy.getVectorNumElements();
3804 if (2 * CastLen != SrcLen)
3805 return SDValue();
3806
3807 SmallVector<int, 128> Mask(SrcLen);
3808 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3809 Mask[i] = 2 * i;
3810 Mask[i + CastLen] = 2 * i + 1;
3811 }
3812 SDValue Deal =
3813 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3814 return opSplit(Deal, dl, DAG).first;
3815}
3816
3817SDValue
3818HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3819 SDValue Op, DAGCombinerInfo &DCI) const {
3820 // Fold
3821 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3822 // into
3823 // shuffle (concat x, y), undef, m3
3824 if (Op.getNumOperands() != 2)
3825 return SDValue();
3826
3827 SelectionDAG &DAG = DCI.DAG;
3828 const SDLoc &dl(Op);
3829 SDValue V0 = Op.getOperand(0);
3830 SDValue V1 = Op.getOperand(1);
3831
3832 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3833 return SDValue();
3834 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3835 return SDValue();
3836
3837 SetVector<SDValue> Order;
3838 Order.insert(V0.getOperand(0));
3839 Order.insert(V0.getOperand(1));
3840 Order.insert(V1.getOperand(0));
3841 Order.insert(V1.getOperand(1));
3842
3843 if (Order.size() > 2)
3844 return SDValue();
3845
3846 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3847 // result must be the same.
3848 EVT InpTy = V0.getValueType();
3849 assert(InpTy.isVector());
3850 unsigned InpLen = InpTy.getVectorNumElements();
3851
3852 SmallVector<int, 128> LongMask;
3853 auto AppendToMask = [&](SDValue Shuffle) {
3854 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3855 ArrayRef<int> Mask = SV->getMask();
3856 SDValue X = Shuffle.getOperand(0);
3857 SDValue Y = Shuffle.getOperand(1);
3858 for (int M : Mask) {
3859 if (M == -1) {
3860 LongMask.push_back(M);
3861 continue;
3862 }
3863 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3864 if (static_cast<unsigned>(M) >= InpLen)
3865 M -= InpLen;
3866
3867 int OutOffset = Order[0] == Src ? 0 : InpLen;
3868 LongMask.push_back(M + OutOffset);
3869 }
3870 };
3871
3872 AppendToMask(V0);
3873 AppendToMask(V1);
3874
3875 SDValue C0 = Order.front();
3876 SDValue C1 = Order.back(); // Can be same as front
3877 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3878
3879 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3880 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3881}
3882
3883SDValue
3884HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3885 const {
3886 const SDLoc &dl(N);
3887 SelectionDAG &DAG = DCI.DAG;
3888 SDValue Op(N, 0);
3889 unsigned Opc = Op.getOpcode();
3890
3892
3893 if (Opc == ISD::TRUNCATE)
3894 return combineTruncateBeforeLegal(Op, DCI);
3895 if (Opc == ISD::CONCAT_VECTORS)
3896 return combineConcatVectorsBeforeLegal(Op, DCI);
3897
3898 if (DCI.isBeforeLegalizeOps())
3899 return SDValue();
3900
3901 switch (Opc) {
3902 case ISD::VSELECT: {
3903 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3904 SDValue Cond = Ops[0];
3905 if (Cond->getOpcode() == ISD::XOR) {
3906 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3907 if (C1->getOpcode() == HexagonISD::QTRUE)
3908 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3909 }
3910 break;
3911 }
3912 case HexagonISD::V2Q:
3913 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3914 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3915 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3916 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3917 }
3918 break;
3919 case HexagonISD::Q2V:
3920 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3921 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3922 DAG.getAllOnesConstant(dl, MVT::i32));
3923 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3924 return getZero(dl, ty(Op), DAG);
3925 break;
3927 if (isUndef(Ops[1]))
3928 return Ops[0];
3929 break;
3930 case HexagonISD::VROR: {
3931 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3932 SDValue Vec = Ops[0].getOperand(0);
3933 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3934 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3935 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3936 }
3937 break;
3938 }
3939 }
3940
3941 return SDValue();
3942}
3943
3944bool
3945HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3946 if (Subtarget.isHVXVectorType(Ty, true))
3947 return false;
3948 auto Action = getPreferredHvxVectorAction(Ty);
3950 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3951 return false;
3952}
3953
3954bool
3955HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3956 if (Subtarget.isHVXVectorType(Ty, true))
3957 return false;
3958 auto Action = getPreferredHvxVectorAction(Ty);
3960 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3961 return false;
3962}
3963
3964bool
3965HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3966 if (!Subtarget.useHVXOps())
3967 return false;
3968 // If the type of any result, or any operand type are HVX vector types,
3969 // this is an HVX operation.
3970 auto IsHvxTy = [this](EVT Ty) {
3971 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3972 };
3973 auto IsHvxOp = [this](SDValue Op) {
3974 return Op.getValueType().isSimple() &&
3975 Subtarget.isHVXVectorType(ty(Op), true);
3976 };
3977 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3978 return true;
3979
3980 // Check if this could be an HVX operation after type widening.
3981 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3982 if (!Op.getValueType().isSimple())
3983 return false;
3984 MVT ValTy = ty(Op);
3985 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3986 };
3987
3988 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3989 if (IsWidenedToHvx(SDValue(N, i)))
3990 return true;
3991 }
3992 return llvm::any_of(N->ops(), IsWidenedToHvx);
3993}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
static cl::opt< bool > EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), cl::desc("Enable FP fast conversion routine."))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:138
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.