LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
44 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
45 MVT ElemTy = Ty.getScalarType();
46 switch (ElemTy.SimpleTy) {
47 case MVT::f16:
48 return std::make_tuple(5, 15, 10);
49 case MVT::f32:
50 return std::make_tuple(8, 127, 23);
51 case MVT::f64:
52 return std::make_tuple(11, 1023, 52);
53 default:
54 break;
55 }
56 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
57}
58
59void
60HexagonTargetLowering::initializeHVXLowering() {
61 if (Subtarget.useHVX64BOps()) {
62 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
63 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
64 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
65 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
66 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
67 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
68 // These "short" boolean vector types should be legal because
69 // they will appear as results of vector compares. If they were
70 // not legal, type legalization would try to make them legal
71 // and that would require using operations that do not use or
72 // produce such types. That, in turn, would imply using custom
73 // nodes, which would be unoptimizable by the DAG combiner.
74 // The idea is to rely on target-independent operations as much
75 // as possible.
76 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
77 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
78 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
79 } else if (Subtarget.useHVX128BOps()) {
80 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
81 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
82 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
83 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
84 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
85 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
86 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
87 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
88 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
89 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
90 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
91 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
92 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
93 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
94 }
95 if (Subtarget.useHVXV81Ops()) {
96 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
97 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
98 }
99 }
100
101 // Set up operation actions.
102
103 bool Use64b = Subtarget.useHVX64BOps();
104 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
105 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
106 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
107 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
108 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
109
110 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
112 AddPromotedToType(Opc, FromTy, ToTy);
113 };
114
115 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
116 // Note: v16i1 -> i16 is handled in type legalization instead of op
117 // legalization.
118 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
119 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
120 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
121 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
122 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
123 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
127
128 if (Subtarget.useHVX128BOps()) {
129 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
130 setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
131 }
132 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
133 Subtarget.useHVXFloatingPoint()) {
134
135 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
136 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
137
138 for (MVT T : FloatV) {
142 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
143 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
144
147
150
151 setOperationAction(ISD::MLOAD, T, Custom);
152 setOperationAction(ISD::MSTORE, T, Custom);
153 // Custom-lower BUILD_VECTOR. The standard (target-independent)
154 // handling of it would convert it to a load, which is not always
155 // the optimal choice.
157 }
158
159
160 // BUILD_VECTOR with f16 operands cannot be promoted without
161 // promoting the result, so lower the node to vsplat or constant pool
165
166 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
167 // generated.
168 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
169 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
170 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
171 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
172
173 if (Subtarget.useHVXV81Ops()) {
174 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
175 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
176 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
177 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
178 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
179 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
180 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
181 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
182
186
187 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
188 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
191
195 }
196
197 for (MVT P : FloatW) {
198 setOperationAction(ISD::LOAD, P, Custom);
199 setOperationAction(ISD::STORE, P, Custom);
203 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
204 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
207
208 // Custom-lower BUILD_VECTOR. The standard (target-independent)
209 // handling of it would convert it to a load, which is not always
210 // the optimal choice.
212 // Make concat-vectors custom to handle concats of more than 2 vectors.
214
215 setOperationAction(ISD::MLOAD, P, Custom);
216 setOperationAction(ISD::MSTORE, P, Custom);
217 }
218
219 if (Subtarget.useHVXQFloatOps()) {
220 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
222 } else if (Subtarget.useHVXIEEEFPOps()) {
223 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
225 }
226 }
227
228 for (MVT T : LegalV) {
231
247 if (T != ByteV) {
251 }
252
255 if (T.getScalarType() != MVT::i32) {
258 }
259
261 setOperationAction(ISD::LOAD, T, Custom);
262 setOperationAction(ISD::MLOAD, T, Custom);
263 setOperationAction(ISD::MSTORE, T, Custom);
264 if (T.getScalarType() != MVT::i32) {
267 }
268
270 // Make concat-vectors custom to handle concats of more than 2 vectors.
281 if (T != ByteV) {
283 // HVX only has shifts of words and halfwords.
287
288 // Promote all shuffles to operate on vectors of bytes.
289 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
290 }
291
292 if (Subtarget.useHVXFloatingPoint()) {
293 // Same action for both QFloat and IEEE.
298 }
299
307 }
308
309 for (MVT T : LegalW) {
310 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
311 // independent) handling of it would convert it to a load, which is
312 // not always the optimal choice.
314 // Make concat-vectors custom to handle concats of more than 2 vectors.
316
317 // Custom-lower these operations for pairs. Expand them into a concat
318 // of the corresponding operations on individual vectors.
327
328 setOperationAction(ISD::LOAD, T, Custom);
329 setOperationAction(ISD::STORE, T, Custom);
330 setOperationAction(ISD::MLOAD, T, Custom);
331 setOperationAction(ISD::MSTORE, T, Custom);
336
351 if (T != ByteW) {
355
356 // Promote all shuffles to operate on vectors of bytes.
357 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
358 }
361
364 if (T.getScalarType() != MVT::i32) {
367 }
368
369 if (Subtarget.useHVXFloatingPoint()) {
370 // Same action for both QFloat and IEEE.
375 }
376 }
377
378 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
379 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
380 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
383
384 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
385 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
386 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
387 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
388 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
389 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
390 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
391 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
392 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
393 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
394 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
395 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
396 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
397 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
398
399 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
400 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
401 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
402 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
403 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
404 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
405 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
406 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
407 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
408 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
409 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
410 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
411 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
412 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
413
414 // Boolean vectors.
415
416 for (MVT T : LegalW) {
417 // Boolean types for vector pairs will overlap with the boolean
418 // types for single vectors, e.g.
419 // v64i8 -> v64i1 (single)
420 // v64i16 -> v64i1 (pair)
421 // Set these actions first, and allow the single actions to overwrite
422 // any duplicates.
423 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
428 // Masked load/store takes a mask that may need splitting.
429 setOperationAction(ISD::MLOAD, BoolW, Custom);
430 setOperationAction(ISD::MSTORE, BoolW, Custom);
431 }
432
433 for (MVT T : LegalV) {
434 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
445 }
446
447 if (Use64b) {
448 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
450 } else {
451 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
453 }
454
455 // Handle store widening for short vectors.
456 unsigned HwLen = Subtarget.getVectorLength();
457 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
458 if (ElemTy == MVT::i1)
459 continue;
460 int ElemWidth = ElemTy.getFixedSizeInBits();
461 int MaxElems = (8*HwLen) / ElemWidth;
462 for (int N = 2; N < MaxElems; N *= 2) {
463 MVT VecTy = MVT::getVectorVT(ElemTy, N);
464 auto Action = getPreferredVectorAction(VecTy);
466 setOperationAction(ISD::LOAD, VecTy, Custom);
467 setOperationAction(ISD::STORE, VecTy, Custom);
474 if (Subtarget.useHVXFloatingPoint()) {
479 }
480
481 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
482 if (!isTypeLegal(BoolTy))
484 }
485 }
486 }
487
488 // Include cases which are not hander earlier
492
494}
495
496unsigned
497HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
498 // Early exit for invalid input types
499 if (!VecTy.isVector())
500 return ~0u;
501
502 MVT ElemTy = VecTy.getVectorElementType();
503 unsigned VecLen = VecTy.getVectorNumElements();
504 unsigned HwLen = Subtarget.getVectorLength();
505
506 // Split vectors of i1 that exceed byte vector length.
507 if (ElemTy == MVT::i1 && VecLen > HwLen)
509
510 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
511 // For shorter vectors of i1, widen them if any of the corresponding
512 // vectors of integers needs to be widened.
513 if (ElemTy == MVT::i1) {
514 for (MVT T : Tys) {
515 assert(T != MVT::i1);
516 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
517 if (A != ~0u)
518 return A;
519 }
520 return ~0u;
521 }
522
523 // If the size of VecTy is at least half of the vector length,
524 // widen the vector. Note: the threshold was not selected in
525 // any scientific way.
526 if (llvm::is_contained(Tys, ElemTy)) {
527 unsigned VecWidth = VecTy.getSizeInBits();
528 unsigned HwWidth = 8*HwLen;
529 if (VecWidth > 2*HwWidth)
531
532 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
533 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
535 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
537 }
538
539 // Defer to default.
540 return ~0u;
541}
542
543unsigned
544HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
545 unsigned Opc = Op.getOpcode();
546 switch (Opc) {
551 }
553}
554
556HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
557 const SDLoc &dl, SelectionDAG &DAG) const {
559 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
560 append_range(IntOps, Ops);
561 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
562}
563
564MVT
565HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
566 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
567
568 MVT ElemTy = Tys.first.getVectorElementType();
569 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
570 Tys.second.getVectorNumElements());
571}
572
573HexagonTargetLowering::TypePair
574HexagonTargetLowering::typeSplit(MVT VecTy) const {
575 assert(VecTy.isVector());
576 unsigned NumElem = VecTy.getVectorNumElements();
577 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
578 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
579 return { HalfTy, HalfTy };
580}
581
582MVT
583HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
584 MVT ElemTy = VecTy.getVectorElementType();
585 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
586 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
587}
588
589MVT
590HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
591 MVT ElemTy = VecTy.getVectorElementType();
592 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
593 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
594}
595
597HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
598 SelectionDAG &DAG) const {
599 if (ty(Vec).getVectorElementType() == ElemTy)
600 return Vec;
601 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
602 return DAG.getBitcast(CastTy, Vec);
603}
604
606HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
607 SelectionDAG &DAG) const {
608 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
609 Ops.first, Ops.second);
610}
611
612HexagonTargetLowering::VectorPair
613HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
614 SelectionDAG &DAG) const {
615 TypePair Tys = typeSplit(ty(Vec));
616 if (Vec.getOpcode() == HexagonISD::QCAT)
617 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
618 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
619}
620
621bool
622HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
623 return Subtarget.isHVXVectorType(Ty) &&
624 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
625}
626
627bool
628HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
629 return Subtarget.isHVXVectorType(Ty) &&
630 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
631}
632
633bool
634HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
635 return Subtarget.isHVXVectorType(Ty, true) &&
636 Ty.getVectorElementType() == MVT::i1;
637}
638
639bool HexagonTargetLowering::allowsHvxMemoryAccess(
640 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
641 // Bool vectors are excluded by default, but make it explicit to
642 // emphasize that bool vectors cannot be loaded or stored.
643 // Also, disallow double vector stores (to prevent unnecessary
644 // store widening in DAG combiner).
645 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
646 return false;
647 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
648 return false;
649 if (Fast)
650 *Fast = 1;
651 return true;
652}
653
654bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
655 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
656 if (!Subtarget.isHVXVectorType(VecTy))
657 return false;
658 // XXX Should this be false? vmemu are a bit slower than vmem.
659 if (Fast)
660 *Fast = 1;
661 return true;
662}
663
664void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
665 MachineInstr &MI, SDNode *Node) const {
666 unsigned Opc = MI.getOpcode();
667 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
668 MachineBasicBlock &MB = *MI.getParent();
669 MachineFunction &MF = *MB.getParent();
670 MachineRegisterInfo &MRI = MF.getRegInfo();
671 DebugLoc DL = MI.getDebugLoc();
672 auto At = MI.getIterator();
673
674 switch (Opc) {
675 case Hexagon::PS_vsplatib:
676 if (Subtarget.useHVXV62Ops()) {
677 // SplatV = A2_tfrsi #imm
678 // OutV = V6_lvsplatb SplatV
679 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
680 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
681 .add(MI.getOperand(1));
682 Register OutV = MI.getOperand(0).getReg();
683 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
684 .addReg(SplatV);
685 } else {
686 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
687 // OutV = V6_lvsplatw SplatV
688 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
689 const MachineOperand &InpOp = MI.getOperand(1);
690 assert(InpOp.isImm());
691 uint32_t V = InpOp.getImm() & 0xFF;
692 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
693 .addImm(V << 24 | V << 16 | V << 8 | V);
694 Register OutV = MI.getOperand(0).getReg();
695 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
696 }
697 MB.erase(At);
698 break;
699 case Hexagon::PS_vsplatrb:
700 if (Subtarget.useHVXV62Ops()) {
701 // OutV = V6_lvsplatb Inp
702 Register OutV = MI.getOperand(0).getReg();
703 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
704 .add(MI.getOperand(1));
705 } else {
706 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
707 const MachineOperand &InpOp = MI.getOperand(1);
708 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
709 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
710 Register OutV = MI.getOperand(0).getReg();
711 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
712 .addReg(SplatV);
713 }
714 MB.erase(At);
715 break;
716 case Hexagon::PS_vsplatih:
717 if (Subtarget.useHVXV62Ops()) {
718 // SplatV = A2_tfrsi #imm
719 // OutV = V6_lvsplath SplatV
720 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
721 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
722 .add(MI.getOperand(1));
723 Register OutV = MI.getOperand(0).getReg();
724 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
725 .addReg(SplatV);
726 } else {
727 // SplatV = A2_tfrsi #imm:#imm
728 // OutV = V6_lvsplatw SplatV
729 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
730 const MachineOperand &InpOp = MI.getOperand(1);
731 assert(InpOp.isImm());
732 uint32_t V = InpOp.getImm() & 0xFFFF;
733 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
734 .addImm(V << 16 | V);
735 Register OutV = MI.getOperand(0).getReg();
736 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
737 }
738 MB.erase(At);
739 break;
740 case Hexagon::PS_vsplatrh:
741 if (Subtarget.useHVXV62Ops()) {
742 // OutV = V6_lvsplath Inp
743 Register OutV = MI.getOperand(0).getReg();
744 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
745 .add(MI.getOperand(1));
746 } else {
747 // SplatV = A2_combine_ll Inp, Inp
748 // OutV = V6_lvsplatw SplatV
749 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
750 const MachineOperand &InpOp = MI.getOperand(1);
751 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
752 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
753 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
754 Register OutV = MI.getOperand(0).getReg();
755 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
756 }
757 MB.erase(At);
758 break;
759 case Hexagon::PS_vsplatiw:
760 case Hexagon::PS_vsplatrw:
761 if (Opc == Hexagon::PS_vsplatiw) {
762 // SplatV = A2_tfrsi #imm
763 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
764 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
765 .add(MI.getOperand(1));
766 MI.getOperand(1).ChangeToRegister(SplatV, false);
767 }
768 // OutV = V6_lvsplatw SplatV/Inp
769 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
770 break;
771 }
772}
773
775HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
776 SelectionDAG &DAG) const {
777 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
778 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
779
780 unsigned ElemWidth = ElemTy.getSizeInBits();
781 if (ElemWidth == 8)
782 return ElemIdx;
783
784 unsigned L = Log2_32(ElemWidth/8);
785 const SDLoc &dl(ElemIdx);
786 return DAG.getNode(ISD::SHL, dl, MVT::i32,
787 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
788}
789
791HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
792 SelectionDAG &DAG) const {
793 unsigned ElemWidth = ElemTy.getSizeInBits();
794 assert(ElemWidth >= 8 && ElemWidth <= 32);
795 if (ElemWidth == 32)
796 return Idx;
797
798 if (ty(Idx) != MVT::i32)
799 Idx = DAG.getBitcast(MVT::i32, Idx);
800 const SDLoc &dl(Idx);
801 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
802 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
803 return SubIdx;
804}
805
807HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
808 SDValue Op1, ArrayRef<int> Mask,
809 SelectionDAG &DAG) const {
810 MVT OpTy = ty(Op0);
811 assert(OpTy == ty(Op1));
812
813 MVT ElemTy = OpTy.getVectorElementType();
814 if (ElemTy == MVT::i8)
815 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
816 assert(ElemTy.getSizeInBits() >= 8);
817
818 MVT ResTy = tyVector(OpTy, MVT::i8);
819 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
820
821 SmallVector<int,128> ByteMask;
822 for (int M : Mask) {
823 if (M < 0) {
824 for (unsigned I = 0; I != ElemSize; ++I)
825 ByteMask.push_back(-1);
826 } else {
827 int NewM = M*ElemSize;
828 for (unsigned I = 0; I != ElemSize; ++I)
829 ByteMask.push_back(NewM+I);
830 }
831 }
832 assert(ResTy.getVectorNumElements() == ByteMask.size());
833 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
834 opCastElem(Op1, MVT::i8, DAG), ByteMask);
835}
836
838HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
839 const SDLoc &dl, MVT VecTy,
840 SelectionDAG &DAG) const {
841 unsigned VecLen = Values.size();
842 MachineFunction &MF = DAG.getMachineFunction();
843 MVT ElemTy = VecTy.getVectorElementType();
844 unsigned ElemWidth = ElemTy.getSizeInBits();
845 unsigned HwLen = Subtarget.getVectorLength();
846
847 unsigned ElemSize = ElemWidth / 8;
848 assert(ElemSize*VecLen == HwLen);
850
851 if (VecTy.getVectorElementType() != MVT::i32 &&
852 !(Subtarget.useHVXFloatingPoint() &&
853 VecTy.getVectorElementType() == MVT::f32)) {
854 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
855 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
856 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
857 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
858 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
859 Words.push_back(DAG.getBitcast(MVT::i32, W));
860 }
861 } else {
862 for (SDValue V : Values)
863 Words.push_back(DAG.getBitcast(MVT::i32, V));
864 }
865 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
866 unsigned NumValues = Values.size();
867 assert(NumValues > 0);
868 bool IsUndef = true;
869 for (unsigned i = 0; i != NumValues; ++i) {
870 if (Values[i].isUndef())
871 continue;
872 IsUndef = false;
873 if (!SplatV.getNode())
874 SplatV = Values[i];
875 else if (SplatV != Values[i])
876 return false;
877 }
878 if (IsUndef)
879 SplatV = Values[0];
880 return true;
881 };
882
883 unsigned NumWords = Words.size();
884 SDValue SplatV;
885 bool IsSplat = isSplat(Words, SplatV);
886 if (IsSplat && isUndef(SplatV))
887 return DAG.getUNDEF(VecTy);
888 if (IsSplat) {
889 assert(SplatV.getNode());
890 if (isNullConstant(SplatV))
891 return getZero(dl, VecTy, DAG);
892 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
893 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
894 return DAG.getBitcast(VecTy, S);
895 }
896
897 // Delay recognizing constant vectors until here, so that we can generate
898 // a vsplat.
899 SmallVector<ConstantInt*, 128> Consts(VecLen);
900 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
901 if (AllConst) {
902 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
903 (Constant**)Consts.end());
904 Constant *CV = ConstantVector::get(Tmp);
905 Align Alignment(HwLen);
906 SDValue CP =
907 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
908 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
910 }
911
912 // A special case is a situation where the vector is built entirely from
913 // elements extracted from another vector. This could be done via a shuffle
914 // more efficiently, but typically, the size of the source vector will not
915 // match the size of the vector being built (which precludes the use of a
916 // shuffle directly).
917 // This only handles a single source vector, and the vector being built
918 // should be of a sub-vector type of the source vector type.
919 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
920 SmallVectorImpl<int> &SrcIdx) {
921 SDValue Vec;
922 for (SDValue V : Values) {
923 if (isUndef(V)) {
924 SrcIdx.push_back(-1);
925 continue;
926 }
927 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
928 return false;
929 // All extracts should come from the same vector.
930 SDValue T = V.getOperand(0);
931 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
932 return false;
933 Vec = T;
934 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
935 if (C == nullptr)
936 return false;
937 int I = C->getSExtValue();
938 assert(I >= 0 && "Negative element index");
939 SrcIdx.push_back(I);
940 }
941 SrcVec = Vec;
942 return true;
943 };
944
945 SmallVector<int,128> ExtIdx;
946 SDValue ExtVec;
947 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
948 MVT ExtTy = ty(ExtVec);
949 unsigned ExtLen = ExtTy.getVectorNumElements();
950 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
951 // Construct a new shuffle mask that will produce a vector with the same
952 // number of elements as the input vector, and such that the vector we
953 // want will be the initial subvector of it.
954 SmallVector<int,128> Mask;
955 BitVector Used(ExtLen);
956
957 for (int M : ExtIdx) {
958 Mask.push_back(M);
959 if (M >= 0)
960 Used.set(M);
961 }
962 // Fill the rest of the mask with the unused elements of ExtVec in hopes
963 // that it will result in a permutation of ExtVec's elements. It's still
964 // fine if it doesn't (e.g. if undefs are present, or elements are
965 // repeated), but permutations can always be done efficiently via vdelta
966 // and vrdelta.
967 for (unsigned I = 0; I != ExtLen; ++I) {
968 if (Mask.size() == ExtLen)
969 break;
970 if (!Used.test(I))
971 Mask.push_back(I);
972 }
973
974 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
975 DAG.getUNDEF(ExtTy), Mask);
976 return ExtLen == VecLen ? S : LoHalf(S, DAG);
977 }
978 }
979
980 // Find most common element to initialize vector with. This is to avoid
981 // unnecessary vinsert/valign for cases where the same value is present
982 // many times. Creates a histogram of the vector's elements to find the
983 // most common element n.
984 assert(4*Words.size() == Subtarget.getVectorLength());
985 int VecHist[32];
986 int n = 0;
987 for (unsigned i = 0; i != NumWords; ++i) {
988 VecHist[i] = 0;
989 if (Words[i].isUndef())
990 continue;
991 for (unsigned j = i; j != NumWords; ++j)
992 if (Words[i] == Words[j])
993 VecHist[i]++;
994
995 if (VecHist[i] > VecHist[n])
996 n = i;
997 }
998
999 SDValue HalfV = getZero(dl, VecTy, DAG);
1000 if (VecHist[n] > 1) {
1001 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
1002 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
1003 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
1004 }
1005 SDValue HalfV0 = HalfV;
1006 SDValue HalfV1 = HalfV;
1007
1008 // Construct two halves in parallel, then or them together. Rn and Rm count
1009 // number of rotations needed before the next element. One last rotation is
1010 // performed post-loop to position the last element.
1011 int Rn = 0, Rm = 0;
1012 SDValue Sn, Sm;
1013 SDValue N = HalfV0;
1014 SDValue M = HalfV1;
1015 for (unsigned i = 0; i != NumWords/2; ++i) {
1016 // Rotate by element count since last insertion.
1017 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1018 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1019 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1020 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1021 {HalfV0, Words[i]});
1022 Rn = 0;
1023 }
1024 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1025 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1026 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1027 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1028 {HalfV1, Words[i+NumWords/2]});
1029 Rm = 0;
1030 }
1031 Rn += 4;
1032 Rm += 4;
1033 }
1034 // Perform last rotation.
1035 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1036 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1037 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1038 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1039
1040 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1041 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1042
1043 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1044
1045 SDValue OutV =
1046 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1047 return OutV;
1048}
1049
1050SDValue
1051HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1052 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1053 MVT PredTy = ty(PredV);
1054 unsigned HwLen = Subtarget.getVectorLength();
1055 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1056
1057 if (Subtarget.isHVXVectorType(PredTy, true)) {
1058 // Move the vector predicate SubV to a vector register, and scale it
1059 // down to match the representation (bytes per type element) that VecV
1060 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1061 // in general) element and put them at the front of the resulting
1062 // vector. This subvector will then be inserted into the Q2V of VecV.
1063 // To avoid having an operation that generates an illegal type (short
1064 // vector), generate a full size vector.
1065 //
1066 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1067 SmallVector<int,128> Mask(HwLen);
1068 // Scale = BitBytes(PredV) / Given BitBytes.
1069 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1070 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1071
1072 for (unsigned i = 0; i != HwLen; ++i) {
1073 unsigned Num = i % Scale;
1074 unsigned Off = i / Scale;
1075 Mask[BlockLen*Num + Off] = i;
1076 }
1077 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1078 if (!ZeroFill)
1079 return S;
1080 // Fill the bytes beyond BlockLen with 0s.
1081 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1082 // when BlockLen < HwLen.
1083 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1084 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1085 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1086 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1087 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1088 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1089 }
1090
1091 // Make sure that this is a valid scalar predicate.
1092 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1093
1094 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1095 SmallVector<SDValue,4> Words[2];
1096 unsigned IdxW = 0;
1097
1098 SDValue W0 = isUndef(PredV)
1099 ? DAG.getUNDEF(MVT::i64)
1100 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1101 if (Bytes < BitBytes) {
1102 Words[IdxW].push_back(HiHalf(W0, DAG));
1103 Words[IdxW].push_back(LoHalf(W0, DAG));
1104 } else
1105 Words[IdxW].push_back(W0);
1106
1107 while (Bytes < BitBytes) {
1108 IdxW ^= 1;
1109 Words[IdxW].clear();
1110
1111 if (Bytes < 4) {
1112 for (const SDValue &W : Words[IdxW ^ 1]) {
1113 SDValue T = expandPredicate(W, dl, DAG);
1114 Words[IdxW].push_back(HiHalf(T, DAG));
1115 Words[IdxW].push_back(LoHalf(T, DAG));
1116 }
1117 } else {
1118 for (const SDValue &W : Words[IdxW ^ 1]) {
1119 Words[IdxW].push_back(W);
1120 Words[IdxW].push_back(W);
1121 }
1122 }
1123 Bytes *= 2;
1124 }
1125
1126 while (Bytes > BitBytes) {
1127 IdxW ^= 1;
1128 Words[IdxW].clear();
1129
1130 if (Bytes <= 4) {
1131 for (const SDValue &W : Words[IdxW ^ 1]) {
1132 SDValue T = contractPredicate(W, dl, DAG);
1133 Words[IdxW].push_back(T);
1134 }
1135 } else {
1136 for (const SDValue &W : Words[IdxW ^ 1]) {
1137 Words[IdxW].push_back(W);
1138 }
1139 }
1140 Bytes /= 2;
1141 }
1142
1143 assert(Bytes == BitBytes);
1144 if (BitBytes == 1 && PredTy == MVT::v2i1)
1145 ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
1146
1147 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1148 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1149 for (const SDValue &W : Words[IdxW]) {
1150 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1151 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1152 }
1153
1154 return Vec;
1155}
1156
1157SDValue
1158HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1159 const SDLoc &dl, MVT VecTy,
1160 SelectionDAG &DAG) const {
1161 // Construct a vector V of bytes, such that a comparison V >u 0 would
1162 // produce the required vector predicate.
1163 unsigned VecLen = Values.size();
1164 unsigned HwLen = Subtarget.getVectorLength();
1165 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1167 bool AllT = true, AllF = true;
1168
1169 auto IsTrue = [] (SDValue V) {
1170 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1171 return !N->isZero();
1172 return false;
1173 };
1174 auto IsFalse = [] (SDValue V) {
1175 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1176 return N->isZero();
1177 return false;
1178 };
1179
1180 if (VecLen <= HwLen) {
1181 // In the hardware, each bit of a vector predicate corresponds to a byte
1182 // of a vector register. Calculate how many bytes does a bit of VecTy
1183 // correspond to.
1184 assert(HwLen % VecLen == 0);
1185 unsigned BitBytes = HwLen / VecLen;
1186 for (SDValue V : Values) {
1187 AllT &= IsTrue(V);
1188 AllF &= IsFalse(V);
1189
1190 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1191 : DAG.getUNDEF(MVT::i8);
1192 for (unsigned B = 0; B != BitBytes; ++B)
1193 Bytes.push_back(Ext);
1194 }
1195 } else {
1196 // There are as many i1 values, as there are bits in a vector register.
1197 // Divide the values into groups of 8 and check that each group consists
1198 // of the same value (ignoring undefs).
1199 for (unsigned I = 0; I != VecLen; I += 8) {
1200 unsigned B = 0;
1201 // Find the first non-undef value in this group.
1202 for (; B != 8; ++B) {
1203 if (!Values[I+B].isUndef())
1204 break;
1205 }
1206 SDValue F = Values[I+B];
1207 AllT &= IsTrue(F);
1208 AllF &= IsFalse(F);
1209
1210 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1211 : DAG.getUNDEF(MVT::i8);
1212 Bytes.push_back(Ext);
1213 // Verify that the rest of values in the group are the same as the
1214 // first.
1215 for (; B != 8; ++B)
1216 assert(Values[I+B].isUndef() || Values[I+B] == F);
1217 }
1218 }
1219
1220 if (AllT)
1221 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1222 if (AllF)
1223 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1224
1225 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1226 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1227 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1228}
1229
1230SDValue
1231HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1232 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1233 MVT ElemTy = ty(VecV).getVectorElementType();
1234
1235 unsigned ElemWidth = ElemTy.getSizeInBits();
1236 assert(ElemWidth >= 8 && ElemWidth <= 32);
1237 (void)ElemWidth;
1238
1239 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1240 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1241 {VecV, ByteIdx});
1242 if (ElemTy == MVT::i32)
1243 return ExWord;
1244
1245 // Have an extracted word, need to extract the smaller element out of it.
1246 // 1. Extract the bits of (the original) IdxV that correspond to the index
1247 // of the desired element in the 32-bit word.
1248 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1249 // 2. Extract the element from the word.
1250 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1251 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1252}
1253
1254SDValue
1255HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1256 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1257 // Implement other return types if necessary.
1258 assert(ResTy == MVT::i1);
1259
1260 unsigned HwLen = Subtarget.getVectorLength();
1261 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1262 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1263
1264 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1265 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1266 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1267
1268 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1269 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1270 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1271}
1272
1273SDValue
1274HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1275 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1276 MVT ElemTy = ty(VecV).getVectorElementType();
1277
1278 unsigned ElemWidth = ElemTy.getSizeInBits();
1279 assert(ElemWidth >= 8 && ElemWidth <= 32);
1280 (void)ElemWidth;
1281
1282 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1283 SDValue ByteIdxV) {
1284 MVT VecTy = ty(VecV);
1285 unsigned HwLen = Subtarget.getVectorLength();
1286 SDValue MaskV =
1287 DAG.getNode(ISD::AND, dl, MVT::i32,
1288 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1289 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1290 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1291 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1292 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1293 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1294 return TorV;
1295 };
1296
1297 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1298 if (ElemTy == MVT::i32)
1299 return InsertWord(VecV, ValV, ByteIdx);
1300
1301 // If this is not inserting a 32-bit word, convert it into such a thing.
1302 // 1. Extract the existing word from the target vector.
1303 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1304 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1305 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1306 dl, MVT::i32, DAG);
1307
1308 // 2. Treating the extracted word as a 32-bit vector, insert the given
1309 // value into it.
1310 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1311 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1312 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1313 ValV, SubIdx, dl, ElemTy, DAG);
1314
1315 // 3. Insert the 32-bit word back into the original vector.
1316 return InsertWord(VecV, Ins, ByteIdx);
1317}
1318
1319SDValue
1320HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1321 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1322 unsigned HwLen = Subtarget.getVectorLength();
1323 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1324 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1325
1326 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1327 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1328 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1329 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1330
1331 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1332 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1333}
1334
1335SDValue
1336HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1337 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1338 MVT VecTy = ty(VecV);
1339 unsigned HwLen = Subtarget.getVectorLength();
1340 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1341 MVT ElemTy = VecTy.getVectorElementType();
1342 unsigned ElemWidth = ElemTy.getSizeInBits();
1343
1344 // If the source vector is a vector pair, get the single vector containing
1345 // the subvector of interest. The subvector will never overlap two single
1346 // vectors.
1347 if (isHvxPairTy(VecTy)) {
1348 unsigned SubIdx = Hexagon::vsub_lo;
1349 if (Idx * ElemWidth >= 8 * HwLen) {
1350 SubIdx = Hexagon::vsub_hi;
1351 Idx -= VecTy.getVectorNumElements() / 2;
1352 }
1353
1354 VecTy = typeSplit(VecTy).first;
1355 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1356 if (VecTy == ResTy)
1357 return VecV;
1358 }
1359
1360 // The only meaningful subvectors of a single HVX vector are those that
1361 // fit in a scalar register.
1362 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1363
1364 MVT WordTy = tyVector(VecTy, MVT::i32);
1365 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1366 unsigned WordIdx = (Idx*ElemWidth) / 32;
1367
1368 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1369 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1370 if (ResTy.getSizeInBits() == 32)
1371 return DAG.getBitcast(ResTy, W0);
1372
1373 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1374 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1375 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1376 return DAG.getBitcast(ResTy, WW);
1377}
1378
1379SDValue
1380HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1381 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1382 MVT VecTy = ty(VecV);
1383 unsigned HwLen = Subtarget.getVectorLength();
1384 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1385 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1386 // IdxV is required to be a constant.
1387 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1388
1389 unsigned ResLen = ResTy.getVectorNumElements();
1390 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1391 unsigned Offset = Idx * BitBytes;
1392 SDValue Undef = DAG.getUNDEF(ByteTy);
1393 SmallVector<int,128> Mask;
1394
1395 if (Subtarget.isHVXVectorType(ResTy, true)) {
1396 // Converting between two vector predicates. Since the result is shorter
1397 // than the source, it will correspond to a vector predicate with the
1398 // relevant bits replicated. The replication count is the ratio of the
1399 // source and target vector lengths.
1400 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1401 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1402 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1403 for (unsigned j = 0; j != Rep; ++j)
1404 Mask.push_back(i + Offset);
1405 }
1406 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1407 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1408 }
1409
1410 // Converting between a vector predicate and a scalar predicate. In the
1411 // vector predicate, a group of BitBytes bits will correspond to a single
1412 // i1 element of the source vector type. Those bits will all have the same
1413 // value. The same will be true for ByteVec, where each byte corresponds
1414 // to a bit in the vector predicate.
1415 // The algorithm is to traverse the ByteVec, going over the i1 values from
1416 // the source vector, and generate the corresponding representation in an
1417 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1418 // elements so that the interesting 8 bytes will be in the low end of the
1419 // vector.
1420 unsigned Rep = 8 / ResLen;
1421 // Make sure the output fill the entire vector register, so repeat the
1422 // 8-byte groups as many times as necessary.
1423 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1424 // This will generate the indexes of the 8 interesting bytes.
1425 for (unsigned i = 0; i != ResLen; ++i) {
1426 for (unsigned j = 0; j != Rep; ++j)
1427 Mask.push_back(Offset + i*BitBytes);
1428 }
1429 }
1430
1431 SDValue Zero = getZero(dl, MVT::i32, DAG);
1432 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1433 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1434 // them against 0.
1435 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1436 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1437 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1438 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1439 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1440 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1441}
1442
1443SDValue
1444HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1445 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1446 MVT VecTy = ty(VecV);
1447 MVT SubTy = ty(SubV);
1448 unsigned HwLen = Subtarget.getVectorLength();
1449 MVT ElemTy = VecTy.getVectorElementType();
1450 unsigned ElemWidth = ElemTy.getSizeInBits();
1451
1452 bool IsPair = isHvxPairTy(VecTy);
1453 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1454 // The two single vectors that VecV consists of, if it's a pair.
1455 SDValue V0, V1;
1456 SDValue SingleV = VecV;
1457 SDValue PickHi;
1458
1459 if (IsPair) {
1460 V0 = LoHalf(VecV, DAG);
1461 V1 = HiHalf(VecV, DAG);
1462
1463 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1464 dl, MVT::i32);
1465 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1466 if (isHvxSingleTy(SubTy)) {
1467 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1468 unsigned Idx = CN->getZExtValue();
1469 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1470 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1471 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1472 }
1473 // If IdxV is not a constant, generate the two variants: with the
1474 // SubV as the high and as the low subregister, and select the right
1475 // pair based on the IdxV.
1476 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1477 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1478 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1479 }
1480 // The subvector being inserted must be entirely contained in one of
1481 // the vectors V0 or V1. Set SingleV to the correct one, and update
1482 // IdxV to be the index relative to the beginning of that vector.
1483 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1484 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1485 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1486 }
1487
1488 // The only meaningful subvectors of a single HVX vector are those that
1489 // fit in a scalar register.
1490 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1491 // Convert IdxV to be index in bytes.
1492 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1493 if (!IdxN || !IdxN->isZero()) {
1494 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1495 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1496 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1497 }
1498 // When inserting a single word, the rotation back to the original position
1499 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1500 // by (HwLen-4)-Idx.
1501 unsigned RolBase = HwLen;
1502 if (SubTy.getSizeInBits() == 32) {
1503 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1504 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1505 } else {
1506 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1507 SDValue R0 = LoHalf(V, DAG);
1508 SDValue R1 = HiHalf(V, DAG);
1509 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1510 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1511 DAG.getConstant(4, dl, MVT::i32));
1512 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1513 RolBase = HwLen-4;
1514 }
1515 // If the vector wasn't ror'ed, don't ror it back.
1516 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1517 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1518 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1519 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1520 }
1521
1522 if (IsPair) {
1523 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1524 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1525 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1526 }
1527 return SingleV;
1528}
1529
1530SDValue
1531HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1532 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1533 MVT VecTy = ty(VecV);
1534 MVT SubTy = ty(SubV);
1535 assert(Subtarget.isHVXVectorType(VecTy, true));
1536 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1537 // predicate as well, or it can be a scalar predicate.
1538
1539 unsigned VecLen = VecTy.getVectorNumElements();
1540 unsigned HwLen = Subtarget.getVectorLength();
1541 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1542
1543 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1544 unsigned BitBytes = HwLen / VecLen;
1545 unsigned BlockLen = HwLen / Scale;
1546
1547 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1548 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1549 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1550 SDValue ByteIdx;
1551
1552 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1553 if (!IdxN || !IdxN->isZero()) {
1554 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1555 DAG.getConstant(BitBytes, dl, MVT::i32));
1556 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1557 }
1558
1559 // ByteVec is the target vector VecV rotated in such a way that the
1560 // subvector should be inserted at index 0. Generate a predicate mask
1561 // and use vmux to do the insertion.
1562 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1563 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1564 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1565 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1566 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1567 // Rotate ByteVec back, and convert to a vector predicate.
1568 if (!IdxN || !IdxN->isZero()) {
1569 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1570 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1571 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1572 }
1573 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1574}
1575
1576SDValue
1577HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1578 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1579 // Sign- and any-extending of a vector predicate to a vector register is
1580 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1581 // a vector of 1s (where the 1s are of type matching the vector type).
1582 assert(Subtarget.isHVXVectorType(ResTy));
1583 if (!ZeroExt)
1584 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1585
1586 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1587 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1588 DAG.getConstant(1, dl, MVT::i32));
1589 SDValue False = getZero(dl, ResTy, DAG);
1590 return DAG.getSelect(dl, ResTy, VecV, True, False);
1591}
1592
1593SDValue
1594HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1595 MVT ResTy, SelectionDAG &DAG) const {
1596 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1597 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1598 // vector register. The remaining bits of the vector register are
1599 // unspecified.
1600
1601 MachineFunction &MF = DAG.getMachineFunction();
1602 unsigned HwLen = Subtarget.getVectorLength();
1603 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1604 MVT PredTy = ty(VecQ);
1605 unsigned PredLen = PredTy.getVectorNumElements();
1606 assert(HwLen % PredLen == 0);
1607 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1608
1609 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1611 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1612 // These are bytes with the LSB rotated left with respect to their index.
1613 for (unsigned i = 0; i != HwLen/8; ++i) {
1614 for (unsigned j = 0; j != 8; ++j)
1615 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1616 }
1617 Constant *CV = ConstantVector::get(Tmp);
1618 Align Alignment(HwLen);
1619 SDValue CP =
1620 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1621 SDValue Bytes =
1622 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1624
1625 // Select the bytes that correspond to true bits in the vector predicate.
1626 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1627 getZero(dl, VecTy, DAG));
1628 // Calculate the OR of all bytes in each group of 8. That will compress
1629 // all the individual bits into a single byte.
1630 // First, OR groups of 4, via vrmpy with 0x01010101.
1631 SDValue All1 =
1632 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1633 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1634 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1635 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1636 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1637 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1638
1639 // Pick every 8th byte and coalesce them at the beginning of the output.
1640 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1641 // byte and so on.
1642 SmallVector<int,128> Mask;
1643 for (unsigned i = 0; i != HwLen; ++i)
1644 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1645 SDValue Collect =
1646 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1647 return DAG.getBitcast(ResTy, Collect);
1648}
1649
1650SDValue
1651HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1652 const SDLoc &dl, SelectionDAG &DAG) const {
1653 // Take a vector and resize the element type to match the given type.
1654 MVT InpTy = ty(VecV);
1655 if (InpTy == ResTy)
1656 return VecV;
1657
1658 unsigned InpWidth = InpTy.getSizeInBits();
1659 unsigned ResWidth = ResTy.getSizeInBits();
1660
1661 if (InpTy.isFloatingPoint()) {
1662 return InpWidth < ResWidth
1663 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1664 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1665 DAG.getTargetConstant(0, dl, MVT::i32));
1666 }
1667
1668 assert(InpTy.isInteger());
1669
1670 if (InpWidth < ResWidth) {
1671 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1672 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1673 } else {
1674 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1675 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1676 }
1677}
1678
1679SDValue
1680HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1681 SelectionDAG &DAG) const {
1682 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1683
1684 const SDLoc &dl(Vec);
1685 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1686 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1687 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1688}
1689
1690SDValue
1691HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1692 const {
1693 const SDLoc &dl(Op);
1694 MVT VecTy = ty(Op);
1695
1696 unsigned Size = Op.getNumOperands();
1698 for (unsigned i = 0; i != Size; ++i)
1699 Ops.push_back(Op.getOperand(i));
1700
1701 if (VecTy.getVectorElementType() == MVT::i1)
1702 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1703
1704 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1705 // not a legal type, just bitcast the node to use i16
1706 // types and bitcast the result back to f16
1707 if (VecTy.getVectorElementType() == MVT::f16 ||
1708 VecTy.getVectorElementType() == MVT::bf16) {
1710 for (unsigned i = 0; i != Size; i++)
1711 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1712
1713 SDValue T0 =
1714 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1715 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1716 }
1717
1718 // First, split the BUILD_VECTOR for vector pairs. We could generate
1719 // some pairs directly (via splat), but splats should be generated
1720 // by the combiner prior to getting here.
1721 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1723 MVT SingleTy = typeSplit(VecTy).first;
1724 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1725 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1726 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1727 }
1728
1729 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1730}
1731
1732SDValue
1733HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1734 const {
1735 const SDLoc &dl(Op);
1736 MVT VecTy = ty(Op);
1737 MVT ArgTy = ty(Op.getOperand(0));
1738
1739 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1740 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1741 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1742 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1743 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1744 return DAG.getBitcast(VecTy, Splat);
1745 }
1746
1747 return SDValue();
1748}
1749
1750SDValue
1751HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1752 const {
1753 // Vector concatenation of two integer (non-bool) vectors does not need
1754 // special lowering. Custom-lower concats of bool vectors and expand
1755 // concats of more than 2 vectors.
1756 MVT VecTy = ty(Op);
1757 const SDLoc &dl(Op);
1758 unsigned NumOp = Op.getNumOperands();
1759 if (VecTy.getVectorElementType() != MVT::i1) {
1760 if (NumOp == 2)
1761 return Op;
1762 // Expand the other cases into a build-vector.
1764 for (SDValue V : Op.getNode()->ops())
1765 DAG.ExtractVectorElements(V, Elems);
1766 // A vector of i16 will be broken up into a build_vector of i16's.
1767 // This is a problem, since at the time of operation legalization,
1768 // all operations are expected to be type-legalized, and i16 is not
1769 // a legal type. If any of the extracted elements is not of a valid
1770 // type, sign-extend it to a valid one.
1771 for (SDValue &V : Elems) {
1772 MVT Ty = ty(V);
1773 if (!isTypeLegal(Ty)) {
1774 MVT NTy = typeLegalize(Ty, DAG);
1775 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1776 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1778 V.getOperand(0), V.getOperand(1)),
1779 DAG.getValueType(Ty));
1780 continue;
1781 }
1782 // A few less complicated cases.
1783 switch (V.getOpcode()) {
1784 case ISD::Constant:
1785 V = DAG.getSExtOrTrunc(V, dl, NTy);
1786 break;
1787 case ISD::UNDEF:
1788 V = DAG.getUNDEF(NTy);
1789 break;
1790 case ISD::TRUNCATE:
1791 V = V.getOperand(0);
1792 break;
1793 default:
1794 llvm_unreachable("Unexpected vector element");
1795 }
1796 }
1797 }
1798 return DAG.getBuildVector(VecTy, dl, Elems);
1799 }
1800
1801 assert(VecTy.getVectorElementType() == MVT::i1);
1802 unsigned HwLen = Subtarget.getVectorLength();
1803 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1804
1805 SDValue Op0 = Op.getOperand(0);
1806
1807 // If the operands are HVX types (i.e. not scalar predicates), then
1808 // defer the concatenation, and create QCAT instead.
1809 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1810 if (NumOp == 2)
1811 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1812
1813 ArrayRef<SDUse> U(Op.getNode()->ops());
1816
1817 MVT HalfTy = typeSplit(VecTy).first;
1818 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1819 Ops.take_front(NumOp/2));
1820 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1821 Ops.take_back(NumOp/2));
1822 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1823 }
1824
1825 // Count how many bytes (in a vector register) each bit in VecTy
1826 // corresponds to.
1827 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1828
1829 SmallVector<SDValue,8> Prefixes;
1830 for (SDValue V : Op.getNode()->op_values()) {
1831 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1832 Prefixes.push_back(P);
1833 }
1834
1835 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1836 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1837 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1838 SDValue Res = getZero(dl, ByteTy, DAG);
1839 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1840 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1841 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1842 }
1843 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1844}
1845
1846SDValue
1847HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1848 const {
1849 // Change the type of the extracted element to i32.
1850 SDValue VecV = Op.getOperand(0);
1851 MVT ElemTy = ty(VecV).getVectorElementType();
1852 const SDLoc &dl(Op);
1853 SDValue IdxV = Op.getOperand(1);
1854 if (ElemTy == MVT::i1)
1855 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1856
1857 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1858}
1859
1860SDValue
1861HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1862 const {
1863 const SDLoc &dl(Op);
1864 MVT VecTy = ty(Op);
1865 SDValue VecV = Op.getOperand(0);
1866 SDValue ValV = Op.getOperand(1);
1867 SDValue IdxV = Op.getOperand(2);
1868 MVT ElemTy = ty(VecV).getVectorElementType();
1869 if (ElemTy == MVT::i1)
1870 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1871
1872 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1874 tyVector(VecTy, MVT::i16),
1875 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1876 DAG.getBitcast(MVT::i16, ValV), IdxV);
1877 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1878 }
1879
1880 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1881}
1882
1883SDValue
1884HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1885 const {
1886 SDValue SrcV = Op.getOperand(0);
1887 MVT SrcTy = ty(SrcV);
1888 MVT DstTy = ty(Op);
1889 SDValue IdxV = Op.getOperand(1);
1890 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1891 assert(Idx % DstTy.getVectorNumElements() == 0);
1892 (void)Idx;
1893 const SDLoc &dl(Op);
1894
1895 MVT ElemTy = SrcTy.getVectorElementType();
1896 if (ElemTy == MVT::i1)
1897 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1898
1899 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1900}
1901
1902SDValue
1903HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1904 const {
1905 // Idx does not need to be a constant.
1906 SDValue VecV = Op.getOperand(0);
1907 SDValue ValV = Op.getOperand(1);
1908 SDValue IdxV = Op.getOperand(2);
1909
1910 const SDLoc &dl(Op);
1911 MVT VecTy = ty(VecV);
1912 MVT ElemTy = VecTy.getVectorElementType();
1913 if (ElemTy == MVT::i1)
1914 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1915
1916 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1917}
1918
1919SDValue
1920HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1921 // Lower any-extends of boolean vectors to sign-extends, since they
1922 // translate directly to Q2V. Zero-extending could also be done equally
1923 // fast, but Q2V is used/recognized in more places.
1924 // For all other vectors, use zero-extend.
1925 MVT ResTy = ty(Op);
1926 SDValue InpV = Op.getOperand(0);
1927 MVT ElemTy = ty(InpV).getVectorElementType();
1928 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1929 return LowerHvxSignExt(Op, DAG);
1930 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1931}
1932
1933SDValue
1934HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1935 MVT ResTy = ty(Op);
1936 SDValue InpV = Op.getOperand(0);
1937 MVT ElemTy = ty(InpV).getVectorElementType();
1938 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1939 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1940 return Op;
1941}
1942
1943SDValue
1944HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1945 MVT ResTy = ty(Op);
1946 SDValue InpV = Op.getOperand(0);
1947 MVT ElemTy = ty(InpV).getVectorElementType();
1948 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1949 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1950 return Op;
1951}
1952
1953SDValue
1954HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1955 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1956 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1957 const SDLoc &dl(Op);
1958 MVT ResTy = ty(Op);
1959 SDValue InpV = Op.getOperand(0);
1960 assert(ResTy == ty(InpV));
1961
1962 // Calculate the vectors of 1 and bitwidth(x).
1963 MVT ElemTy = ty(InpV).getVectorElementType();
1964 unsigned ElemWidth = ElemTy.getSizeInBits();
1965
1966 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1967 DAG.getConstant(1, dl, MVT::i32));
1968 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1969 DAG.getConstant(ElemWidth, dl, MVT::i32));
1970 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1971 DAG.getAllOnesConstant(dl, MVT::i32));
1972
1973 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1974 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1975 // it separately in custom combine or selection).
1976 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1977 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1978 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1979 return DAG.getNode(ISD::SUB, dl, ResTy,
1980 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1981}
1982
1983SDValue
1984HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1985 const SDLoc &dl(Op);
1986 MVT ResTy = ty(Op);
1987 assert(ResTy.getVectorElementType() == MVT::i32);
1988
1989 SDValue Vs = Op.getOperand(0);
1990 SDValue Vt = Op.getOperand(1);
1991
1992 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1993 unsigned Opc = Op.getOpcode();
1994
1995 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1996 if (Opc == ISD::MULHU)
1997 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1998 if (Opc == ISD::MULHS)
1999 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2000
2001#ifndef NDEBUG
2002 Op.dump(&DAG);
2003#endif
2004 llvm_unreachable("Unexpected mulh operation");
2005}
2006
2007SDValue
2008HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2009 const SDLoc &dl(Op);
2010 unsigned Opc = Op.getOpcode();
2011 SDValue Vu = Op.getOperand(0);
2012 SDValue Vv = Op.getOperand(1);
2013
2014 // If the HI part is not used, convert it to a regular MUL.
2015 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2016 // Need to preserve the types and the number of values.
2017 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2018 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2019 return DAG.getMergeValues({Lo, Hi}, dl);
2020 }
2021
2022 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2023 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2024
2025 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2026 // valued nodes.
2027 if (Subtarget.useHVXV62Ops())
2028 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2029
2030 if (Opc == HexagonISD::SMUL_LOHI) {
2031 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2032 // for other signedness LOHI is cheaper.
2033 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2034 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2035 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2036 return DAG.getMergeValues({Lo, Hi}, dl);
2037 }
2038 }
2039
2040 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2041}
2042
2043SDValue
2044HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2045 SDValue Val = Op.getOperand(0);
2046 MVT ResTy = ty(Op);
2047 MVT ValTy = ty(Val);
2048 const SDLoc &dl(Op);
2049
2050 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2051 unsigned HwLen = Subtarget.getVectorLength();
2052 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2053 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2054 unsigned BitWidth = ResTy.getSizeInBits();
2055
2056 if (BitWidth < 64) {
2057 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2058 dl, MVT::i32, DAG);
2059 if (BitWidth == 32)
2060 return W0;
2061 assert(BitWidth < 32u);
2062 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2063 }
2064
2065 // The result is >= 64 bits. The only options are 64 or 128.
2066 assert(BitWidth == 64 || BitWidth == 128);
2068 for (unsigned i = 0; i != BitWidth/32; ++i) {
2069 SDValue W = extractHvxElementReg(
2070 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2071 Words.push_back(W);
2072 }
2073 SmallVector<SDValue,2> Combines;
2074 assert(Words.size() % 2 == 0);
2075 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2076 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2077 Combines.push_back(C);
2078 }
2079
2080 if (BitWidth == 64)
2081 return Combines[0];
2082
2083 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2084 }
2085
2086 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2087 // Splat the input into a 32-element i32 vector, then AND each element
2088 // with a unique bitmask to isolate individual bits.
2089 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2090 assert(Val32.getValueType().getSizeInBits() == 32 &&
2091 "Input must be 32 bits");
2092 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2093 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2095 for (unsigned i = 0; i < 32; ++i)
2096 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2097
2098 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2099 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2100 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2101 };
2102 // === Case: v32i1 ===
2103 if (ResTy == MVT::v32i1 &&
2104 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2105 Subtarget.useHVX128BOps()) {
2106 SDValue Val32 = Val;
2107 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2108 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2109 return bitcastI32ToV32I1(Val32);
2110 }
2111 // === Case: v64i1 ===
2112 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2113 // Split i64 into lo/hi 32-bit halves.
2114 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2115 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2116 DAG.getConstant(32, dl, MVT::i64));
2117 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2118
2119 // Reuse the same 32-bit logic twice.
2120 SDValue LoRes = bitcastI32ToV32I1(Lo);
2121 SDValue HiRes = bitcastI32ToV32I1(Hi);
2122
2123 // Concatenate into a v64i1 predicate.
2124 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2125 }
2126
2127 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2128 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2129 unsigned BitWidth = ValTy.getSizeInBits();
2130 unsigned HwLen = Subtarget.getVectorLength();
2131 assert(BitWidth == HwLen);
2132
2133 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2134 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2135 // Splat each byte of Val 8 times.
2136 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2137 // where b0, b1,..., b15 are least to most significant bytes of I.
2139 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2140 // These are bytes with the LSB rotated left with respect to their index.
2142 for (unsigned I = 0; I != HwLen / 8; ++I) {
2143 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2144 SDValue Byte =
2145 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2146 for (unsigned J = 0; J != 8; ++J) {
2147 Bytes.push_back(Byte);
2148 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2149 }
2150 }
2151
2152 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2153 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2154 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2155
2156 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2157 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2158 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2159 }
2160
2161 return Op;
2162}
2163
2164SDValue
2165HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2166 // Sign- and zero-extends are legal.
2167 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2168 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2169 Op.getOperand(0));
2170}
2171
2172SDValue
2173HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2174 MVT ResTy = ty(Op);
2175 if (ResTy.getVectorElementType() != MVT::i1)
2176 return Op;
2177
2178 const SDLoc &dl(Op);
2179 unsigned HwLen = Subtarget.getVectorLength();
2180 unsigned VecLen = ResTy.getVectorNumElements();
2181 assert(HwLen % VecLen == 0);
2182 unsigned ElemSize = HwLen / VecLen;
2183
2184 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2185 SDValue S =
2186 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2187 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2188 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2189 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2190}
2191
2192SDValue
2193HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2194 if (SDValue S = getVectorShiftByInt(Op, DAG))
2195 return S;
2196 return Op;
2197}
2198
2199SDValue
2200HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2201 SelectionDAG &DAG) const {
2202 unsigned Opc = Op.getOpcode();
2203 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2204
2205 // Make sure the shift amount is within the range of the bitwidth
2206 // of the element type.
2207 SDValue A = Op.getOperand(0);
2208 SDValue B = Op.getOperand(1);
2209 SDValue S = Op.getOperand(2);
2210
2211 MVT InpTy = ty(A);
2212 MVT ElemTy = InpTy.getVectorElementType();
2213
2214 const SDLoc &dl(Op);
2215 unsigned ElemWidth = ElemTy.getSizeInBits();
2216 bool IsLeft = Opc == ISD::FSHL;
2217
2218 // The expansion into regular shifts produces worse code for i8 and for
2219 // right shift of i32 on v65+.
2220 bool UseShifts = ElemTy != MVT::i8;
2221 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2222 UseShifts = false;
2223
2224 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2225 // If this is a funnel shift by a scalar, lower it into regular shifts.
2226 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2227 SDValue ModS =
2228 DAG.getNode(ISD::AND, dl, MVT::i32,
2229 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2230 SDValue NegS =
2231 DAG.getNode(ISD::SUB, dl, MVT::i32,
2232 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2233 SDValue IsZero =
2234 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2235 // FSHL A, B => A << | B >>n
2236 // FSHR A, B => A <<n | B >>
2237 SDValue Part1 =
2238 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2239 SDValue Part2 =
2240 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2241 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2242 // If the shift amount was 0, pick A or B, depending on the direction.
2243 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2244 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2245 }
2246
2248 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2249
2250 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2251 return DAG.getNode(MOpc, dl, ty(Op),
2252 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2253}
2254
2255SDValue
2256HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2257 const SDLoc &dl(Op);
2258 unsigned IntNo = Op.getConstantOperandVal(0);
2259 SmallVector<SDValue> Ops(Op->ops());
2260
2261 auto Swap = [&](SDValue P) {
2262 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2263 };
2264
2265 switch (IntNo) {
2266 case Intrinsic::hexagon_V6_pred_typecast:
2267 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2268 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2269 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2270 if (ResTy == InpTy)
2271 return Ops[1];
2272 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2273 }
2274 break;
2275 }
2276 case Intrinsic::hexagon_V6_vmpyss_parts:
2277 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2278 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2279 {Ops[1], Ops[2]}));
2280 case Intrinsic::hexagon_V6_vmpyuu_parts:
2281 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2282 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2283 {Ops[1], Ops[2]}));
2284 case Intrinsic::hexagon_V6_vmpyus_parts:
2285 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2286 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2287 {Ops[1], Ops[2]}));
2288 }
2289 } // switch
2290
2291 return Op;
2292}
2293
2294SDValue
2295HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2296 const SDLoc &dl(Op);
2297 unsigned HwLen = Subtarget.getVectorLength();
2298 MachineFunction &MF = DAG.getMachineFunction();
2299 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2300 SDValue Mask = MaskN->getMask();
2301 SDValue Chain = MaskN->getChain();
2302 SDValue Base = MaskN->getBasePtr();
2303 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2304
2305 unsigned Opc = Op->getOpcode();
2306 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2307
2308 if (Opc == ISD::MLOAD) {
2309 MVT ValTy = ty(Op);
2310 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2311 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2312 if (isUndef(Thru))
2313 return Load;
2314 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2315 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2316 }
2317
2318 // MSTORE
2319 // HVX only has aligned masked stores.
2320
2321 // TODO: Fold negations of the mask into the store.
2322 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2323 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2324 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2325
2326 if (MaskN->getAlign().value() % HwLen == 0) {
2327 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2328 {Mask, Base, Offset0, Value, Chain}, DAG);
2329 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2330 return Store;
2331 }
2332
2333 // Unaligned case.
2334 auto StoreAlign = [&](SDValue V, SDValue A) {
2335 SDValue Z = getZero(dl, ty(V), DAG);
2336 // TODO: use funnel shifts?
2337 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2338 // upper half.
2339 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2340 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2341 return std::make_pair(LoV, HiV);
2342 };
2343
2344 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2345 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2346 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2347 VectorPair Tmp = StoreAlign(MaskV, Base);
2348 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2349 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2350 VectorPair ValueU = StoreAlign(Value, Base);
2351
2352 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2353 SDValue StoreLo =
2354 getInstr(StoreOpc, dl, MVT::Other,
2355 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2356 SDValue StoreHi =
2357 getInstr(StoreOpc, dl, MVT::Other,
2358 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2359 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2360 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2361 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2362}
2363
2364SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2365 SelectionDAG &DAG) const {
2366 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2367 // is legal (done via a pattern).
2368 assert(Subtarget.useHVXQFloatOps());
2369
2370 assert(Op->getOpcode() == ISD::FP_EXTEND);
2371
2372 MVT VecTy = ty(Op);
2373 MVT ArgTy = ty(Op.getOperand(0));
2374 const SDLoc &dl(Op);
2375
2376 if (ArgTy == MVT::v64bf16) {
2377 MVT HalfTy = typeSplit(VecTy).first;
2378 SDValue BF16Vec = Op.getOperand(0);
2379 SDValue Zeroes =
2380 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2381 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2382 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2383 // values.
2384 SDValue ShuffVec =
2385 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2386 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2387 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2388 {VecPair.second, VecPair.first,
2389 DAG.getSignedConstant(-4, dl, MVT::i32)},
2390 DAG);
2391 return Result;
2392 }
2393
2394 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2395
2396 SDValue F16Vec = Op.getOperand(0);
2397
2398 APFloat FloatVal = APFloat(1.0f);
2399 bool Ignored;
2401 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2402 SDValue VmpyVec =
2403 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2404
2405 MVT HalfTy = typeSplit(VecTy).first;
2406 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2407 SDValue LoVec =
2408 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2409 SDValue HiVec =
2410 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2411
2412 SDValue ShuffVec =
2413 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2414 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2415
2416 return ShuffVec;
2417}
2418
2419SDValue
2420HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2421 // Catch invalid conversion ops (just in case).
2422 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2423 Op.getOpcode() == ISD::FP_TO_UINT);
2424
2425 MVT ResTy = ty(Op);
2426 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2427 MVT IntTy = ResTy.getVectorElementType();
2428
2429 if (Subtarget.useHVXIEEEFPOps()) {
2430 // There are only conversions from f16.
2431 if (FpTy == MVT::f16) {
2432 // Other int types aren't legal in HVX, so we shouldn't see them here.
2433 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2434 // Conversions to i8 and i16 are legal.
2435 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2436 return Op;
2437 }
2438 }
2439
2440 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2441 return EqualizeFpIntConversion(Op, DAG);
2442
2443 return ExpandHvxFpToInt(Op, DAG);
2444}
2445
2446// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2447// R1 = #1, R2 holds the v32i1 param
2448// V1 = vsplat(R1)
2449// V2 = vsplat(R2)
2450// Q0 = vand(V1,R1)
2451// V0.w=prefixsum(Q0)
2452// V0.w=vsub(V0.w,V1.w)
2453// V2.w = vlsr(V2.w,V0.w)
2454// V2 = vand(V2,V1)
2455// V2.sf = V2.w
2456SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2457 SelectionDAG &DAG) const {
2458
2459 MVT ResTy = ty(PredOp);
2460 const SDLoc &dl(PredOp);
2461
2462 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2463 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2464 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2465 SDValue(RegConst, 0));
2466 SDNode *PredTransfer =
2467 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2468 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2469 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2470 SDValue(PredTransfer, 0));
2471 SDNode *SplatParam = DAG.getMachineNode(
2472 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2473 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2474 SDNode *Vsub =
2475 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2476 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2477 SDNode *IndexShift =
2478 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2479 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2480 SDNode *MaskOff =
2481 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2482 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2483 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2484 SDValue(MaskOff, 0));
2485 return SDValue(Convert, 0);
2486}
2487
2488// For vector type v64i1 uint_to_fo to v64f16:
2489// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2490// R3 = subreg_high (R32)
2491// R2 = subreg_low (R32)
2492// R1 = #1
2493// V1 = vsplat(R1)
2494// V2 = vsplat(R2)
2495// V3 = vsplat(R3)
2496// Q0 = vand(V1,R1)
2497// V0.w=prefixsum(Q0)
2498// V0.w=vsub(V0.w,V1.w)
2499// V2.w = vlsr(V2.w,V0.w)
2500// V3.w = vlsr(V3.w,V0.w)
2501// V2 = vand(V2,V1)
2502// V3 = vand(V3,V1)
2503// V2.h = vpacke(V3.w,V2.w)
2504// V2.hf = V2.h
2505SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2506 SelectionDAG &DAG) const {
2507
2508 MVT ResTy = ty(PredOp);
2509 const SDLoc &dl(PredOp);
2510
2511 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2512 // Get the hi and lo regs
2513 SDValue HiReg =
2514 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2515 SDValue LoReg =
2516 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2517 // Get constant #1 and splat into vector V1
2518 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2519 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2520 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2521 SDValue(RegConst, 0));
2522 // Splat the hi and lo args
2523 SDNode *SplatHi =
2524 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2525 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2526 SDNode *SplatLo =
2527 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2528 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2529 // vand between splatted const and const
2530 SDNode *PredTransfer =
2531 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2532 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2533 // Get the prefixsum
2534 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2535 SDValue(PredTransfer, 0));
2536 // Get the vsub
2537 SDNode *Vsub =
2538 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2539 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2540 // Get vlsr for hi and lo
2541 SDNode *IndexShift_hi =
2542 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2543 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2544 SDNode *IndexShift_lo =
2545 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2546 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2547 // Get vand of hi and lo
2548 SDNode *MaskOff_hi =
2549 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2550 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2551 SDNode *MaskOff_lo =
2552 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2553 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2554 // Pack them
2555 SDNode *Pack =
2556 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2557 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2558 SDNode *Convert =
2559 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2560 return SDValue(Convert, 0);
2561}
2562
2563SDValue
2564HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2565 // Catch invalid conversion ops (just in case).
2566 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2567 Op.getOpcode() == ISD::UINT_TO_FP);
2568
2569 MVT ResTy = ty(Op);
2570 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2571 MVT FpTy = ResTy.getVectorElementType();
2572
2573 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2574 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2575 return LowerHvxPred32ToFp(Op, DAG);
2576 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2577 return LowerHvxPred64ToFp(Op, DAG);
2578 }
2579
2580 if (Subtarget.useHVXIEEEFPOps()) {
2581 // There are only conversions to f16.
2582 if (FpTy == MVT::f16) {
2583 // Other int types aren't legal in HVX, so we shouldn't see them here.
2584 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2585 // i8, i16 -> f16 is legal.
2586 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2587 return Op;
2588 }
2589 }
2590
2591 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2592 return EqualizeFpIntConversion(Op, DAG);
2593
2594 return ExpandHvxIntToFp(Op, DAG);
2595}
2596
2597HexagonTargetLowering::TypePair
2598HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2599 // Compare the widths of elements of the two types, and extend the narrower
2600 // type to match the with of the wider type. For vector types, apply this
2601 // to the element type.
2602 assert(Ty0.isVector() == Ty1.isVector());
2603
2604 MVT ElemTy0 = Ty0.getScalarType();
2605 MVT ElemTy1 = Ty1.getScalarType();
2606
2607 unsigned Width0 = ElemTy0.getSizeInBits();
2608 unsigned Width1 = ElemTy1.getSizeInBits();
2609 unsigned MaxWidth = std::max(Width0, Width1);
2610
2611 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2612 if (ScalarTy.isInteger())
2613 return MVT::getIntegerVT(Width);
2614 assert(ScalarTy.isFloatingPoint());
2615 return MVT::getFloatingPointVT(Width);
2616 };
2617
2618 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2619 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2620
2621 if (!Ty0.isVector()) {
2622 // Both types are scalars.
2623 return {WideETy0, WideETy1};
2624 }
2625
2626 // Vector types.
2627 unsigned NumElem = Ty0.getVectorNumElements();
2628 assert(NumElem == Ty1.getVectorNumElements());
2629
2630 return {MVT::getVectorVT(WideETy0, NumElem),
2631 MVT::getVectorVT(WideETy1, NumElem)};
2632}
2633
2634HexagonTargetLowering::TypePair
2635HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2636 // Compare the numbers of elements of two vector types, and widen the
2637 // narrower one to match the number of elements in the wider one.
2638 assert(Ty0.isVector() && Ty1.isVector());
2639
2640 unsigned Len0 = Ty0.getVectorNumElements();
2641 unsigned Len1 = Ty1.getVectorNumElements();
2642 if (Len0 == Len1)
2643 return {Ty0, Ty1};
2644
2645 unsigned MaxLen = std::max(Len0, Len1);
2646 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2647 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2648}
2649
2650MVT
2651HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2652 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2653 assert(LegalTy.isSimple());
2654 return LegalTy.getSimpleVT();
2655}
2656
2657MVT
2658HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2659 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2660 assert(Ty.getSizeInBits() <= HwWidth);
2661 if (Ty.getSizeInBits() == HwWidth)
2662 return Ty;
2663
2664 MVT ElemTy = Ty.getScalarType();
2665 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2666}
2667
2668HexagonTargetLowering::VectorPair
2669HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2670 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2671 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2672 // whether an overflow has occurred.
2673 MVT ResTy = ty(A);
2674 assert(ResTy == ty(B));
2675 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2676
2677 if (!Signed) {
2678 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2679 // save any instructions.
2680 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2681 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2682 return {Add, Ovf};
2683 }
2684
2685 // Signed overflow has happened, if:
2686 // (A, B have the same sign) and (A+B has a different sign from either)
2687 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2688 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2689 SDValue NotA =
2690 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2691 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2692 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2693 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2694 SDValue MSB =
2695 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2696 return {Add, MSB};
2697}
2698
2699HexagonTargetLowering::VectorPair
2700HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2701 bool Signed, SelectionDAG &DAG) const {
2702 // Shift Val right by Amt bits, round the result to the nearest integer,
2703 // tie-break by rounding halves to even integer.
2704
2705 const SDLoc &dl(Val);
2706 MVT ValTy = ty(Val);
2707
2708 // This should also work for signed integers.
2709 //
2710 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2711 // bool ovf = (inp > tmp0);
2712 // uint rup = inp & (1 << (Amt+1));
2713 //
2714 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2715 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2716 // uint tmp3 = tmp2 + rup;
2717 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2718 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2719 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2720 MVT IntTy = tyVector(ValTy, ElemTy);
2721 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2722 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2723
2724 SDValue Inp = DAG.getBitcast(IntTy, Val);
2725 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2726
2727 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2728 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2729 SDValue Zero = getZero(dl, IntTy, DAG);
2730 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2731 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2732 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2733
2734 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2735 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2736 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2737 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2738
2739 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2740 SDValue One = DAG.getConstant(1, dl, IntTy);
2741 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2742 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2743 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2744 return {Mux, Ovf};
2745}
2746
2747SDValue
2748HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2749 SelectionDAG &DAG) const {
2750 MVT VecTy = ty(A);
2751 MVT PairTy = typeJoin({VecTy, VecTy});
2752 assert(VecTy.getVectorElementType() == MVT::i32);
2753
2754 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2755
2756 // mulhs(A,B) =
2757 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2758 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2759 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2760 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2761 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2762 // anything, so it cannot produce any carry over to higher bits),
2763 // so everything in [] can be shifted by 16 without loss of precision.
2764 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2765 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2766 // The final additions need to make sure to properly maintain any carry-
2767 // out bits.
2768 //
2769 // Hi(B) Lo(B)
2770 // Hi(A) Lo(A)
2771 // --------------
2772 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2773 // Hi(B)*Lo(A) | + dropping the low 16 bits
2774 // Hi(A)*Lo(B) | T2
2775 // Hi(B)*Hi(A)
2776
2777 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2778 // T1 = get Hi(A) into low halves.
2779 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2780 // P0 = interleaved T1.h*B.uh (full precision product)
2781 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2782 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2783 SDValue T2 = LoHalf(P0, DAG);
2784 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2785 // added to the final sum.
2786 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2787 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2788 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2789 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2790 // T3 = full-precision(T0+T2) >> 16
2791 // The low halves are added-unsigned, the high ones are added-signed.
2792 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2793 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2794 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2795 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2796 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2797 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2798 SDValue T5 = LoHalf(P3, DAG);
2799 // Add:
2800 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2801 return T6;
2802}
2803
2804SDValue
2805HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2806 bool SignedB, const SDLoc &dl,
2807 SelectionDAG &DAG) const {
2808 MVT VecTy = ty(A);
2809 MVT PairTy = typeJoin({VecTy, VecTy});
2810 assert(VecTy.getVectorElementType() == MVT::i32);
2811
2812 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2813
2814 if (SignedA && !SignedB) {
2815 // Make A:unsigned, B:signed.
2816 std::swap(A, B);
2817 std::swap(SignedA, SignedB);
2818 }
2819
2820 // Do halfword-wise multiplications for unsigned*unsigned product, then
2821 // add corrections for signed and unsigned*signed.
2822
2823 SDValue Lo, Hi;
2824
2825 // P0:lo = (uu) products of low halves of A and B,
2826 // P0:hi = (uu) products of high halves.
2827 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2828
2829 // Swap low/high halves in B
2830 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2831 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2832 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2833 // P1 = products of even/odd halfwords.
2834 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2835 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2836 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2837
2838 // P2:lo = low halves of P1:lo + P1:hi,
2839 // P2:hi = high halves of P1:lo + P1:hi.
2840 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2841 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2842 // Still need to add the high halves of P0:lo to P2:lo
2843 SDValue T2 =
2844 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2845 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2846
2847 // The high halves of T3 will contribute to the HI part of LOHI.
2848 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2849 {HiHalf(P2, DAG), T3, S16}, DAG);
2850
2851 // The low halves of P2 need to be added to high halves of the LO part.
2852 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2853 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2854 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2855
2856 if (SignedA) {
2857 assert(SignedB && "Signed A and unsigned B should have been inverted");
2858
2859 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2860 SDValue Zero = getZero(dl, VecTy, DAG);
2861 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2862 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2863 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2864 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2865 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2866 } else if (SignedB) {
2867 // Same correction as for mulhus:
2868 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2869 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2870 SDValue Zero = getZero(dl, VecTy, DAG);
2871 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2872 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2873 } else {
2874 assert(!SignedA && !SignedB);
2875 }
2876
2877 return DAG.getMergeValues({Lo, Hi}, dl);
2878}
2879
2880SDValue
2881HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2882 SDValue B, bool SignedB,
2883 const SDLoc &dl,
2884 SelectionDAG &DAG) const {
2885 MVT VecTy = ty(A);
2886 MVT PairTy = typeJoin({VecTy, VecTy});
2887 assert(VecTy.getVectorElementType() == MVT::i32);
2888
2889 if (SignedA && !SignedB) {
2890 // Make A:unsigned, B:signed.
2891 std::swap(A, B);
2892 std::swap(SignedA, SignedB);
2893 }
2894
2895 // Do S*S first, then make corrections for U*S or U*U if needed.
2896 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2897 SDValue P1 =
2898 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2899 SDValue Lo = LoHalf(P1, DAG);
2900 SDValue Hi = HiHalf(P1, DAG);
2901
2902 if (!SignedB) {
2903 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2904 SDValue Zero = getZero(dl, VecTy, DAG);
2905 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2906
2907 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2908 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2909 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2910 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2911 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2912 // $A))>;
2913 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2914 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2915 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2916 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2917 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2918 } else if (!SignedA) {
2919 SDValue Zero = getZero(dl, VecTy, DAG);
2920 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2921
2922 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2923 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2924 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2925 // (HiHalf (Muls64O $A, $B)),
2926 // $B)>;
2927 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2928 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2929 }
2930
2931 return DAG.getMergeValues({Lo, Hi}, dl);
2932}
2933
2934SDValue
2935HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2936 const {
2937 // Rewrite conversion between integer and floating-point in such a way that
2938 // the integer type is extended/narrowed to match the bitwidth of the
2939 // floating-point type, combined with additional integer-integer extensions
2940 // or narrowings to match the original input/result types.
2941 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2942 //
2943 // The input/result types are not required to be legal, but if they are
2944 // legal, this function should not introduce illegal types.
2945
2946 unsigned Opc = Op.getOpcode();
2949
2950 SDValue Inp = Op.getOperand(0);
2951 MVT InpTy = ty(Inp);
2952 MVT ResTy = ty(Op);
2953
2954 if (InpTy == ResTy)
2955 return Op;
2956
2957 const SDLoc &dl(Op);
2959
2960 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2961 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2962 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2963 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2964 return Res;
2965}
2966
2967SDValue
2968HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2969 unsigned Opc = Op.getOpcode();
2971
2972 const SDLoc &dl(Op);
2973 SDValue Op0 = Op.getOperand(0);
2974 MVT InpTy = ty(Op0);
2975 MVT ResTy = ty(Op);
2976 assert(InpTy.changeTypeToInteger() == ResTy);
2977
2978 // At this point this is an experiment under a flag.
2979 // In arch before V81 the rounding mode is towards nearest value.
2980 // The C/C++ standard requires rounding towards zero:
2981 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
2982 // finite value of real floating type is converted to an integer type, the
2983 // fractional part is discarded (i.e., the value is truncated toward zero)."
2984 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
2985 // floating-point type can be converted to a prvalue of an integer type. The
2986 // conversion truncates; that is, the fractional part is discarded."
2987 if (InpTy == MVT::v64f16) {
2988 if (Subtarget.useHVXV81Ops()) {
2989 // This is c/c++ compliant
2990 SDValue ConvVec =
2991 getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
2992 return ConvVec;
2993 } else if (EnableFpFastConvert) {
2994 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
2995 SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
2996 return ConvVec;
2997 }
2998 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
2999 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3000 SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3001 return ConvVec;
3002 }
3003
3004 // int32_t conv_f32_to_i32(uint32_t inp) {
3005 // // s | exp8 | frac23
3006 //
3007 // int neg = (int32_t)inp < 0;
3008 //
3009 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3010 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3011 // // produce a large positive "expm1", which will result in max u/int.
3012 // // In all IEEE formats, bias is the largest positive number that can be
3013 // // represented in bias-width bits (i.e. 011..1).
3014 // int32_t expm1 = (inp << 1) - 0x80000000;
3015 // expm1 >>= 24;
3016 //
3017 // // Always insert the "implicit 1". Subnormal numbers will become 0
3018 // // regardless.
3019 // uint32_t frac = (inp << 8) | 0x80000000;
3020 //
3021 // // "frac" is the fraction part represented as Q1.31. If it was
3022 // // interpreted as uint32_t, it would be the fraction part multiplied
3023 // // by 2^31.
3024 //
3025 // // Calculate the amount of right shift, since shifting further to the
3026 // // left would lose significant bits. Limit it to 32, because we want
3027 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3028 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3029 // // left by 31). "rsh" can be negative.
3030 // int32_t rsh = min(31 - (expm1 + 1), 32);
3031 //
3032 // frac >>= rsh; // rsh == 32 will produce 0
3033 //
3034 // // Everything up to this point is the same for conversion to signed
3035 // // unsigned integer.
3036 //
3037 // if (neg) // Only for signed int
3038 // frac = -frac; //
3039 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3040 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3041 // if (rsh <= 0 && !neg) //
3042 // frac = 0x7fffffff; //
3043 //
3044 // if (neg) // Only for unsigned int
3045 // frac = 0; //
3046 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3047 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3048 //
3049 // return frac;
3050 // }
3051
3052 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3053
3054 // Zero = V6_vd0();
3055 // Neg = V6_vgtw(Zero, Inp);
3056 // One = V6_lvsplatw(1);
3057 // M80 = V6_lvsplatw(0x80000000);
3058 // Exp00 = V6_vaslwv(Inp, One);
3059 // Exp01 = V6_vsubw(Exp00, M80);
3060 // ExpM1 = V6_vasrw(Exp01, 24);
3061 // Frc00 = V6_vaslw(Inp, 8);
3062 // Frc01 = V6_vor(Frc00, M80);
3063 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3064 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3065 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3066
3067 // if signed int:
3068 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3069 // Pos = V6_vgtw(Rsh01, Zero);
3070 // Frc13 = V6_vsubw(Zero, Frc02);
3071 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3072 // Int = V6_vmux(Pos, Frc14, Bnd);
3073 //
3074 // if unsigned int:
3075 // Rsn = V6_vgtw(Zero, Rsh01)
3076 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3077 // Int = V6_vmux(Neg, Zero, Frc23)
3078
3079 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3080 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3081 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3082
3083 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3084 SDValue Zero = getZero(dl, ResTy, DAG);
3085 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3086 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3087 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3088 SDValue One = DAG.getConstant(1, dl, ResTy);
3089 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3090 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3091 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3092 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3093
3094 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3095 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3096 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3097
3098 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3099 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3100 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3101 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3102 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3103
3104 SDValue Int;
3105
3106 if (Opc == ISD::FP_TO_SINT) {
3107 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3108 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3109 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3110 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3111 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3112 } else {
3114 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3115 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3116 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3117 }
3118
3119 return Int;
3120}
3121
3122SDValue
3123HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3124 unsigned Opc = Op.getOpcode();
3126
3127 const SDLoc &dl(Op);
3128 SDValue Op0 = Op.getOperand(0);
3129 MVT InpTy = ty(Op0);
3130 MVT ResTy = ty(Op);
3131 assert(ResTy.changeTypeToInteger() == InpTy);
3132
3133 // uint32_t vnoc1_rnd(int32_t w) {
3134 // int32_t iszero = w == 0;
3135 // int32_t isneg = w < 0;
3136 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3137 //
3138 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3139 // uint32_t frac0 = (uint64_t)u << norm_left;
3140 //
3141 // // Rounding:
3142 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3143 // uint32_t renorm = (frac0 > frac1);
3144 // uint32_t rup = (int)(frac0 << 22) < 0;
3145 //
3146 // uint32_t frac2 = frac0 >> 8;
3147 // uint32_t frac3 = frac1 >> 8;
3148 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3149 //
3150 // int32_t exp = 32 - norm_left + renorm + 127;
3151 // exp <<= 23;
3152 //
3153 // uint32_t sign = 0x80000000 * isneg;
3154 // uint32_t f = sign | exp | frac;
3155 // return iszero ? 0 : f;
3156 // }
3157
3158 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3159 bool Signed = Opc == ISD::SINT_TO_FP;
3160
3161 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3162 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3163
3164 SDValue Zero = getZero(dl, InpTy, DAG);
3165 SDValue One = DAG.getConstant(1, dl, InpTy);
3166 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3167 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3168 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3169 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3170 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3171
3172 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3173 if (Signed) {
3174 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3175 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3176 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3177 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3178 }
3179
3180 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3181 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3182 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3183 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3184 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3185 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3186 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3187 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3188 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3189
3190 return Flt;
3191}
3192
3193SDValue
3194HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3195 unsigned Opc = Op.getOpcode();
3196 unsigned TLOpc;
3197 switch (Opc) {
3198 case ISD::ANY_EXTEND:
3199 case ISD::SIGN_EXTEND:
3200 case ISD::ZERO_EXTEND:
3201 TLOpc = HexagonISD::TL_EXTEND;
3202 break;
3203 case ISD::TRUNCATE:
3205 break;
3206#ifndef NDEBUG
3207 Op.dump(&DAG);
3208#endif
3209 llvm_unreachable("Unexpected operator");
3210 }
3211
3212 const SDLoc &dl(Op);
3213 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3214 DAG.getUNDEF(MVT::i128), // illegal type
3215 DAG.getConstant(Opc, dl, MVT::i32));
3216}
3217
3218SDValue
3219HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3220 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3221 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3222 unsigned Opc = Op.getConstantOperandVal(2);
3223 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3224}
3225
3226HexagonTargetLowering::VectorPair
3227HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3228 assert(!Op.isMachineOpcode());
3229 SmallVector<SDValue, 2> OpsL, OpsH;
3230 const SDLoc &dl(Op);
3231
3232 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3233 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3234 SDValue TV = DAG.getValueType(Ty);
3235 return std::make_pair(TV, TV);
3236 };
3237
3238 for (SDValue A : Op.getNode()->ops()) {
3239 auto [Lo, Hi] =
3240 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3241 // Special case for type operand.
3242 switch (Op.getOpcode()) {
3243 case ISD::SIGN_EXTEND_INREG:
3244 case HexagonISD::SSAT:
3245 case HexagonISD::USAT:
3246 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3247 std::tie(Lo, Hi) = SplitVTNode(N);
3248 break;
3249 }
3250 OpsL.push_back(Lo);
3251 OpsH.push_back(Hi);
3252 }
3253
3254 MVT ResTy = ty(Op);
3255 MVT HalfTy = typeSplit(ResTy).first;
3256 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3257 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3258 return {L, H};
3259}
3260
3261SDValue
3262HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3263 auto *MemN = cast<MemSDNode>(Op.getNode());
3264
3265 if (!MemN->getMemoryVT().isSimple())
3266 return Op;
3267
3268 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3269 if (!isHvxPairTy(MemTy))
3270 return Op;
3271
3272 const SDLoc &dl(Op);
3273 unsigned HwLen = Subtarget.getVectorLength();
3274 MVT SingleTy = typeSplit(MemTy).first;
3275 SDValue Chain = MemN->getChain();
3276 SDValue Base0 = MemN->getBasePtr();
3277 SDValue Base1 =
3278 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3279 unsigned MemOpc = MemN->getOpcode();
3280
3281 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3282 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3283 MachineFunction &MF = DAG.getMachineFunction();
3284 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3285 ? (uint64_t)MemoryLocation::UnknownSize
3286 : HwLen;
3287 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3288 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3289 }
3290
3291 if (MemOpc == ISD::LOAD) {
3292 assert(cast<LoadSDNode>(Op)->isUnindexed());
3293 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3294 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3295 return DAG.getMergeValues(
3296 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3297 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3298 Load0.getValue(1), Load1.getValue(1)) }, dl);
3299 }
3300 if (MemOpc == ISD::STORE) {
3301 assert(cast<StoreSDNode>(Op)->isUnindexed());
3302 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3303 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3304 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3305 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3306 }
3307
3308 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3309
3310 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3311 assert(MaskN->isUnindexed());
3312 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3313 SDValue Offset = DAG.getUNDEF(MVT::i32);
3314
3315 if (MemOpc == ISD::MLOAD) {
3316 VectorPair Thru =
3317 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3318 SDValue MLoad0 =
3319 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3320 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3321 ISD::NON_EXTLOAD, false);
3322 SDValue MLoad1 =
3323 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3324 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3325 ISD::NON_EXTLOAD, false);
3326 return DAG.getMergeValues(
3327 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3328 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3329 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3330 }
3331 if (MemOpc == ISD::MSTORE) {
3332 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3333 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3334 Masks.first, SingleTy, MOp0,
3335 ISD::UNINDEXED, false, false);
3336 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3337 Masks.second, SingleTy, MOp1,
3338 ISD::UNINDEXED, false, false);
3339 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3340 }
3341
3342 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3343 llvm_unreachable(Name.c_str());
3344}
3345
3346SDValue
3347HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3348 const SDLoc &dl(Op);
3349 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3350 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3351 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3352 "Not widening loads of i1 yet");
3353
3354 SDValue Chain = LoadN->getChain();
3355 SDValue Base = LoadN->getBasePtr();
3356 SDValue Offset = DAG.getUNDEF(MVT::i32);
3357
3358 MVT ResTy = ty(Op);
3359 unsigned HwLen = Subtarget.getVectorLength();
3360 unsigned ResLen = ResTy.getStoreSize();
3361 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3362
3363 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3364 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3365 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3366
3367 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3368 MachineFunction &MF = DAG.getMachineFunction();
3369 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3370
3371 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3372 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3374 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3375 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3376}
3377
3378SDValue
3379HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3380 const SDLoc &dl(Op);
3381 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3382 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3383 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3384 "Not widening stores of i1 yet");
3385
3386 SDValue Chain = StoreN->getChain();
3387 SDValue Base = StoreN->getBasePtr();
3388 SDValue Offset = DAG.getUNDEF(MVT::i32);
3389
3390 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3391 MVT ValueTy = ty(Value);
3392 unsigned ValueLen = ValueTy.getVectorNumElements();
3393 unsigned HwLen = Subtarget.getVectorLength();
3394 assert(isPowerOf2_32(ValueLen));
3395
3396 for (unsigned Len = ValueLen; Len < HwLen; ) {
3397 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3398 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3399 }
3400 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3401
3402 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3403 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3404 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3405 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3406 MachineFunction &MF = DAG.getMachineFunction();
3407 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3408 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3409 MemOp, ISD::UNINDEXED, false, false);
3410}
3411
3412SDValue
3413HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3414 const SDLoc &dl(Op);
3415 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3416 MVT ElemTy = ty(Op0).getVectorElementType();
3417 unsigned HwLen = Subtarget.getVectorLength();
3418
3419 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3420 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3421 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3422 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3423 return SDValue();
3424
3425 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3426 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3427 EVT ResTy =
3428 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3429 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3430 {WideOp0, WideOp1, Op.getOperand(2)});
3431
3432 EVT RetTy = typeLegalize(ty(Op), DAG);
3433 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3434 {SetCC, getZero(dl, MVT::i32, DAG)});
3435}
3436
3437SDValue HexagonTargetLowering::WidenHvxIntrinsic(SDValue Op,
3438 SelectionDAG &DAG) const {
3439 const SDLoc &dl(Op);
3440 unsigned HwWidth = 8 * Subtarget.getVectorLength();
3441 bool IsResInterleaved = false;
3442
3443 SDValue WideRes = SDValue();
3444 SDValue Op1 = Op.getOperand(1);
3445 MVT ResTy = ty(Op);
3446 MVT OpTy = ty(Op1);
3447 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
3448 return SDValue();
3449
3450 auto getFactor = [HwWidth](MVT Ty) {
3451 unsigned Width = Ty.getSizeInBits();
3452 assert(HwWidth % Width == 0);
3453 return HwWidth / Width;
3454 };
3455
3456 auto getWideTy = [getFactor](MVT Ty) {
3457 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
3458 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
3459 };
3460
3461 unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3462 SDValue Op2 = Op.getOperand(2);
3463 SDValue WideOp1 = appendUndef(Op1, getWideTy(OpTy), DAG);
3464 SDValue WideOp2;
3466 WideOp2 = Op2;
3467 } else {
3468 WideOp2 = appendUndef(Op2, getWideTy(OpTy), DAG);
3469 }
3470 unsigned WidenFactor = getFactor(OpTy);
3471 unsigned WideLen = ResTy.getVectorNumElements() * WidenFactor;
3472 MVT WideResTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideLen);
3473
3474 switch (IID) {
3475 default:
3476 return SDValue();
3477 case Intrinsic::hexagon_vasrsat_su:
3478 case Intrinsic::hexagon_vasrsat_uu:
3479 case Intrinsic::hexagon_vasrsat_ss:
3480 WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy,
3481 DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2,
3482 Op.getOperand(3));
3483 break;
3484 case Intrinsic::hexagon_vadd_su:
3485 case Intrinsic::hexagon_vadd_uu:
3486 case Intrinsic::hexagon_vadd_ss:
3487 case Intrinsic::hexagon_vadd_us:
3488
3489 case Intrinsic::hexagon_vsub_su:
3490 case Intrinsic::hexagon_vsub_uu:
3491 case Intrinsic::hexagon_vsub_ss:
3492 case Intrinsic::hexagon_vsub_us:
3493
3494 case Intrinsic::hexagon_vmpy_su:
3495 case Intrinsic::hexagon_vmpy_uu:
3496 case Intrinsic::hexagon_vmpy_ss:
3497 case Intrinsic::hexagon_vmpy_us:
3498 case Intrinsic::hexagon_vmpy_ub_ub:
3499 case Intrinsic::hexagon_vmpy_ub_b:
3500 case Intrinsic::hexagon_vmpy_uh_uh:
3501 case Intrinsic::hexagon_vmpy_h_h:
3502 IsResInterleaved = true;
3503 WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy,
3504 DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2);
3505 break;
3506 case Intrinsic::hexagon_vavgu:
3507 case Intrinsic::hexagon_vavgs:
3508 WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy,
3509 DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2);
3510 break;
3511 }
3512 unsigned OrigLen = ResTy.getVectorNumElements();
3513 assert(OrigLen % 2 == 0);
3514 unsigned HalfOrigLen = OrigLen / 2;
3515 unsigned SplitLen = WideLen / 2;
3516 if (IsResInterleaved) {
3517 // Get the valid odd and even elements from the widened vector-pair while
3518 // maintaining their deinterleaved order. The following shuffle_vector will
3519 // produce a vector-pair with all the valid elements (even followed by odd)
3520 // accumulated together followed by undefs.
3521 SmallVector<int, 128> ShuffV;
3522 for (unsigned j = 0; j < WidenFactor; j++) {
3523 for (unsigned i = 0; i < HalfOrigLen; i++)
3524 ShuffV.push_back(j * HalfOrigLen + i);
3525 for (unsigned i = 0; i < HalfOrigLen; i++)
3526 ShuffV.push_back(SplitLen + j * HalfOrigLen + i);
3527 }
3528 WideRes = DAG.getVectorShuffle(WideResTy, dl, WideRes,
3529 DAG.getUNDEF(WideResTy), ShuffV);
3530 }
3531 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
3532 {WideRes, getZero(dl, MVT::i32, DAG)});
3533}
3534
3535SDValue
3536HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3537 unsigned Opc = Op.getOpcode();
3538 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3539 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3540 return isHvxPairTy(ty(V));
3541 });
3542
3543 if (IsPairOp) {
3544 switch (Opc) {
3545 default:
3546 break;
3547 case ISD::LOAD:
3548 case ISD::STORE:
3549 case ISD::MLOAD:
3550 case ISD::MSTORE:
3551 return SplitHvxMemOp(Op, DAG);
3552 case ISD::SINT_TO_FP:
3553 case ISD::UINT_TO_FP:
3554 case ISD::FP_TO_SINT:
3555 case ISD::FP_TO_UINT:
3556 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3557 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3558 break;
3559 case ISD::ABS:
3560 case ISD::CTPOP:
3561 case ISD::CTLZ:
3562 case ISD::CTTZ:
3563 case ISD::MUL:
3564 case ISD::FADD:
3565 case ISD::FSUB:
3566 case ISD::FMUL:
3567 case ISD::FMINIMUMNUM:
3568 case ISD::FMAXIMUMNUM:
3569 case ISD::MULHS:
3570 case ISD::MULHU:
3571 case ISD::AND:
3572 case ISD::OR:
3573 case ISD::XOR:
3574 case ISD::SRA:
3575 case ISD::SHL:
3576 case ISD::SRL:
3577 case ISD::FSHL:
3578 case ISD::FSHR:
3579 case ISD::SMIN:
3580 case ISD::SMAX:
3581 case ISD::UMIN:
3582 case ISD::UMAX:
3583 case ISD::SETCC:
3584 case ISD::VSELECT:
3586 case ISD::SPLAT_VECTOR:
3587 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3588 case ISD::SIGN_EXTEND:
3589 case ISD::ZERO_EXTEND:
3590 // In general, sign- and zero-extends can't be split and still
3591 // be legal. The only exception is extending bool vectors.
3592 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3593 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3594 break;
3595 }
3596 }
3597
3598 switch (Opc) {
3599 default:
3600 break;
3601 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3602 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3603 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3604 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3605 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3606 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3607 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3608 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3609 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3610 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3611 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3612 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3613 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3614 case ISD::SRA:
3615 case ISD::SHL:
3616 case ISD::SRL: return LowerHvxShift(Op, DAG);
3617 case ISD::FSHL:
3618 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3619 case ISD::MULHS:
3620 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3621 case ISD::SMUL_LOHI:
3622 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3623 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3624 case ISD::SETCC:
3625 case ISD::INTRINSIC_VOID: return Op;
3626 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3627 case ISD::MLOAD:
3628 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3629 // Unaligned loads will be handled by the default lowering.
3630 case ISD::LOAD: return SDValue();
3631 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3632 case ISD::FP_TO_SINT:
3633 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3634 case ISD::SINT_TO_FP:
3635 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3636
3637 // Special nodes:
3640 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3641 }
3642#ifndef NDEBUG
3643 Op.dumpr(&DAG);
3644#endif
3645 llvm_unreachable("Unhandled HVX operation");
3646}
3647
3648SDValue
3649HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3650 const {
3651 // Rewrite the extension/truncation/saturation op into steps where each
3652 // step changes the type widths by a factor of 2.
3653 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3654 //
3655 // Some of the vector types in Op may not be legal.
3656
3657 unsigned Opc = Op.getOpcode();
3658 switch (Opc) {
3659 case HexagonISD::SSAT:
3660 case HexagonISD::USAT:
3663 break;
3664 case ISD::ANY_EXTEND:
3665 case ISD::ZERO_EXTEND:
3666 case ISD::SIGN_EXTEND:
3667 case ISD::TRUNCATE:
3668 llvm_unreachable("ISD:: ops will be auto-folded");
3669 break;
3670#ifndef NDEBUG
3671 Op.dump(&DAG);
3672#endif
3673 llvm_unreachable("Unexpected operation");
3674 }
3675
3676 SDValue Inp = Op.getOperand(0);
3677 MVT InpTy = ty(Inp);
3678 MVT ResTy = ty(Op);
3679
3680 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3681 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3682 assert(InpWidth != ResWidth);
3683
3684 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3685 return Op;
3686
3687 const SDLoc &dl(Op);
3688 unsigned NumElems = InpTy.getVectorNumElements();
3689 assert(NumElems == ResTy.getVectorNumElements());
3690
3691 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3692 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3693 switch (Opc) {
3694 case HexagonISD::SSAT:
3695 case HexagonISD::USAT:
3696 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3699 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3700 default:
3701 llvm_unreachable("Unexpected opcode");
3702 }
3703 };
3704
3705 SDValue S = Inp;
3706 if (InpWidth < ResWidth) {
3707 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3708 while (InpWidth * 2 <= ResWidth)
3709 S = repeatOp(InpWidth *= 2, S);
3710 } else {
3711 // InpWidth > ResWidth
3712 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3713 while (InpWidth / 2 >= ResWidth)
3714 S = repeatOp(InpWidth /= 2, S);
3715 }
3716 return S;
3717}
3718
3719SDValue
3720HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3721 SDValue Inp0 = Op.getOperand(0);
3722 MVT InpTy = ty(Inp0);
3723 MVT ResTy = ty(Op);
3724 unsigned InpWidth = InpTy.getSizeInBits();
3725 unsigned ResWidth = ResTy.getSizeInBits();
3726 unsigned Opc = Op.getOpcode();
3727
3728 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3729 // First, make sure that the narrower type is widened to HVX.
3730 // This may cause the result to be wider than what the legalizer
3731 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3732 // desired type.
3733 auto [WInpTy, WResTy] =
3734 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3735 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3736 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3737 SDValue S;
3739 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3740 Op.getOperand(2));
3741 } else {
3742 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3743 }
3744 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3745 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3746 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3747 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3748 } else {
3749 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3750 return RemoveTLWrapper(Op, DAG);
3751 }
3752 llvm_unreachable("Unexpected situation");
3753}
3754
3755void
3756HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3758 unsigned Opc = N->getOpcode();
3759 SDValue Op(N, 0);
3760 SDValue Inp0; // Optional first argument.
3761 if (N->getNumOperands() > 0)
3762 Inp0 = Op.getOperand(0);
3763
3764 switch (Opc) {
3765 case ISD::ANY_EXTEND:
3766 case ISD::SIGN_EXTEND:
3767 case ISD::ZERO_EXTEND:
3768 case ISD::TRUNCATE:
3769 if (Subtarget.isHVXElementType(ty(Op)) &&
3770 Subtarget.isHVXElementType(ty(Inp0))) {
3771 Results.push_back(CreateTLWrapper(Op, DAG));
3772 }
3773 break;
3774 case ISD::SETCC:
3775 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3776 if (SDValue T = WidenHvxSetCC(Op, DAG))
3777 Results.push_back(T);
3778 }
3779 break;
3780 case ISD::STORE: {
3781 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3782 SDValue Store = WidenHvxStore(Op, DAG);
3783 Results.push_back(Store);
3784 }
3785 break;
3786 }
3787 case ISD::MLOAD:
3788 if (isHvxPairTy(ty(Op))) {
3789 SDValue S = SplitHvxMemOp(Op, DAG);
3791 Results.push_back(S.getOperand(0));
3792 Results.push_back(S.getOperand(1));
3793 }
3794 break;
3795 case ISD::MSTORE:
3796 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3797 SDValue S = SplitHvxMemOp(Op, DAG);
3798 Results.push_back(S);
3799 }
3800 break;
3802 if (shouldWidenToHvx(ty(Op.getOperand(1)), DAG)) {
3803 if (SDValue T = WidenHvxIntrinsic(Op, DAG))
3804 Results.push_back(T);
3805 }
3806 break;
3807 case ISD::SINT_TO_FP:
3808 case ISD::UINT_TO_FP:
3809 case ISD::FP_TO_SINT:
3810 case ISD::FP_TO_UINT:
3811 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3812 SDValue T = EqualizeFpIntConversion(Op, DAG);
3813 Results.push_back(T);
3814 }
3815 break;
3816 case HexagonISD::SSAT:
3817 case HexagonISD::USAT:
3820 Results.push_back(LegalizeHvxResize(Op, DAG));
3821 break;
3822 default:
3823 break;
3824 }
3825}
3826
3827void
3828HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3830 unsigned Opc = N->getOpcode();
3831 SDValue Op(N, 0);
3832 SDValue Inp0; // Optional first argument.
3833 if (N->getNumOperands() > 0)
3834 Inp0 = Op.getOperand(0);
3835
3836 switch (Opc) {
3837 case ISD::ANY_EXTEND:
3838 case ISD::SIGN_EXTEND:
3839 case ISD::ZERO_EXTEND:
3840 case ISD::TRUNCATE:
3841 if (Subtarget.isHVXElementType(ty(Op)) &&
3842 Subtarget.isHVXElementType(ty(Inp0))) {
3843 Results.push_back(CreateTLWrapper(Op, DAG));
3844 }
3845 break;
3846 case ISD::SETCC:
3847 if (shouldWidenToHvx(ty(Op), DAG)) {
3848 if (SDValue T = WidenHvxSetCC(Op, DAG))
3849 Results.push_back(T);
3850 }
3851 break;
3852 case ISD::LOAD: {
3853 if (shouldWidenToHvx(ty(Op), DAG)) {
3854 SDValue Load = WidenHvxLoad(Op, DAG);
3855 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3856 Results.push_back(Load.getOperand(0));
3857 Results.push_back(Load.getOperand(1));
3858 }
3859 break;
3860 }
3861 case ISD::BITCAST:
3862 if (isHvxBoolTy(ty(Inp0))) {
3863 SDValue C = LowerHvxBitcast(Op, DAG);
3864 Results.push_back(C);
3865 }
3866 break;
3868 assert(shouldWidenToHvx(ty(N->getOperand(1)), DAG) && "Not widening?");
3869 if (SDValue T = WidenHvxIntrinsic(Op, DAG))
3870 Results.push_back(T);
3871 break;
3872 case ISD::FP_TO_SINT:
3873 case ISD::FP_TO_UINT:
3874 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3875 SDValue T = EqualizeFpIntConversion(Op, DAG);
3876 Results.push_back(T);
3877 }
3878 break;
3879 case HexagonISD::SSAT:
3880 case HexagonISD::USAT:
3883 Results.push_back(LegalizeHvxResize(Op, DAG));
3884 break;
3885 default:
3886 break;
3887 }
3888}
3889
3890SDValue
3891HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3892 DAGCombinerInfo &DCI) const {
3893 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3894 // to extract-subvector (shuffle V, pick even, pick odd)
3895
3896 assert(Op.getOpcode() == ISD::TRUNCATE);
3897 SelectionDAG &DAG = DCI.DAG;
3898 const SDLoc &dl(Op);
3899
3900 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3901 return SDValue();
3902 SDValue Cast = Op.getOperand(0);
3903 SDValue Src = Cast.getOperand(0);
3904
3905 EVT TruncTy = Op.getValueType();
3906 EVT CastTy = Cast.getValueType();
3907 EVT SrcTy = Src.getValueType();
3908 if (SrcTy.isSimple())
3909 return SDValue();
3910 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3911 return SDValue();
3912 unsigned SrcLen = SrcTy.getVectorNumElements();
3913 unsigned CastLen = CastTy.getVectorNumElements();
3914 if (2 * CastLen != SrcLen)
3915 return SDValue();
3916
3917 SmallVector<int, 128> Mask(SrcLen);
3918 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3919 Mask[i] = 2 * i;
3920 Mask[i + CastLen] = 2 * i + 1;
3921 }
3922 SDValue Deal =
3923 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3924 return opSplit(Deal, dl, DAG).first;
3925}
3926
3927SDValue
3928HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3929 SDValue Op, DAGCombinerInfo &DCI) const {
3930 // Fold
3931 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3932 // into
3933 // shuffle (concat x, y), undef, m3
3934 if (Op.getNumOperands() != 2)
3935 return SDValue();
3936
3937 SelectionDAG &DAG = DCI.DAG;
3938 const SDLoc &dl(Op);
3939 SDValue V0 = Op.getOperand(0);
3940 SDValue V1 = Op.getOperand(1);
3941
3942 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3943 return SDValue();
3944 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3945 return SDValue();
3946
3947 SetVector<SDValue> Order;
3948 Order.insert(V0.getOperand(0));
3949 Order.insert(V0.getOperand(1));
3950 Order.insert(V1.getOperand(0));
3951 Order.insert(V1.getOperand(1));
3952
3953 if (Order.size() > 2)
3954 return SDValue();
3955
3956 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3957 // result must be the same.
3958 EVT InpTy = V0.getValueType();
3959 assert(InpTy.isVector());
3960 unsigned InpLen = InpTy.getVectorNumElements();
3961
3962 SmallVector<int, 128> LongMask;
3963 auto AppendToMask = [&](SDValue Shuffle) {
3964 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3965 ArrayRef<int> Mask = SV->getMask();
3966 SDValue X = Shuffle.getOperand(0);
3967 SDValue Y = Shuffle.getOperand(1);
3968 for (int M : Mask) {
3969 if (M == -1) {
3970 LongMask.push_back(M);
3971 continue;
3972 }
3973 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3974 if (static_cast<unsigned>(M) >= InpLen)
3975 M -= InpLen;
3976
3977 int OutOffset = Order[0] == Src ? 0 : InpLen;
3978 LongMask.push_back(M + OutOffset);
3979 }
3980 };
3981
3982 AppendToMask(V0);
3983 AppendToMask(V1);
3984
3985 SDValue C0 = Order.front();
3986 SDValue C1 = Order.back(); // Can be same as front
3987 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3988
3989 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3990 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3991}
3992
3993SDValue
3994HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3995 const {
3996 const SDLoc &dl(N);
3997 SelectionDAG &DAG = DCI.DAG;
3998 SDValue Op(N, 0);
3999 unsigned Opc = Op.getOpcode();
4000
4002
4003 if (Opc == ISD::TRUNCATE)
4004 return combineTruncateBeforeLegal(Op, DCI);
4005 if (Opc == ISD::CONCAT_VECTORS)
4006 return combineConcatVectorsBeforeLegal(Op, DCI);
4007
4008 if (DCI.isBeforeLegalizeOps())
4009 return SDValue();
4010
4011 switch (Opc) {
4012 case ISD::VSELECT: {
4013 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
4014 SDValue Cond = Ops[0];
4015 if (Cond->getOpcode() == ISD::XOR) {
4016 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
4017 if (C1->getOpcode() == HexagonISD::QTRUE)
4018 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
4019 }
4020 break;
4021 }
4022 case HexagonISD::V2Q:
4023 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
4024 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
4025 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
4026 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
4027 }
4028 break;
4029 case HexagonISD::Q2V:
4030 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
4031 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
4032 DAG.getAllOnesConstant(dl, MVT::i32));
4033 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
4034 return getZero(dl, ty(Op), DAG);
4035 break;
4037 if (isUndef(Ops[1]))
4038 return Ops[0];
4039 break;
4040 case HexagonISD::VROR: {
4041 if (Ops[0].getOpcode() == HexagonISD::VROR) {
4042 SDValue Vec = Ops[0].getOperand(0);
4043 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
4044 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
4045 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
4046 }
4047 break;
4048 }
4049 }
4050
4051 return SDValue();
4052}
4053
4054bool
4055HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
4056 if (Subtarget.isHVXVectorType(Ty, true))
4057 return false;
4058 auto Action = getPreferredHvxVectorAction(Ty);
4060 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
4061 return false;
4062}
4063
4064bool
4065HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
4066 if (Subtarget.isHVXVectorType(Ty, true))
4067 return false;
4068 auto Action = getPreferredHvxVectorAction(Ty);
4070 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
4071 return false;
4072}
4073
4074bool
4075HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
4076 if (!Subtarget.useHVXOps())
4077 return false;
4078 // If the type of any result, or any operand type are HVX vector types,
4079 // this is an HVX operation.
4080 auto IsHvxTy = [this](EVT Ty) {
4081 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
4082 };
4083 auto IsHvxOp = [this](SDValue Op) {
4084 return Op.getValueType().isSimple() &&
4085 Subtarget.isHVXVectorType(ty(Op), true);
4086 };
4087 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
4088 return true;
4089
4090 // Check if this could be an HVX operation after type widening.
4091 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
4092 if (!Op.getValueType().isSimple())
4093 return false;
4094 MVT ValTy = ty(Op);
4095 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
4096 };
4097
4098 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
4099 if (IsWidenedToHvx(SDValue(N, i)))
4100 return true;
4101 }
4102 return llvm::any_of(N->ops(), IsWidenedToHvx);
4103}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
static cl::opt< bool > EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), cl::desc("Enable FP fast conversion routine."))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:138
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.