LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
110 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
111 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
112 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
113 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
114 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
115 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
119
120 if (Subtarget.useHVX128BOps())
121 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
123 Subtarget.useHVXFloatingPoint()) {
124
125 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
126 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
127
128 for (MVT T : FloatV) {
132 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
133 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
134
137
140
141 setOperationAction(ISD::MLOAD, T, Custom);
142 setOperationAction(ISD::MSTORE, T, Custom);
143 // Custom-lower BUILD_VECTOR. The standard (target-independent)
144 // handling of it would convert it to a load, which is not always
145 // the optimal choice.
147 }
148
149
150 // BUILD_VECTOR with f16 operands cannot be promoted without
151 // promoting the result, so lower the node to vsplat or constant pool
155
156 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
157 // generated.
158 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
159 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
160 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
161 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
162
163 for (MVT P : FloatW) {
164 setOperationAction(ISD::LOAD, P, Custom);
165 setOperationAction(ISD::STORE, P, Custom);
169 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
170 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
173
174 // Custom-lower BUILD_VECTOR. The standard (target-independent)
175 // handling of it would convert it to a load, which is not always
176 // the optimal choice.
178 // Make concat-vectors custom to handle concats of more than 2 vectors.
180
181 setOperationAction(ISD::MLOAD, P, Custom);
182 setOperationAction(ISD::MSTORE, P, Custom);
183 }
184
185 if (Subtarget.useHVXQFloatOps()) {
186 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
188 } else if (Subtarget.useHVXIEEEFPOps()) {
189 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
191 }
192 }
193
194 for (MVT T : LegalV) {
197
213 if (T != ByteV) {
217 }
218
221 if (T.getScalarType() != MVT::i32) {
224 }
225
227 setOperationAction(ISD::LOAD, T, Custom);
228 setOperationAction(ISD::MLOAD, T, Custom);
229 setOperationAction(ISD::MSTORE, T, Custom);
230 if (T.getScalarType() != MVT::i32) {
233 }
234
236 // Make concat-vectors custom to handle concats of more than 2 vectors.
247 if (T != ByteV) {
249 // HVX only has shifts of words and halfwords.
253
254 // Promote all shuffles to operate on vectors of bytes.
255 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
256 }
257
258 if (Subtarget.useHVXFloatingPoint()) {
259 // Same action for both QFloat and IEEE.
264 }
265
273 }
274
275 for (MVT T : LegalW) {
276 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
277 // independent) handling of it would convert it to a load, which is
278 // not always the optimal choice.
280 // Make concat-vectors custom to handle concats of more than 2 vectors.
282
283 // Custom-lower these operations for pairs. Expand them into a concat
284 // of the corresponding operations on individual vectors.
293
294 setOperationAction(ISD::LOAD, T, Custom);
295 setOperationAction(ISD::STORE, T, Custom);
296 setOperationAction(ISD::MLOAD, T, Custom);
297 setOperationAction(ISD::MSTORE, T, Custom);
302
317 if (T != ByteW) {
321
322 // Promote all shuffles to operate on vectors of bytes.
323 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
324 }
327
330 if (T.getScalarType() != MVT::i32) {
333 }
334
335 if (Subtarget.useHVXFloatingPoint()) {
336 // Same action for both QFloat and IEEE.
341 }
342 }
343
344 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
345 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
346 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
349
350 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
351 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
352 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
353 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
354 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
355 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
356 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
357 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
358 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
359 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
360 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
361 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
362
363 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
364 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
365 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
366 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
367 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
368 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
369 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
370 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
371 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
372 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
373 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
374 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
375
376 // Boolean vectors.
377
378 for (MVT T : LegalW) {
379 // Boolean types for vector pairs will overlap with the boolean
380 // types for single vectors, e.g.
381 // v64i8 -> v64i1 (single)
382 // v64i16 -> v64i1 (pair)
383 // Set these actions first, and allow the single actions to overwrite
384 // any duplicates.
385 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
390 // Masked load/store takes a mask that may need splitting.
391 setOperationAction(ISD::MLOAD, BoolW, Custom);
392 setOperationAction(ISD::MSTORE, BoolW, Custom);
393 }
394
395 for (MVT T : LegalV) {
396 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
407 }
408
409 if (Use64b) {
410 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
412 } else {
413 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
415 }
416
417 // Handle store widening for short vectors.
418 unsigned HwLen = Subtarget.getVectorLength();
419 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
420 if (ElemTy == MVT::i1)
421 continue;
422 int ElemWidth = ElemTy.getFixedSizeInBits();
423 int MaxElems = (8*HwLen) / ElemWidth;
424 for (int N = 2; N < MaxElems; N *= 2) {
425 MVT VecTy = MVT::getVectorVT(ElemTy, N);
426 auto Action = getPreferredVectorAction(VecTy);
428 setOperationAction(ISD::LOAD, VecTy, Custom);
429 setOperationAction(ISD::STORE, VecTy, Custom);
435 if (Subtarget.useHVXFloatingPoint()) {
440 }
441
442 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
443 if (!isTypeLegal(BoolTy))
445 }
446 }
447 }
448
449 // Include cases which are not hander earlier
452
454}
455
456unsigned
457HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
458 MVT ElemTy = VecTy.getVectorElementType();
459 unsigned VecLen = VecTy.getVectorNumElements();
460 unsigned HwLen = Subtarget.getVectorLength();
461
462 // Split vectors of i1 that exceed byte vector length.
463 if (ElemTy == MVT::i1 && VecLen > HwLen)
465
466 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
467 // For shorter vectors of i1, widen them if any of the corresponding
468 // vectors of integers needs to be widened.
469 if (ElemTy == MVT::i1) {
470 for (MVT T : Tys) {
471 assert(T != MVT::i1);
472 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
473 if (A != ~0u)
474 return A;
475 }
476 return ~0u;
477 }
478
479 // If the size of VecTy is at least half of the vector length,
480 // widen the vector. Note: the threshold was not selected in
481 // any scientific way.
482 if (llvm::is_contained(Tys, ElemTy)) {
483 unsigned VecWidth = VecTy.getSizeInBits();
484 unsigned HwWidth = 8*HwLen;
485 if (VecWidth > 2*HwWidth)
487
488 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
489 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
491 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
493 }
494
495 // Defer to default.
496 return ~0u;
497}
498
499unsigned
500HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
501 unsigned Opc = Op.getOpcode();
502 switch (Opc) {
507 }
509}
510
512HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
513 const SDLoc &dl, SelectionDAG &DAG) const {
515 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
516 append_range(IntOps, Ops);
517 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
518}
519
520MVT
521HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
522 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
523
524 MVT ElemTy = Tys.first.getVectorElementType();
525 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
526 Tys.second.getVectorNumElements());
527}
528
529HexagonTargetLowering::TypePair
530HexagonTargetLowering::typeSplit(MVT VecTy) const {
531 assert(VecTy.isVector());
532 unsigned NumElem = VecTy.getVectorNumElements();
533 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
534 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
535 return { HalfTy, HalfTy };
536}
537
538MVT
539HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
540 MVT ElemTy = VecTy.getVectorElementType();
541 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
542 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
543}
544
545MVT
546HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
547 MVT ElemTy = VecTy.getVectorElementType();
548 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
549 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
550}
551
553HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
554 SelectionDAG &DAG) const {
555 if (ty(Vec).getVectorElementType() == ElemTy)
556 return Vec;
557 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
558 return DAG.getBitcast(CastTy, Vec);
559}
560
562HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
563 SelectionDAG &DAG) const {
564 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
565 Ops.first, Ops.second);
566}
567
568HexagonTargetLowering::VectorPair
569HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
570 SelectionDAG &DAG) const {
571 TypePair Tys = typeSplit(ty(Vec));
572 if (Vec.getOpcode() == HexagonISD::QCAT)
573 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
574 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
575}
576
577bool
578HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
579 return Subtarget.isHVXVectorType(Ty) &&
580 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
581}
582
583bool
584HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
585 return Subtarget.isHVXVectorType(Ty) &&
586 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
587}
588
589bool
590HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
591 return Subtarget.isHVXVectorType(Ty, true) &&
592 Ty.getVectorElementType() == MVT::i1;
593}
594
595bool HexagonTargetLowering::allowsHvxMemoryAccess(
596 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
597 // Bool vectors are excluded by default, but make it explicit to
598 // emphasize that bool vectors cannot be loaded or stored.
599 // Also, disallow double vector stores (to prevent unnecessary
600 // store widening in DAG combiner).
601 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
602 return false;
603 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
604 return false;
605 if (Fast)
606 *Fast = 1;
607 return true;
608}
609
610bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
611 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
612 if (!Subtarget.isHVXVectorType(VecTy))
613 return false;
614 // XXX Should this be false? vmemu are a bit slower than vmem.
615 if (Fast)
616 *Fast = 1;
617 return true;
618}
619
620void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
621 MachineInstr &MI, SDNode *Node) const {
622 unsigned Opc = MI.getOpcode();
623 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
624 MachineBasicBlock &MB = *MI.getParent();
625 MachineFunction &MF = *MB.getParent();
626 MachineRegisterInfo &MRI = MF.getRegInfo();
627 DebugLoc DL = MI.getDebugLoc();
628 auto At = MI.getIterator();
629
630 switch (Opc) {
631 case Hexagon::PS_vsplatib:
632 if (Subtarget.useHVXV62Ops()) {
633 // SplatV = A2_tfrsi #imm
634 // OutV = V6_lvsplatb SplatV
635 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
636 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
637 .add(MI.getOperand(1));
638 Register OutV = MI.getOperand(0).getReg();
639 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
640 .addReg(SplatV);
641 } else {
642 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
643 // OutV = V6_lvsplatw SplatV
644 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
645 const MachineOperand &InpOp = MI.getOperand(1);
646 assert(InpOp.isImm());
647 uint32_t V = InpOp.getImm() & 0xFF;
648 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
649 .addImm(V << 24 | V << 16 | V << 8 | V);
650 Register OutV = MI.getOperand(0).getReg();
651 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
652 }
653 MB.erase(At);
654 break;
655 case Hexagon::PS_vsplatrb:
656 if (Subtarget.useHVXV62Ops()) {
657 // OutV = V6_lvsplatb Inp
658 Register OutV = MI.getOperand(0).getReg();
659 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
660 .add(MI.getOperand(1));
661 } else {
662 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
663 const MachineOperand &InpOp = MI.getOperand(1);
664 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
665 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
666 Register OutV = MI.getOperand(0).getReg();
667 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
668 .addReg(SplatV);
669 }
670 MB.erase(At);
671 break;
672 case Hexagon::PS_vsplatih:
673 if (Subtarget.useHVXV62Ops()) {
674 // SplatV = A2_tfrsi #imm
675 // OutV = V6_lvsplath SplatV
676 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
677 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
678 .add(MI.getOperand(1));
679 Register OutV = MI.getOperand(0).getReg();
680 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
681 .addReg(SplatV);
682 } else {
683 // SplatV = A2_tfrsi #imm:#imm
684 // OutV = V6_lvsplatw SplatV
685 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
686 const MachineOperand &InpOp = MI.getOperand(1);
687 assert(InpOp.isImm());
688 uint32_t V = InpOp.getImm() & 0xFFFF;
689 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
690 .addImm(V << 16 | V);
691 Register OutV = MI.getOperand(0).getReg();
692 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
693 }
694 MB.erase(At);
695 break;
696 case Hexagon::PS_vsplatrh:
697 if (Subtarget.useHVXV62Ops()) {
698 // OutV = V6_lvsplath Inp
699 Register OutV = MI.getOperand(0).getReg();
700 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
701 .add(MI.getOperand(1));
702 } else {
703 // SplatV = A2_combine_ll Inp, Inp
704 // OutV = V6_lvsplatw SplatV
705 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
706 const MachineOperand &InpOp = MI.getOperand(1);
707 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
708 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
709 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
710 Register OutV = MI.getOperand(0).getReg();
711 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
712 }
713 MB.erase(At);
714 break;
715 case Hexagon::PS_vsplatiw:
716 case Hexagon::PS_vsplatrw:
717 if (Opc == Hexagon::PS_vsplatiw) {
718 // SplatV = A2_tfrsi #imm
719 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
720 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
721 .add(MI.getOperand(1));
722 MI.getOperand(1).ChangeToRegister(SplatV, false);
723 }
724 // OutV = V6_lvsplatw SplatV/Inp
725 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
726 break;
727 }
728}
729
731HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
732 SelectionDAG &DAG) const {
733 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
734 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
735
736 unsigned ElemWidth = ElemTy.getSizeInBits();
737 if (ElemWidth == 8)
738 return ElemIdx;
739
740 unsigned L = Log2_32(ElemWidth/8);
741 const SDLoc &dl(ElemIdx);
742 return DAG.getNode(ISD::SHL, dl, MVT::i32,
743 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
744}
745
747HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
748 SelectionDAG &DAG) const {
749 unsigned ElemWidth = ElemTy.getSizeInBits();
750 assert(ElemWidth >= 8 && ElemWidth <= 32);
751 if (ElemWidth == 32)
752 return Idx;
753
754 if (ty(Idx) != MVT::i32)
755 Idx = DAG.getBitcast(MVT::i32, Idx);
756 const SDLoc &dl(Idx);
757 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
758 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
759 return SubIdx;
760}
761
763HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
764 SDValue Op1, ArrayRef<int> Mask,
765 SelectionDAG &DAG) const {
766 MVT OpTy = ty(Op0);
767 assert(OpTy == ty(Op1));
768
769 MVT ElemTy = OpTy.getVectorElementType();
770 if (ElemTy == MVT::i8)
771 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
772 assert(ElemTy.getSizeInBits() >= 8);
773
774 MVT ResTy = tyVector(OpTy, MVT::i8);
775 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
776
777 SmallVector<int,128> ByteMask;
778 for (int M : Mask) {
779 if (M < 0) {
780 for (unsigned I = 0; I != ElemSize; ++I)
781 ByteMask.push_back(-1);
782 } else {
783 int NewM = M*ElemSize;
784 for (unsigned I = 0; I != ElemSize; ++I)
785 ByteMask.push_back(NewM+I);
786 }
787 }
788 assert(ResTy.getVectorNumElements() == ByteMask.size());
789 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
790 opCastElem(Op1, MVT::i8, DAG), ByteMask);
791}
792
794HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
795 const SDLoc &dl, MVT VecTy,
796 SelectionDAG &DAG) const {
797 unsigned VecLen = Values.size();
798 MachineFunction &MF = DAG.getMachineFunction();
799 MVT ElemTy = VecTy.getVectorElementType();
800 unsigned ElemWidth = ElemTy.getSizeInBits();
801 unsigned HwLen = Subtarget.getVectorLength();
802
803 unsigned ElemSize = ElemWidth / 8;
804 assert(ElemSize*VecLen == HwLen);
806
807 if (VecTy.getVectorElementType() != MVT::i32 &&
808 !(Subtarget.useHVXFloatingPoint() &&
809 VecTy.getVectorElementType() == MVT::f32)) {
810 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
811 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
812 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
813 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
814 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
815 Words.push_back(DAG.getBitcast(MVT::i32, W));
816 }
817 } else {
818 for (SDValue V : Values)
819 Words.push_back(DAG.getBitcast(MVT::i32, V));
820 }
821 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
822 unsigned NumValues = Values.size();
823 assert(NumValues > 0);
824 bool IsUndef = true;
825 for (unsigned i = 0; i != NumValues; ++i) {
826 if (Values[i].isUndef())
827 continue;
828 IsUndef = false;
829 if (!SplatV.getNode())
830 SplatV = Values[i];
831 else if (SplatV != Values[i])
832 return false;
833 }
834 if (IsUndef)
835 SplatV = Values[0];
836 return true;
837 };
838
839 unsigned NumWords = Words.size();
840 SDValue SplatV;
841 bool IsSplat = isSplat(Words, SplatV);
842 if (IsSplat && isUndef(SplatV))
843 return DAG.getUNDEF(VecTy);
844 if (IsSplat) {
845 assert(SplatV.getNode());
846 if (isNullConstant(SplatV))
847 return getZero(dl, VecTy, DAG);
848 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
849 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
850 return DAG.getBitcast(VecTy, S);
851 }
852
853 // Delay recognizing constant vectors until here, so that we can generate
854 // a vsplat.
855 SmallVector<ConstantInt*, 128> Consts(VecLen);
856 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
857 if (AllConst) {
858 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
859 (Constant**)Consts.end());
860 Constant *CV = ConstantVector::get(Tmp);
861 Align Alignment(HwLen);
862 SDValue CP =
863 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
864 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
866 }
867
868 // A special case is a situation where the vector is built entirely from
869 // elements extracted from another vector. This could be done via a shuffle
870 // more efficiently, but typically, the size of the source vector will not
871 // match the size of the vector being built (which precludes the use of a
872 // shuffle directly).
873 // This only handles a single source vector, and the vector being built
874 // should be of a sub-vector type of the source vector type.
875 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
876 SmallVectorImpl<int> &SrcIdx) {
877 SDValue Vec;
878 for (SDValue V : Values) {
879 if (isUndef(V)) {
880 SrcIdx.push_back(-1);
881 continue;
882 }
883 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
884 return false;
885 // All extracts should come from the same vector.
886 SDValue T = V.getOperand(0);
887 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
888 return false;
889 Vec = T;
890 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
891 if (C == nullptr)
892 return false;
893 int I = C->getSExtValue();
894 assert(I >= 0 && "Negative element index");
895 SrcIdx.push_back(I);
896 }
897 SrcVec = Vec;
898 return true;
899 };
900
901 SmallVector<int,128> ExtIdx;
902 SDValue ExtVec;
903 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
904 MVT ExtTy = ty(ExtVec);
905 unsigned ExtLen = ExtTy.getVectorNumElements();
906 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
907 // Construct a new shuffle mask that will produce a vector with the same
908 // number of elements as the input vector, and such that the vector we
909 // want will be the initial subvector of it.
910 SmallVector<int,128> Mask;
911 BitVector Used(ExtLen);
912
913 for (int M : ExtIdx) {
914 Mask.push_back(M);
915 if (M >= 0)
916 Used.set(M);
917 }
918 // Fill the rest of the mask with the unused elements of ExtVec in hopes
919 // that it will result in a permutation of ExtVec's elements. It's still
920 // fine if it doesn't (e.g. if undefs are present, or elements are
921 // repeated), but permutations can always be done efficiently via vdelta
922 // and vrdelta.
923 for (unsigned I = 0; I != ExtLen; ++I) {
924 if (Mask.size() == ExtLen)
925 break;
926 if (!Used.test(I))
927 Mask.push_back(I);
928 }
929
930 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
931 DAG.getUNDEF(ExtTy), Mask);
932 return ExtLen == VecLen ? S : LoHalf(S, DAG);
933 }
934 }
935
936 // Find most common element to initialize vector with. This is to avoid
937 // unnecessary vinsert/valign for cases where the same value is present
938 // many times. Creates a histogram of the vector's elements to find the
939 // most common element n.
940 assert(4*Words.size() == Subtarget.getVectorLength());
941 int VecHist[32];
942 int n = 0;
943 for (unsigned i = 0; i != NumWords; ++i) {
944 VecHist[i] = 0;
945 if (Words[i].isUndef())
946 continue;
947 for (unsigned j = i; j != NumWords; ++j)
948 if (Words[i] == Words[j])
949 VecHist[i]++;
950
951 if (VecHist[i] > VecHist[n])
952 n = i;
953 }
954
955 SDValue HalfV = getZero(dl, VecTy, DAG);
956 if (VecHist[n] > 1) {
957 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
958 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
959 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
960 }
961 SDValue HalfV0 = HalfV;
962 SDValue HalfV1 = HalfV;
963
964 // Construct two halves in parallel, then or them together. Rn and Rm count
965 // number of rotations needed before the next element. One last rotation is
966 // performed post-loop to position the last element.
967 int Rn = 0, Rm = 0;
968 SDValue Sn, Sm;
969 SDValue N = HalfV0;
970 SDValue M = HalfV1;
971 for (unsigned i = 0; i != NumWords/2; ++i) {
972 // Rotate by element count since last insertion.
973 if (Words[i] != Words[n] || VecHist[n] <= 1) {
974 Sn = DAG.getConstant(Rn, dl, MVT::i32);
975 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
976 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
977 {HalfV0, Words[i]});
978 Rn = 0;
979 }
980 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
981 Sm = DAG.getConstant(Rm, dl, MVT::i32);
982 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
983 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
984 {HalfV1, Words[i+NumWords/2]});
985 Rm = 0;
986 }
987 Rn += 4;
988 Rm += 4;
989 }
990 // Perform last rotation.
991 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
992 Sm = DAG.getConstant(Rm, dl, MVT::i32);
993 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
994 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
995
996 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
997 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
998
999 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1000
1001 SDValue OutV =
1002 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1003 return OutV;
1004}
1005
1006SDValue
1007HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1008 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1009 MVT PredTy = ty(PredV);
1010 unsigned HwLen = Subtarget.getVectorLength();
1011 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1012
1013 if (Subtarget.isHVXVectorType(PredTy, true)) {
1014 // Move the vector predicate SubV to a vector register, and scale it
1015 // down to match the representation (bytes per type element) that VecV
1016 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1017 // in general) element and put them at the front of the resulting
1018 // vector. This subvector will then be inserted into the Q2V of VecV.
1019 // To avoid having an operation that generates an illegal type (short
1020 // vector), generate a full size vector.
1021 //
1022 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1023 SmallVector<int,128> Mask(HwLen);
1024 // Scale = BitBytes(PredV) / Given BitBytes.
1025 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1026 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1027
1028 for (unsigned i = 0; i != HwLen; ++i) {
1029 unsigned Num = i % Scale;
1030 unsigned Off = i / Scale;
1031 Mask[BlockLen*Num + Off] = i;
1032 }
1033 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1034 if (!ZeroFill)
1035 return S;
1036 // Fill the bytes beyond BlockLen with 0s.
1037 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1038 // when BlockLen < HwLen.
1039 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1040 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1041 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1042 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1043 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1044 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1045 }
1046
1047 // Make sure that this is a valid scalar predicate.
1048 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1049
1050 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1051 SmallVector<SDValue,4> Words[2];
1052 unsigned IdxW = 0;
1053
1054 SDValue W0 = isUndef(PredV)
1055 ? DAG.getUNDEF(MVT::i64)
1056 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1057 Words[IdxW].push_back(HiHalf(W0, DAG));
1058 Words[IdxW].push_back(LoHalf(W0, DAG));
1059
1060 while (Bytes < BitBytes) {
1061 IdxW ^= 1;
1062 Words[IdxW].clear();
1063
1064 if (Bytes < 4) {
1065 for (const SDValue &W : Words[IdxW ^ 1]) {
1066 SDValue T = expandPredicate(W, dl, DAG);
1067 Words[IdxW].push_back(HiHalf(T, DAG));
1068 Words[IdxW].push_back(LoHalf(T, DAG));
1069 }
1070 } else {
1071 for (const SDValue &W : Words[IdxW ^ 1]) {
1072 Words[IdxW].push_back(W);
1073 Words[IdxW].push_back(W);
1074 }
1075 }
1076 Bytes *= 2;
1077 }
1078
1079 assert(Bytes == BitBytes);
1080
1081 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1082 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1083 for (const SDValue &W : Words[IdxW]) {
1084 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1085 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1086 }
1087
1088 return Vec;
1089}
1090
1091SDValue
1092HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1093 const SDLoc &dl, MVT VecTy,
1094 SelectionDAG &DAG) const {
1095 // Construct a vector V of bytes, such that a comparison V >u 0 would
1096 // produce the required vector predicate.
1097 unsigned VecLen = Values.size();
1098 unsigned HwLen = Subtarget.getVectorLength();
1099 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1101 bool AllT = true, AllF = true;
1102
1103 auto IsTrue = [] (SDValue V) {
1104 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1105 return !N->isZero();
1106 return false;
1107 };
1108 auto IsFalse = [] (SDValue V) {
1109 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1110 return N->isZero();
1111 return false;
1112 };
1113
1114 if (VecLen <= HwLen) {
1115 // In the hardware, each bit of a vector predicate corresponds to a byte
1116 // of a vector register. Calculate how many bytes does a bit of VecTy
1117 // correspond to.
1118 assert(HwLen % VecLen == 0);
1119 unsigned BitBytes = HwLen / VecLen;
1120 for (SDValue V : Values) {
1121 AllT &= IsTrue(V);
1122 AllF &= IsFalse(V);
1123
1124 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1125 : DAG.getUNDEF(MVT::i8);
1126 for (unsigned B = 0; B != BitBytes; ++B)
1127 Bytes.push_back(Ext);
1128 }
1129 } else {
1130 // There are as many i1 values, as there are bits in a vector register.
1131 // Divide the values into groups of 8 and check that each group consists
1132 // of the same value (ignoring undefs).
1133 for (unsigned I = 0; I != VecLen; I += 8) {
1134 unsigned B = 0;
1135 // Find the first non-undef value in this group.
1136 for (; B != 8; ++B) {
1137 if (!Values[I+B].isUndef())
1138 break;
1139 }
1140 SDValue F = Values[I+B];
1141 AllT &= IsTrue(F);
1142 AllF &= IsFalse(F);
1143
1144 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1145 : DAG.getUNDEF(MVT::i8);
1146 Bytes.push_back(Ext);
1147 // Verify that the rest of values in the group are the same as the
1148 // first.
1149 for (; B != 8; ++B)
1150 assert(Values[I+B].isUndef() || Values[I+B] == F);
1151 }
1152 }
1153
1154 if (AllT)
1155 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1156 if (AllF)
1157 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1158
1159 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1160 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1161 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1162}
1163
1164SDValue
1165HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1166 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1167 MVT ElemTy = ty(VecV).getVectorElementType();
1168
1169 unsigned ElemWidth = ElemTy.getSizeInBits();
1170 assert(ElemWidth >= 8 && ElemWidth <= 32);
1171 (void)ElemWidth;
1172
1173 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1174 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1175 {VecV, ByteIdx});
1176 if (ElemTy == MVT::i32)
1177 return ExWord;
1178
1179 // Have an extracted word, need to extract the smaller element out of it.
1180 // 1. Extract the bits of (the original) IdxV that correspond to the index
1181 // of the desired element in the 32-bit word.
1182 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1183 // 2. Extract the element from the word.
1184 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1185 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1186}
1187
1188SDValue
1189HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1190 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1191 // Implement other return types if necessary.
1192 assert(ResTy == MVT::i1);
1193
1194 unsigned HwLen = Subtarget.getVectorLength();
1195 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1196 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1197
1198 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1199 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1200 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1201
1202 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1203 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1204 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1205}
1206
1207SDValue
1208HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1209 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1210 MVT ElemTy = ty(VecV).getVectorElementType();
1211
1212 unsigned ElemWidth = ElemTy.getSizeInBits();
1213 assert(ElemWidth >= 8 && ElemWidth <= 32);
1214 (void)ElemWidth;
1215
1216 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1217 SDValue ByteIdxV) {
1218 MVT VecTy = ty(VecV);
1219 unsigned HwLen = Subtarget.getVectorLength();
1220 SDValue MaskV =
1221 DAG.getNode(ISD::AND, dl, MVT::i32,
1222 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1223 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1224 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1225 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1226 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1227 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1228 return TorV;
1229 };
1230
1231 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1232 if (ElemTy == MVT::i32)
1233 return InsertWord(VecV, ValV, ByteIdx);
1234
1235 // If this is not inserting a 32-bit word, convert it into such a thing.
1236 // 1. Extract the existing word from the target vector.
1237 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1238 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1239 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1240 dl, MVT::i32, DAG);
1241
1242 // 2. Treating the extracted word as a 32-bit vector, insert the given
1243 // value into it.
1244 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1245 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1246 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1247 ValV, SubIdx, dl, ElemTy, DAG);
1248
1249 // 3. Insert the 32-bit word back into the original vector.
1250 return InsertWord(VecV, Ins, ByteIdx);
1251}
1252
1253SDValue
1254HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1255 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1256 unsigned HwLen = Subtarget.getVectorLength();
1257 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1258 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1259
1260 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1261 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1262 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1263 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1264
1265 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1266 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1267}
1268
1269SDValue
1270HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1271 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1272 MVT VecTy = ty(VecV);
1273 unsigned HwLen = Subtarget.getVectorLength();
1274 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1275 MVT ElemTy = VecTy.getVectorElementType();
1276 unsigned ElemWidth = ElemTy.getSizeInBits();
1277
1278 // If the source vector is a vector pair, get the single vector containing
1279 // the subvector of interest. The subvector will never overlap two single
1280 // vectors.
1281 if (isHvxPairTy(VecTy)) {
1282 unsigned SubIdx = Hexagon::vsub_lo;
1283 if (Idx * ElemWidth >= 8 * HwLen) {
1284 SubIdx = Hexagon::vsub_hi;
1285 Idx -= VecTy.getVectorNumElements() / 2;
1286 }
1287
1288 VecTy = typeSplit(VecTy).first;
1289 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1290 if (VecTy == ResTy)
1291 return VecV;
1292 }
1293
1294 // The only meaningful subvectors of a single HVX vector are those that
1295 // fit in a scalar register.
1296 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1297
1298 MVT WordTy = tyVector(VecTy, MVT::i32);
1299 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1300 unsigned WordIdx = (Idx*ElemWidth) / 32;
1301
1302 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1303 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1304 if (ResTy.getSizeInBits() == 32)
1305 return DAG.getBitcast(ResTy, W0);
1306
1307 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1308 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1309 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1310 return DAG.getBitcast(ResTy, WW);
1311}
1312
1313SDValue
1314HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1315 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1316 MVT VecTy = ty(VecV);
1317 unsigned HwLen = Subtarget.getVectorLength();
1318 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1319 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1320 // IdxV is required to be a constant.
1321 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1322
1323 unsigned ResLen = ResTy.getVectorNumElements();
1324 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1325 unsigned Offset = Idx * BitBytes;
1326 SDValue Undef = DAG.getUNDEF(ByteTy);
1327 SmallVector<int,128> Mask;
1328
1329 if (Subtarget.isHVXVectorType(ResTy, true)) {
1330 // Converting between two vector predicates. Since the result is shorter
1331 // than the source, it will correspond to a vector predicate with the
1332 // relevant bits replicated. The replication count is the ratio of the
1333 // source and target vector lengths.
1334 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1335 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1336 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1337 for (unsigned j = 0; j != Rep; ++j)
1338 Mask.push_back(i + Offset);
1339 }
1340 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1341 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1342 }
1343
1344 // Converting between a vector predicate and a scalar predicate. In the
1345 // vector predicate, a group of BitBytes bits will correspond to a single
1346 // i1 element of the source vector type. Those bits will all have the same
1347 // value. The same will be true for ByteVec, where each byte corresponds
1348 // to a bit in the vector predicate.
1349 // The algorithm is to traverse the ByteVec, going over the i1 values from
1350 // the source vector, and generate the corresponding representation in an
1351 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1352 // elements so that the interesting 8 bytes will be in the low end of the
1353 // vector.
1354 unsigned Rep = 8 / ResLen;
1355 // Make sure the output fill the entire vector register, so repeat the
1356 // 8-byte groups as many times as necessary.
1357 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1358 // This will generate the indexes of the 8 interesting bytes.
1359 for (unsigned i = 0; i != ResLen; ++i) {
1360 for (unsigned j = 0; j != Rep; ++j)
1361 Mask.push_back(Offset + i*BitBytes);
1362 }
1363 }
1364
1365 SDValue Zero = getZero(dl, MVT::i32, DAG);
1366 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1367 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1368 // them against 0.
1369 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1370 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1371 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1372 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1373 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1374 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1375}
1376
1377SDValue
1378HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1379 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1380 MVT VecTy = ty(VecV);
1381 MVT SubTy = ty(SubV);
1382 unsigned HwLen = Subtarget.getVectorLength();
1383 MVT ElemTy = VecTy.getVectorElementType();
1384 unsigned ElemWidth = ElemTy.getSizeInBits();
1385
1386 bool IsPair = isHvxPairTy(VecTy);
1387 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1388 // The two single vectors that VecV consists of, if it's a pair.
1389 SDValue V0, V1;
1390 SDValue SingleV = VecV;
1391 SDValue PickHi;
1392
1393 if (IsPair) {
1394 V0 = LoHalf(VecV, DAG);
1395 V1 = HiHalf(VecV, DAG);
1396
1397 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1398 dl, MVT::i32);
1399 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1400 if (isHvxSingleTy(SubTy)) {
1401 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1402 unsigned Idx = CN->getZExtValue();
1403 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1404 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1405 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1406 }
1407 // If IdxV is not a constant, generate the two variants: with the
1408 // SubV as the high and as the low subregister, and select the right
1409 // pair based on the IdxV.
1410 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1411 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1412 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1413 }
1414 // The subvector being inserted must be entirely contained in one of
1415 // the vectors V0 or V1. Set SingleV to the correct one, and update
1416 // IdxV to be the index relative to the beginning of that vector.
1417 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1418 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1419 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1420 }
1421
1422 // The only meaningful subvectors of a single HVX vector are those that
1423 // fit in a scalar register.
1424 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1425 // Convert IdxV to be index in bytes.
1426 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1427 if (!IdxN || !IdxN->isZero()) {
1428 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1429 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1430 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1431 }
1432 // When inserting a single word, the rotation back to the original position
1433 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1434 // by (HwLen-4)-Idx.
1435 unsigned RolBase = HwLen;
1436 if (SubTy.getSizeInBits() == 32) {
1437 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1438 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1439 } else {
1440 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1441 SDValue R0 = LoHalf(V, DAG);
1442 SDValue R1 = HiHalf(V, DAG);
1443 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1444 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1445 DAG.getConstant(4, dl, MVT::i32));
1446 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1447 RolBase = HwLen-4;
1448 }
1449 // If the vector wasn't ror'ed, don't ror it back.
1450 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1451 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1452 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1453 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1454 }
1455
1456 if (IsPair) {
1457 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1458 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1459 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1460 }
1461 return SingleV;
1462}
1463
1464SDValue
1465HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1466 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1467 MVT VecTy = ty(VecV);
1468 MVT SubTy = ty(SubV);
1469 assert(Subtarget.isHVXVectorType(VecTy, true));
1470 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1471 // predicate as well, or it can be a scalar predicate.
1472
1473 unsigned VecLen = VecTy.getVectorNumElements();
1474 unsigned HwLen = Subtarget.getVectorLength();
1475 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1476
1477 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1478 unsigned BitBytes = HwLen / VecLen;
1479 unsigned BlockLen = HwLen / Scale;
1480
1481 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1482 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1483 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1484 SDValue ByteIdx;
1485
1486 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1487 if (!IdxN || !IdxN->isZero()) {
1488 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1489 DAG.getConstant(BitBytes, dl, MVT::i32));
1490 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1491 }
1492
1493 // ByteVec is the target vector VecV rotated in such a way that the
1494 // subvector should be inserted at index 0. Generate a predicate mask
1495 // and use vmux to do the insertion.
1496 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1497 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1498 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1499 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1500 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1501 // Rotate ByteVec back, and convert to a vector predicate.
1502 if (!IdxN || !IdxN->isZero()) {
1503 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1504 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1505 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1506 }
1507 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1508}
1509
1510SDValue
1511HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1512 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1513 // Sign- and any-extending of a vector predicate to a vector register is
1514 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1515 // a vector of 1s (where the 1s are of type matching the vector type).
1516 assert(Subtarget.isHVXVectorType(ResTy));
1517 if (!ZeroExt)
1518 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1519
1520 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1521 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1522 DAG.getConstant(1, dl, MVT::i32));
1523 SDValue False = getZero(dl, ResTy, DAG);
1524 return DAG.getSelect(dl, ResTy, VecV, True, False);
1525}
1526
1527SDValue
1528HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1529 MVT ResTy, SelectionDAG &DAG) const {
1530 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1531 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1532 // vector register. The remaining bits of the vector register are
1533 // unspecified.
1534
1535 MachineFunction &MF = DAG.getMachineFunction();
1536 unsigned HwLen = Subtarget.getVectorLength();
1537 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1538 MVT PredTy = ty(VecQ);
1539 unsigned PredLen = PredTy.getVectorNumElements();
1540 assert(HwLen % PredLen == 0);
1541 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1542
1543 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1545 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1546 // These are bytes with the LSB rotated left with respect to their index.
1547 for (unsigned i = 0; i != HwLen/8; ++i) {
1548 for (unsigned j = 0; j != 8; ++j)
1549 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1550 }
1551 Constant *CV = ConstantVector::get(Tmp);
1552 Align Alignment(HwLen);
1553 SDValue CP =
1554 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1555 SDValue Bytes =
1556 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1558
1559 // Select the bytes that correspond to true bits in the vector predicate.
1560 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1561 getZero(dl, VecTy, DAG));
1562 // Calculate the OR of all bytes in each group of 8. That will compress
1563 // all the individual bits into a single byte.
1564 // First, OR groups of 4, via vrmpy with 0x01010101.
1565 SDValue All1 =
1566 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1567 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1568 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1569 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1570 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1571 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1572
1573 // Pick every 8th byte and coalesce them at the beginning of the output.
1574 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1575 // byte and so on.
1576 SmallVector<int,128> Mask;
1577 for (unsigned i = 0; i != HwLen; ++i)
1578 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1579 SDValue Collect =
1580 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1581 return DAG.getBitcast(ResTy, Collect);
1582}
1583
1584SDValue
1585HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1586 const SDLoc &dl, SelectionDAG &DAG) const {
1587 // Take a vector and resize the element type to match the given type.
1588 MVT InpTy = ty(VecV);
1589 if (InpTy == ResTy)
1590 return VecV;
1591
1592 unsigned InpWidth = InpTy.getSizeInBits();
1593 unsigned ResWidth = ResTy.getSizeInBits();
1594
1595 if (InpTy.isFloatingPoint()) {
1596 return InpWidth < ResWidth
1597 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1598 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1599 DAG.getTargetConstant(0, dl, MVT::i32));
1600 }
1601
1602 assert(InpTy.isInteger());
1603
1604 if (InpWidth < ResWidth) {
1605 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1606 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1607 } else {
1608 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1609 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1610 }
1611}
1612
1613SDValue
1614HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1615 SelectionDAG &DAG) const {
1616 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1617
1618 const SDLoc &dl(Vec);
1619 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1620 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1621 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1622}
1623
1624SDValue
1625HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1626 const {
1627 const SDLoc &dl(Op);
1628 MVT VecTy = ty(Op);
1629
1630 unsigned Size = Op.getNumOperands();
1632 for (unsigned i = 0; i != Size; ++i)
1633 Ops.push_back(Op.getOperand(i));
1634
1635 if (VecTy.getVectorElementType() == MVT::i1)
1636 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1637
1638 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1639 // not a legal type, just bitcast the node to use i16
1640 // types and bitcast the result back to f16
1641 if (VecTy.getVectorElementType() == MVT::f16) {
1643 for (unsigned i = 0; i != Size; i++)
1644 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1645
1646 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1647 tyVector(VecTy, MVT::i16), NewOps);
1648 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1649 }
1650
1651 // First, split the BUILD_VECTOR for vector pairs. We could generate
1652 // some pairs directly (via splat), but splats should be generated
1653 // by the combiner prior to getting here.
1654 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1656 MVT SingleTy = typeSplit(VecTy).first;
1657 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1658 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1659 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1660 }
1661
1662 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1663}
1664
1665SDValue
1666HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1667 const {
1668 const SDLoc &dl(Op);
1669 MVT VecTy = ty(Op);
1670 MVT ArgTy = ty(Op.getOperand(0));
1671
1672 if (ArgTy == MVT::f16) {
1673 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1674 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1675 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1676 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1677 return DAG.getBitcast(VecTy, Splat);
1678 }
1679
1680 return SDValue();
1681}
1682
1683SDValue
1684HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1685 const {
1686 // Vector concatenation of two integer (non-bool) vectors does not need
1687 // special lowering. Custom-lower concats of bool vectors and expand
1688 // concats of more than 2 vectors.
1689 MVT VecTy = ty(Op);
1690 const SDLoc &dl(Op);
1691 unsigned NumOp = Op.getNumOperands();
1692 if (VecTy.getVectorElementType() != MVT::i1) {
1693 if (NumOp == 2)
1694 return Op;
1695 // Expand the other cases into a build-vector.
1697 for (SDValue V : Op.getNode()->ops())
1698 DAG.ExtractVectorElements(V, Elems);
1699 // A vector of i16 will be broken up into a build_vector of i16's.
1700 // This is a problem, since at the time of operation legalization,
1701 // all operations are expected to be type-legalized, and i16 is not
1702 // a legal type. If any of the extracted elements is not of a valid
1703 // type, sign-extend it to a valid one.
1704 for (SDValue &V : Elems) {
1705 MVT Ty = ty(V);
1706 if (!isTypeLegal(Ty)) {
1707 MVT NTy = typeLegalize(Ty, DAG);
1708 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1709 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1711 V.getOperand(0), V.getOperand(1)),
1712 DAG.getValueType(Ty));
1713 continue;
1714 }
1715 // A few less complicated cases.
1716 switch (V.getOpcode()) {
1717 case ISD::Constant:
1718 V = DAG.getSExtOrTrunc(V, dl, NTy);
1719 break;
1720 case ISD::UNDEF:
1721 V = DAG.getUNDEF(NTy);
1722 break;
1723 case ISD::TRUNCATE:
1724 V = V.getOperand(0);
1725 break;
1726 default:
1727 llvm_unreachable("Unexpected vector element");
1728 }
1729 }
1730 }
1731 return DAG.getBuildVector(VecTy, dl, Elems);
1732 }
1733
1734 assert(VecTy.getVectorElementType() == MVT::i1);
1735 unsigned HwLen = Subtarget.getVectorLength();
1736 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1737
1738 SDValue Op0 = Op.getOperand(0);
1739
1740 // If the operands are HVX types (i.e. not scalar predicates), then
1741 // defer the concatenation, and create QCAT instead.
1742 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1743 if (NumOp == 2)
1744 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1745
1746 ArrayRef<SDUse> U(Op.getNode()->ops());
1749
1750 MVT HalfTy = typeSplit(VecTy).first;
1751 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1752 Ops.take_front(NumOp/2));
1753 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1754 Ops.take_back(NumOp/2));
1755 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1756 }
1757
1758 // Count how many bytes (in a vector register) each bit in VecTy
1759 // corresponds to.
1760 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1761
1762 SmallVector<SDValue,8> Prefixes;
1763 for (SDValue V : Op.getNode()->op_values()) {
1764 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1765 Prefixes.push_back(P);
1766 }
1767
1768 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1769 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1770 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1771 SDValue Res = getZero(dl, ByteTy, DAG);
1772 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1773 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1774 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1775 }
1776 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1777}
1778
1779SDValue
1780HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1781 const {
1782 // Change the type of the extracted element to i32.
1783 SDValue VecV = Op.getOperand(0);
1784 MVT ElemTy = ty(VecV).getVectorElementType();
1785 const SDLoc &dl(Op);
1786 SDValue IdxV = Op.getOperand(1);
1787 if (ElemTy == MVT::i1)
1788 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1789
1790 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1791}
1792
1793SDValue
1794HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1795 const {
1796 const SDLoc &dl(Op);
1797 MVT VecTy = ty(Op);
1798 SDValue VecV = Op.getOperand(0);
1799 SDValue ValV = Op.getOperand(1);
1800 SDValue IdxV = Op.getOperand(2);
1801 MVT ElemTy = ty(VecV).getVectorElementType();
1802 if (ElemTy == MVT::i1)
1803 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1804
1805 if (ElemTy == MVT::f16) {
1807 tyVector(VecTy, MVT::i16),
1808 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1809 DAG.getBitcast(MVT::i16, ValV), IdxV);
1810 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1811 }
1812
1813 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1814}
1815
1816SDValue
1817HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1818 const {
1819 SDValue SrcV = Op.getOperand(0);
1820 MVT SrcTy = ty(SrcV);
1821 MVT DstTy = ty(Op);
1822 SDValue IdxV = Op.getOperand(1);
1823 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1824 assert(Idx % DstTy.getVectorNumElements() == 0);
1825 (void)Idx;
1826 const SDLoc &dl(Op);
1827
1828 MVT ElemTy = SrcTy.getVectorElementType();
1829 if (ElemTy == MVT::i1)
1830 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1831
1832 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1833}
1834
1835SDValue
1836HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1837 const {
1838 // Idx does not need to be a constant.
1839 SDValue VecV = Op.getOperand(0);
1840 SDValue ValV = Op.getOperand(1);
1841 SDValue IdxV = Op.getOperand(2);
1842
1843 const SDLoc &dl(Op);
1844 MVT VecTy = ty(VecV);
1845 MVT ElemTy = VecTy.getVectorElementType();
1846 if (ElemTy == MVT::i1)
1847 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1848
1849 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1850}
1851
1852SDValue
1853HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1854 // Lower any-extends of boolean vectors to sign-extends, since they
1855 // translate directly to Q2V. Zero-extending could also be done equally
1856 // fast, but Q2V is used/recognized in more places.
1857 // For all other vectors, use zero-extend.
1858 MVT ResTy = ty(Op);
1859 SDValue InpV = Op.getOperand(0);
1860 MVT ElemTy = ty(InpV).getVectorElementType();
1861 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1862 return LowerHvxSignExt(Op, DAG);
1863 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1864}
1865
1866SDValue
1867HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1868 MVT ResTy = ty(Op);
1869 SDValue InpV = Op.getOperand(0);
1870 MVT ElemTy = ty(InpV).getVectorElementType();
1871 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1872 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1873 return Op;
1874}
1875
1876SDValue
1877HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1878 MVT ResTy = ty(Op);
1879 SDValue InpV = Op.getOperand(0);
1880 MVT ElemTy = ty(InpV).getVectorElementType();
1881 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1882 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1883 return Op;
1884}
1885
1886SDValue
1887HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1888 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1889 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1890 const SDLoc &dl(Op);
1891 MVT ResTy = ty(Op);
1892 SDValue InpV = Op.getOperand(0);
1893 assert(ResTy == ty(InpV));
1894
1895 // Calculate the vectors of 1 and bitwidth(x).
1896 MVT ElemTy = ty(InpV).getVectorElementType();
1897 unsigned ElemWidth = ElemTy.getSizeInBits();
1898
1899 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1900 DAG.getConstant(1, dl, MVT::i32));
1901 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1902 DAG.getConstant(ElemWidth, dl, MVT::i32));
1903 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1904 DAG.getAllOnesConstant(dl, MVT::i32));
1905
1906 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1907 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1908 // it separately in custom combine or selection).
1909 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1910 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1911 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1912 return DAG.getNode(ISD::SUB, dl, ResTy,
1913 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1914}
1915
1916SDValue
1917HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1918 const SDLoc &dl(Op);
1919 MVT ResTy = ty(Op);
1920 assert(ResTy.getVectorElementType() == MVT::i32);
1921
1922 SDValue Vs = Op.getOperand(0);
1923 SDValue Vt = Op.getOperand(1);
1924
1925 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1926 unsigned Opc = Op.getOpcode();
1927
1928 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1929 if (Opc == ISD::MULHU)
1930 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1931 if (Opc == ISD::MULHS)
1932 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1933
1934#ifndef NDEBUG
1935 Op.dump(&DAG);
1936#endif
1937 llvm_unreachable("Unexpected mulh operation");
1938}
1939
1940SDValue
1941HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1942 const SDLoc &dl(Op);
1943 unsigned Opc = Op.getOpcode();
1944 SDValue Vu = Op.getOperand(0);
1945 SDValue Vv = Op.getOperand(1);
1946
1947 // If the HI part is not used, convert it to a regular MUL.
1948 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1949 // Need to preserve the types and the number of values.
1950 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1951 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1952 return DAG.getMergeValues({Lo, Hi}, dl);
1953 }
1954
1955 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1956 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1957
1958 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1959 // valued nodes.
1960 if (Subtarget.useHVXV62Ops())
1961 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1962
1963 if (Opc == HexagonISD::SMUL_LOHI) {
1964 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1965 // for other signedness LOHI is cheaper.
1966 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1967 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1968 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1969 return DAG.getMergeValues({Lo, Hi}, dl);
1970 }
1971 }
1972
1973 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1974}
1975
1976SDValue
1977HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1978 SDValue Val = Op.getOperand(0);
1979 MVT ResTy = ty(Op);
1980 MVT ValTy = ty(Val);
1981 const SDLoc &dl(Op);
1982
1983 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1984 unsigned HwLen = Subtarget.getVectorLength();
1985 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1986 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1987 unsigned BitWidth = ResTy.getSizeInBits();
1988
1989 if (BitWidth < 64) {
1990 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1991 dl, MVT::i32, DAG);
1992 if (BitWidth == 32)
1993 return W0;
1994 assert(BitWidth < 32u);
1995 return DAG.getZExtOrTrunc(W0, dl, ResTy);
1996 }
1997
1998 // The result is >= 64 bits. The only options are 64 or 128.
1999 assert(BitWidth == 64 || BitWidth == 128);
2001 for (unsigned i = 0; i != BitWidth/32; ++i) {
2002 SDValue W = extractHvxElementReg(
2003 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2004 Words.push_back(W);
2005 }
2006 SmallVector<SDValue,2> Combines;
2007 assert(Words.size() % 2 == 0);
2008 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2009 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2010 Combines.push_back(C);
2011 }
2012
2013 if (BitWidth == 64)
2014 return Combines[0];
2015
2016 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2017 }
2018
2019 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2020 // Splat the input into a 32-element i32 vector, then AND each element
2021 // with a unique bitmask to isolate individual bits.
2022 if (ResTy == MVT::v32i1 &&
2023 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2024 Subtarget.useHVX128BOps()) {
2025 SDValue Val32 = Val;
2026 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2027 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2028
2029 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2030 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2032 for (unsigned i = 0; i < 32; ++i)
2033 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2034
2035 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2036 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2037 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
2038 }
2039
2040 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2041 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2042 unsigned BitWidth = ValTy.getSizeInBits();
2043 unsigned HwLen = Subtarget.getVectorLength();
2044 assert(BitWidth == HwLen);
2045
2046 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2047 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2048 // Splat each byte of Val 8 times.
2049 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2050 // where b0, b1,..., b15 are least to most significant bytes of I.
2052 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2053 // These are bytes with the LSB rotated left with respect to their index.
2055 for (unsigned I = 0; I != HwLen / 8; ++I) {
2056 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2057 SDValue Byte =
2058 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2059 for (unsigned J = 0; J != 8; ++J) {
2060 Bytes.push_back(Byte);
2061 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2062 }
2063 }
2064
2065 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2066 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2067 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2068
2069 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2070 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2071 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2072 }
2073
2074 return Op;
2075}
2076
2077SDValue
2078HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2079 // Sign- and zero-extends are legal.
2080 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2081 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2082 Op.getOperand(0));
2083}
2084
2085SDValue
2086HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2087 MVT ResTy = ty(Op);
2088 if (ResTy.getVectorElementType() != MVT::i1)
2089 return Op;
2090
2091 const SDLoc &dl(Op);
2092 unsigned HwLen = Subtarget.getVectorLength();
2093 unsigned VecLen = ResTy.getVectorNumElements();
2094 assert(HwLen % VecLen == 0);
2095 unsigned ElemSize = HwLen / VecLen;
2096
2097 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2098 SDValue S =
2099 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2100 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2101 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2102 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2103}
2104
2105SDValue
2106HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2107 if (SDValue S = getVectorShiftByInt(Op, DAG))
2108 return S;
2109 return Op;
2110}
2111
2112SDValue
2113HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2114 SelectionDAG &DAG) const {
2115 unsigned Opc = Op.getOpcode();
2116 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2117
2118 // Make sure the shift amount is within the range of the bitwidth
2119 // of the element type.
2120 SDValue A = Op.getOperand(0);
2121 SDValue B = Op.getOperand(1);
2122 SDValue S = Op.getOperand(2);
2123
2124 MVT InpTy = ty(A);
2125 MVT ElemTy = InpTy.getVectorElementType();
2126
2127 const SDLoc &dl(Op);
2128 unsigned ElemWidth = ElemTy.getSizeInBits();
2129 bool IsLeft = Opc == ISD::FSHL;
2130
2131 // The expansion into regular shifts produces worse code for i8 and for
2132 // right shift of i32 on v65+.
2133 bool UseShifts = ElemTy != MVT::i8;
2134 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2135 UseShifts = false;
2136
2137 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2138 // If this is a funnel shift by a scalar, lower it into regular shifts.
2139 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2140 SDValue ModS =
2141 DAG.getNode(ISD::AND, dl, MVT::i32,
2142 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2143 SDValue NegS =
2144 DAG.getNode(ISD::SUB, dl, MVT::i32,
2145 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2146 SDValue IsZero =
2147 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2148 // FSHL A, B => A << | B >>n
2149 // FSHR A, B => A <<n | B >>
2150 SDValue Part1 =
2151 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2152 SDValue Part2 =
2153 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2154 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2155 // If the shift amount was 0, pick A or B, depending on the direction.
2156 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2157 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2158 }
2159
2161 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2162
2163 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2164 return DAG.getNode(MOpc, dl, ty(Op),
2165 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2166}
2167
2168SDValue
2169HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2170 const SDLoc &dl(Op);
2171 unsigned IntNo = Op.getConstantOperandVal(0);
2172 SmallVector<SDValue> Ops(Op->ops());
2173
2174 auto Swap = [&](SDValue P) {
2175 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2176 };
2177
2178 switch (IntNo) {
2179 case Intrinsic::hexagon_V6_pred_typecast:
2180 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2181 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2182 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2183 if (ResTy == InpTy)
2184 return Ops[1];
2185 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2186 }
2187 break;
2188 }
2189 case Intrinsic::hexagon_V6_vmpyss_parts:
2190 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2191 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2192 {Ops[1], Ops[2]}));
2193 case Intrinsic::hexagon_V6_vmpyuu_parts:
2194 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2195 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2196 {Ops[1], Ops[2]}));
2197 case Intrinsic::hexagon_V6_vmpyus_parts:
2198 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2199 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2200 {Ops[1], Ops[2]}));
2201 }
2202 } // switch
2203
2204 return Op;
2205}
2206
2207SDValue
2208HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2209 const SDLoc &dl(Op);
2210 unsigned HwLen = Subtarget.getVectorLength();
2211 MachineFunction &MF = DAG.getMachineFunction();
2212 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2213 SDValue Mask = MaskN->getMask();
2214 SDValue Chain = MaskN->getChain();
2215 SDValue Base = MaskN->getBasePtr();
2216 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2217
2218 unsigned Opc = Op->getOpcode();
2219 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2220
2221 if (Opc == ISD::MLOAD) {
2222 MVT ValTy = ty(Op);
2223 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2224 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2225 if (isUndef(Thru))
2226 return Load;
2227 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2228 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2229 }
2230
2231 // MSTORE
2232 // HVX only has aligned masked stores.
2233
2234 // TODO: Fold negations of the mask into the store.
2235 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2236 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2237 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2238
2239 if (MaskN->getAlign().value() % HwLen == 0) {
2240 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2241 {Mask, Base, Offset0, Value, Chain}, DAG);
2242 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2243 return Store;
2244 }
2245
2246 // Unaligned case.
2247 auto StoreAlign = [&](SDValue V, SDValue A) {
2248 SDValue Z = getZero(dl, ty(V), DAG);
2249 // TODO: use funnel shifts?
2250 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2251 // upper half.
2252 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2253 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2254 return std::make_pair(LoV, HiV);
2255 };
2256
2257 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2258 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2259 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2260 VectorPair Tmp = StoreAlign(MaskV, Base);
2261 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2262 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2263 VectorPair ValueU = StoreAlign(Value, Base);
2264
2265 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2266 SDValue StoreLo =
2267 getInstr(StoreOpc, dl, MVT::Other,
2268 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2269 SDValue StoreHi =
2270 getInstr(StoreOpc, dl, MVT::Other,
2271 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2272 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2273 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2274 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2275}
2276
2277SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2278 SelectionDAG &DAG) const {
2279 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2280 // is legal (done via a pattern).
2281 assert(Subtarget.useHVXQFloatOps());
2282
2283 assert(Op->getOpcode() == ISD::FP_EXTEND);
2284
2285 MVT VecTy = ty(Op);
2286 MVT ArgTy = ty(Op.getOperand(0));
2287 const SDLoc &dl(Op);
2288 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2289
2290 SDValue F16Vec = Op.getOperand(0);
2291
2292 APFloat FloatVal = APFloat(1.0f);
2293 bool Ignored;
2295 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2296 SDValue VmpyVec =
2297 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2298
2299 MVT HalfTy = typeSplit(VecTy).first;
2300 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2301 SDValue LoVec =
2302 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2303 SDValue HiVec =
2304 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2305
2306 SDValue ShuffVec =
2307 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2308 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2309
2310 return ShuffVec;
2311}
2312
2313SDValue
2314HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2315 // Catch invalid conversion ops (just in case).
2316 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2317 Op.getOpcode() == ISD::FP_TO_UINT);
2318
2319 MVT ResTy = ty(Op);
2320 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2321 MVT IntTy = ResTy.getVectorElementType();
2322
2323 if (Subtarget.useHVXIEEEFPOps()) {
2324 // There are only conversions from f16.
2325 if (FpTy == MVT::f16) {
2326 // Other int types aren't legal in HVX, so we shouldn't see them here.
2327 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2328 // Conversions to i8 and i16 are legal.
2329 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2330 return Op;
2331 }
2332 }
2333
2334 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2335 return EqualizeFpIntConversion(Op, DAG);
2336
2337 return ExpandHvxFpToInt(Op, DAG);
2338}
2339
2340// For vector type v32i1 uint_to_fp to v32f32:
2341// R1 = #1, R2 holds the v32i1 param
2342// V1 = vsplat(R1)
2343// V2 = vsplat(R2)
2344// Q0 = vand(V1,R1)
2345// V0.w=prefixsum(Q0)
2346// V0.w=vsub(V0.w,V1.w)
2347// V2.w = vlsr(V2.w,V0.w)
2348// V2 = vand(V2,V1)
2349// V2.sf = V2.w
2350SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2351 SelectionDAG &DAG) const {
2352
2353 MVT ResTy = ty(PredOp);
2354 const SDLoc &dl(PredOp);
2355
2356 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2357 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2358 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2359 SDValue(RegConst, 0));
2360 SDNode *PredTransfer =
2361 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2362 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2363 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2364 SDValue(PredTransfer, 0));
2365 SDNode *SplatParam = DAG.getMachineNode(
2366 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2367 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2368 SDNode *Vsub =
2369 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2370 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2371 SDNode *IndexShift =
2372 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2373 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2374 SDNode *MaskOff =
2375 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2376 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2377 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2378 SDValue(MaskOff, 0));
2379 return SDValue(Convert, 0);
2380}
2381
2382// For vector type v64i1 uint_to_fo to v64f16:
2383// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2384// R3 = subreg_high (R32)
2385// R2 = subreg_low (R32)
2386// R1 = #1
2387// V1 = vsplat(R1)
2388// V2 = vsplat(R2)
2389// V3 = vsplat(R3)
2390// Q0 = vand(V1,R1)
2391// V0.w=prefixsum(Q0)
2392// V0.w=vsub(V0.w,V1.w)
2393// V2.w = vlsr(V2.w,V0.w)
2394// V3.w = vlsr(V3.w,V0.w)
2395// V2 = vand(V2,V1)
2396// V3 = vand(V3,V1)
2397// V2.h = vpacke(V3.w,V2.w)
2398// V2.hf = V2.h
2399SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2400 SelectionDAG &DAG) const {
2401
2402 MVT ResTy = ty(PredOp);
2403 const SDLoc &dl(PredOp);
2404
2405 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2406 // Get the hi and lo regs
2407 SDValue HiReg =
2408 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2409 SDValue LoReg =
2410 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2411 // Get constant #1 and splat into vector V1
2412 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2413 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2414 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2415 SDValue(RegConst, 0));
2416 // Splat the hi and lo args
2417 SDNode *SplatHi =
2418 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2419 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2420 SDNode *SplatLo =
2421 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2422 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2423 // vand between splatted const and const
2424 SDNode *PredTransfer =
2425 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2426 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2427 // Get the prefixsum
2428 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2429 SDValue(PredTransfer, 0));
2430 // Get the vsub
2431 SDNode *Vsub =
2432 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2433 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2434 // Get vlsr for hi and lo
2435 SDNode *IndexShift_hi =
2436 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2437 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2438 SDNode *IndexShift_lo =
2439 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2440 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2441 // Get vand of hi and lo
2442 SDNode *MaskOff_hi =
2443 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2444 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2445 SDNode *MaskOff_lo =
2446 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2447 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2448 // Pack them
2449 SDNode *Pack =
2450 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2451 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2452 SDNode *Convert =
2453 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2454 return SDValue(Convert, 0);
2455}
2456
2457SDValue
2458HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2459 // Catch invalid conversion ops (just in case).
2460 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2461 Op.getOpcode() == ISD::UINT_TO_FP);
2462
2463 MVT ResTy = ty(Op);
2464 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2465 MVT FpTy = ResTy.getVectorElementType();
2466
2467 if (Op.getOpcode() == ISD::UINT_TO_FP) {
2468 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2469 return LowerHvxPred32ToFp(Op, DAG);
2470 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2471 return LowerHvxPred64ToFp(Op, DAG);
2472 }
2473
2474 if (Subtarget.useHVXIEEEFPOps()) {
2475 // There are only conversions to f16.
2476 if (FpTy == MVT::f16) {
2477 // Other int types aren't legal in HVX, so we shouldn't see them here.
2478 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2479 // i8, i16 -> f16 is legal.
2480 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2481 return Op;
2482 }
2483 }
2484
2485 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2486 return EqualizeFpIntConversion(Op, DAG);
2487
2488 return ExpandHvxIntToFp(Op, DAG);
2489}
2490
2491HexagonTargetLowering::TypePair
2492HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2493 // Compare the widths of elements of the two types, and extend the narrower
2494 // type to match the with of the wider type. For vector types, apply this
2495 // to the element type.
2496 assert(Ty0.isVector() == Ty1.isVector());
2497
2498 MVT ElemTy0 = Ty0.getScalarType();
2499 MVT ElemTy1 = Ty1.getScalarType();
2500
2501 unsigned Width0 = ElemTy0.getSizeInBits();
2502 unsigned Width1 = ElemTy1.getSizeInBits();
2503 unsigned MaxWidth = std::max(Width0, Width1);
2504
2505 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2506 if (ScalarTy.isInteger())
2507 return MVT::getIntegerVT(Width);
2508 assert(ScalarTy.isFloatingPoint());
2509 return MVT::getFloatingPointVT(Width);
2510 };
2511
2512 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2513 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2514
2515 if (!Ty0.isVector()) {
2516 // Both types are scalars.
2517 return {WideETy0, WideETy1};
2518 }
2519
2520 // Vector types.
2521 unsigned NumElem = Ty0.getVectorNumElements();
2522 assert(NumElem == Ty1.getVectorNumElements());
2523
2524 return {MVT::getVectorVT(WideETy0, NumElem),
2525 MVT::getVectorVT(WideETy1, NumElem)};
2526}
2527
2528HexagonTargetLowering::TypePair
2529HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2530 // Compare the numbers of elements of two vector types, and widen the
2531 // narrower one to match the number of elements in the wider one.
2532 assert(Ty0.isVector() && Ty1.isVector());
2533
2534 unsigned Len0 = Ty0.getVectorNumElements();
2535 unsigned Len1 = Ty1.getVectorNumElements();
2536 if (Len0 == Len1)
2537 return {Ty0, Ty1};
2538
2539 unsigned MaxLen = std::max(Len0, Len1);
2540 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2541 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2542}
2543
2544MVT
2545HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2546 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2547 assert(LegalTy.isSimple());
2548 return LegalTy.getSimpleVT();
2549}
2550
2551MVT
2552HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2553 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2554 assert(Ty.getSizeInBits() <= HwWidth);
2555 if (Ty.getSizeInBits() == HwWidth)
2556 return Ty;
2557
2558 MVT ElemTy = Ty.getScalarType();
2559 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2560}
2561
2562HexagonTargetLowering::VectorPair
2563HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2564 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2565 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2566 // whether an overflow has occurred.
2567 MVT ResTy = ty(A);
2568 assert(ResTy == ty(B));
2569 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2570
2571 if (!Signed) {
2572 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2573 // save any instructions.
2574 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2575 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2576 return {Add, Ovf};
2577 }
2578
2579 // Signed overflow has happened, if:
2580 // (A, B have the same sign) and (A+B has a different sign from either)
2581 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2582 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2583 SDValue NotA =
2584 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2585 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2586 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2587 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2588 SDValue MSB =
2589 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2590 return {Add, MSB};
2591}
2592
2593HexagonTargetLowering::VectorPair
2594HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2595 bool Signed, SelectionDAG &DAG) const {
2596 // Shift Val right by Amt bits, round the result to the nearest integer,
2597 // tie-break by rounding halves to even integer.
2598
2599 const SDLoc &dl(Val);
2600 MVT ValTy = ty(Val);
2601
2602 // This should also work for signed integers.
2603 //
2604 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2605 // bool ovf = (inp > tmp0);
2606 // uint rup = inp & (1 << (Amt+1));
2607 //
2608 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2609 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2610 // uint tmp3 = tmp2 + rup;
2611 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2612 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2613 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2614 MVT IntTy = tyVector(ValTy, ElemTy);
2615 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2616 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2617
2618 SDValue Inp = DAG.getBitcast(IntTy, Val);
2619 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2620
2621 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2622 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2623 SDValue Zero = getZero(dl, IntTy, DAG);
2624 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2625 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2626 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2627
2628 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2629 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2630 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2631 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2632
2633 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2634 SDValue One = DAG.getConstant(1, dl, IntTy);
2635 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2636 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2637 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2638 return {Mux, Ovf};
2639}
2640
2641SDValue
2642HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2643 SelectionDAG &DAG) const {
2644 MVT VecTy = ty(A);
2645 MVT PairTy = typeJoin({VecTy, VecTy});
2646 assert(VecTy.getVectorElementType() == MVT::i32);
2647
2648 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2649
2650 // mulhs(A,B) =
2651 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2652 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2653 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2654 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2655 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2656 // anything, so it cannot produce any carry over to higher bits),
2657 // so everything in [] can be shifted by 16 without loss of precision.
2658 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2659 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2660 // The final additions need to make sure to properly maintain any carry-
2661 // out bits.
2662 //
2663 // Hi(B) Lo(B)
2664 // Hi(A) Lo(A)
2665 // --------------
2666 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2667 // Hi(B)*Lo(A) | + dropping the low 16 bits
2668 // Hi(A)*Lo(B) | T2
2669 // Hi(B)*Hi(A)
2670
2671 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2672 // T1 = get Hi(A) into low halves.
2673 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2674 // P0 = interleaved T1.h*B.uh (full precision product)
2675 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2676 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2677 SDValue T2 = LoHalf(P0, DAG);
2678 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2679 // added to the final sum.
2680 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2681 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2682 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2683 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2684 // T3 = full-precision(T0+T2) >> 16
2685 // The low halves are added-unsigned, the high ones are added-signed.
2686 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2687 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2688 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2689 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2690 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2691 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2692 SDValue T5 = LoHalf(P3, DAG);
2693 // Add:
2694 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2695 return T6;
2696}
2697
2698SDValue
2699HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2700 bool SignedB, const SDLoc &dl,
2701 SelectionDAG &DAG) const {
2702 MVT VecTy = ty(A);
2703 MVT PairTy = typeJoin({VecTy, VecTy});
2704 assert(VecTy.getVectorElementType() == MVT::i32);
2705
2706 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2707
2708 if (SignedA && !SignedB) {
2709 // Make A:unsigned, B:signed.
2710 std::swap(A, B);
2711 std::swap(SignedA, SignedB);
2712 }
2713
2714 // Do halfword-wise multiplications for unsigned*unsigned product, then
2715 // add corrections for signed and unsigned*signed.
2716
2717 SDValue Lo, Hi;
2718
2719 // P0:lo = (uu) products of low halves of A and B,
2720 // P0:hi = (uu) products of high halves.
2721 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2722
2723 // Swap low/high halves in B
2724 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2725 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2726 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2727 // P1 = products of even/odd halfwords.
2728 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2729 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2730 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2731
2732 // P2:lo = low halves of P1:lo + P1:hi,
2733 // P2:hi = high halves of P1:lo + P1:hi.
2734 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2735 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2736 // Still need to add the high halves of P0:lo to P2:lo
2737 SDValue T2 =
2738 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2739 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2740
2741 // The high halves of T3 will contribute to the HI part of LOHI.
2742 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2743 {HiHalf(P2, DAG), T3, S16}, DAG);
2744
2745 // The low halves of P2 need to be added to high halves of the LO part.
2746 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2747 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2748 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2749
2750 if (SignedA) {
2751 assert(SignedB && "Signed A and unsigned B should have been inverted");
2752
2753 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2754 SDValue Zero = getZero(dl, VecTy, DAG);
2755 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2756 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2757 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2758 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2759 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2760 } else if (SignedB) {
2761 // Same correction as for mulhus:
2762 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2763 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2764 SDValue Zero = getZero(dl, VecTy, DAG);
2765 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2766 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2767 } else {
2768 assert(!SignedA && !SignedB);
2769 }
2770
2771 return DAG.getMergeValues({Lo, Hi}, dl);
2772}
2773
2774SDValue
2775HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2776 SDValue B, bool SignedB,
2777 const SDLoc &dl,
2778 SelectionDAG &DAG) const {
2779 MVT VecTy = ty(A);
2780 MVT PairTy = typeJoin({VecTy, VecTy});
2781 assert(VecTy.getVectorElementType() == MVT::i32);
2782
2783 if (SignedA && !SignedB) {
2784 // Make A:unsigned, B:signed.
2785 std::swap(A, B);
2786 std::swap(SignedA, SignedB);
2787 }
2788
2789 // Do S*S first, then make corrections for U*S or U*U if needed.
2790 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2791 SDValue P1 =
2792 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2793 SDValue Lo = LoHalf(P1, DAG);
2794 SDValue Hi = HiHalf(P1, DAG);
2795
2796 if (!SignedB) {
2797 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2798 SDValue Zero = getZero(dl, VecTy, DAG);
2799 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2800
2801 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2802 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2803 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2804 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2805 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2806 // $A))>;
2807 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2808 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2809 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2810 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2811 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2812 } else if (!SignedA) {
2813 SDValue Zero = getZero(dl, VecTy, DAG);
2814 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2815
2816 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2817 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2818 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2819 // (HiHalf (Muls64O $A, $B)),
2820 // $B)>;
2821 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2822 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2823 }
2824
2825 return DAG.getMergeValues({Lo, Hi}, dl);
2826}
2827
2828SDValue
2829HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2830 const {
2831 // Rewrite conversion between integer and floating-point in such a way that
2832 // the integer type is extended/narrowed to match the bitwidth of the
2833 // floating-point type, combined with additional integer-integer extensions
2834 // or narrowings to match the original input/result types.
2835 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2836 //
2837 // The input/result types are not required to be legal, but if they are
2838 // legal, this function should not introduce illegal types.
2839
2840 unsigned Opc = Op.getOpcode();
2843
2844 SDValue Inp = Op.getOperand(0);
2845 MVT InpTy = ty(Inp);
2846 MVT ResTy = ty(Op);
2847
2848 if (InpTy == ResTy)
2849 return Op;
2850
2851 const SDLoc &dl(Op);
2853
2854 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2855 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2856 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2857 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2858 return Res;
2859}
2860
2861SDValue
2862HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2863 unsigned Opc = Op.getOpcode();
2865
2866 const SDLoc &dl(Op);
2867 SDValue Op0 = Op.getOperand(0);
2868 MVT InpTy = ty(Op0);
2869 MVT ResTy = ty(Op);
2870 assert(InpTy.changeTypeToInteger() == ResTy);
2871
2872 // int32_t conv_f32_to_i32(uint32_t inp) {
2873 // // s | exp8 | frac23
2874 //
2875 // int neg = (int32_t)inp < 0;
2876 //
2877 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2878 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2879 // // produce a large positive "expm1", which will result in max u/int.
2880 // // In all IEEE formats, bias is the largest positive number that can be
2881 // // represented in bias-width bits (i.e. 011..1).
2882 // int32_t expm1 = (inp << 1) - 0x80000000;
2883 // expm1 >>= 24;
2884 //
2885 // // Always insert the "implicit 1". Subnormal numbers will become 0
2886 // // regardless.
2887 // uint32_t frac = (inp << 8) | 0x80000000;
2888 //
2889 // // "frac" is the fraction part represented as Q1.31. If it was
2890 // // interpreted as uint32_t, it would be the fraction part multiplied
2891 // // by 2^31.
2892 //
2893 // // Calculate the amount of right shift, since shifting further to the
2894 // // left would lose significant bits. Limit it to 32, because we want
2895 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2896 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2897 // // left by 31). "rsh" can be negative.
2898 // int32_t rsh = min(31 - (expm1 + 1), 32);
2899 //
2900 // frac >>= rsh; // rsh == 32 will produce 0
2901 //
2902 // // Everything up to this point is the same for conversion to signed
2903 // // unsigned integer.
2904 //
2905 // if (neg) // Only for signed int
2906 // frac = -frac; //
2907 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2908 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2909 // if (rsh <= 0 && !neg) //
2910 // frac = 0x7fffffff; //
2911 //
2912 // if (neg) // Only for unsigned int
2913 // frac = 0; //
2914 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2915 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2916 //
2917 // return frac;
2918 // }
2919
2920 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2921
2922 // Zero = V6_vd0();
2923 // Neg = V6_vgtw(Zero, Inp);
2924 // One = V6_lvsplatw(1);
2925 // M80 = V6_lvsplatw(0x80000000);
2926 // Exp00 = V6_vaslwv(Inp, One);
2927 // Exp01 = V6_vsubw(Exp00, M80);
2928 // ExpM1 = V6_vasrw(Exp01, 24);
2929 // Frc00 = V6_vaslw(Inp, 8);
2930 // Frc01 = V6_vor(Frc00, M80);
2931 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2932 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2933 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2934
2935 // if signed int:
2936 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2937 // Pos = V6_vgtw(Rsh01, Zero);
2938 // Frc13 = V6_vsubw(Zero, Frc02);
2939 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2940 // Int = V6_vmux(Pos, Frc14, Bnd);
2941 //
2942 // if unsigned int:
2943 // Rsn = V6_vgtw(Zero, Rsh01)
2944 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2945 // Int = V6_vmux(Neg, Zero, Frc23)
2946
2947 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2948 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2949 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2950
2951 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2952 SDValue Zero = getZero(dl, ResTy, DAG);
2953 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2954 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2955 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2956 SDValue One = DAG.getConstant(1, dl, ResTy);
2957 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2958 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2959 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2960 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2961
2962 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2963 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2964 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2965
2966 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2967 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2968 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2969 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2970 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2971
2972 SDValue Int;
2973
2974 if (Opc == ISD::FP_TO_SINT) {
2975 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2976 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2977 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2978 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2979 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2980 } else {
2982 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2983 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2984 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2985 }
2986
2987 return Int;
2988}
2989
2990SDValue
2991HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2992 unsigned Opc = Op.getOpcode();
2994
2995 const SDLoc &dl(Op);
2996 SDValue Op0 = Op.getOperand(0);
2997 MVT InpTy = ty(Op0);
2998 MVT ResTy = ty(Op);
2999 assert(ResTy.changeTypeToInteger() == InpTy);
3000
3001 // uint32_t vnoc1_rnd(int32_t w) {
3002 // int32_t iszero = w == 0;
3003 // int32_t isneg = w < 0;
3004 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3005 //
3006 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3007 // uint32_t frac0 = (uint64_t)u << norm_left;
3008 //
3009 // // Rounding:
3010 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3011 // uint32_t renorm = (frac0 > frac1);
3012 // uint32_t rup = (int)(frac0 << 22) < 0;
3013 //
3014 // uint32_t frac2 = frac0 >> 8;
3015 // uint32_t frac3 = frac1 >> 8;
3016 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3017 //
3018 // int32_t exp = 32 - norm_left + renorm + 127;
3019 // exp <<= 23;
3020 //
3021 // uint32_t sign = 0x80000000 * isneg;
3022 // uint32_t f = sign | exp | frac;
3023 // return iszero ? 0 : f;
3024 // }
3025
3026 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3027 bool Signed = Opc == ISD::SINT_TO_FP;
3028
3029 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3030 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3031
3032 SDValue Zero = getZero(dl, InpTy, DAG);
3033 SDValue One = DAG.getConstant(1, dl, InpTy);
3034 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3035 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3036 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3037 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3038 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3039
3040 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3041 if (Signed) {
3042 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3043 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3044 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3045 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3046 }
3047
3048 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3049 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3050 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3051 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3052 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3053 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3054 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3055 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3056 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3057
3058 return Flt;
3059}
3060
3061SDValue
3062HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3063 unsigned Opc = Op.getOpcode();
3064 unsigned TLOpc;
3065 switch (Opc) {
3066 case ISD::ANY_EXTEND:
3067 case ISD::SIGN_EXTEND:
3068 case ISD::ZERO_EXTEND:
3069 TLOpc = HexagonISD::TL_EXTEND;
3070 break;
3071 case ISD::TRUNCATE:
3073 break;
3074#ifndef NDEBUG
3075 Op.dump(&DAG);
3076#endif
3077 llvm_unreachable("Unexpected operator");
3078 }
3079
3080 const SDLoc &dl(Op);
3081 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3082 DAG.getUNDEF(MVT::i128), // illegal type
3083 DAG.getConstant(Opc, dl, MVT::i32));
3084}
3085
3086SDValue
3087HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3088 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3089 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3090 unsigned Opc = Op.getConstantOperandVal(2);
3091 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3092}
3093
3094HexagonTargetLowering::VectorPair
3095HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3096 assert(!Op.isMachineOpcode());
3097 SmallVector<SDValue, 2> OpsL, OpsH;
3098 const SDLoc &dl(Op);
3099
3100 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3101 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3102 SDValue TV = DAG.getValueType(Ty);
3103 return std::make_pair(TV, TV);
3104 };
3105
3106 for (SDValue A : Op.getNode()->ops()) {
3107 auto [Lo, Hi] =
3108 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3109 // Special case for type operand.
3110 switch (Op.getOpcode()) {
3111 case ISD::SIGN_EXTEND_INREG:
3112 case HexagonISD::SSAT:
3113 case HexagonISD::USAT:
3114 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3115 std::tie(Lo, Hi) = SplitVTNode(N);
3116 break;
3117 }
3118 OpsL.push_back(Lo);
3119 OpsH.push_back(Hi);
3120 }
3121
3122 MVT ResTy = ty(Op);
3123 MVT HalfTy = typeSplit(ResTy).first;
3124 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3125 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3126 return {L, H};
3127}
3128
3129SDValue
3130HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3131 auto *MemN = cast<MemSDNode>(Op.getNode());
3132
3133 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3134 if (!isHvxPairTy(MemTy))
3135 return Op;
3136
3137 const SDLoc &dl(Op);
3138 unsigned HwLen = Subtarget.getVectorLength();
3139 MVT SingleTy = typeSplit(MemTy).first;
3140 SDValue Chain = MemN->getChain();
3141 SDValue Base0 = MemN->getBasePtr();
3142 SDValue Base1 =
3143 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3144 unsigned MemOpc = MemN->getOpcode();
3145
3146 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3147 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3148 MachineFunction &MF = DAG.getMachineFunction();
3149 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3150 ? (uint64_t)MemoryLocation::UnknownSize
3151 : HwLen;
3152 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3153 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3154 }
3155
3156 if (MemOpc == ISD::LOAD) {
3157 assert(cast<LoadSDNode>(Op)->isUnindexed());
3158 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3159 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3160 return DAG.getMergeValues(
3161 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3162 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3163 Load0.getValue(1), Load1.getValue(1)) }, dl);
3164 }
3165 if (MemOpc == ISD::STORE) {
3166 assert(cast<StoreSDNode>(Op)->isUnindexed());
3167 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3168 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3169 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3170 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3171 }
3172
3173 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3174
3175 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3176 assert(MaskN->isUnindexed());
3177 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3178 SDValue Offset = DAG.getUNDEF(MVT::i32);
3179
3180 if (MemOpc == ISD::MLOAD) {
3181 VectorPair Thru =
3182 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3183 SDValue MLoad0 =
3184 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3185 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3186 ISD::NON_EXTLOAD, false);
3187 SDValue MLoad1 =
3188 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3189 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3190 ISD::NON_EXTLOAD, false);
3191 return DAG.getMergeValues(
3192 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3193 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3194 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3195 }
3196 if (MemOpc == ISD::MSTORE) {
3197 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3198 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3199 Masks.first, SingleTy, MOp0,
3200 ISD::UNINDEXED, false, false);
3201 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3202 Masks.second, SingleTy, MOp1,
3203 ISD::UNINDEXED, false, false);
3204 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3205 }
3206
3207 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3208 llvm_unreachable(Name.c_str());
3209}
3210
3211SDValue
3212HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3213 const SDLoc &dl(Op);
3214 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3215 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3216 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3217 "Not widening loads of i1 yet");
3218
3219 SDValue Chain = LoadN->getChain();
3220 SDValue Base = LoadN->getBasePtr();
3221 SDValue Offset = DAG.getUNDEF(MVT::i32);
3222
3223 MVT ResTy = ty(Op);
3224 unsigned HwLen = Subtarget.getVectorLength();
3225 unsigned ResLen = ResTy.getStoreSize();
3226 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3227
3228 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3229 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3230 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3231
3232 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3233 MachineFunction &MF = DAG.getMachineFunction();
3234 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3235
3236 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3237 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3239 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3240 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3241}
3242
3243SDValue
3244HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3245 const SDLoc &dl(Op);
3246 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3247 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3248 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3249 "Not widening stores of i1 yet");
3250
3251 SDValue Chain = StoreN->getChain();
3252 SDValue Base = StoreN->getBasePtr();
3253 SDValue Offset = DAG.getUNDEF(MVT::i32);
3254
3255 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3256 MVT ValueTy = ty(Value);
3257 unsigned ValueLen = ValueTy.getVectorNumElements();
3258 unsigned HwLen = Subtarget.getVectorLength();
3259 assert(isPowerOf2_32(ValueLen));
3260
3261 for (unsigned Len = ValueLen; Len < HwLen; ) {
3262 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3263 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3264 }
3265 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3266
3267 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3268 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3269 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3270 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3271 MachineFunction &MF = DAG.getMachineFunction();
3272 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3273 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3274 MemOp, ISD::UNINDEXED, false, false);
3275}
3276
3277SDValue
3278HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3279 const SDLoc &dl(Op);
3280 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3281 MVT ElemTy = ty(Op0).getVectorElementType();
3282 unsigned HwLen = Subtarget.getVectorLength();
3283
3284 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3285 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3286 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3287 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3288 return SDValue();
3289
3290 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3291 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3292 EVT ResTy =
3293 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3294 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3295 {WideOp0, WideOp1, Op.getOperand(2)});
3296
3297 EVT RetTy = typeLegalize(ty(Op), DAG);
3298 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3299 {SetCC, getZero(dl, MVT::i32, DAG)});
3300}
3301
3302SDValue
3303HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3304 unsigned Opc = Op.getOpcode();
3305 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3306 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3307 return isHvxPairTy(ty(V));
3308 });
3309
3310 if (IsPairOp) {
3311 switch (Opc) {
3312 default:
3313 break;
3314 case ISD::LOAD:
3315 case ISD::STORE:
3316 case ISD::MLOAD:
3317 case ISD::MSTORE:
3318 return SplitHvxMemOp(Op, DAG);
3319 case ISD::SINT_TO_FP:
3320 case ISD::UINT_TO_FP:
3321 case ISD::FP_TO_SINT:
3322 case ISD::FP_TO_UINT:
3323 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3324 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3325 break;
3326 case ISD::ABS:
3327 case ISD::CTPOP:
3328 case ISD::CTLZ:
3329 case ISD::CTTZ:
3330 case ISD::MUL:
3331 case ISD::FADD:
3332 case ISD::FSUB:
3333 case ISD::FMUL:
3334 case ISD::FMINIMUMNUM:
3335 case ISD::FMAXIMUMNUM:
3336 case ISD::MULHS:
3337 case ISD::MULHU:
3338 case ISD::AND:
3339 case ISD::OR:
3340 case ISD::XOR:
3341 case ISD::SRA:
3342 case ISD::SHL:
3343 case ISD::SRL:
3344 case ISD::FSHL:
3345 case ISD::FSHR:
3346 case ISD::SMIN:
3347 case ISD::SMAX:
3348 case ISD::UMIN:
3349 case ISD::UMAX:
3350 case ISD::SETCC:
3351 case ISD::VSELECT:
3353 case ISD::SPLAT_VECTOR:
3354 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3355 case ISD::SIGN_EXTEND:
3356 case ISD::ZERO_EXTEND:
3357 // In general, sign- and zero-extends can't be split and still
3358 // be legal. The only exception is extending bool vectors.
3359 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3360 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3361 break;
3362 }
3363 }
3364
3365 switch (Opc) {
3366 default:
3367 break;
3368 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3369 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3370 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3371 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3372 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3373 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3374 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3375 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3376 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3377 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3378 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3379 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3380 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3381 case ISD::SRA:
3382 case ISD::SHL:
3383 case ISD::SRL: return LowerHvxShift(Op, DAG);
3384 case ISD::FSHL:
3385 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3386 case ISD::MULHS:
3387 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3388 case ISD::SMUL_LOHI:
3389 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3390 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3391 case ISD::SETCC:
3392 case ISD::INTRINSIC_VOID: return Op;
3393 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3394 case ISD::MLOAD:
3395 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3396 // Unaligned loads will be handled by the default lowering.
3397 case ISD::LOAD: return SDValue();
3398 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3399 case ISD::FP_TO_SINT:
3400 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3401 case ISD::SINT_TO_FP:
3402 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3403
3404 // Special nodes:
3407 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3408 }
3409#ifndef NDEBUG
3410 Op.dumpr(&DAG);
3411#endif
3412 llvm_unreachable("Unhandled HVX operation");
3413}
3414
3415SDValue
3416HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3417 const {
3418 // Rewrite the extension/truncation/saturation op into steps where each
3419 // step changes the type widths by a factor of 2.
3420 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3421 //
3422 // Some of the vector types in Op may not be legal.
3423
3424 unsigned Opc = Op.getOpcode();
3425 switch (Opc) {
3426 case HexagonISD::SSAT:
3427 case HexagonISD::USAT:
3430 break;
3431 case ISD::ANY_EXTEND:
3432 case ISD::ZERO_EXTEND:
3433 case ISD::SIGN_EXTEND:
3434 case ISD::TRUNCATE:
3435 llvm_unreachable("ISD:: ops will be auto-folded");
3436 break;
3437#ifndef NDEBUG
3438 Op.dump(&DAG);
3439#endif
3440 llvm_unreachable("Unexpected operation");
3441 }
3442
3443 SDValue Inp = Op.getOperand(0);
3444 MVT InpTy = ty(Inp);
3445 MVT ResTy = ty(Op);
3446
3447 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3448 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3449 assert(InpWidth != ResWidth);
3450
3451 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3452 return Op;
3453
3454 const SDLoc &dl(Op);
3455 unsigned NumElems = InpTy.getVectorNumElements();
3456 assert(NumElems == ResTy.getVectorNumElements());
3457
3458 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3459 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3460 switch (Opc) {
3461 case HexagonISD::SSAT:
3462 case HexagonISD::USAT:
3463 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3466 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3467 default:
3468 llvm_unreachable("Unexpected opcode");
3469 }
3470 };
3471
3472 SDValue S = Inp;
3473 if (InpWidth < ResWidth) {
3474 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3475 while (InpWidth * 2 <= ResWidth)
3476 S = repeatOp(InpWidth *= 2, S);
3477 } else {
3478 // InpWidth > ResWidth
3479 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3480 while (InpWidth / 2 >= ResWidth)
3481 S = repeatOp(InpWidth /= 2, S);
3482 }
3483 return S;
3484}
3485
3486SDValue
3487HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3488 SDValue Inp0 = Op.getOperand(0);
3489 MVT InpTy = ty(Inp0);
3490 MVT ResTy = ty(Op);
3491 unsigned InpWidth = InpTy.getSizeInBits();
3492 unsigned ResWidth = ResTy.getSizeInBits();
3493 unsigned Opc = Op.getOpcode();
3494
3495 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3496 // First, make sure that the narrower type is widened to HVX.
3497 // This may cause the result to be wider than what the legalizer
3498 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3499 // desired type.
3500 auto [WInpTy, WResTy] =
3501 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3502 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3503 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3504 SDValue S;
3506 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3507 Op.getOperand(2));
3508 } else {
3509 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3510 }
3511 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3512 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3513 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3514 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3515 } else {
3516 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3517 return RemoveTLWrapper(Op, DAG);
3518 }
3519 llvm_unreachable("Unexpected situation");
3520}
3521
3522void
3523HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3525 unsigned Opc = N->getOpcode();
3526 SDValue Op(N, 0);
3527 SDValue Inp0; // Optional first argument.
3528 if (N->getNumOperands() > 0)
3529 Inp0 = Op.getOperand(0);
3530
3531 switch (Opc) {
3532 case ISD::ANY_EXTEND:
3533 case ISD::SIGN_EXTEND:
3534 case ISD::ZERO_EXTEND:
3535 case ISD::TRUNCATE:
3536 if (Subtarget.isHVXElementType(ty(Op)) &&
3537 Subtarget.isHVXElementType(ty(Inp0))) {
3538 Results.push_back(CreateTLWrapper(Op, DAG));
3539 }
3540 break;
3541 case ISD::SETCC:
3542 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3543 if (SDValue T = WidenHvxSetCC(Op, DAG))
3544 Results.push_back(T);
3545 }
3546 break;
3547 case ISD::STORE: {
3548 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3549 SDValue Store = WidenHvxStore(Op, DAG);
3550 Results.push_back(Store);
3551 }
3552 break;
3553 }
3554 case ISD::MLOAD:
3555 if (isHvxPairTy(ty(Op))) {
3556 SDValue S = SplitHvxMemOp(Op, DAG);
3558 Results.push_back(S.getOperand(0));
3559 Results.push_back(S.getOperand(1));
3560 }
3561 break;
3562 case ISD::MSTORE:
3563 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3564 SDValue S = SplitHvxMemOp(Op, DAG);
3565 Results.push_back(S);
3566 }
3567 break;
3568 case ISD::SINT_TO_FP:
3569 case ISD::UINT_TO_FP:
3570 case ISD::FP_TO_SINT:
3571 case ISD::FP_TO_UINT:
3572 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3573 SDValue T = EqualizeFpIntConversion(Op, DAG);
3574 Results.push_back(T);
3575 }
3576 break;
3577 case HexagonISD::SSAT:
3578 case HexagonISD::USAT:
3581 Results.push_back(LegalizeHvxResize(Op, DAG));
3582 break;
3583 default:
3584 break;
3585 }
3586}
3587
3588void
3589HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3591 unsigned Opc = N->getOpcode();
3592 SDValue Op(N, 0);
3593 SDValue Inp0; // Optional first argument.
3594 if (N->getNumOperands() > 0)
3595 Inp0 = Op.getOperand(0);
3596
3597 switch (Opc) {
3598 case ISD::ANY_EXTEND:
3599 case ISD::SIGN_EXTEND:
3600 case ISD::ZERO_EXTEND:
3601 case ISD::TRUNCATE:
3602 if (Subtarget.isHVXElementType(ty(Op)) &&
3603 Subtarget.isHVXElementType(ty(Inp0))) {
3604 Results.push_back(CreateTLWrapper(Op, DAG));
3605 }
3606 break;
3607 case ISD::SETCC:
3608 if (shouldWidenToHvx(ty(Op), DAG)) {
3609 if (SDValue T = WidenHvxSetCC(Op, DAG))
3610 Results.push_back(T);
3611 }
3612 break;
3613 case ISD::LOAD: {
3614 if (shouldWidenToHvx(ty(Op), DAG)) {
3615 SDValue Load = WidenHvxLoad(Op, DAG);
3616 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3617 Results.push_back(Load.getOperand(0));
3618 Results.push_back(Load.getOperand(1));
3619 }
3620 break;
3621 }
3622 case ISD::BITCAST:
3623 if (isHvxBoolTy(ty(Inp0))) {
3624 SDValue C = LowerHvxBitcast(Op, DAG);
3625 Results.push_back(C);
3626 }
3627 break;
3628 case ISD::FP_TO_SINT:
3629 case ISD::FP_TO_UINT:
3630 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3631 SDValue T = EqualizeFpIntConversion(Op, DAG);
3632 Results.push_back(T);
3633 }
3634 break;
3635 case HexagonISD::SSAT:
3636 case HexagonISD::USAT:
3639 Results.push_back(LegalizeHvxResize(Op, DAG));
3640 break;
3641 default:
3642 break;
3643 }
3644}
3645
3646SDValue
3647HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3648 DAGCombinerInfo &DCI) const {
3649 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3650 // to extract-subvector (shuffle V, pick even, pick odd)
3651
3652 assert(Op.getOpcode() == ISD::TRUNCATE);
3653 SelectionDAG &DAG = DCI.DAG;
3654 const SDLoc &dl(Op);
3655
3656 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3657 return SDValue();
3658 SDValue Cast = Op.getOperand(0);
3659 SDValue Src = Cast.getOperand(0);
3660
3661 EVT TruncTy = Op.getValueType();
3662 EVT CastTy = Cast.getValueType();
3663 EVT SrcTy = Src.getValueType();
3664 if (SrcTy.isSimple())
3665 return SDValue();
3666 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3667 return SDValue();
3668 unsigned SrcLen = SrcTy.getVectorNumElements();
3669 unsigned CastLen = CastTy.getVectorNumElements();
3670 if (2 * CastLen != SrcLen)
3671 return SDValue();
3672
3673 SmallVector<int, 128> Mask(SrcLen);
3674 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3675 Mask[i] = 2 * i;
3676 Mask[i + CastLen] = 2 * i + 1;
3677 }
3678 SDValue Deal =
3679 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3680 return opSplit(Deal, dl, DAG).first;
3681}
3682
3683SDValue
3684HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3685 SDValue Op, DAGCombinerInfo &DCI) const {
3686 // Fold
3687 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3688 // into
3689 // shuffle (concat x, y), undef, m3
3690 if (Op.getNumOperands() != 2)
3691 return SDValue();
3692
3693 SelectionDAG &DAG = DCI.DAG;
3694 const SDLoc &dl(Op);
3695 SDValue V0 = Op.getOperand(0);
3696 SDValue V1 = Op.getOperand(1);
3697
3698 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3699 return SDValue();
3700 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3701 return SDValue();
3702
3703 SetVector<SDValue> Order;
3704 Order.insert(V0.getOperand(0));
3705 Order.insert(V0.getOperand(1));
3706 Order.insert(V1.getOperand(0));
3707 Order.insert(V1.getOperand(1));
3708
3709 if (Order.size() > 2)
3710 return SDValue();
3711
3712 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3713 // result must be the same.
3714 EVT InpTy = V0.getValueType();
3715 assert(InpTy.isVector());
3716 unsigned InpLen = InpTy.getVectorNumElements();
3717
3718 SmallVector<int, 128> LongMask;
3719 auto AppendToMask = [&](SDValue Shuffle) {
3720 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3721 ArrayRef<int> Mask = SV->getMask();
3722 SDValue X = Shuffle.getOperand(0);
3723 SDValue Y = Shuffle.getOperand(1);
3724 for (int M : Mask) {
3725 if (M == -1) {
3726 LongMask.push_back(M);
3727 continue;
3728 }
3729 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3730 if (static_cast<unsigned>(M) >= InpLen)
3731 M -= InpLen;
3732
3733 int OutOffset = Order[0] == Src ? 0 : InpLen;
3734 LongMask.push_back(M + OutOffset);
3735 }
3736 };
3737
3738 AppendToMask(V0);
3739 AppendToMask(V1);
3740
3741 SDValue C0 = Order.front();
3742 SDValue C1 = Order.back(); // Can be same as front
3743 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3744
3745 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3746 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3747}
3748
3749SDValue
3750HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3751 const {
3752 const SDLoc &dl(N);
3753 SelectionDAG &DAG = DCI.DAG;
3754 SDValue Op(N, 0);
3755 unsigned Opc = Op.getOpcode();
3756
3758
3759 if (Opc == ISD::TRUNCATE)
3760 return combineTruncateBeforeLegal(Op, DCI);
3761 if (Opc == ISD::CONCAT_VECTORS)
3762 return combineConcatVectorsBeforeLegal(Op, DCI);
3763
3764 if (DCI.isBeforeLegalizeOps())
3765 return SDValue();
3766
3767 switch (Opc) {
3768 case ISD::VSELECT: {
3769 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3770 SDValue Cond = Ops[0];
3771 if (Cond->getOpcode() == ISD::XOR) {
3772 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3773 if (C1->getOpcode() == HexagonISD::QTRUE)
3774 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3775 }
3776 break;
3777 }
3778 case HexagonISD::V2Q:
3779 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3780 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3781 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3782 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3783 }
3784 break;
3785 case HexagonISD::Q2V:
3786 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3787 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3788 DAG.getAllOnesConstant(dl, MVT::i32));
3789 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3790 return getZero(dl, ty(Op), DAG);
3791 break;
3793 if (isUndef(Ops[1]))
3794 return Ops[0];
3795 break;
3796 case HexagonISD::VROR: {
3797 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3798 SDValue Vec = Ops[0].getOperand(0);
3799 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3800 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3801 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3802 }
3803 break;
3804 }
3805 }
3806
3807 return SDValue();
3808}
3809
3810bool
3811HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3812 if (Subtarget.isHVXVectorType(Ty, true))
3813 return false;
3814 auto Action = getPreferredHvxVectorAction(Ty);
3816 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3817 return false;
3818}
3819
3820bool
3821HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3822 if (Subtarget.isHVXVectorType(Ty, true))
3823 return false;
3824 auto Action = getPreferredHvxVectorAction(Ty);
3826 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3827 return false;
3828}
3829
3830bool
3831HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3832 if (!Subtarget.useHVXOps())
3833 return false;
3834 // If the type of any result, or any operand type are HVX vector types,
3835 // this is an HVX operation.
3836 auto IsHvxTy = [this](EVT Ty) {
3837 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3838 };
3839 auto IsHvxOp = [this](SDValue Op) {
3840 return Op.getValueType().isSimple() &&
3841 Subtarget.isHVXVectorType(ty(Op), true);
3842 };
3843 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3844 return true;
3845
3846 // Check if this could be an HVX operation after type widening.
3847 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3848 if (!Op.getValueType().isSimple())
3849 return false;
3850 MVT ValTy = ty(Op);
3851 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3852 };
3853
3854 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3855 if (IsWidenedToHvx(SDValue(N, i)))
3856 return true;
3857 }
3858 return llvm::any_of(N->ops(), IsWidenedToHvx);
3859}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:149
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:155
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.