LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
110 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
111 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
112 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
113 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
114 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
115 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
119
120 if (Subtarget.useHVX128BOps())
121 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
123 Subtarget.useHVXFloatingPoint()) {
124
125 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
126 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
127
128 for (MVT T : FloatV) {
132 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
133 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
134
137
140
141 setOperationAction(ISD::MLOAD, T, Custom);
142 setOperationAction(ISD::MSTORE, T, Custom);
143 // Custom-lower BUILD_VECTOR. The standard (target-independent)
144 // handling of it would convert it to a load, which is not always
145 // the optimal choice.
147 }
148
149
150 // BUILD_VECTOR with f16 operands cannot be promoted without
151 // promoting the result, so lower the node to vsplat or constant pool
155
156 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
157 // generated.
158 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
159 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
160 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
161 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
162
163 for (MVT P : FloatW) {
164 setOperationAction(ISD::LOAD, P, Custom);
165 setOperationAction(ISD::STORE, P, Custom);
169 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
170 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
173
174 // Custom-lower BUILD_VECTOR. The standard (target-independent)
175 // handling of it would convert it to a load, which is not always
176 // the optimal choice.
178 // Make concat-vectors custom to handle concats of more than 2 vectors.
180
181 setOperationAction(ISD::MLOAD, P, Custom);
182 setOperationAction(ISD::MSTORE, P, Custom);
183 }
184
185 if (Subtarget.useHVXQFloatOps()) {
186 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
188 } else if (Subtarget.useHVXIEEEFPOps()) {
189 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
191 }
192 }
193
194 for (MVT T : LegalV) {
197
213 if (T != ByteV) {
217 }
218
221 if (T.getScalarType() != MVT::i32) {
224 }
225
227 setOperationAction(ISD::LOAD, T, Custom);
228 setOperationAction(ISD::MLOAD, T, Custom);
229 setOperationAction(ISD::MSTORE, T, Custom);
230 if (T.getScalarType() != MVT::i32) {
233 }
234
236 // Make concat-vectors custom to handle concats of more than 2 vectors.
247 if (T != ByteV) {
249 // HVX only has shifts of words and halfwords.
253
254 // Promote all shuffles to operate on vectors of bytes.
255 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
256 }
257
258 if (Subtarget.useHVXFloatingPoint()) {
259 // Same action for both QFloat and IEEE.
264 }
265
273 }
274
275 for (MVT T : LegalW) {
276 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
277 // independent) handling of it would convert it to a load, which is
278 // not always the optimal choice.
280 // Make concat-vectors custom to handle concats of more than 2 vectors.
282
283 // Custom-lower these operations for pairs. Expand them into a concat
284 // of the corresponding operations on individual vectors.
293
294 setOperationAction(ISD::LOAD, T, Custom);
295 setOperationAction(ISD::STORE, T, Custom);
296 setOperationAction(ISD::MLOAD, T, Custom);
297 setOperationAction(ISD::MSTORE, T, Custom);
302
317 if (T != ByteW) {
321
322 // Promote all shuffles to operate on vectors of bytes.
323 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
324 }
327
330 if (T.getScalarType() != MVT::i32) {
333 }
334
335 if (Subtarget.useHVXFloatingPoint()) {
336 // Same action for both QFloat and IEEE.
341 }
342 }
343
344 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
345 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
346 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
349
350 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
351 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
352 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
353 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
354 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
355 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
356 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
357 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
358 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
359 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
360 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
361 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
362 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
363 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
364
365 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
366 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
367 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
368 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
369 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
370 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
371 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
372 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
373 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
374 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
375 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
376 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
377 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
378 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
379
380 // Boolean vectors.
381
382 for (MVT T : LegalW) {
383 // Boolean types for vector pairs will overlap with the boolean
384 // types for single vectors, e.g.
385 // v64i8 -> v64i1 (single)
386 // v64i16 -> v64i1 (pair)
387 // Set these actions first, and allow the single actions to overwrite
388 // any duplicates.
389 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
394 // Masked load/store takes a mask that may need splitting.
395 setOperationAction(ISD::MLOAD, BoolW, Custom);
396 setOperationAction(ISD::MSTORE, BoolW, Custom);
397 }
398
399 for (MVT T : LegalV) {
400 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
411 }
412
413 if (Use64b) {
414 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
416 } else {
417 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
419 }
420
421 // Handle store widening for short vectors.
422 unsigned HwLen = Subtarget.getVectorLength();
423 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
424 if (ElemTy == MVT::i1)
425 continue;
426 int ElemWidth = ElemTy.getFixedSizeInBits();
427 int MaxElems = (8*HwLen) / ElemWidth;
428 for (int N = 2; N < MaxElems; N *= 2) {
429 MVT VecTy = MVT::getVectorVT(ElemTy, N);
430 auto Action = getPreferredVectorAction(VecTy);
432 setOperationAction(ISD::LOAD, VecTy, Custom);
433 setOperationAction(ISD::STORE, VecTy, Custom);
439 if (Subtarget.useHVXFloatingPoint()) {
444 }
445
446 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
447 if (!isTypeLegal(BoolTy))
449 }
450 }
451 }
452
453 // Include cases which are not hander earlier
457
459}
460
461unsigned
462HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
463 MVT ElemTy = VecTy.getVectorElementType();
464 unsigned VecLen = VecTy.getVectorNumElements();
465 unsigned HwLen = Subtarget.getVectorLength();
466
467 // Split vectors of i1 that exceed byte vector length.
468 if (ElemTy == MVT::i1 && VecLen > HwLen)
470
471 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
472 // For shorter vectors of i1, widen them if any of the corresponding
473 // vectors of integers needs to be widened.
474 if (ElemTy == MVT::i1) {
475 for (MVT T : Tys) {
476 assert(T != MVT::i1);
477 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
478 if (A != ~0u)
479 return A;
480 }
481 return ~0u;
482 }
483
484 // If the size of VecTy is at least half of the vector length,
485 // widen the vector. Note: the threshold was not selected in
486 // any scientific way.
487 if (llvm::is_contained(Tys, ElemTy)) {
488 unsigned VecWidth = VecTy.getSizeInBits();
489 unsigned HwWidth = 8*HwLen;
490 if (VecWidth > 2*HwWidth)
492
493 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
494 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
496 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
498 }
499
500 // Defer to default.
501 return ~0u;
502}
503
504unsigned
505HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
506 unsigned Opc = Op.getOpcode();
507 switch (Opc) {
512 }
514}
515
517HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
518 const SDLoc &dl, SelectionDAG &DAG) const {
520 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
521 append_range(IntOps, Ops);
522 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
523}
524
525MVT
526HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
527 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
528
529 MVT ElemTy = Tys.first.getVectorElementType();
530 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
531 Tys.second.getVectorNumElements());
532}
533
534HexagonTargetLowering::TypePair
535HexagonTargetLowering::typeSplit(MVT VecTy) const {
536 assert(VecTy.isVector());
537 unsigned NumElem = VecTy.getVectorNumElements();
538 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
539 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
540 return { HalfTy, HalfTy };
541}
542
543MVT
544HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
545 MVT ElemTy = VecTy.getVectorElementType();
546 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
547 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
548}
549
550MVT
551HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
552 MVT ElemTy = VecTy.getVectorElementType();
553 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
554 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
555}
556
558HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
559 SelectionDAG &DAG) const {
560 if (ty(Vec).getVectorElementType() == ElemTy)
561 return Vec;
562 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
563 return DAG.getBitcast(CastTy, Vec);
564}
565
567HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
568 SelectionDAG &DAG) const {
569 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
570 Ops.first, Ops.second);
571}
572
573HexagonTargetLowering::VectorPair
574HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
575 SelectionDAG &DAG) const {
576 TypePair Tys = typeSplit(ty(Vec));
577 if (Vec.getOpcode() == HexagonISD::QCAT)
578 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
579 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
580}
581
582bool
583HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
584 return Subtarget.isHVXVectorType(Ty) &&
585 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
586}
587
588bool
589HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
590 return Subtarget.isHVXVectorType(Ty) &&
591 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
592}
593
594bool
595HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
596 return Subtarget.isHVXVectorType(Ty, true) &&
597 Ty.getVectorElementType() == MVT::i1;
598}
599
600bool HexagonTargetLowering::allowsHvxMemoryAccess(
601 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
602 // Bool vectors are excluded by default, but make it explicit to
603 // emphasize that bool vectors cannot be loaded or stored.
604 // Also, disallow double vector stores (to prevent unnecessary
605 // store widening in DAG combiner).
606 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
607 return false;
608 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
609 return false;
610 if (Fast)
611 *Fast = 1;
612 return true;
613}
614
615bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
616 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
617 if (!Subtarget.isHVXVectorType(VecTy))
618 return false;
619 // XXX Should this be false? vmemu are a bit slower than vmem.
620 if (Fast)
621 *Fast = 1;
622 return true;
623}
624
625void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
626 MachineInstr &MI, SDNode *Node) const {
627 unsigned Opc = MI.getOpcode();
628 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
629 MachineBasicBlock &MB = *MI.getParent();
630 MachineFunction &MF = *MB.getParent();
631 MachineRegisterInfo &MRI = MF.getRegInfo();
632 DebugLoc DL = MI.getDebugLoc();
633 auto At = MI.getIterator();
634
635 switch (Opc) {
636 case Hexagon::PS_vsplatib:
637 if (Subtarget.useHVXV62Ops()) {
638 // SplatV = A2_tfrsi #imm
639 // OutV = V6_lvsplatb SplatV
640 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
641 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
642 .add(MI.getOperand(1));
643 Register OutV = MI.getOperand(0).getReg();
644 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
645 .addReg(SplatV);
646 } else {
647 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
648 // OutV = V6_lvsplatw SplatV
649 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
650 const MachineOperand &InpOp = MI.getOperand(1);
651 assert(InpOp.isImm());
652 uint32_t V = InpOp.getImm() & 0xFF;
653 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
654 .addImm(V << 24 | V << 16 | V << 8 | V);
655 Register OutV = MI.getOperand(0).getReg();
656 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
657 }
658 MB.erase(At);
659 break;
660 case Hexagon::PS_vsplatrb:
661 if (Subtarget.useHVXV62Ops()) {
662 // OutV = V6_lvsplatb Inp
663 Register OutV = MI.getOperand(0).getReg();
664 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
665 .add(MI.getOperand(1));
666 } else {
667 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
668 const MachineOperand &InpOp = MI.getOperand(1);
669 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
670 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
671 Register OutV = MI.getOperand(0).getReg();
672 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
673 .addReg(SplatV);
674 }
675 MB.erase(At);
676 break;
677 case Hexagon::PS_vsplatih:
678 if (Subtarget.useHVXV62Ops()) {
679 // SplatV = A2_tfrsi #imm
680 // OutV = V6_lvsplath SplatV
681 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
682 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
683 .add(MI.getOperand(1));
684 Register OutV = MI.getOperand(0).getReg();
685 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
686 .addReg(SplatV);
687 } else {
688 // SplatV = A2_tfrsi #imm:#imm
689 // OutV = V6_lvsplatw SplatV
690 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
691 const MachineOperand &InpOp = MI.getOperand(1);
692 assert(InpOp.isImm());
693 uint32_t V = InpOp.getImm() & 0xFFFF;
694 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
695 .addImm(V << 16 | V);
696 Register OutV = MI.getOperand(0).getReg();
697 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
698 }
699 MB.erase(At);
700 break;
701 case Hexagon::PS_vsplatrh:
702 if (Subtarget.useHVXV62Ops()) {
703 // OutV = V6_lvsplath Inp
704 Register OutV = MI.getOperand(0).getReg();
705 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
706 .add(MI.getOperand(1));
707 } else {
708 // SplatV = A2_combine_ll Inp, Inp
709 // OutV = V6_lvsplatw SplatV
710 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
711 const MachineOperand &InpOp = MI.getOperand(1);
712 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
713 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
714 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
715 Register OutV = MI.getOperand(0).getReg();
716 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
717 }
718 MB.erase(At);
719 break;
720 case Hexagon::PS_vsplatiw:
721 case Hexagon::PS_vsplatrw:
722 if (Opc == Hexagon::PS_vsplatiw) {
723 // SplatV = A2_tfrsi #imm
724 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
725 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
726 .add(MI.getOperand(1));
727 MI.getOperand(1).ChangeToRegister(SplatV, false);
728 }
729 // OutV = V6_lvsplatw SplatV/Inp
730 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
731 break;
732 }
733}
734
736HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
737 SelectionDAG &DAG) const {
738 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
739 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
740
741 unsigned ElemWidth = ElemTy.getSizeInBits();
742 if (ElemWidth == 8)
743 return ElemIdx;
744
745 unsigned L = Log2_32(ElemWidth/8);
746 const SDLoc &dl(ElemIdx);
747 return DAG.getNode(ISD::SHL, dl, MVT::i32,
748 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
749}
750
752HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
753 SelectionDAG &DAG) const {
754 unsigned ElemWidth = ElemTy.getSizeInBits();
755 assert(ElemWidth >= 8 && ElemWidth <= 32);
756 if (ElemWidth == 32)
757 return Idx;
758
759 if (ty(Idx) != MVT::i32)
760 Idx = DAG.getBitcast(MVT::i32, Idx);
761 const SDLoc &dl(Idx);
762 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
763 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
764 return SubIdx;
765}
766
768HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
769 SDValue Op1, ArrayRef<int> Mask,
770 SelectionDAG &DAG) const {
771 MVT OpTy = ty(Op0);
772 assert(OpTy == ty(Op1));
773
774 MVT ElemTy = OpTy.getVectorElementType();
775 if (ElemTy == MVT::i8)
776 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
777 assert(ElemTy.getSizeInBits() >= 8);
778
779 MVT ResTy = tyVector(OpTy, MVT::i8);
780 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
781
782 SmallVector<int,128> ByteMask;
783 for (int M : Mask) {
784 if (M < 0) {
785 for (unsigned I = 0; I != ElemSize; ++I)
786 ByteMask.push_back(-1);
787 } else {
788 int NewM = M*ElemSize;
789 for (unsigned I = 0; I != ElemSize; ++I)
790 ByteMask.push_back(NewM+I);
791 }
792 }
793 assert(ResTy.getVectorNumElements() == ByteMask.size());
794 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
795 opCastElem(Op1, MVT::i8, DAG), ByteMask);
796}
797
799HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
800 const SDLoc &dl, MVT VecTy,
801 SelectionDAG &DAG) const {
802 unsigned VecLen = Values.size();
803 MachineFunction &MF = DAG.getMachineFunction();
804 MVT ElemTy = VecTy.getVectorElementType();
805 unsigned ElemWidth = ElemTy.getSizeInBits();
806 unsigned HwLen = Subtarget.getVectorLength();
807
808 unsigned ElemSize = ElemWidth / 8;
809 assert(ElemSize*VecLen == HwLen);
811
812 if (VecTy.getVectorElementType() != MVT::i32 &&
813 !(Subtarget.useHVXFloatingPoint() &&
814 VecTy.getVectorElementType() == MVT::f32)) {
815 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
816 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
817 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
818 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
819 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
820 Words.push_back(DAG.getBitcast(MVT::i32, W));
821 }
822 } else {
823 for (SDValue V : Values)
824 Words.push_back(DAG.getBitcast(MVT::i32, V));
825 }
826 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
827 unsigned NumValues = Values.size();
828 assert(NumValues > 0);
829 bool IsUndef = true;
830 for (unsigned i = 0; i != NumValues; ++i) {
831 if (Values[i].isUndef())
832 continue;
833 IsUndef = false;
834 if (!SplatV.getNode())
835 SplatV = Values[i];
836 else if (SplatV != Values[i])
837 return false;
838 }
839 if (IsUndef)
840 SplatV = Values[0];
841 return true;
842 };
843
844 unsigned NumWords = Words.size();
845 SDValue SplatV;
846 bool IsSplat = isSplat(Words, SplatV);
847 if (IsSplat && isUndef(SplatV))
848 return DAG.getUNDEF(VecTy);
849 if (IsSplat) {
850 assert(SplatV.getNode());
851 if (isNullConstant(SplatV))
852 return getZero(dl, VecTy, DAG);
853 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
854 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
855 return DAG.getBitcast(VecTy, S);
856 }
857
858 // Delay recognizing constant vectors until here, so that we can generate
859 // a vsplat.
860 SmallVector<ConstantInt*, 128> Consts(VecLen);
861 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
862 if (AllConst) {
863 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
864 (Constant**)Consts.end());
865 Constant *CV = ConstantVector::get(Tmp);
866 Align Alignment(HwLen);
867 SDValue CP =
868 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
869 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
871 }
872
873 // A special case is a situation where the vector is built entirely from
874 // elements extracted from another vector. This could be done via a shuffle
875 // more efficiently, but typically, the size of the source vector will not
876 // match the size of the vector being built (which precludes the use of a
877 // shuffle directly).
878 // This only handles a single source vector, and the vector being built
879 // should be of a sub-vector type of the source vector type.
880 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
881 SmallVectorImpl<int> &SrcIdx) {
882 SDValue Vec;
883 for (SDValue V : Values) {
884 if (isUndef(V)) {
885 SrcIdx.push_back(-1);
886 continue;
887 }
888 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
889 return false;
890 // All extracts should come from the same vector.
891 SDValue T = V.getOperand(0);
892 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
893 return false;
894 Vec = T;
895 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
896 if (C == nullptr)
897 return false;
898 int I = C->getSExtValue();
899 assert(I >= 0 && "Negative element index");
900 SrcIdx.push_back(I);
901 }
902 SrcVec = Vec;
903 return true;
904 };
905
906 SmallVector<int,128> ExtIdx;
907 SDValue ExtVec;
908 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
909 MVT ExtTy = ty(ExtVec);
910 unsigned ExtLen = ExtTy.getVectorNumElements();
911 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
912 // Construct a new shuffle mask that will produce a vector with the same
913 // number of elements as the input vector, and such that the vector we
914 // want will be the initial subvector of it.
915 SmallVector<int,128> Mask;
916 BitVector Used(ExtLen);
917
918 for (int M : ExtIdx) {
919 Mask.push_back(M);
920 if (M >= 0)
921 Used.set(M);
922 }
923 // Fill the rest of the mask with the unused elements of ExtVec in hopes
924 // that it will result in a permutation of ExtVec's elements. It's still
925 // fine if it doesn't (e.g. if undefs are present, or elements are
926 // repeated), but permutations can always be done efficiently via vdelta
927 // and vrdelta.
928 for (unsigned I = 0; I != ExtLen; ++I) {
929 if (Mask.size() == ExtLen)
930 break;
931 if (!Used.test(I))
932 Mask.push_back(I);
933 }
934
935 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
936 DAG.getUNDEF(ExtTy), Mask);
937 return ExtLen == VecLen ? S : LoHalf(S, DAG);
938 }
939 }
940
941 // Find most common element to initialize vector with. This is to avoid
942 // unnecessary vinsert/valign for cases where the same value is present
943 // many times. Creates a histogram of the vector's elements to find the
944 // most common element n.
945 assert(4*Words.size() == Subtarget.getVectorLength());
946 int VecHist[32];
947 int n = 0;
948 for (unsigned i = 0; i != NumWords; ++i) {
949 VecHist[i] = 0;
950 if (Words[i].isUndef())
951 continue;
952 for (unsigned j = i; j != NumWords; ++j)
953 if (Words[i] == Words[j])
954 VecHist[i]++;
955
956 if (VecHist[i] > VecHist[n])
957 n = i;
958 }
959
960 SDValue HalfV = getZero(dl, VecTy, DAG);
961 if (VecHist[n] > 1) {
962 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
963 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
964 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
965 }
966 SDValue HalfV0 = HalfV;
967 SDValue HalfV1 = HalfV;
968
969 // Construct two halves in parallel, then or them together. Rn and Rm count
970 // number of rotations needed before the next element. One last rotation is
971 // performed post-loop to position the last element.
972 int Rn = 0, Rm = 0;
973 SDValue Sn, Sm;
974 SDValue N = HalfV0;
975 SDValue M = HalfV1;
976 for (unsigned i = 0; i != NumWords/2; ++i) {
977 // Rotate by element count since last insertion.
978 if (Words[i] != Words[n] || VecHist[n] <= 1) {
979 Sn = DAG.getConstant(Rn, dl, MVT::i32);
980 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
981 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
982 {HalfV0, Words[i]});
983 Rn = 0;
984 }
985 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
986 Sm = DAG.getConstant(Rm, dl, MVT::i32);
987 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
988 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
989 {HalfV1, Words[i+NumWords/2]});
990 Rm = 0;
991 }
992 Rn += 4;
993 Rm += 4;
994 }
995 // Perform last rotation.
996 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
997 Sm = DAG.getConstant(Rm, dl, MVT::i32);
998 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
999 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1000
1001 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1002 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1003
1004 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1005
1006 SDValue OutV =
1007 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1008 return OutV;
1009}
1010
1011SDValue
1012HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1013 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1014 MVT PredTy = ty(PredV);
1015 unsigned HwLen = Subtarget.getVectorLength();
1016 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1017
1018 if (Subtarget.isHVXVectorType(PredTy, true)) {
1019 // Move the vector predicate SubV to a vector register, and scale it
1020 // down to match the representation (bytes per type element) that VecV
1021 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1022 // in general) element and put them at the front of the resulting
1023 // vector. This subvector will then be inserted into the Q2V of VecV.
1024 // To avoid having an operation that generates an illegal type (short
1025 // vector), generate a full size vector.
1026 //
1027 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1028 SmallVector<int,128> Mask(HwLen);
1029 // Scale = BitBytes(PredV) / Given BitBytes.
1030 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1031 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1032
1033 for (unsigned i = 0; i != HwLen; ++i) {
1034 unsigned Num = i % Scale;
1035 unsigned Off = i / Scale;
1036 Mask[BlockLen*Num + Off] = i;
1037 }
1038 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1039 if (!ZeroFill)
1040 return S;
1041 // Fill the bytes beyond BlockLen with 0s.
1042 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1043 // when BlockLen < HwLen.
1044 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1045 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1046 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1047 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1048 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1049 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1050 }
1051
1052 // Make sure that this is a valid scalar predicate.
1053 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1054
1055 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1056 SmallVector<SDValue,4> Words[2];
1057 unsigned IdxW = 0;
1058
1059 SDValue W0 = isUndef(PredV)
1060 ? DAG.getUNDEF(MVT::i64)
1061 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1062 Words[IdxW].push_back(HiHalf(W0, DAG));
1063 Words[IdxW].push_back(LoHalf(W0, DAG));
1064
1065 while (Bytes < BitBytes) {
1066 IdxW ^= 1;
1067 Words[IdxW].clear();
1068
1069 if (Bytes < 4) {
1070 for (const SDValue &W : Words[IdxW ^ 1]) {
1071 SDValue T = expandPredicate(W, dl, DAG);
1072 Words[IdxW].push_back(HiHalf(T, DAG));
1073 Words[IdxW].push_back(LoHalf(T, DAG));
1074 }
1075 } else {
1076 for (const SDValue &W : Words[IdxW ^ 1]) {
1077 Words[IdxW].push_back(W);
1078 Words[IdxW].push_back(W);
1079 }
1080 }
1081 Bytes *= 2;
1082 }
1083
1084 assert(Bytes == BitBytes);
1085
1086 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1087 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1088 for (const SDValue &W : Words[IdxW]) {
1089 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1090 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1091 }
1092
1093 return Vec;
1094}
1095
1096SDValue
1097HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1098 const SDLoc &dl, MVT VecTy,
1099 SelectionDAG &DAG) const {
1100 // Construct a vector V of bytes, such that a comparison V >u 0 would
1101 // produce the required vector predicate.
1102 unsigned VecLen = Values.size();
1103 unsigned HwLen = Subtarget.getVectorLength();
1104 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1106 bool AllT = true, AllF = true;
1107
1108 auto IsTrue = [] (SDValue V) {
1109 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1110 return !N->isZero();
1111 return false;
1112 };
1113 auto IsFalse = [] (SDValue V) {
1114 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1115 return N->isZero();
1116 return false;
1117 };
1118
1119 if (VecLen <= HwLen) {
1120 // In the hardware, each bit of a vector predicate corresponds to a byte
1121 // of a vector register. Calculate how many bytes does a bit of VecTy
1122 // correspond to.
1123 assert(HwLen % VecLen == 0);
1124 unsigned BitBytes = HwLen / VecLen;
1125 for (SDValue V : Values) {
1126 AllT &= IsTrue(V);
1127 AllF &= IsFalse(V);
1128
1129 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1130 : DAG.getUNDEF(MVT::i8);
1131 for (unsigned B = 0; B != BitBytes; ++B)
1132 Bytes.push_back(Ext);
1133 }
1134 } else {
1135 // There are as many i1 values, as there are bits in a vector register.
1136 // Divide the values into groups of 8 and check that each group consists
1137 // of the same value (ignoring undefs).
1138 for (unsigned I = 0; I != VecLen; I += 8) {
1139 unsigned B = 0;
1140 // Find the first non-undef value in this group.
1141 for (; B != 8; ++B) {
1142 if (!Values[I+B].isUndef())
1143 break;
1144 }
1145 SDValue F = Values[I+B];
1146 AllT &= IsTrue(F);
1147 AllF &= IsFalse(F);
1148
1149 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1150 : DAG.getUNDEF(MVT::i8);
1151 Bytes.push_back(Ext);
1152 // Verify that the rest of values in the group are the same as the
1153 // first.
1154 for (; B != 8; ++B)
1155 assert(Values[I+B].isUndef() || Values[I+B] == F);
1156 }
1157 }
1158
1159 if (AllT)
1160 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1161 if (AllF)
1162 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1163
1164 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1165 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1166 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1167}
1168
1169SDValue
1170HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1171 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1172 MVT ElemTy = ty(VecV).getVectorElementType();
1173
1174 unsigned ElemWidth = ElemTy.getSizeInBits();
1175 assert(ElemWidth >= 8 && ElemWidth <= 32);
1176 (void)ElemWidth;
1177
1178 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1179 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1180 {VecV, ByteIdx});
1181 if (ElemTy == MVT::i32)
1182 return ExWord;
1183
1184 // Have an extracted word, need to extract the smaller element out of it.
1185 // 1. Extract the bits of (the original) IdxV that correspond to the index
1186 // of the desired element in the 32-bit word.
1187 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1188 // 2. Extract the element from the word.
1189 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1190 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1191}
1192
1193SDValue
1194HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1195 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1196 // Implement other return types if necessary.
1197 assert(ResTy == MVT::i1);
1198
1199 unsigned HwLen = Subtarget.getVectorLength();
1200 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1201 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1202
1203 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1204 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1205 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1206
1207 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1208 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1209 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1210}
1211
1212SDValue
1213HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1214 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1215 MVT ElemTy = ty(VecV).getVectorElementType();
1216
1217 unsigned ElemWidth = ElemTy.getSizeInBits();
1218 assert(ElemWidth >= 8 && ElemWidth <= 32);
1219 (void)ElemWidth;
1220
1221 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1222 SDValue ByteIdxV) {
1223 MVT VecTy = ty(VecV);
1224 unsigned HwLen = Subtarget.getVectorLength();
1225 SDValue MaskV =
1226 DAG.getNode(ISD::AND, dl, MVT::i32,
1227 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1228 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1229 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1230 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1231 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1232 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1233 return TorV;
1234 };
1235
1236 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1237 if (ElemTy == MVT::i32)
1238 return InsertWord(VecV, ValV, ByteIdx);
1239
1240 // If this is not inserting a 32-bit word, convert it into such a thing.
1241 // 1. Extract the existing word from the target vector.
1242 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1243 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1244 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1245 dl, MVT::i32, DAG);
1246
1247 // 2. Treating the extracted word as a 32-bit vector, insert the given
1248 // value into it.
1249 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1250 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1251 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1252 ValV, SubIdx, dl, ElemTy, DAG);
1253
1254 // 3. Insert the 32-bit word back into the original vector.
1255 return InsertWord(VecV, Ins, ByteIdx);
1256}
1257
1258SDValue
1259HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1260 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1261 unsigned HwLen = Subtarget.getVectorLength();
1262 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1263 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1264
1265 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1266 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1267 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1268 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1269
1270 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1271 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1272}
1273
1274SDValue
1275HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1276 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1277 MVT VecTy = ty(VecV);
1278 unsigned HwLen = Subtarget.getVectorLength();
1279 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1280 MVT ElemTy = VecTy.getVectorElementType();
1281 unsigned ElemWidth = ElemTy.getSizeInBits();
1282
1283 // If the source vector is a vector pair, get the single vector containing
1284 // the subvector of interest. The subvector will never overlap two single
1285 // vectors.
1286 if (isHvxPairTy(VecTy)) {
1287 unsigned SubIdx = Hexagon::vsub_lo;
1288 if (Idx * ElemWidth >= 8 * HwLen) {
1289 SubIdx = Hexagon::vsub_hi;
1290 Idx -= VecTy.getVectorNumElements() / 2;
1291 }
1292
1293 VecTy = typeSplit(VecTy).first;
1294 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1295 if (VecTy == ResTy)
1296 return VecV;
1297 }
1298
1299 // The only meaningful subvectors of a single HVX vector are those that
1300 // fit in a scalar register.
1301 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1302
1303 MVT WordTy = tyVector(VecTy, MVT::i32);
1304 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1305 unsigned WordIdx = (Idx*ElemWidth) / 32;
1306
1307 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1308 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1309 if (ResTy.getSizeInBits() == 32)
1310 return DAG.getBitcast(ResTy, W0);
1311
1312 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1313 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1314 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1315 return DAG.getBitcast(ResTy, WW);
1316}
1317
1318SDValue
1319HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1320 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1321 MVT VecTy = ty(VecV);
1322 unsigned HwLen = Subtarget.getVectorLength();
1323 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1324 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1325 // IdxV is required to be a constant.
1326 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1327
1328 unsigned ResLen = ResTy.getVectorNumElements();
1329 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1330 unsigned Offset = Idx * BitBytes;
1331 SDValue Undef = DAG.getUNDEF(ByteTy);
1332 SmallVector<int,128> Mask;
1333
1334 if (Subtarget.isHVXVectorType(ResTy, true)) {
1335 // Converting between two vector predicates. Since the result is shorter
1336 // than the source, it will correspond to a vector predicate with the
1337 // relevant bits replicated. The replication count is the ratio of the
1338 // source and target vector lengths.
1339 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1340 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1341 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1342 for (unsigned j = 0; j != Rep; ++j)
1343 Mask.push_back(i + Offset);
1344 }
1345 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1346 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1347 }
1348
1349 // Converting between a vector predicate and a scalar predicate. In the
1350 // vector predicate, a group of BitBytes bits will correspond to a single
1351 // i1 element of the source vector type. Those bits will all have the same
1352 // value. The same will be true for ByteVec, where each byte corresponds
1353 // to a bit in the vector predicate.
1354 // The algorithm is to traverse the ByteVec, going over the i1 values from
1355 // the source vector, and generate the corresponding representation in an
1356 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1357 // elements so that the interesting 8 bytes will be in the low end of the
1358 // vector.
1359 unsigned Rep = 8 / ResLen;
1360 // Make sure the output fill the entire vector register, so repeat the
1361 // 8-byte groups as many times as necessary.
1362 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1363 // This will generate the indexes of the 8 interesting bytes.
1364 for (unsigned i = 0; i != ResLen; ++i) {
1365 for (unsigned j = 0; j != Rep; ++j)
1366 Mask.push_back(Offset + i*BitBytes);
1367 }
1368 }
1369
1370 SDValue Zero = getZero(dl, MVT::i32, DAG);
1371 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1372 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1373 // them against 0.
1374 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1375 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1376 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1377 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1378 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1379 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1380}
1381
1382SDValue
1383HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1384 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1385 MVT VecTy = ty(VecV);
1386 MVT SubTy = ty(SubV);
1387 unsigned HwLen = Subtarget.getVectorLength();
1388 MVT ElemTy = VecTy.getVectorElementType();
1389 unsigned ElemWidth = ElemTy.getSizeInBits();
1390
1391 bool IsPair = isHvxPairTy(VecTy);
1392 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1393 // The two single vectors that VecV consists of, if it's a pair.
1394 SDValue V0, V1;
1395 SDValue SingleV = VecV;
1396 SDValue PickHi;
1397
1398 if (IsPair) {
1399 V0 = LoHalf(VecV, DAG);
1400 V1 = HiHalf(VecV, DAG);
1401
1402 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1403 dl, MVT::i32);
1404 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1405 if (isHvxSingleTy(SubTy)) {
1406 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1407 unsigned Idx = CN->getZExtValue();
1408 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1409 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1410 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1411 }
1412 // If IdxV is not a constant, generate the two variants: with the
1413 // SubV as the high and as the low subregister, and select the right
1414 // pair based on the IdxV.
1415 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1416 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1417 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1418 }
1419 // The subvector being inserted must be entirely contained in one of
1420 // the vectors V0 or V1. Set SingleV to the correct one, and update
1421 // IdxV to be the index relative to the beginning of that vector.
1422 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1423 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1424 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1425 }
1426
1427 // The only meaningful subvectors of a single HVX vector are those that
1428 // fit in a scalar register.
1429 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1430 // Convert IdxV to be index in bytes.
1431 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1432 if (!IdxN || !IdxN->isZero()) {
1433 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1434 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1435 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1436 }
1437 // When inserting a single word, the rotation back to the original position
1438 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1439 // by (HwLen-4)-Idx.
1440 unsigned RolBase = HwLen;
1441 if (SubTy.getSizeInBits() == 32) {
1442 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1443 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1444 } else {
1445 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1446 SDValue R0 = LoHalf(V, DAG);
1447 SDValue R1 = HiHalf(V, DAG);
1448 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1449 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1450 DAG.getConstant(4, dl, MVT::i32));
1451 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1452 RolBase = HwLen-4;
1453 }
1454 // If the vector wasn't ror'ed, don't ror it back.
1455 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1456 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1457 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1458 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1459 }
1460
1461 if (IsPair) {
1462 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1463 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1464 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1465 }
1466 return SingleV;
1467}
1468
1469SDValue
1470HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1471 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1472 MVT VecTy = ty(VecV);
1473 MVT SubTy = ty(SubV);
1474 assert(Subtarget.isHVXVectorType(VecTy, true));
1475 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1476 // predicate as well, or it can be a scalar predicate.
1477
1478 unsigned VecLen = VecTy.getVectorNumElements();
1479 unsigned HwLen = Subtarget.getVectorLength();
1480 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1481
1482 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1483 unsigned BitBytes = HwLen / VecLen;
1484 unsigned BlockLen = HwLen / Scale;
1485
1486 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1487 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1488 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1489 SDValue ByteIdx;
1490
1491 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1492 if (!IdxN || !IdxN->isZero()) {
1493 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1494 DAG.getConstant(BitBytes, dl, MVT::i32));
1495 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1496 }
1497
1498 // ByteVec is the target vector VecV rotated in such a way that the
1499 // subvector should be inserted at index 0. Generate a predicate mask
1500 // and use vmux to do the insertion.
1501 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1502 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1503 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1504 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1505 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1506 // Rotate ByteVec back, and convert to a vector predicate.
1507 if (!IdxN || !IdxN->isZero()) {
1508 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1509 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1510 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1511 }
1512 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1513}
1514
1515SDValue
1516HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1517 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1518 // Sign- and any-extending of a vector predicate to a vector register is
1519 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1520 // a vector of 1s (where the 1s are of type matching the vector type).
1521 assert(Subtarget.isHVXVectorType(ResTy));
1522 if (!ZeroExt)
1523 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1524
1525 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1526 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1527 DAG.getConstant(1, dl, MVT::i32));
1528 SDValue False = getZero(dl, ResTy, DAG);
1529 return DAG.getSelect(dl, ResTy, VecV, True, False);
1530}
1531
1532SDValue
1533HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1534 MVT ResTy, SelectionDAG &DAG) const {
1535 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1536 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1537 // vector register. The remaining bits of the vector register are
1538 // unspecified.
1539
1540 MachineFunction &MF = DAG.getMachineFunction();
1541 unsigned HwLen = Subtarget.getVectorLength();
1542 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1543 MVT PredTy = ty(VecQ);
1544 unsigned PredLen = PredTy.getVectorNumElements();
1545 assert(HwLen % PredLen == 0);
1546 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1547
1548 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1550 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1551 // These are bytes with the LSB rotated left with respect to their index.
1552 for (unsigned i = 0; i != HwLen/8; ++i) {
1553 for (unsigned j = 0; j != 8; ++j)
1554 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1555 }
1556 Constant *CV = ConstantVector::get(Tmp);
1557 Align Alignment(HwLen);
1558 SDValue CP =
1559 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1560 SDValue Bytes =
1561 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1563
1564 // Select the bytes that correspond to true bits in the vector predicate.
1565 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1566 getZero(dl, VecTy, DAG));
1567 // Calculate the OR of all bytes in each group of 8. That will compress
1568 // all the individual bits into a single byte.
1569 // First, OR groups of 4, via vrmpy with 0x01010101.
1570 SDValue All1 =
1571 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1572 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1573 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1574 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1575 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1576 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1577
1578 // Pick every 8th byte and coalesce them at the beginning of the output.
1579 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1580 // byte and so on.
1581 SmallVector<int,128> Mask;
1582 for (unsigned i = 0; i != HwLen; ++i)
1583 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1584 SDValue Collect =
1585 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1586 return DAG.getBitcast(ResTy, Collect);
1587}
1588
1589SDValue
1590HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1591 const SDLoc &dl, SelectionDAG &DAG) const {
1592 // Take a vector and resize the element type to match the given type.
1593 MVT InpTy = ty(VecV);
1594 if (InpTy == ResTy)
1595 return VecV;
1596
1597 unsigned InpWidth = InpTy.getSizeInBits();
1598 unsigned ResWidth = ResTy.getSizeInBits();
1599
1600 if (InpTy.isFloatingPoint()) {
1601 return InpWidth < ResWidth
1602 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1603 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1604 DAG.getTargetConstant(0, dl, MVT::i32));
1605 }
1606
1607 assert(InpTy.isInteger());
1608
1609 if (InpWidth < ResWidth) {
1610 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1611 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1612 } else {
1613 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1614 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1615 }
1616}
1617
1618SDValue
1619HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1620 SelectionDAG &DAG) const {
1621 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1622
1623 const SDLoc &dl(Vec);
1624 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1625 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1626 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1627}
1628
1629SDValue
1630HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1631 const {
1632 const SDLoc &dl(Op);
1633 MVT VecTy = ty(Op);
1634
1635 unsigned Size = Op.getNumOperands();
1637 for (unsigned i = 0; i != Size; ++i)
1638 Ops.push_back(Op.getOperand(i));
1639
1640 if (VecTy.getVectorElementType() == MVT::i1)
1641 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1642
1643 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1644 // not a legal type, just bitcast the node to use i16
1645 // types and bitcast the result back to f16
1646 if (VecTy.getVectorElementType() == MVT::f16) {
1648 for (unsigned i = 0; i != Size; i++)
1649 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1650
1651 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1652 tyVector(VecTy, MVT::i16), NewOps);
1653 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1654 }
1655
1656 // First, split the BUILD_VECTOR for vector pairs. We could generate
1657 // some pairs directly (via splat), but splats should be generated
1658 // by the combiner prior to getting here.
1659 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1661 MVT SingleTy = typeSplit(VecTy).first;
1662 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1663 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1664 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1665 }
1666
1667 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1668}
1669
1670SDValue
1671HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1672 const {
1673 const SDLoc &dl(Op);
1674 MVT VecTy = ty(Op);
1675 MVT ArgTy = ty(Op.getOperand(0));
1676
1677 if (ArgTy == MVT::f16) {
1678 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1679 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1680 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1681 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1682 return DAG.getBitcast(VecTy, Splat);
1683 }
1684
1685 return SDValue();
1686}
1687
1688SDValue
1689HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1690 const {
1691 // Vector concatenation of two integer (non-bool) vectors does not need
1692 // special lowering. Custom-lower concats of bool vectors and expand
1693 // concats of more than 2 vectors.
1694 MVT VecTy = ty(Op);
1695 const SDLoc &dl(Op);
1696 unsigned NumOp = Op.getNumOperands();
1697 if (VecTy.getVectorElementType() != MVT::i1) {
1698 if (NumOp == 2)
1699 return Op;
1700 // Expand the other cases into a build-vector.
1702 for (SDValue V : Op.getNode()->ops())
1703 DAG.ExtractVectorElements(V, Elems);
1704 // A vector of i16 will be broken up into a build_vector of i16's.
1705 // This is a problem, since at the time of operation legalization,
1706 // all operations are expected to be type-legalized, and i16 is not
1707 // a legal type. If any of the extracted elements is not of a valid
1708 // type, sign-extend it to a valid one.
1709 for (SDValue &V : Elems) {
1710 MVT Ty = ty(V);
1711 if (!isTypeLegal(Ty)) {
1712 MVT NTy = typeLegalize(Ty, DAG);
1713 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1714 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1715 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1716 V.getOperand(0), V.getOperand(1)),
1717 DAG.getValueType(Ty));
1718 continue;
1719 }
1720 // A few less complicated cases.
1721 switch (V.getOpcode()) {
1722 case ISD::Constant:
1723 V = DAG.getSExtOrTrunc(V, dl, NTy);
1724 break;
1725 case ISD::UNDEF:
1726 V = DAG.getUNDEF(NTy);
1727 break;
1728 case ISD::TRUNCATE:
1729 V = V.getOperand(0);
1730 break;
1731 default:
1732 llvm_unreachable("Unexpected vector element");
1733 }
1734 }
1735 }
1736 return DAG.getBuildVector(VecTy, dl, Elems);
1737 }
1738
1739 assert(VecTy.getVectorElementType() == MVT::i1);
1740 unsigned HwLen = Subtarget.getVectorLength();
1741 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1742
1743 SDValue Op0 = Op.getOperand(0);
1744
1745 // If the operands are HVX types (i.e. not scalar predicates), then
1746 // defer the concatenation, and create QCAT instead.
1747 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1748 if (NumOp == 2)
1749 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1750
1751 ArrayRef<SDUse> U(Op.getNode()->ops());
1754
1755 MVT HalfTy = typeSplit(VecTy).first;
1756 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1757 Ops.take_front(NumOp/2));
1758 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1759 Ops.take_back(NumOp/2));
1760 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1761 }
1762
1763 // Count how many bytes (in a vector register) each bit in VecTy
1764 // corresponds to.
1765 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1766
1767 SmallVector<SDValue,8> Prefixes;
1768 for (SDValue V : Op.getNode()->op_values()) {
1769 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1770 Prefixes.push_back(P);
1771 }
1772
1773 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1774 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1775 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1776 SDValue Res = getZero(dl, ByteTy, DAG);
1777 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1778 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1779 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1780 }
1781 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1782}
1783
1784SDValue
1785HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1786 const {
1787 // Change the type of the extracted element to i32.
1788 SDValue VecV = Op.getOperand(0);
1789 MVT ElemTy = ty(VecV).getVectorElementType();
1790 const SDLoc &dl(Op);
1791 SDValue IdxV = Op.getOperand(1);
1792 if (ElemTy == MVT::i1)
1793 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1794
1795 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1796}
1797
1798SDValue
1799HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1800 const {
1801 const SDLoc &dl(Op);
1802 MVT VecTy = ty(Op);
1803 SDValue VecV = Op.getOperand(0);
1804 SDValue ValV = Op.getOperand(1);
1805 SDValue IdxV = Op.getOperand(2);
1806 MVT ElemTy = ty(VecV).getVectorElementType();
1807 if (ElemTy == MVT::i1)
1808 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1809
1810 if (ElemTy == MVT::f16) {
1812 tyVector(VecTy, MVT::i16),
1813 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1814 DAG.getBitcast(MVT::i16, ValV), IdxV);
1815 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1816 }
1817
1818 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1819}
1820
1821SDValue
1822HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1823 const {
1824 SDValue SrcV = Op.getOperand(0);
1825 MVT SrcTy = ty(SrcV);
1826 MVT DstTy = ty(Op);
1827 SDValue IdxV = Op.getOperand(1);
1828 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1829 assert(Idx % DstTy.getVectorNumElements() == 0);
1830 (void)Idx;
1831 const SDLoc &dl(Op);
1832
1833 MVT ElemTy = SrcTy.getVectorElementType();
1834 if (ElemTy == MVT::i1)
1835 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1836
1837 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1838}
1839
1840SDValue
1841HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1842 const {
1843 // Idx does not need to be a constant.
1844 SDValue VecV = Op.getOperand(0);
1845 SDValue ValV = Op.getOperand(1);
1846 SDValue IdxV = Op.getOperand(2);
1847
1848 const SDLoc &dl(Op);
1849 MVT VecTy = ty(VecV);
1850 MVT ElemTy = VecTy.getVectorElementType();
1851 if (ElemTy == MVT::i1)
1852 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1853
1854 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1855}
1856
1857SDValue
1858HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1859 // Lower any-extends of boolean vectors to sign-extends, since they
1860 // translate directly to Q2V. Zero-extending could also be done equally
1861 // fast, but Q2V is used/recognized in more places.
1862 // For all other vectors, use zero-extend.
1863 MVT ResTy = ty(Op);
1864 SDValue InpV = Op.getOperand(0);
1865 MVT ElemTy = ty(InpV).getVectorElementType();
1866 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1867 return LowerHvxSignExt(Op, DAG);
1868 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1869}
1870
1871SDValue
1872HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1873 MVT ResTy = ty(Op);
1874 SDValue InpV = Op.getOperand(0);
1875 MVT ElemTy = ty(InpV).getVectorElementType();
1876 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1877 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1878 return Op;
1879}
1880
1881SDValue
1882HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1883 MVT ResTy = ty(Op);
1884 SDValue InpV = Op.getOperand(0);
1885 MVT ElemTy = ty(InpV).getVectorElementType();
1886 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1887 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1888 return Op;
1889}
1890
1891SDValue
1892HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1893 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1894 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1895 const SDLoc &dl(Op);
1896 MVT ResTy = ty(Op);
1897 SDValue InpV = Op.getOperand(0);
1898 assert(ResTy == ty(InpV));
1899
1900 // Calculate the vectors of 1 and bitwidth(x).
1901 MVT ElemTy = ty(InpV).getVectorElementType();
1902 unsigned ElemWidth = ElemTy.getSizeInBits();
1903
1904 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1905 DAG.getConstant(1, dl, MVT::i32));
1906 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1907 DAG.getConstant(ElemWidth, dl, MVT::i32));
1908 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1909 DAG.getAllOnesConstant(dl, MVT::i32));
1910
1911 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1912 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1913 // it separately in custom combine or selection).
1914 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1915 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1916 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1917 return DAG.getNode(ISD::SUB, dl, ResTy,
1918 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1923 const SDLoc &dl(Op);
1924 MVT ResTy = ty(Op);
1925 assert(ResTy.getVectorElementType() == MVT::i32);
1926
1927 SDValue Vs = Op.getOperand(0);
1928 SDValue Vt = Op.getOperand(1);
1929
1930 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1931 unsigned Opc = Op.getOpcode();
1932
1933 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1934 if (Opc == ISD::MULHU)
1935 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1936 if (Opc == ISD::MULHS)
1937 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1938
1939#ifndef NDEBUG
1940 Op.dump(&DAG);
1941#endif
1942 llvm_unreachable("Unexpected mulh operation");
1943}
1944
1945SDValue
1946HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1947 const SDLoc &dl(Op);
1948 unsigned Opc = Op.getOpcode();
1949 SDValue Vu = Op.getOperand(0);
1950 SDValue Vv = Op.getOperand(1);
1951
1952 // If the HI part is not used, convert it to a regular MUL.
1953 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1954 // Need to preserve the types and the number of values.
1955 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1956 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1957 return DAG.getMergeValues({Lo, Hi}, dl);
1958 }
1959
1960 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1961 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1962
1963 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1964 // valued nodes.
1965 if (Subtarget.useHVXV62Ops())
1966 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1967
1968 if (Opc == HexagonISD::SMUL_LOHI) {
1969 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1970 // for other signedness LOHI is cheaper.
1971 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1972 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1973 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1974 return DAG.getMergeValues({Lo, Hi}, dl);
1975 }
1976 }
1977
1978 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1979}
1980
1981SDValue
1982HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1983 SDValue Val = Op.getOperand(0);
1984 MVT ResTy = ty(Op);
1985 MVT ValTy = ty(Val);
1986 const SDLoc &dl(Op);
1987
1988 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1989 unsigned HwLen = Subtarget.getVectorLength();
1990 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1991 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1992 unsigned BitWidth = ResTy.getSizeInBits();
1993
1994 if (BitWidth < 64) {
1995 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1996 dl, MVT::i32, DAG);
1997 if (BitWidth == 32)
1998 return W0;
1999 assert(BitWidth < 32u);
2000 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2001 }
2002
2003 // The result is >= 64 bits. The only options are 64 or 128.
2004 assert(BitWidth == 64 || BitWidth == 128);
2006 for (unsigned i = 0; i != BitWidth/32; ++i) {
2007 SDValue W = extractHvxElementReg(
2008 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2009 Words.push_back(W);
2010 }
2011 SmallVector<SDValue,2> Combines;
2012 assert(Words.size() % 2 == 0);
2013 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2014 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2015 Combines.push_back(C);
2016 }
2017
2018 if (BitWidth == 64)
2019 return Combines[0];
2020
2021 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2022 }
2023
2024 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2025 // Splat the input into a 32-element i32 vector, then AND each element
2026 // with a unique bitmask to isolate individual bits.
2027 if (ResTy == MVT::v32i1 &&
2028 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2029 Subtarget.useHVX128BOps()) {
2030 SDValue Val32 = Val;
2031 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2032 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2033
2034 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2035 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2037 for (unsigned i = 0; i < 32; ++i)
2038 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2039
2040 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2041 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2042 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
2043 }
2044
2045 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2046 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2047 unsigned BitWidth = ValTy.getSizeInBits();
2048 unsigned HwLen = Subtarget.getVectorLength();
2049 assert(BitWidth == HwLen);
2050
2051 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2052 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2053 // Splat each byte of Val 8 times.
2054 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2055 // where b0, b1,..., b15 are least to most significant bytes of I.
2057 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2058 // These are bytes with the LSB rotated left with respect to their index.
2060 for (unsigned I = 0; I != HwLen / 8; ++I) {
2061 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2062 SDValue Byte =
2063 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2064 for (unsigned J = 0; J != 8; ++J) {
2065 Bytes.push_back(Byte);
2066 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2067 }
2068 }
2069
2070 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2071 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2072 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2073
2074 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2075 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2076 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2077 }
2078
2079 return Op;
2080}
2081
2082SDValue
2083HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2084 // Sign- and zero-extends are legal.
2085 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2086 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2087 Op.getOperand(0));
2088}
2089
2090SDValue
2091HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2092 MVT ResTy = ty(Op);
2093 if (ResTy.getVectorElementType() != MVT::i1)
2094 return Op;
2095
2096 const SDLoc &dl(Op);
2097 unsigned HwLen = Subtarget.getVectorLength();
2098 unsigned VecLen = ResTy.getVectorNumElements();
2099 assert(HwLen % VecLen == 0);
2100 unsigned ElemSize = HwLen / VecLen;
2101
2102 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2103 SDValue S =
2104 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2105 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2106 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2107 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2108}
2109
2110SDValue
2111HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2112 if (SDValue S = getVectorShiftByInt(Op, DAG))
2113 return S;
2114 return Op;
2115}
2116
2117SDValue
2118HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2119 SelectionDAG &DAG) const {
2120 unsigned Opc = Op.getOpcode();
2121 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2122
2123 // Make sure the shift amount is within the range of the bitwidth
2124 // of the element type.
2125 SDValue A = Op.getOperand(0);
2126 SDValue B = Op.getOperand(1);
2127 SDValue S = Op.getOperand(2);
2128
2129 MVT InpTy = ty(A);
2130 MVT ElemTy = InpTy.getVectorElementType();
2131
2132 const SDLoc &dl(Op);
2133 unsigned ElemWidth = ElemTy.getSizeInBits();
2134 bool IsLeft = Opc == ISD::FSHL;
2135
2136 // The expansion into regular shifts produces worse code for i8 and for
2137 // right shift of i32 on v65+.
2138 bool UseShifts = ElemTy != MVT::i8;
2139 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2140 UseShifts = false;
2141
2142 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2143 // If this is a funnel shift by a scalar, lower it into regular shifts.
2144 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2145 SDValue ModS =
2146 DAG.getNode(ISD::AND, dl, MVT::i32,
2147 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2148 SDValue NegS =
2149 DAG.getNode(ISD::SUB, dl, MVT::i32,
2150 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2151 SDValue IsZero =
2152 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2153 // FSHL A, B => A << | B >>n
2154 // FSHR A, B => A <<n | B >>
2155 SDValue Part1 =
2156 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2157 SDValue Part2 =
2158 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2159 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2160 // If the shift amount was 0, pick A or B, depending on the direction.
2161 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2162 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2163 }
2164
2166 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2167
2168 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2169 return DAG.getNode(MOpc, dl, ty(Op),
2170 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2171}
2172
2173SDValue
2174HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2175 const SDLoc &dl(Op);
2176 unsigned IntNo = Op.getConstantOperandVal(0);
2177 SmallVector<SDValue> Ops(Op->ops());
2178
2179 auto Swap = [&](SDValue P) {
2180 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2181 };
2182
2183 switch (IntNo) {
2184 case Intrinsic::hexagon_V6_pred_typecast:
2185 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2186 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2187 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2188 if (ResTy == InpTy)
2189 return Ops[1];
2190 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2191 }
2192 break;
2193 }
2194 case Intrinsic::hexagon_V6_vmpyss_parts:
2195 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2196 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2197 {Ops[1], Ops[2]}));
2198 case Intrinsic::hexagon_V6_vmpyuu_parts:
2199 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2200 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2201 {Ops[1], Ops[2]}));
2202 case Intrinsic::hexagon_V6_vmpyus_parts:
2203 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2204 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2205 {Ops[1], Ops[2]}));
2206 }
2207 } // switch
2208
2209 return Op;
2210}
2211
2212SDValue
2213HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2214 const SDLoc &dl(Op);
2215 unsigned HwLen = Subtarget.getVectorLength();
2216 MachineFunction &MF = DAG.getMachineFunction();
2217 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2218 SDValue Mask = MaskN->getMask();
2219 SDValue Chain = MaskN->getChain();
2220 SDValue Base = MaskN->getBasePtr();
2221 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2222
2223 unsigned Opc = Op->getOpcode();
2224 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2225
2226 if (Opc == ISD::MLOAD) {
2227 MVT ValTy = ty(Op);
2228 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2229 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2230 if (isUndef(Thru))
2231 return Load;
2232 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2233 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2234 }
2235
2236 // MSTORE
2237 // HVX only has aligned masked stores.
2238
2239 // TODO: Fold negations of the mask into the store.
2240 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2241 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2242 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2243
2244 if (MaskN->getAlign().value() % HwLen == 0) {
2245 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2246 {Mask, Base, Offset0, Value, Chain}, DAG);
2247 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2248 return Store;
2249 }
2250
2251 // Unaligned case.
2252 auto StoreAlign = [&](SDValue V, SDValue A) {
2253 SDValue Z = getZero(dl, ty(V), DAG);
2254 // TODO: use funnel shifts?
2255 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2256 // upper half.
2257 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2258 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2259 return std::make_pair(LoV, HiV);
2260 };
2261
2262 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2263 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2264 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2265 VectorPair Tmp = StoreAlign(MaskV, Base);
2266 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2267 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2268 VectorPair ValueU = StoreAlign(Value, Base);
2269
2270 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2271 SDValue StoreLo =
2272 getInstr(StoreOpc, dl, MVT::Other,
2273 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2274 SDValue StoreHi =
2275 getInstr(StoreOpc, dl, MVT::Other,
2276 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2277 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2278 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2279 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2280}
2281
2282SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2283 SelectionDAG &DAG) const {
2284 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2285 // is legal (done via a pattern).
2286 assert(Subtarget.useHVXQFloatOps());
2287
2288 assert(Op->getOpcode() == ISD::FP_EXTEND);
2289
2290 MVT VecTy = ty(Op);
2291 MVT ArgTy = ty(Op.getOperand(0));
2292 const SDLoc &dl(Op);
2293 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2294
2295 SDValue F16Vec = Op.getOperand(0);
2296
2297 APFloat FloatVal = APFloat(1.0f);
2298 bool Ignored;
2300 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2301 SDValue VmpyVec =
2302 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2303
2304 MVT HalfTy = typeSplit(VecTy).first;
2305 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2306 SDValue LoVec =
2307 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2308 SDValue HiVec =
2309 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2310
2311 SDValue ShuffVec =
2312 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2313 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2314
2315 return ShuffVec;
2316}
2317
2318SDValue
2319HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2320 // Catch invalid conversion ops (just in case).
2321 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2322 Op.getOpcode() == ISD::FP_TO_UINT);
2323
2324 MVT ResTy = ty(Op);
2325 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2326 MVT IntTy = ResTy.getVectorElementType();
2327
2328 if (Subtarget.useHVXIEEEFPOps()) {
2329 // There are only conversions from f16.
2330 if (FpTy == MVT::f16) {
2331 // Other int types aren't legal in HVX, so we shouldn't see them here.
2332 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2333 // Conversions to i8 and i16 are legal.
2334 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2335 return Op;
2336 }
2337 }
2338
2339 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2340 return EqualizeFpIntConversion(Op, DAG);
2341
2342 return ExpandHvxFpToInt(Op, DAG);
2343}
2344
2345// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2346// R1 = #1, R2 holds the v32i1 param
2347// V1 = vsplat(R1)
2348// V2 = vsplat(R2)
2349// Q0 = vand(V1,R1)
2350// V0.w=prefixsum(Q0)
2351// V0.w=vsub(V0.w,V1.w)
2352// V2.w = vlsr(V2.w,V0.w)
2353// V2 = vand(V2,V1)
2354// V2.sf = V2.w
2355SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2356 SelectionDAG &DAG) const {
2357
2358 MVT ResTy = ty(PredOp);
2359 const SDLoc &dl(PredOp);
2360
2361 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2362 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2363 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2364 SDValue(RegConst, 0));
2365 SDNode *PredTransfer =
2366 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2367 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2368 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2369 SDValue(PredTransfer, 0));
2370 SDNode *SplatParam = DAG.getMachineNode(
2371 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2372 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2373 SDNode *Vsub =
2374 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2375 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2376 SDNode *IndexShift =
2377 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2378 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2379 SDNode *MaskOff =
2380 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2381 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2382 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2383 SDValue(MaskOff, 0));
2384 return SDValue(Convert, 0);
2385}
2386
2387// For vector type v64i1 uint_to_fo to v64f16:
2388// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2389// R3 = subreg_high (R32)
2390// R2 = subreg_low (R32)
2391// R1 = #1
2392// V1 = vsplat(R1)
2393// V2 = vsplat(R2)
2394// V3 = vsplat(R3)
2395// Q0 = vand(V1,R1)
2396// V0.w=prefixsum(Q0)
2397// V0.w=vsub(V0.w,V1.w)
2398// V2.w = vlsr(V2.w,V0.w)
2399// V3.w = vlsr(V3.w,V0.w)
2400// V2 = vand(V2,V1)
2401// V3 = vand(V3,V1)
2402// V2.h = vpacke(V3.w,V2.w)
2403// V2.hf = V2.h
2404SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2405 SelectionDAG &DAG) const {
2406
2407 MVT ResTy = ty(PredOp);
2408 const SDLoc &dl(PredOp);
2409
2410 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2411 // Get the hi and lo regs
2412 SDValue HiReg =
2413 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2414 SDValue LoReg =
2415 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2416 // Get constant #1 and splat into vector V1
2417 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2418 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2419 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2420 SDValue(RegConst, 0));
2421 // Splat the hi and lo args
2422 SDNode *SplatHi =
2423 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2424 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2425 SDNode *SplatLo =
2426 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2427 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2428 // vand between splatted const and const
2429 SDNode *PredTransfer =
2430 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2431 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2432 // Get the prefixsum
2433 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2434 SDValue(PredTransfer, 0));
2435 // Get the vsub
2436 SDNode *Vsub =
2437 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2438 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2439 // Get vlsr for hi and lo
2440 SDNode *IndexShift_hi =
2441 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2442 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2443 SDNode *IndexShift_lo =
2444 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2445 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2446 // Get vand of hi and lo
2447 SDNode *MaskOff_hi =
2448 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2449 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2450 SDNode *MaskOff_lo =
2451 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2452 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2453 // Pack them
2454 SDNode *Pack =
2455 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2456 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2457 SDNode *Convert =
2458 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2459 return SDValue(Convert, 0);
2460}
2461
2462SDValue
2463HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2464 // Catch invalid conversion ops (just in case).
2465 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2466 Op.getOpcode() == ISD::UINT_TO_FP);
2467
2468 MVT ResTy = ty(Op);
2469 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2470 MVT FpTy = ResTy.getVectorElementType();
2471
2472 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2473 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2474 return LowerHvxPred32ToFp(Op, DAG);
2475 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2476 return LowerHvxPred64ToFp(Op, DAG);
2477 }
2478
2479 if (Subtarget.useHVXIEEEFPOps()) {
2480 // There are only conversions to f16.
2481 if (FpTy == MVT::f16) {
2482 // Other int types aren't legal in HVX, so we shouldn't see them here.
2483 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2484 // i8, i16 -> f16 is legal.
2485 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2486 return Op;
2487 }
2488 }
2489
2490 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2491 return EqualizeFpIntConversion(Op, DAG);
2492
2493 return ExpandHvxIntToFp(Op, DAG);
2494}
2495
2496HexagonTargetLowering::TypePair
2497HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2498 // Compare the widths of elements of the two types, and extend the narrower
2499 // type to match the with of the wider type. For vector types, apply this
2500 // to the element type.
2501 assert(Ty0.isVector() == Ty1.isVector());
2502
2503 MVT ElemTy0 = Ty0.getScalarType();
2504 MVT ElemTy1 = Ty1.getScalarType();
2505
2506 unsigned Width0 = ElemTy0.getSizeInBits();
2507 unsigned Width1 = ElemTy1.getSizeInBits();
2508 unsigned MaxWidth = std::max(Width0, Width1);
2509
2510 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2511 if (ScalarTy.isInteger())
2512 return MVT::getIntegerVT(Width);
2513 assert(ScalarTy.isFloatingPoint());
2514 return MVT::getFloatingPointVT(Width);
2515 };
2516
2517 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2518 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2519
2520 if (!Ty0.isVector()) {
2521 // Both types are scalars.
2522 return {WideETy0, WideETy1};
2523 }
2524
2525 // Vector types.
2526 unsigned NumElem = Ty0.getVectorNumElements();
2527 assert(NumElem == Ty1.getVectorNumElements());
2528
2529 return {MVT::getVectorVT(WideETy0, NumElem),
2530 MVT::getVectorVT(WideETy1, NumElem)};
2531}
2532
2533HexagonTargetLowering::TypePair
2534HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2535 // Compare the numbers of elements of two vector types, and widen the
2536 // narrower one to match the number of elements in the wider one.
2537 assert(Ty0.isVector() && Ty1.isVector());
2538
2539 unsigned Len0 = Ty0.getVectorNumElements();
2540 unsigned Len1 = Ty1.getVectorNumElements();
2541 if (Len0 == Len1)
2542 return {Ty0, Ty1};
2543
2544 unsigned MaxLen = std::max(Len0, Len1);
2545 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2546 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2547}
2548
2549MVT
2550HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2551 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2552 assert(LegalTy.isSimple());
2553 return LegalTy.getSimpleVT();
2554}
2555
2556MVT
2557HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2558 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2559 assert(Ty.getSizeInBits() <= HwWidth);
2560 if (Ty.getSizeInBits() == HwWidth)
2561 return Ty;
2562
2563 MVT ElemTy = Ty.getScalarType();
2564 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2565}
2566
2567HexagonTargetLowering::VectorPair
2568HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2569 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2570 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2571 // whether an overflow has occurred.
2572 MVT ResTy = ty(A);
2573 assert(ResTy == ty(B));
2574 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2575
2576 if (!Signed) {
2577 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2578 // save any instructions.
2579 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2580 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2581 return {Add, Ovf};
2582 }
2583
2584 // Signed overflow has happened, if:
2585 // (A, B have the same sign) and (A+B has a different sign from either)
2586 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2587 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2588 SDValue NotA =
2589 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2590 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2591 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2592 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2593 SDValue MSB =
2594 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2595 return {Add, MSB};
2596}
2597
2598HexagonTargetLowering::VectorPair
2599HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2600 bool Signed, SelectionDAG &DAG) const {
2601 // Shift Val right by Amt bits, round the result to the nearest integer,
2602 // tie-break by rounding halves to even integer.
2603
2604 const SDLoc &dl(Val);
2605 MVT ValTy = ty(Val);
2606
2607 // This should also work for signed integers.
2608 //
2609 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2610 // bool ovf = (inp > tmp0);
2611 // uint rup = inp & (1 << (Amt+1));
2612 //
2613 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2614 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2615 // uint tmp3 = tmp2 + rup;
2616 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2617 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2618 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2619 MVT IntTy = tyVector(ValTy, ElemTy);
2620 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2621 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2622
2623 SDValue Inp = DAG.getBitcast(IntTy, Val);
2624 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2625
2626 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2627 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2628 SDValue Zero = getZero(dl, IntTy, DAG);
2629 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2630 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2631 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2632
2633 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2634 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2635 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2636 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2637
2638 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2639 SDValue One = DAG.getConstant(1, dl, IntTy);
2640 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2641 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2642 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2643 return {Mux, Ovf};
2644}
2645
2646SDValue
2647HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2648 SelectionDAG &DAG) const {
2649 MVT VecTy = ty(A);
2650 MVT PairTy = typeJoin({VecTy, VecTy});
2651 assert(VecTy.getVectorElementType() == MVT::i32);
2652
2653 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2654
2655 // mulhs(A,B) =
2656 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2657 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2658 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2659 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2660 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2661 // anything, so it cannot produce any carry over to higher bits),
2662 // so everything in [] can be shifted by 16 without loss of precision.
2663 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2664 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2665 // The final additions need to make sure to properly maintain any carry-
2666 // out bits.
2667 //
2668 // Hi(B) Lo(B)
2669 // Hi(A) Lo(A)
2670 // --------------
2671 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2672 // Hi(B)*Lo(A) | + dropping the low 16 bits
2673 // Hi(A)*Lo(B) | T2
2674 // Hi(B)*Hi(A)
2675
2676 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2677 // T1 = get Hi(A) into low halves.
2678 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2679 // P0 = interleaved T1.h*B.uh (full precision product)
2680 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2681 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2682 SDValue T2 = LoHalf(P0, DAG);
2683 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2684 // added to the final sum.
2685 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2686 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2687 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2688 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2689 // T3 = full-precision(T0+T2) >> 16
2690 // The low halves are added-unsigned, the high ones are added-signed.
2691 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2692 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2693 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2694 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2695 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2696 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2697 SDValue T5 = LoHalf(P3, DAG);
2698 // Add:
2699 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2700 return T6;
2701}
2702
2703SDValue
2704HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2705 bool SignedB, const SDLoc &dl,
2706 SelectionDAG &DAG) const {
2707 MVT VecTy = ty(A);
2708 MVT PairTy = typeJoin({VecTy, VecTy});
2709 assert(VecTy.getVectorElementType() == MVT::i32);
2710
2711 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2712
2713 if (SignedA && !SignedB) {
2714 // Make A:unsigned, B:signed.
2715 std::swap(A, B);
2716 std::swap(SignedA, SignedB);
2717 }
2718
2719 // Do halfword-wise multiplications for unsigned*unsigned product, then
2720 // add corrections for signed and unsigned*signed.
2721
2722 SDValue Lo, Hi;
2723
2724 // P0:lo = (uu) products of low halves of A and B,
2725 // P0:hi = (uu) products of high halves.
2726 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2727
2728 // Swap low/high halves in B
2729 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2730 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2731 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2732 // P1 = products of even/odd halfwords.
2733 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2734 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2735 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2736
2737 // P2:lo = low halves of P1:lo + P1:hi,
2738 // P2:hi = high halves of P1:lo + P1:hi.
2739 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2740 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2741 // Still need to add the high halves of P0:lo to P2:lo
2742 SDValue T2 =
2743 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2744 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2745
2746 // The high halves of T3 will contribute to the HI part of LOHI.
2747 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2748 {HiHalf(P2, DAG), T3, S16}, DAG);
2749
2750 // The low halves of P2 need to be added to high halves of the LO part.
2751 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2752 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2753 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2754
2755 if (SignedA) {
2756 assert(SignedB && "Signed A and unsigned B should have been inverted");
2757
2758 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2759 SDValue Zero = getZero(dl, VecTy, DAG);
2760 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2761 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2762 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2763 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2764 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2765 } else if (SignedB) {
2766 // Same correction as for mulhus:
2767 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2768 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2769 SDValue Zero = getZero(dl, VecTy, DAG);
2770 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2771 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2772 } else {
2773 assert(!SignedA && !SignedB);
2774 }
2775
2776 return DAG.getMergeValues({Lo, Hi}, dl);
2777}
2778
2779SDValue
2780HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2781 SDValue B, bool SignedB,
2782 const SDLoc &dl,
2783 SelectionDAG &DAG) const {
2784 MVT VecTy = ty(A);
2785 MVT PairTy = typeJoin({VecTy, VecTy});
2786 assert(VecTy.getVectorElementType() == MVT::i32);
2787
2788 if (SignedA && !SignedB) {
2789 // Make A:unsigned, B:signed.
2790 std::swap(A, B);
2791 std::swap(SignedA, SignedB);
2792 }
2793
2794 // Do S*S first, then make corrections for U*S or U*U if needed.
2795 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2796 SDValue P1 =
2797 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2798 SDValue Lo = LoHalf(P1, DAG);
2799 SDValue Hi = HiHalf(P1, DAG);
2800
2801 if (!SignedB) {
2802 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2803 SDValue Zero = getZero(dl, VecTy, DAG);
2804 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2805
2806 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2807 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2808 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2809 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2810 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2811 // $A))>;
2812 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2813 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2814 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2815 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2816 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2817 } else if (!SignedA) {
2818 SDValue Zero = getZero(dl, VecTy, DAG);
2819 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2820
2821 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2822 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2823 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2824 // (HiHalf (Muls64O $A, $B)),
2825 // $B)>;
2826 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2827 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2828 }
2829
2830 return DAG.getMergeValues({Lo, Hi}, dl);
2831}
2832
2833SDValue
2834HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2835 const {
2836 // Rewrite conversion between integer and floating-point in such a way that
2837 // the integer type is extended/narrowed to match the bitwidth of the
2838 // floating-point type, combined with additional integer-integer extensions
2839 // or narrowings to match the original input/result types.
2840 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2841 //
2842 // The input/result types are not required to be legal, but if they are
2843 // legal, this function should not introduce illegal types.
2844
2845 unsigned Opc = Op.getOpcode();
2848
2849 SDValue Inp = Op.getOperand(0);
2850 MVT InpTy = ty(Inp);
2851 MVT ResTy = ty(Op);
2852
2853 if (InpTy == ResTy)
2854 return Op;
2855
2856 const SDLoc &dl(Op);
2858
2859 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2860 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2861 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2862 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2863 return Res;
2864}
2865
2866SDValue
2867HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2868 unsigned Opc = Op.getOpcode();
2870
2871 const SDLoc &dl(Op);
2872 SDValue Op0 = Op.getOperand(0);
2873 MVT InpTy = ty(Op0);
2874 MVT ResTy = ty(Op);
2875 assert(InpTy.changeTypeToInteger() == ResTy);
2876
2877 // int32_t conv_f32_to_i32(uint32_t inp) {
2878 // // s | exp8 | frac23
2879 //
2880 // int neg = (int32_t)inp < 0;
2881 //
2882 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2883 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2884 // // produce a large positive "expm1", which will result in max u/int.
2885 // // In all IEEE formats, bias is the largest positive number that can be
2886 // // represented in bias-width bits (i.e. 011..1).
2887 // int32_t expm1 = (inp << 1) - 0x80000000;
2888 // expm1 >>= 24;
2889 //
2890 // // Always insert the "implicit 1". Subnormal numbers will become 0
2891 // // regardless.
2892 // uint32_t frac = (inp << 8) | 0x80000000;
2893 //
2894 // // "frac" is the fraction part represented as Q1.31. If it was
2895 // // interpreted as uint32_t, it would be the fraction part multiplied
2896 // // by 2^31.
2897 //
2898 // // Calculate the amount of right shift, since shifting further to the
2899 // // left would lose significant bits. Limit it to 32, because we want
2900 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2901 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2902 // // left by 31). "rsh" can be negative.
2903 // int32_t rsh = min(31 - (expm1 + 1), 32);
2904 //
2905 // frac >>= rsh; // rsh == 32 will produce 0
2906 //
2907 // // Everything up to this point is the same for conversion to signed
2908 // // unsigned integer.
2909 //
2910 // if (neg) // Only for signed int
2911 // frac = -frac; //
2912 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2913 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2914 // if (rsh <= 0 && !neg) //
2915 // frac = 0x7fffffff; //
2916 //
2917 // if (neg) // Only for unsigned int
2918 // frac = 0; //
2919 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2920 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2921 //
2922 // return frac;
2923 // }
2924
2925 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2926
2927 // Zero = V6_vd0();
2928 // Neg = V6_vgtw(Zero, Inp);
2929 // One = V6_lvsplatw(1);
2930 // M80 = V6_lvsplatw(0x80000000);
2931 // Exp00 = V6_vaslwv(Inp, One);
2932 // Exp01 = V6_vsubw(Exp00, M80);
2933 // ExpM1 = V6_vasrw(Exp01, 24);
2934 // Frc00 = V6_vaslw(Inp, 8);
2935 // Frc01 = V6_vor(Frc00, M80);
2936 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2937 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2938 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2939
2940 // if signed int:
2941 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2942 // Pos = V6_vgtw(Rsh01, Zero);
2943 // Frc13 = V6_vsubw(Zero, Frc02);
2944 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2945 // Int = V6_vmux(Pos, Frc14, Bnd);
2946 //
2947 // if unsigned int:
2948 // Rsn = V6_vgtw(Zero, Rsh01)
2949 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2950 // Int = V6_vmux(Neg, Zero, Frc23)
2951
2952 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2953 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2954 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2955
2956 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2957 SDValue Zero = getZero(dl, ResTy, DAG);
2958 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2959 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2960 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2961 SDValue One = DAG.getConstant(1, dl, ResTy);
2962 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2963 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2964 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2965 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2966
2967 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2968 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2969 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2970
2971 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2972 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2973 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2974 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2975 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2976
2977 SDValue Int;
2978
2979 if (Opc == ISD::FP_TO_SINT) {
2980 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2981 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2982 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2983 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2984 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2985 } else {
2987 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2988 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2989 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2990 }
2991
2992 return Int;
2993}
2994
2995SDValue
2996HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2997 unsigned Opc = Op.getOpcode();
2999
3000 const SDLoc &dl(Op);
3001 SDValue Op0 = Op.getOperand(0);
3002 MVT InpTy = ty(Op0);
3003 MVT ResTy = ty(Op);
3004 assert(ResTy.changeTypeToInteger() == InpTy);
3005
3006 // uint32_t vnoc1_rnd(int32_t w) {
3007 // int32_t iszero = w == 0;
3008 // int32_t isneg = w < 0;
3009 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3010 //
3011 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3012 // uint32_t frac0 = (uint64_t)u << norm_left;
3013 //
3014 // // Rounding:
3015 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3016 // uint32_t renorm = (frac0 > frac1);
3017 // uint32_t rup = (int)(frac0 << 22) < 0;
3018 //
3019 // uint32_t frac2 = frac0 >> 8;
3020 // uint32_t frac3 = frac1 >> 8;
3021 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3022 //
3023 // int32_t exp = 32 - norm_left + renorm + 127;
3024 // exp <<= 23;
3025 //
3026 // uint32_t sign = 0x80000000 * isneg;
3027 // uint32_t f = sign | exp | frac;
3028 // return iszero ? 0 : f;
3029 // }
3030
3031 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3032 bool Signed = Opc == ISD::SINT_TO_FP;
3033
3034 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3035 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3036
3037 SDValue Zero = getZero(dl, InpTy, DAG);
3038 SDValue One = DAG.getConstant(1, dl, InpTy);
3039 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3040 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3041 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3042 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3043 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3044
3045 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3046 if (Signed) {
3047 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3048 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3049 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3050 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3051 }
3052
3053 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3054 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3055 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3056 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3057 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3058 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3059 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3060 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3061 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3062
3063 return Flt;
3064}
3065
3066SDValue
3067HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3068 unsigned Opc = Op.getOpcode();
3069 unsigned TLOpc;
3070 switch (Opc) {
3071 case ISD::ANY_EXTEND:
3072 case ISD::SIGN_EXTEND:
3073 case ISD::ZERO_EXTEND:
3074 TLOpc = HexagonISD::TL_EXTEND;
3075 break;
3076 case ISD::TRUNCATE:
3078 break;
3079#ifndef NDEBUG
3080 Op.dump(&DAG);
3081#endif
3082 llvm_unreachable("Unexpected operator");
3083 }
3084
3085 const SDLoc &dl(Op);
3086 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3087 DAG.getUNDEF(MVT::i128), // illegal type
3088 DAG.getConstant(Opc, dl, MVT::i32));
3089}
3090
3091SDValue
3092HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3093 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3094 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3095 unsigned Opc = Op.getConstantOperandVal(2);
3096 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3097}
3098
3099HexagonTargetLowering::VectorPair
3100HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3101 assert(!Op.isMachineOpcode());
3102 SmallVector<SDValue, 2> OpsL, OpsH;
3103 const SDLoc &dl(Op);
3104
3105 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3106 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3107 SDValue TV = DAG.getValueType(Ty);
3108 return std::make_pair(TV, TV);
3109 };
3110
3111 for (SDValue A : Op.getNode()->ops()) {
3112 auto [Lo, Hi] =
3113 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3114 // Special case for type operand.
3115 switch (Op.getOpcode()) {
3116 case ISD::SIGN_EXTEND_INREG:
3117 case HexagonISD::SSAT:
3118 case HexagonISD::USAT:
3119 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3120 std::tie(Lo, Hi) = SplitVTNode(N);
3121 break;
3122 }
3123 OpsL.push_back(Lo);
3124 OpsH.push_back(Hi);
3125 }
3126
3127 MVT ResTy = ty(Op);
3128 MVT HalfTy = typeSplit(ResTy).first;
3129 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3130 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3131 return {L, H};
3132}
3133
3134SDValue
3135HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3136 auto *MemN = cast<MemSDNode>(Op.getNode());
3137
3138 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3139 if (!isHvxPairTy(MemTy))
3140 return Op;
3141
3142 const SDLoc &dl(Op);
3143 unsigned HwLen = Subtarget.getVectorLength();
3144 MVT SingleTy = typeSplit(MemTy).first;
3145 SDValue Chain = MemN->getChain();
3146 SDValue Base0 = MemN->getBasePtr();
3147 SDValue Base1 =
3148 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3149 unsigned MemOpc = MemN->getOpcode();
3150
3151 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3152 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3153 MachineFunction &MF = DAG.getMachineFunction();
3154 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3155 ? (uint64_t)MemoryLocation::UnknownSize
3156 : HwLen;
3157 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3158 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3159 }
3160
3161 if (MemOpc == ISD::LOAD) {
3162 assert(cast<LoadSDNode>(Op)->isUnindexed());
3163 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3164 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3165 return DAG.getMergeValues(
3166 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3167 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3168 Load0.getValue(1), Load1.getValue(1)) }, dl);
3169 }
3170 if (MemOpc == ISD::STORE) {
3171 assert(cast<StoreSDNode>(Op)->isUnindexed());
3172 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3173 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3174 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3175 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3176 }
3177
3178 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3179
3180 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3181 assert(MaskN->isUnindexed());
3182 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3183 SDValue Offset = DAG.getUNDEF(MVT::i32);
3184
3185 if (MemOpc == ISD::MLOAD) {
3186 VectorPair Thru =
3187 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3188 SDValue MLoad0 =
3189 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3190 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3191 ISD::NON_EXTLOAD, false);
3192 SDValue MLoad1 =
3193 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3194 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3195 ISD::NON_EXTLOAD, false);
3196 return DAG.getMergeValues(
3197 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3198 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3199 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3200 }
3201 if (MemOpc == ISD::MSTORE) {
3202 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3203 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3204 Masks.first, SingleTy, MOp0,
3205 ISD::UNINDEXED, false, false);
3206 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3207 Masks.second, SingleTy, MOp1,
3208 ISD::UNINDEXED, false, false);
3209 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3210 }
3211
3212 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3213 llvm_unreachable(Name.c_str());
3214}
3215
3216SDValue
3217HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3218 const SDLoc &dl(Op);
3219 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3220 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3221 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3222 "Not widening loads of i1 yet");
3223
3224 SDValue Chain = LoadN->getChain();
3225 SDValue Base = LoadN->getBasePtr();
3226 SDValue Offset = DAG.getUNDEF(MVT::i32);
3227
3228 MVT ResTy = ty(Op);
3229 unsigned HwLen = Subtarget.getVectorLength();
3230 unsigned ResLen = ResTy.getStoreSize();
3231 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3232
3233 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3234 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3235 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3236
3237 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3238 MachineFunction &MF = DAG.getMachineFunction();
3239 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3240
3241 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3242 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3244 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3245 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3246}
3247
3248SDValue
3249HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3250 const SDLoc &dl(Op);
3251 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3252 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3253 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3254 "Not widening stores of i1 yet");
3255
3256 SDValue Chain = StoreN->getChain();
3257 SDValue Base = StoreN->getBasePtr();
3258 SDValue Offset = DAG.getUNDEF(MVT::i32);
3259
3260 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3261 MVT ValueTy = ty(Value);
3262 unsigned ValueLen = ValueTy.getVectorNumElements();
3263 unsigned HwLen = Subtarget.getVectorLength();
3264 assert(isPowerOf2_32(ValueLen));
3265
3266 for (unsigned Len = ValueLen; Len < HwLen; ) {
3267 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3268 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3269 }
3270 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3271
3272 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3273 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3274 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3275 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3276 MachineFunction &MF = DAG.getMachineFunction();
3277 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3278 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3279 MemOp, ISD::UNINDEXED, false, false);
3280}
3281
3282SDValue
3283HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3284 const SDLoc &dl(Op);
3285 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3286 MVT ElemTy = ty(Op0).getVectorElementType();
3287 unsigned HwLen = Subtarget.getVectorLength();
3288
3289 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3290 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3291 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3292 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3293 return SDValue();
3294
3295 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3296 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3297 EVT ResTy =
3298 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3299 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3300 {WideOp0, WideOp1, Op.getOperand(2)});
3301
3302 EVT RetTy = typeLegalize(ty(Op), DAG);
3303 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3304 {SetCC, getZero(dl, MVT::i32, DAG)});
3305}
3306
3307SDValue
3308HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3309 unsigned Opc = Op.getOpcode();
3310 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3311 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3312 return isHvxPairTy(ty(V));
3313 });
3314
3315 if (IsPairOp) {
3316 switch (Opc) {
3317 default:
3318 break;
3319 case ISD::LOAD:
3320 case ISD::STORE:
3321 case ISD::MLOAD:
3322 case ISD::MSTORE:
3323 return SplitHvxMemOp(Op, DAG);
3324 case ISD::SINT_TO_FP:
3325 case ISD::UINT_TO_FP:
3326 case ISD::FP_TO_SINT:
3327 case ISD::FP_TO_UINT:
3328 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3329 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3330 break;
3331 case ISD::ABS:
3332 case ISD::CTPOP:
3333 case ISD::CTLZ:
3334 case ISD::CTTZ:
3335 case ISD::MUL:
3336 case ISD::FADD:
3337 case ISD::FSUB:
3338 case ISD::FMUL:
3339 case ISD::FMINIMUMNUM:
3340 case ISD::FMAXIMUMNUM:
3341 case ISD::MULHS:
3342 case ISD::MULHU:
3343 case ISD::AND:
3344 case ISD::OR:
3345 case ISD::XOR:
3346 case ISD::SRA:
3347 case ISD::SHL:
3348 case ISD::SRL:
3349 case ISD::FSHL:
3350 case ISD::FSHR:
3351 case ISD::SMIN:
3352 case ISD::SMAX:
3353 case ISD::UMIN:
3354 case ISD::UMAX:
3355 case ISD::SETCC:
3356 case ISD::VSELECT:
3358 case ISD::SPLAT_VECTOR:
3359 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3360 case ISD::SIGN_EXTEND:
3361 case ISD::ZERO_EXTEND:
3362 // In general, sign- and zero-extends can't be split and still
3363 // be legal. The only exception is extending bool vectors.
3364 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3365 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3366 break;
3367 }
3368 }
3369
3370 switch (Opc) {
3371 default:
3372 break;
3373 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3374 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3375 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3376 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3377 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3378 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3379 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3380 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3381 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3382 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3383 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3384 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3385 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3386 case ISD::SRA:
3387 case ISD::SHL:
3388 case ISD::SRL: return LowerHvxShift(Op, DAG);
3389 case ISD::FSHL:
3390 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3391 case ISD::MULHS:
3392 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3393 case ISD::SMUL_LOHI:
3394 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3395 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3396 case ISD::SETCC:
3397 case ISD::INTRINSIC_VOID: return Op;
3398 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3399 case ISD::MLOAD:
3400 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3401 // Unaligned loads will be handled by the default lowering.
3402 case ISD::LOAD: return SDValue();
3403 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3404 case ISD::FP_TO_SINT:
3405 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3406 case ISD::SINT_TO_FP:
3407 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3408
3409 // Special nodes:
3412 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3413 }
3414#ifndef NDEBUG
3415 Op.dumpr(&DAG);
3416#endif
3417 llvm_unreachable("Unhandled HVX operation");
3418}
3419
3420SDValue
3421HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3422 const {
3423 // Rewrite the extension/truncation/saturation op into steps where each
3424 // step changes the type widths by a factor of 2.
3425 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3426 //
3427 // Some of the vector types in Op may not be legal.
3428
3429 unsigned Opc = Op.getOpcode();
3430 switch (Opc) {
3431 case HexagonISD::SSAT:
3432 case HexagonISD::USAT:
3435 break;
3436 case ISD::ANY_EXTEND:
3437 case ISD::ZERO_EXTEND:
3438 case ISD::SIGN_EXTEND:
3439 case ISD::TRUNCATE:
3440 llvm_unreachable("ISD:: ops will be auto-folded");
3441 break;
3442#ifndef NDEBUG
3443 Op.dump(&DAG);
3444#endif
3445 llvm_unreachable("Unexpected operation");
3446 }
3447
3448 SDValue Inp = Op.getOperand(0);
3449 MVT InpTy = ty(Inp);
3450 MVT ResTy = ty(Op);
3451
3452 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3453 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3454 assert(InpWidth != ResWidth);
3455
3456 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3457 return Op;
3458
3459 const SDLoc &dl(Op);
3460 unsigned NumElems = InpTy.getVectorNumElements();
3461 assert(NumElems == ResTy.getVectorNumElements());
3462
3463 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3464 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3465 switch (Opc) {
3466 case HexagonISD::SSAT:
3467 case HexagonISD::USAT:
3468 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3471 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3472 default:
3473 llvm_unreachable("Unexpected opcode");
3474 }
3475 };
3476
3477 SDValue S = Inp;
3478 if (InpWidth < ResWidth) {
3479 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3480 while (InpWidth * 2 <= ResWidth)
3481 S = repeatOp(InpWidth *= 2, S);
3482 } else {
3483 // InpWidth > ResWidth
3484 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3485 while (InpWidth / 2 >= ResWidth)
3486 S = repeatOp(InpWidth /= 2, S);
3487 }
3488 return S;
3489}
3490
3491SDValue
3492HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3493 SDValue Inp0 = Op.getOperand(0);
3494 MVT InpTy = ty(Inp0);
3495 MVT ResTy = ty(Op);
3496 unsigned InpWidth = InpTy.getSizeInBits();
3497 unsigned ResWidth = ResTy.getSizeInBits();
3498 unsigned Opc = Op.getOpcode();
3499
3500 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3501 // First, make sure that the narrower type is widened to HVX.
3502 // This may cause the result to be wider than what the legalizer
3503 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3504 // desired type.
3505 auto [WInpTy, WResTy] =
3506 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3507 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3508 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3509 SDValue S;
3511 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3512 Op.getOperand(2));
3513 } else {
3514 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3515 }
3516 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3517 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3518 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3519 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3520 } else {
3521 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3522 return RemoveTLWrapper(Op, DAG);
3523 }
3524 llvm_unreachable("Unexpected situation");
3525}
3526
3527void
3528HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3530 unsigned Opc = N->getOpcode();
3531 SDValue Op(N, 0);
3532 SDValue Inp0; // Optional first argument.
3533 if (N->getNumOperands() > 0)
3534 Inp0 = Op.getOperand(0);
3535
3536 switch (Opc) {
3537 case ISD::ANY_EXTEND:
3538 case ISD::SIGN_EXTEND:
3539 case ISD::ZERO_EXTEND:
3540 case ISD::TRUNCATE:
3541 if (Subtarget.isHVXElementType(ty(Op)) &&
3542 Subtarget.isHVXElementType(ty(Inp0))) {
3543 Results.push_back(CreateTLWrapper(Op, DAG));
3544 }
3545 break;
3546 case ISD::SETCC:
3547 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3548 if (SDValue T = WidenHvxSetCC(Op, DAG))
3549 Results.push_back(T);
3550 }
3551 break;
3552 case ISD::STORE: {
3553 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3554 SDValue Store = WidenHvxStore(Op, DAG);
3555 Results.push_back(Store);
3556 }
3557 break;
3558 }
3559 case ISD::MLOAD:
3560 if (isHvxPairTy(ty(Op))) {
3561 SDValue S = SplitHvxMemOp(Op, DAG);
3563 Results.push_back(S.getOperand(0));
3564 Results.push_back(S.getOperand(1));
3565 }
3566 break;
3567 case ISD::MSTORE:
3568 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3569 SDValue S = SplitHvxMemOp(Op, DAG);
3570 Results.push_back(S);
3571 }
3572 break;
3573 case ISD::SINT_TO_FP:
3574 case ISD::UINT_TO_FP:
3575 case ISD::FP_TO_SINT:
3576 case ISD::FP_TO_UINT:
3577 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3578 SDValue T = EqualizeFpIntConversion(Op, DAG);
3579 Results.push_back(T);
3580 }
3581 break;
3582 case HexagonISD::SSAT:
3583 case HexagonISD::USAT:
3586 Results.push_back(LegalizeHvxResize(Op, DAG));
3587 break;
3588 default:
3589 break;
3590 }
3591}
3592
3593void
3594HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3596 unsigned Opc = N->getOpcode();
3597 SDValue Op(N, 0);
3598 SDValue Inp0; // Optional first argument.
3599 if (N->getNumOperands() > 0)
3600 Inp0 = Op.getOperand(0);
3601
3602 switch (Opc) {
3603 case ISD::ANY_EXTEND:
3604 case ISD::SIGN_EXTEND:
3605 case ISD::ZERO_EXTEND:
3606 case ISD::TRUNCATE:
3607 if (Subtarget.isHVXElementType(ty(Op)) &&
3608 Subtarget.isHVXElementType(ty(Inp0))) {
3609 Results.push_back(CreateTLWrapper(Op, DAG));
3610 }
3611 break;
3612 case ISD::SETCC:
3613 if (shouldWidenToHvx(ty(Op), DAG)) {
3614 if (SDValue T = WidenHvxSetCC(Op, DAG))
3615 Results.push_back(T);
3616 }
3617 break;
3618 case ISD::LOAD: {
3619 if (shouldWidenToHvx(ty(Op), DAG)) {
3620 SDValue Load = WidenHvxLoad(Op, DAG);
3621 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3622 Results.push_back(Load.getOperand(0));
3623 Results.push_back(Load.getOperand(1));
3624 }
3625 break;
3626 }
3627 case ISD::BITCAST:
3628 if (isHvxBoolTy(ty(Inp0))) {
3629 SDValue C = LowerHvxBitcast(Op, DAG);
3630 Results.push_back(C);
3631 }
3632 break;
3633 case ISD::FP_TO_SINT:
3634 case ISD::FP_TO_UINT:
3635 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3636 SDValue T = EqualizeFpIntConversion(Op, DAG);
3637 Results.push_back(T);
3638 }
3639 break;
3640 case HexagonISD::SSAT:
3641 case HexagonISD::USAT:
3644 Results.push_back(LegalizeHvxResize(Op, DAG));
3645 break;
3646 default:
3647 break;
3648 }
3649}
3650
3651SDValue
3652HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3653 DAGCombinerInfo &DCI) const {
3654 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3655 // to extract-subvector (shuffle V, pick even, pick odd)
3656
3657 assert(Op.getOpcode() == ISD::TRUNCATE);
3658 SelectionDAG &DAG = DCI.DAG;
3659 const SDLoc &dl(Op);
3660
3661 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3662 return SDValue();
3663 SDValue Cast = Op.getOperand(0);
3664 SDValue Src = Cast.getOperand(0);
3665
3666 EVT TruncTy = Op.getValueType();
3667 EVT CastTy = Cast.getValueType();
3668 EVT SrcTy = Src.getValueType();
3669 if (SrcTy.isSimple())
3670 return SDValue();
3671 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3672 return SDValue();
3673 unsigned SrcLen = SrcTy.getVectorNumElements();
3674 unsigned CastLen = CastTy.getVectorNumElements();
3675 if (2 * CastLen != SrcLen)
3676 return SDValue();
3677
3678 SmallVector<int, 128> Mask(SrcLen);
3679 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3680 Mask[i] = 2 * i;
3681 Mask[i + CastLen] = 2 * i + 1;
3682 }
3683 SDValue Deal =
3684 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3685 return opSplit(Deal, dl, DAG).first;
3686}
3687
3688SDValue
3689HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3690 SDValue Op, DAGCombinerInfo &DCI) const {
3691 // Fold
3692 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3693 // into
3694 // shuffle (concat x, y), undef, m3
3695 if (Op.getNumOperands() != 2)
3696 return SDValue();
3697
3698 SelectionDAG &DAG = DCI.DAG;
3699 const SDLoc &dl(Op);
3700 SDValue V0 = Op.getOperand(0);
3701 SDValue V1 = Op.getOperand(1);
3702
3703 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3704 return SDValue();
3705 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3706 return SDValue();
3707
3708 SetVector<SDValue> Order;
3709 Order.insert(V0.getOperand(0));
3710 Order.insert(V0.getOperand(1));
3711 Order.insert(V1.getOperand(0));
3712 Order.insert(V1.getOperand(1));
3713
3714 if (Order.size() > 2)
3715 return SDValue();
3716
3717 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3718 // result must be the same.
3719 EVT InpTy = V0.getValueType();
3720 assert(InpTy.isVector());
3721 unsigned InpLen = InpTy.getVectorNumElements();
3722
3723 SmallVector<int, 128> LongMask;
3724 auto AppendToMask = [&](SDValue Shuffle) {
3725 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3726 ArrayRef<int> Mask = SV->getMask();
3727 SDValue X = Shuffle.getOperand(0);
3728 SDValue Y = Shuffle.getOperand(1);
3729 for (int M : Mask) {
3730 if (M == -1) {
3731 LongMask.push_back(M);
3732 continue;
3733 }
3734 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3735 if (static_cast<unsigned>(M) >= InpLen)
3736 M -= InpLen;
3737
3738 int OutOffset = Order[0] == Src ? 0 : InpLen;
3739 LongMask.push_back(M + OutOffset);
3740 }
3741 };
3742
3743 AppendToMask(V0);
3744 AppendToMask(V1);
3745
3746 SDValue C0 = Order.front();
3747 SDValue C1 = Order.back(); // Can be same as front
3748 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3749
3750 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3751 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3752}
3753
3754SDValue
3755HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3756 const {
3757 const SDLoc &dl(N);
3758 SelectionDAG &DAG = DCI.DAG;
3759 SDValue Op(N, 0);
3760 unsigned Opc = Op.getOpcode();
3761
3763
3764 if (Opc == ISD::TRUNCATE)
3765 return combineTruncateBeforeLegal(Op, DCI);
3766 if (Opc == ISD::CONCAT_VECTORS)
3767 return combineConcatVectorsBeforeLegal(Op, DCI);
3768
3769 if (DCI.isBeforeLegalizeOps())
3770 return SDValue();
3771
3772 switch (Opc) {
3773 case ISD::VSELECT: {
3774 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3775 SDValue Cond = Ops[0];
3776 if (Cond->getOpcode() == ISD::XOR) {
3777 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3778 if (C1->getOpcode() == HexagonISD::QTRUE)
3779 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3780 }
3781 break;
3782 }
3783 case HexagonISD::V2Q:
3784 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3785 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3786 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3787 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3788 }
3789 break;
3790 case HexagonISD::Q2V:
3791 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3792 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3793 DAG.getAllOnesConstant(dl, MVT::i32));
3794 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3795 return getZero(dl, ty(Op), DAG);
3796 break;
3798 if (isUndef(Ops[1]))
3799 return Ops[0];
3800 break;
3801 case HexagonISD::VROR: {
3802 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3803 SDValue Vec = Ops[0].getOperand(0);
3804 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3805 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3806 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3807 }
3808 break;
3809 }
3810 }
3811
3812 return SDValue();
3813}
3814
3815bool
3816HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3817 if (Subtarget.isHVXVectorType(Ty, true))
3818 return false;
3819 auto Action = getPreferredHvxVectorAction(Ty);
3821 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3822 return false;
3823}
3824
3825bool
3826HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3827 if (Subtarget.isHVXVectorType(Ty, true))
3828 return false;
3829 auto Action = getPreferredHvxVectorAction(Ty);
3831 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3832 return false;
3833}
3834
3835bool
3836HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3837 if (!Subtarget.useHVXOps())
3838 return false;
3839 // If the type of any result, or any operand type are HVX vector types,
3840 // this is an HVX operation.
3841 auto IsHvxTy = [this](EVT Ty) {
3842 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3843 };
3844 auto IsHvxOp = [this](SDValue Op) {
3845 return Op.getValueType().isSimple() &&
3846 Subtarget.isHVXVectorType(ty(Op), true);
3847 };
3848 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3849 return true;
3850
3851 // Check if this could be an HVX operation after type widening.
3852 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3853 if (!Op.getValueType().isSimple())
3854 return false;
3855 MVT ValTy = ty(Op);
3856 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3857 };
3858
3859 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3860 if (IsWidenedToHvx(SDValue(N, i)))
3861 return true;
3862 }
3863 return llvm::any_of(N->ops(), IsWidenedToHvx);
3864}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:131
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:137
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.