LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
110 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
111 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
112 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
113 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
114 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
115 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
119
120 if (Subtarget.useHVX128BOps()) {
121 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122 setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
123 }
124 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
125 Subtarget.useHVXFloatingPoint()) {
126
127 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
128 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
129
130 for (MVT T : FloatV) {
134 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
135 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
136
139
142
143 setOperationAction(ISD::MLOAD, T, Custom);
144 setOperationAction(ISD::MSTORE, T, Custom);
145 // Custom-lower BUILD_VECTOR. The standard (target-independent)
146 // handling of it would convert it to a load, which is not always
147 // the optimal choice.
149 }
150
151
152 // BUILD_VECTOR with f16 operands cannot be promoted without
153 // promoting the result, so lower the node to vsplat or constant pool
157
158 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
159 // generated.
160 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
161 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
162 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
163 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
164
165 for (MVT P : FloatW) {
166 setOperationAction(ISD::LOAD, P, Custom);
167 setOperationAction(ISD::STORE, P, Custom);
171 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
172 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
175
176 // Custom-lower BUILD_VECTOR. The standard (target-independent)
177 // handling of it would convert it to a load, which is not always
178 // the optimal choice.
180 // Make concat-vectors custom to handle concats of more than 2 vectors.
182
183 setOperationAction(ISD::MLOAD, P, Custom);
184 setOperationAction(ISD::MSTORE, P, Custom);
185 }
186
187 if (Subtarget.useHVXQFloatOps()) {
188 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
190 } else if (Subtarget.useHVXIEEEFPOps()) {
191 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
193 }
194 }
195
196 for (MVT T : LegalV) {
199
215 if (T != ByteV) {
219 }
220
223 if (T.getScalarType() != MVT::i32) {
226 }
227
229 setOperationAction(ISD::LOAD, T, Custom);
230 setOperationAction(ISD::MLOAD, T, Custom);
231 setOperationAction(ISD::MSTORE, T, Custom);
232 if (T.getScalarType() != MVT::i32) {
235 }
236
238 // Make concat-vectors custom to handle concats of more than 2 vectors.
249 if (T != ByteV) {
251 // HVX only has shifts of words and halfwords.
255
256 // Promote all shuffles to operate on vectors of bytes.
257 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
258 }
259
260 if (Subtarget.useHVXFloatingPoint()) {
261 // Same action for both QFloat and IEEE.
266 }
267
275 }
276
277 for (MVT T : LegalW) {
278 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
279 // independent) handling of it would convert it to a load, which is
280 // not always the optimal choice.
282 // Make concat-vectors custom to handle concats of more than 2 vectors.
284
285 // Custom-lower these operations for pairs. Expand them into a concat
286 // of the corresponding operations on individual vectors.
295
296 setOperationAction(ISD::LOAD, T, Custom);
297 setOperationAction(ISD::STORE, T, Custom);
298 setOperationAction(ISD::MLOAD, T, Custom);
299 setOperationAction(ISD::MSTORE, T, Custom);
304
319 if (T != ByteW) {
323
324 // Promote all shuffles to operate on vectors of bytes.
325 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
326 }
329
332 if (T.getScalarType() != MVT::i32) {
335 }
336
337 if (Subtarget.useHVXFloatingPoint()) {
338 // Same action for both QFloat and IEEE.
343 }
344 }
345
346 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
347 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
348 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
351
352 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
353 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
354 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
355 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
356 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
357 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
358 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
359 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
360 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
361 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
362 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
363 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
364 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
365 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
366
367 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
368 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
369 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
370 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
371 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
372 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
373 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
374 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
375 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
376 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
377 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
378 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
379 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
380 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
381
382 // Boolean vectors.
383
384 for (MVT T : LegalW) {
385 // Boolean types for vector pairs will overlap with the boolean
386 // types for single vectors, e.g.
387 // v64i8 -> v64i1 (single)
388 // v64i16 -> v64i1 (pair)
389 // Set these actions first, and allow the single actions to overwrite
390 // any duplicates.
391 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
396 // Masked load/store takes a mask that may need splitting.
397 setOperationAction(ISD::MLOAD, BoolW, Custom);
398 setOperationAction(ISD::MSTORE, BoolW, Custom);
399 }
400
401 for (MVT T : LegalV) {
402 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
413 }
414
415 if (Use64b) {
416 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
418 } else {
419 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
421 }
422
423 // Handle store widening for short vectors.
424 unsigned HwLen = Subtarget.getVectorLength();
425 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
426 if (ElemTy == MVT::i1)
427 continue;
428 int ElemWidth = ElemTy.getFixedSizeInBits();
429 int MaxElems = (8*HwLen) / ElemWidth;
430 for (int N = 2; N < MaxElems; N *= 2) {
431 MVT VecTy = MVT::getVectorVT(ElemTy, N);
432 auto Action = getPreferredVectorAction(VecTy);
434 setOperationAction(ISD::LOAD, VecTy, Custom);
435 setOperationAction(ISD::STORE, VecTy, Custom);
441 if (Subtarget.useHVXFloatingPoint()) {
446 }
447
448 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
449 if (!isTypeLegal(BoolTy))
451 }
452 }
453 }
454
455 // Include cases which are not hander earlier
459
461}
462
463unsigned
464HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
465 MVT ElemTy = VecTy.getVectorElementType();
466 unsigned VecLen = VecTy.getVectorNumElements();
467 unsigned HwLen = Subtarget.getVectorLength();
468
469 // Split vectors of i1 that exceed byte vector length.
470 if (ElemTy == MVT::i1 && VecLen > HwLen)
472
473 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
474 // For shorter vectors of i1, widen them if any of the corresponding
475 // vectors of integers needs to be widened.
476 if (ElemTy == MVT::i1) {
477 for (MVT T : Tys) {
478 assert(T != MVT::i1);
479 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
480 if (A != ~0u)
481 return A;
482 }
483 return ~0u;
484 }
485
486 // If the size of VecTy is at least half of the vector length,
487 // widen the vector. Note: the threshold was not selected in
488 // any scientific way.
489 if (llvm::is_contained(Tys, ElemTy)) {
490 unsigned VecWidth = VecTy.getSizeInBits();
491 unsigned HwWidth = 8*HwLen;
492 if (VecWidth > 2*HwWidth)
494
495 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
496 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
498 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
500 }
501
502 // Defer to default.
503 return ~0u;
504}
505
506unsigned
507HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
508 unsigned Opc = Op.getOpcode();
509 switch (Opc) {
514 }
516}
517
519HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
520 const SDLoc &dl, SelectionDAG &DAG) const {
522 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
523 append_range(IntOps, Ops);
524 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
525}
526
527MVT
528HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
529 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
530
531 MVT ElemTy = Tys.first.getVectorElementType();
532 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
533 Tys.second.getVectorNumElements());
534}
535
536HexagonTargetLowering::TypePair
537HexagonTargetLowering::typeSplit(MVT VecTy) const {
538 assert(VecTy.isVector());
539 unsigned NumElem = VecTy.getVectorNumElements();
540 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
541 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
542 return { HalfTy, HalfTy };
543}
544
545MVT
546HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
547 MVT ElemTy = VecTy.getVectorElementType();
548 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
549 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
550}
551
552MVT
553HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
554 MVT ElemTy = VecTy.getVectorElementType();
555 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
556 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
557}
558
560HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
561 SelectionDAG &DAG) const {
562 if (ty(Vec).getVectorElementType() == ElemTy)
563 return Vec;
564 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
565 return DAG.getBitcast(CastTy, Vec);
566}
567
569HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
570 SelectionDAG &DAG) const {
571 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
572 Ops.first, Ops.second);
573}
574
575HexagonTargetLowering::VectorPair
576HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
577 SelectionDAG &DAG) const {
578 TypePair Tys = typeSplit(ty(Vec));
579 if (Vec.getOpcode() == HexagonISD::QCAT)
580 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
581 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
582}
583
584bool
585HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
586 return Subtarget.isHVXVectorType(Ty) &&
587 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
588}
589
590bool
591HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
592 return Subtarget.isHVXVectorType(Ty) &&
593 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
594}
595
596bool
597HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
598 return Subtarget.isHVXVectorType(Ty, true) &&
599 Ty.getVectorElementType() == MVT::i1;
600}
601
602bool HexagonTargetLowering::allowsHvxMemoryAccess(
603 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
604 // Bool vectors are excluded by default, but make it explicit to
605 // emphasize that bool vectors cannot be loaded or stored.
606 // Also, disallow double vector stores (to prevent unnecessary
607 // store widening in DAG combiner).
608 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
609 return false;
610 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
611 return false;
612 if (Fast)
613 *Fast = 1;
614 return true;
615}
616
617bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
618 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
619 if (!Subtarget.isHVXVectorType(VecTy))
620 return false;
621 // XXX Should this be false? vmemu are a bit slower than vmem.
622 if (Fast)
623 *Fast = 1;
624 return true;
625}
626
627void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
628 MachineInstr &MI, SDNode *Node) const {
629 unsigned Opc = MI.getOpcode();
630 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
631 MachineBasicBlock &MB = *MI.getParent();
632 MachineFunction &MF = *MB.getParent();
633 MachineRegisterInfo &MRI = MF.getRegInfo();
634 DebugLoc DL = MI.getDebugLoc();
635 auto At = MI.getIterator();
636
637 switch (Opc) {
638 case Hexagon::PS_vsplatib:
639 if (Subtarget.useHVXV62Ops()) {
640 // SplatV = A2_tfrsi #imm
641 // OutV = V6_lvsplatb SplatV
642 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
643 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
644 .add(MI.getOperand(1));
645 Register OutV = MI.getOperand(0).getReg();
646 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
647 .addReg(SplatV);
648 } else {
649 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
650 // OutV = V6_lvsplatw SplatV
651 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
652 const MachineOperand &InpOp = MI.getOperand(1);
653 assert(InpOp.isImm());
654 uint32_t V = InpOp.getImm() & 0xFF;
655 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
656 .addImm(V << 24 | V << 16 | V << 8 | V);
657 Register OutV = MI.getOperand(0).getReg();
658 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
659 }
660 MB.erase(At);
661 break;
662 case Hexagon::PS_vsplatrb:
663 if (Subtarget.useHVXV62Ops()) {
664 // OutV = V6_lvsplatb Inp
665 Register OutV = MI.getOperand(0).getReg();
666 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
667 .add(MI.getOperand(1));
668 } else {
669 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
670 const MachineOperand &InpOp = MI.getOperand(1);
671 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
672 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
673 Register OutV = MI.getOperand(0).getReg();
674 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
675 .addReg(SplatV);
676 }
677 MB.erase(At);
678 break;
679 case Hexagon::PS_vsplatih:
680 if (Subtarget.useHVXV62Ops()) {
681 // SplatV = A2_tfrsi #imm
682 // OutV = V6_lvsplath SplatV
683 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
684 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
685 .add(MI.getOperand(1));
686 Register OutV = MI.getOperand(0).getReg();
687 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
688 .addReg(SplatV);
689 } else {
690 // SplatV = A2_tfrsi #imm:#imm
691 // OutV = V6_lvsplatw SplatV
692 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
693 const MachineOperand &InpOp = MI.getOperand(1);
694 assert(InpOp.isImm());
695 uint32_t V = InpOp.getImm() & 0xFFFF;
696 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
697 .addImm(V << 16 | V);
698 Register OutV = MI.getOperand(0).getReg();
699 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
700 }
701 MB.erase(At);
702 break;
703 case Hexagon::PS_vsplatrh:
704 if (Subtarget.useHVXV62Ops()) {
705 // OutV = V6_lvsplath Inp
706 Register OutV = MI.getOperand(0).getReg();
707 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
708 .add(MI.getOperand(1));
709 } else {
710 // SplatV = A2_combine_ll Inp, Inp
711 // OutV = V6_lvsplatw SplatV
712 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
713 const MachineOperand &InpOp = MI.getOperand(1);
714 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
715 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
716 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
717 Register OutV = MI.getOperand(0).getReg();
718 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
719 }
720 MB.erase(At);
721 break;
722 case Hexagon::PS_vsplatiw:
723 case Hexagon::PS_vsplatrw:
724 if (Opc == Hexagon::PS_vsplatiw) {
725 // SplatV = A2_tfrsi #imm
726 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
727 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
728 .add(MI.getOperand(1));
729 MI.getOperand(1).ChangeToRegister(SplatV, false);
730 }
731 // OutV = V6_lvsplatw SplatV/Inp
732 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
733 break;
734 }
735}
736
738HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
739 SelectionDAG &DAG) const {
740 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
741 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
742
743 unsigned ElemWidth = ElemTy.getSizeInBits();
744 if (ElemWidth == 8)
745 return ElemIdx;
746
747 unsigned L = Log2_32(ElemWidth/8);
748 const SDLoc &dl(ElemIdx);
749 return DAG.getNode(ISD::SHL, dl, MVT::i32,
750 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
751}
752
754HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
755 SelectionDAG &DAG) const {
756 unsigned ElemWidth = ElemTy.getSizeInBits();
757 assert(ElemWidth >= 8 && ElemWidth <= 32);
758 if (ElemWidth == 32)
759 return Idx;
760
761 if (ty(Idx) != MVT::i32)
762 Idx = DAG.getBitcast(MVT::i32, Idx);
763 const SDLoc &dl(Idx);
764 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
765 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
766 return SubIdx;
767}
768
770HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
771 SDValue Op1, ArrayRef<int> Mask,
772 SelectionDAG &DAG) const {
773 MVT OpTy = ty(Op0);
774 assert(OpTy == ty(Op1));
775
776 MVT ElemTy = OpTy.getVectorElementType();
777 if (ElemTy == MVT::i8)
778 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
779 assert(ElemTy.getSizeInBits() >= 8);
780
781 MVT ResTy = tyVector(OpTy, MVT::i8);
782 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
783
784 SmallVector<int,128> ByteMask;
785 for (int M : Mask) {
786 if (M < 0) {
787 for (unsigned I = 0; I != ElemSize; ++I)
788 ByteMask.push_back(-1);
789 } else {
790 int NewM = M*ElemSize;
791 for (unsigned I = 0; I != ElemSize; ++I)
792 ByteMask.push_back(NewM+I);
793 }
794 }
795 assert(ResTy.getVectorNumElements() == ByteMask.size());
796 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
797 opCastElem(Op1, MVT::i8, DAG), ByteMask);
798}
799
801HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
802 const SDLoc &dl, MVT VecTy,
803 SelectionDAG &DAG) const {
804 unsigned VecLen = Values.size();
805 MachineFunction &MF = DAG.getMachineFunction();
806 MVT ElemTy = VecTy.getVectorElementType();
807 unsigned ElemWidth = ElemTy.getSizeInBits();
808 unsigned HwLen = Subtarget.getVectorLength();
809
810 unsigned ElemSize = ElemWidth / 8;
811 assert(ElemSize*VecLen == HwLen);
813
814 if (VecTy.getVectorElementType() != MVT::i32 &&
815 !(Subtarget.useHVXFloatingPoint() &&
816 VecTy.getVectorElementType() == MVT::f32)) {
817 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
818 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
819 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
820 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
821 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
822 Words.push_back(DAG.getBitcast(MVT::i32, W));
823 }
824 } else {
825 for (SDValue V : Values)
826 Words.push_back(DAG.getBitcast(MVT::i32, V));
827 }
828 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
829 unsigned NumValues = Values.size();
830 assert(NumValues > 0);
831 bool IsUndef = true;
832 for (unsigned i = 0; i != NumValues; ++i) {
833 if (Values[i].isUndef())
834 continue;
835 IsUndef = false;
836 if (!SplatV.getNode())
837 SplatV = Values[i];
838 else if (SplatV != Values[i])
839 return false;
840 }
841 if (IsUndef)
842 SplatV = Values[0];
843 return true;
844 };
845
846 unsigned NumWords = Words.size();
847 SDValue SplatV;
848 bool IsSplat = isSplat(Words, SplatV);
849 if (IsSplat && isUndef(SplatV))
850 return DAG.getUNDEF(VecTy);
851 if (IsSplat) {
852 assert(SplatV.getNode());
853 if (isNullConstant(SplatV))
854 return getZero(dl, VecTy, DAG);
855 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
856 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
857 return DAG.getBitcast(VecTy, S);
858 }
859
860 // Delay recognizing constant vectors until here, so that we can generate
861 // a vsplat.
862 SmallVector<ConstantInt*, 128> Consts(VecLen);
863 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
864 if (AllConst) {
865 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
866 (Constant**)Consts.end());
867 Constant *CV = ConstantVector::get(Tmp);
868 Align Alignment(HwLen);
869 SDValue CP =
870 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
871 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
873 }
874
875 // A special case is a situation where the vector is built entirely from
876 // elements extracted from another vector. This could be done via a shuffle
877 // more efficiently, but typically, the size of the source vector will not
878 // match the size of the vector being built (which precludes the use of a
879 // shuffle directly).
880 // This only handles a single source vector, and the vector being built
881 // should be of a sub-vector type of the source vector type.
882 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
883 SmallVectorImpl<int> &SrcIdx) {
884 SDValue Vec;
885 for (SDValue V : Values) {
886 if (isUndef(V)) {
887 SrcIdx.push_back(-1);
888 continue;
889 }
890 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
891 return false;
892 // All extracts should come from the same vector.
893 SDValue T = V.getOperand(0);
894 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
895 return false;
896 Vec = T;
897 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
898 if (C == nullptr)
899 return false;
900 int I = C->getSExtValue();
901 assert(I >= 0 && "Negative element index");
902 SrcIdx.push_back(I);
903 }
904 SrcVec = Vec;
905 return true;
906 };
907
908 SmallVector<int,128> ExtIdx;
909 SDValue ExtVec;
910 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
911 MVT ExtTy = ty(ExtVec);
912 unsigned ExtLen = ExtTy.getVectorNumElements();
913 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
914 // Construct a new shuffle mask that will produce a vector with the same
915 // number of elements as the input vector, and such that the vector we
916 // want will be the initial subvector of it.
917 SmallVector<int,128> Mask;
918 BitVector Used(ExtLen);
919
920 for (int M : ExtIdx) {
921 Mask.push_back(M);
922 if (M >= 0)
923 Used.set(M);
924 }
925 // Fill the rest of the mask with the unused elements of ExtVec in hopes
926 // that it will result in a permutation of ExtVec's elements. It's still
927 // fine if it doesn't (e.g. if undefs are present, or elements are
928 // repeated), but permutations can always be done efficiently via vdelta
929 // and vrdelta.
930 for (unsigned I = 0; I != ExtLen; ++I) {
931 if (Mask.size() == ExtLen)
932 break;
933 if (!Used.test(I))
934 Mask.push_back(I);
935 }
936
937 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
938 DAG.getUNDEF(ExtTy), Mask);
939 return ExtLen == VecLen ? S : LoHalf(S, DAG);
940 }
941 }
942
943 // Find most common element to initialize vector with. This is to avoid
944 // unnecessary vinsert/valign for cases where the same value is present
945 // many times. Creates a histogram of the vector's elements to find the
946 // most common element n.
947 assert(4*Words.size() == Subtarget.getVectorLength());
948 int VecHist[32];
949 int n = 0;
950 for (unsigned i = 0; i != NumWords; ++i) {
951 VecHist[i] = 0;
952 if (Words[i].isUndef())
953 continue;
954 for (unsigned j = i; j != NumWords; ++j)
955 if (Words[i] == Words[j])
956 VecHist[i]++;
957
958 if (VecHist[i] > VecHist[n])
959 n = i;
960 }
961
962 SDValue HalfV = getZero(dl, VecTy, DAG);
963 if (VecHist[n] > 1) {
964 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
965 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
966 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
967 }
968 SDValue HalfV0 = HalfV;
969 SDValue HalfV1 = HalfV;
970
971 // Construct two halves in parallel, then or them together. Rn and Rm count
972 // number of rotations needed before the next element. One last rotation is
973 // performed post-loop to position the last element.
974 int Rn = 0, Rm = 0;
975 SDValue Sn, Sm;
976 SDValue N = HalfV0;
977 SDValue M = HalfV1;
978 for (unsigned i = 0; i != NumWords/2; ++i) {
979 // Rotate by element count since last insertion.
980 if (Words[i] != Words[n] || VecHist[n] <= 1) {
981 Sn = DAG.getConstant(Rn, dl, MVT::i32);
982 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
983 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
984 {HalfV0, Words[i]});
985 Rn = 0;
986 }
987 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
988 Sm = DAG.getConstant(Rm, dl, MVT::i32);
989 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
990 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
991 {HalfV1, Words[i+NumWords/2]});
992 Rm = 0;
993 }
994 Rn += 4;
995 Rm += 4;
996 }
997 // Perform last rotation.
998 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
999 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1000 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1001 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1002
1003 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1004 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1005
1006 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1007
1008 SDValue OutV =
1009 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1010 return OutV;
1011}
1012
1013SDValue
1014HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1015 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1016 MVT PredTy = ty(PredV);
1017 unsigned HwLen = Subtarget.getVectorLength();
1018 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1019
1020 if (Subtarget.isHVXVectorType(PredTy, true)) {
1021 // Move the vector predicate SubV to a vector register, and scale it
1022 // down to match the representation (bytes per type element) that VecV
1023 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1024 // in general) element and put them at the front of the resulting
1025 // vector. This subvector will then be inserted into the Q2V of VecV.
1026 // To avoid having an operation that generates an illegal type (short
1027 // vector), generate a full size vector.
1028 //
1029 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1030 SmallVector<int,128> Mask(HwLen);
1031 // Scale = BitBytes(PredV) / Given BitBytes.
1032 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1033 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1034
1035 for (unsigned i = 0; i != HwLen; ++i) {
1036 unsigned Num = i % Scale;
1037 unsigned Off = i / Scale;
1038 Mask[BlockLen*Num + Off] = i;
1039 }
1040 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1041 if (!ZeroFill)
1042 return S;
1043 // Fill the bytes beyond BlockLen with 0s.
1044 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1045 // when BlockLen < HwLen.
1046 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1047 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1048 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1049 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1050 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1051 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1052 }
1053
1054 // Make sure that this is a valid scalar predicate.
1055 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1056
1057 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1058 SmallVector<SDValue,4> Words[2];
1059 unsigned IdxW = 0;
1060
1061 SDValue W0 = isUndef(PredV)
1062 ? DAG.getUNDEF(MVT::i64)
1063 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1064 if (Bytes < BitBytes) {
1065 Words[IdxW].push_back(HiHalf(W0, DAG));
1066 Words[IdxW].push_back(LoHalf(W0, DAG));
1067 } else
1068 Words[IdxW].push_back(W0);
1069
1070 while (Bytes < BitBytes) {
1071 IdxW ^= 1;
1072 Words[IdxW].clear();
1073
1074 if (Bytes < 4) {
1075 for (const SDValue &W : Words[IdxW ^ 1]) {
1076 SDValue T = expandPredicate(W, dl, DAG);
1077 Words[IdxW].push_back(HiHalf(T, DAG));
1078 Words[IdxW].push_back(LoHalf(T, DAG));
1079 }
1080 } else {
1081 for (const SDValue &W : Words[IdxW ^ 1]) {
1082 Words[IdxW].push_back(W);
1083 Words[IdxW].push_back(W);
1084 }
1085 }
1086 Bytes *= 2;
1087 }
1088
1089 while (Bytes > BitBytes) {
1090 IdxW ^= 1;
1091 Words[IdxW].clear();
1092
1093 if (Bytes <= 4) {
1094 for (const SDValue &W : Words[IdxW ^ 1]) {
1095 SDValue T = contractPredicate(W, dl, DAG);
1096 Words[IdxW].push_back(T);
1097 }
1098 } else {
1099 for (const SDValue &W : Words[IdxW ^ 1]) {
1100 Words[IdxW].push_back(W);
1101 }
1102 }
1103 Bytes /= 2;
1104 }
1105
1106 assert(Bytes == BitBytes);
1107 if (BitBytes == 1 && PredTy == MVT::v2i1)
1108 ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
1109
1110 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1111 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1112 for (const SDValue &W : Words[IdxW]) {
1113 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1114 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1115 }
1116
1117 return Vec;
1118}
1119
1120SDValue
1121HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1122 const SDLoc &dl, MVT VecTy,
1123 SelectionDAG &DAG) const {
1124 // Construct a vector V of bytes, such that a comparison V >u 0 would
1125 // produce the required vector predicate.
1126 unsigned VecLen = Values.size();
1127 unsigned HwLen = Subtarget.getVectorLength();
1128 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1130 bool AllT = true, AllF = true;
1131
1132 auto IsTrue = [] (SDValue V) {
1133 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1134 return !N->isZero();
1135 return false;
1136 };
1137 auto IsFalse = [] (SDValue V) {
1138 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1139 return N->isZero();
1140 return false;
1141 };
1142
1143 if (VecLen <= HwLen) {
1144 // In the hardware, each bit of a vector predicate corresponds to a byte
1145 // of a vector register. Calculate how many bytes does a bit of VecTy
1146 // correspond to.
1147 assert(HwLen % VecLen == 0);
1148 unsigned BitBytes = HwLen / VecLen;
1149 for (SDValue V : Values) {
1150 AllT &= IsTrue(V);
1151 AllF &= IsFalse(V);
1152
1153 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1154 : DAG.getUNDEF(MVT::i8);
1155 for (unsigned B = 0; B != BitBytes; ++B)
1156 Bytes.push_back(Ext);
1157 }
1158 } else {
1159 // There are as many i1 values, as there are bits in a vector register.
1160 // Divide the values into groups of 8 and check that each group consists
1161 // of the same value (ignoring undefs).
1162 for (unsigned I = 0; I != VecLen; I += 8) {
1163 unsigned B = 0;
1164 // Find the first non-undef value in this group.
1165 for (; B != 8; ++B) {
1166 if (!Values[I+B].isUndef())
1167 break;
1168 }
1169 SDValue F = Values[I+B];
1170 AllT &= IsTrue(F);
1171 AllF &= IsFalse(F);
1172
1173 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1174 : DAG.getUNDEF(MVT::i8);
1175 Bytes.push_back(Ext);
1176 // Verify that the rest of values in the group are the same as the
1177 // first.
1178 for (; B != 8; ++B)
1179 assert(Values[I+B].isUndef() || Values[I+B] == F);
1180 }
1181 }
1182
1183 if (AllT)
1184 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1185 if (AllF)
1186 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1187
1188 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1189 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1190 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1191}
1192
1193SDValue
1194HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1195 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1196 MVT ElemTy = ty(VecV).getVectorElementType();
1197
1198 unsigned ElemWidth = ElemTy.getSizeInBits();
1199 assert(ElemWidth >= 8 && ElemWidth <= 32);
1200 (void)ElemWidth;
1201
1202 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1203 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1204 {VecV, ByteIdx});
1205 if (ElemTy == MVT::i32)
1206 return ExWord;
1207
1208 // Have an extracted word, need to extract the smaller element out of it.
1209 // 1. Extract the bits of (the original) IdxV that correspond to the index
1210 // of the desired element in the 32-bit word.
1211 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1212 // 2. Extract the element from the word.
1213 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1214 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1215}
1216
1217SDValue
1218HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1219 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1220 // Implement other return types if necessary.
1221 assert(ResTy == MVT::i1);
1222
1223 unsigned HwLen = Subtarget.getVectorLength();
1224 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1225 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1226
1227 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1228 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1229 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1230
1231 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1232 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1233 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1234}
1235
1236SDValue
1237HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1238 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1239 MVT ElemTy = ty(VecV).getVectorElementType();
1240
1241 unsigned ElemWidth = ElemTy.getSizeInBits();
1242 assert(ElemWidth >= 8 && ElemWidth <= 32);
1243 (void)ElemWidth;
1244
1245 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1246 SDValue ByteIdxV) {
1247 MVT VecTy = ty(VecV);
1248 unsigned HwLen = Subtarget.getVectorLength();
1249 SDValue MaskV =
1250 DAG.getNode(ISD::AND, dl, MVT::i32,
1251 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1252 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1253 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1254 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1255 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1256 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1257 return TorV;
1258 };
1259
1260 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1261 if (ElemTy == MVT::i32)
1262 return InsertWord(VecV, ValV, ByteIdx);
1263
1264 // If this is not inserting a 32-bit word, convert it into such a thing.
1265 // 1. Extract the existing word from the target vector.
1266 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1267 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1268 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1269 dl, MVT::i32, DAG);
1270
1271 // 2. Treating the extracted word as a 32-bit vector, insert the given
1272 // value into it.
1273 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1274 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1275 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1276 ValV, SubIdx, dl, ElemTy, DAG);
1277
1278 // 3. Insert the 32-bit word back into the original vector.
1279 return InsertWord(VecV, Ins, ByteIdx);
1280}
1281
1282SDValue
1283HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1284 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1285 unsigned HwLen = Subtarget.getVectorLength();
1286 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1287 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1288
1289 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1290 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1291 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1292 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1293
1294 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1295 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1296}
1297
1298SDValue
1299HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1300 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1301 MVT VecTy = ty(VecV);
1302 unsigned HwLen = Subtarget.getVectorLength();
1303 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1304 MVT ElemTy = VecTy.getVectorElementType();
1305 unsigned ElemWidth = ElemTy.getSizeInBits();
1306
1307 // If the source vector is a vector pair, get the single vector containing
1308 // the subvector of interest. The subvector will never overlap two single
1309 // vectors.
1310 if (isHvxPairTy(VecTy)) {
1311 unsigned SubIdx = Hexagon::vsub_lo;
1312 if (Idx * ElemWidth >= 8 * HwLen) {
1313 SubIdx = Hexagon::vsub_hi;
1314 Idx -= VecTy.getVectorNumElements() / 2;
1315 }
1316
1317 VecTy = typeSplit(VecTy).first;
1318 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1319 if (VecTy == ResTy)
1320 return VecV;
1321 }
1322
1323 // The only meaningful subvectors of a single HVX vector are those that
1324 // fit in a scalar register.
1325 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1326
1327 MVT WordTy = tyVector(VecTy, MVT::i32);
1328 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1329 unsigned WordIdx = (Idx*ElemWidth) / 32;
1330
1331 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1332 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1333 if (ResTy.getSizeInBits() == 32)
1334 return DAG.getBitcast(ResTy, W0);
1335
1336 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1337 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1338 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1339 return DAG.getBitcast(ResTy, WW);
1340}
1341
1342SDValue
1343HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1344 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1345 MVT VecTy = ty(VecV);
1346 unsigned HwLen = Subtarget.getVectorLength();
1347 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1348 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1349 // IdxV is required to be a constant.
1350 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1351
1352 unsigned ResLen = ResTy.getVectorNumElements();
1353 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1354 unsigned Offset = Idx * BitBytes;
1355 SDValue Undef = DAG.getUNDEF(ByteTy);
1356 SmallVector<int,128> Mask;
1357
1358 if (Subtarget.isHVXVectorType(ResTy, true)) {
1359 // Converting between two vector predicates. Since the result is shorter
1360 // than the source, it will correspond to a vector predicate with the
1361 // relevant bits replicated. The replication count is the ratio of the
1362 // source and target vector lengths.
1363 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1364 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1365 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1366 for (unsigned j = 0; j != Rep; ++j)
1367 Mask.push_back(i + Offset);
1368 }
1369 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1370 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1371 }
1372
1373 // Converting between a vector predicate and a scalar predicate. In the
1374 // vector predicate, a group of BitBytes bits will correspond to a single
1375 // i1 element of the source vector type. Those bits will all have the same
1376 // value. The same will be true for ByteVec, where each byte corresponds
1377 // to a bit in the vector predicate.
1378 // The algorithm is to traverse the ByteVec, going over the i1 values from
1379 // the source vector, and generate the corresponding representation in an
1380 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1381 // elements so that the interesting 8 bytes will be in the low end of the
1382 // vector.
1383 unsigned Rep = 8 / ResLen;
1384 // Make sure the output fill the entire vector register, so repeat the
1385 // 8-byte groups as many times as necessary.
1386 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1387 // This will generate the indexes of the 8 interesting bytes.
1388 for (unsigned i = 0; i != ResLen; ++i) {
1389 for (unsigned j = 0; j != Rep; ++j)
1390 Mask.push_back(Offset + i*BitBytes);
1391 }
1392 }
1393
1394 SDValue Zero = getZero(dl, MVT::i32, DAG);
1395 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1396 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1397 // them against 0.
1398 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1399 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1400 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1401 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1402 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1403 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1404}
1405
1406SDValue
1407HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1408 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1409 MVT VecTy = ty(VecV);
1410 MVT SubTy = ty(SubV);
1411 unsigned HwLen = Subtarget.getVectorLength();
1412 MVT ElemTy = VecTy.getVectorElementType();
1413 unsigned ElemWidth = ElemTy.getSizeInBits();
1414
1415 bool IsPair = isHvxPairTy(VecTy);
1416 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1417 // The two single vectors that VecV consists of, if it's a pair.
1418 SDValue V0, V1;
1419 SDValue SingleV = VecV;
1420 SDValue PickHi;
1421
1422 if (IsPair) {
1423 V0 = LoHalf(VecV, DAG);
1424 V1 = HiHalf(VecV, DAG);
1425
1426 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1427 dl, MVT::i32);
1428 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1429 if (isHvxSingleTy(SubTy)) {
1430 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1431 unsigned Idx = CN->getZExtValue();
1432 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1433 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1434 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1435 }
1436 // If IdxV is not a constant, generate the two variants: with the
1437 // SubV as the high and as the low subregister, and select the right
1438 // pair based on the IdxV.
1439 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1440 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1441 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1442 }
1443 // The subvector being inserted must be entirely contained in one of
1444 // the vectors V0 or V1. Set SingleV to the correct one, and update
1445 // IdxV to be the index relative to the beginning of that vector.
1446 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1447 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1448 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1449 }
1450
1451 // The only meaningful subvectors of a single HVX vector are those that
1452 // fit in a scalar register.
1453 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1454 // Convert IdxV to be index in bytes.
1455 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1456 if (!IdxN || !IdxN->isZero()) {
1457 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1458 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1459 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1460 }
1461 // When inserting a single word, the rotation back to the original position
1462 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1463 // by (HwLen-4)-Idx.
1464 unsigned RolBase = HwLen;
1465 if (SubTy.getSizeInBits() == 32) {
1466 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1467 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1468 } else {
1469 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1470 SDValue R0 = LoHalf(V, DAG);
1471 SDValue R1 = HiHalf(V, DAG);
1472 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1473 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1474 DAG.getConstant(4, dl, MVT::i32));
1475 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1476 RolBase = HwLen-4;
1477 }
1478 // If the vector wasn't ror'ed, don't ror it back.
1479 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1480 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1481 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1482 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1483 }
1484
1485 if (IsPair) {
1486 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1487 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1488 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1489 }
1490 return SingleV;
1491}
1492
1493SDValue
1494HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1495 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1496 MVT VecTy = ty(VecV);
1497 MVT SubTy = ty(SubV);
1498 assert(Subtarget.isHVXVectorType(VecTy, true));
1499 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1500 // predicate as well, or it can be a scalar predicate.
1501
1502 unsigned VecLen = VecTy.getVectorNumElements();
1503 unsigned HwLen = Subtarget.getVectorLength();
1504 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1505
1506 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1507 unsigned BitBytes = HwLen / VecLen;
1508 unsigned BlockLen = HwLen / Scale;
1509
1510 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1511 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1512 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1513 SDValue ByteIdx;
1514
1515 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1516 if (!IdxN || !IdxN->isZero()) {
1517 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1518 DAG.getConstant(BitBytes, dl, MVT::i32));
1519 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1520 }
1521
1522 // ByteVec is the target vector VecV rotated in such a way that the
1523 // subvector should be inserted at index 0. Generate a predicate mask
1524 // and use vmux to do the insertion.
1525 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1526 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1527 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1528 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1529 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1530 // Rotate ByteVec back, and convert to a vector predicate.
1531 if (!IdxN || !IdxN->isZero()) {
1532 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1533 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1534 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1535 }
1536 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1537}
1538
1539SDValue
1540HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1541 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1542 // Sign- and any-extending of a vector predicate to a vector register is
1543 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1544 // a vector of 1s (where the 1s are of type matching the vector type).
1545 assert(Subtarget.isHVXVectorType(ResTy));
1546 if (!ZeroExt)
1547 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1548
1549 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1550 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1551 DAG.getConstant(1, dl, MVT::i32));
1552 SDValue False = getZero(dl, ResTy, DAG);
1553 return DAG.getSelect(dl, ResTy, VecV, True, False);
1554}
1555
1556SDValue
1557HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1558 MVT ResTy, SelectionDAG &DAG) const {
1559 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1560 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1561 // vector register. The remaining bits of the vector register are
1562 // unspecified.
1563
1564 MachineFunction &MF = DAG.getMachineFunction();
1565 unsigned HwLen = Subtarget.getVectorLength();
1566 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1567 MVT PredTy = ty(VecQ);
1568 unsigned PredLen = PredTy.getVectorNumElements();
1569 assert(HwLen % PredLen == 0);
1570 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1571
1572 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1574 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1575 // These are bytes with the LSB rotated left with respect to their index.
1576 for (unsigned i = 0; i != HwLen/8; ++i) {
1577 for (unsigned j = 0; j != 8; ++j)
1578 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1579 }
1580 Constant *CV = ConstantVector::get(Tmp);
1581 Align Alignment(HwLen);
1582 SDValue CP =
1583 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1584 SDValue Bytes =
1585 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1587
1588 // Select the bytes that correspond to true bits in the vector predicate.
1589 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1590 getZero(dl, VecTy, DAG));
1591 // Calculate the OR of all bytes in each group of 8. That will compress
1592 // all the individual bits into a single byte.
1593 // First, OR groups of 4, via vrmpy with 0x01010101.
1594 SDValue All1 =
1595 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1596 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1597 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1598 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1599 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1600 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1601
1602 // Pick every 8th byte and coalesce them at the beginning of the output.
1603 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1604 // byte and so on.
1605 SmallVector<int,128> Mask;
1606 for (unsigned i = 0; i != HwLen; ++i)
1607 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1608 SDValue Collect =
1609 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1610 return DAG.getBitcast(ResTy, Collect);
1611}
1612
1613SDValue
1614HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1615 const SDLoc &dl, SelectionDAG &DAG) const {
1616 // Take a vector and resize the element type to match the given type.
1617 MVT InpTy = ty(VecV);
1618 if (InpTy == ResTy)
1619 return VecV;
1620
1621 unsigned InpWidth = InpTy.getSizeInBits();
1622 unsigned ResWidth = ResTy.getSizeInBits();
1623
1624 if (InpTy.isFloatingPoint()) {
1625 return InpWidth < ResWidth
1626 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1627 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1628 DAG.getTargetConstant(0, dl, MVT::i32));
1629 }
1630
1631 assert(InpTy.isInteger());
1632
1633 if (InpWidth < ResWidth) {
1634 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1635 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1636 } else {
1637 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1638 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1639 }
1640}
1641
1642SDValue
1643HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1644 SelectionDAG &DAG) const {
1645 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1646
1647 const SDLoc &dl(Vec);
1648 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1649 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1650 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1651}
1652
1653SDValue
1654HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1655 const {
1656 const SDLoc &dl(Op);
1657 MVT VecTy = ty(Op);
1658
1659 unsigned Size = Op.getNumOperands();
1661 for (unsigned i = 0; i != Size; ++i)
1662 Ops.push_back(Op.getOperand(i));
1663
1664 if (VecTy.getVectorElementType() == MVT::i1)
1665 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1666
1667 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1668 // not a legal type, just bitcast the node to use i16
1669 // types and bitcast the result back to f16
1670 if (VecTy.getVectorElementType() == MVT::f16) {
1672 for (unsigned i = 0; i != Size; i++)
1673 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1674
1675 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1676 tyVector(VecTy, MVT::i16), NewOps);
1677 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1678 }
1679
1680 // First, split the BUILD_VECTOR for vector pairs. We could generate
1681 // some pairs directly (via splat), but splats should be generated
1682 // by the combiner prior to getting here.
1683 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1685 MVT SingleTy = typeSplit(VecTy).first;
1686 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1687 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1688 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1689 }
1690
1691 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1692}
1693
1694SDValue
1695HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1696 const {
1697 const SDLoc &dl(Op);
1698 MVT VecTy = ty(Op);
1699 MVT ArgTy = ty(Op.getOperand(0));
1700
1701 if (ArgTy == MVT::f16) {
1702 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1703 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1704 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1705 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1706 return DAG.getBitcast(VecTy, Splat);
1707 }
1708
1709 return SDValue();
1710}
1711
1712SDValue
1713HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1714 const {
1715 // Vector concatenation of two integer (non-bool) vectors does not need
1716 // special lowering. Custom-lower concats of bool vectors and expand
1717 // concats of more than 2 vectors.
1718 MVT VecTy = ty(Op);
1719 const SDLoc &dl(Op);
1720 unsigned NumOp = Op.getNumOperands();
1721 if (VecTy.getVectorElementType() != MVT::i1) {
1722 if (NumOp == 2)
1723 return Op;
1724 // Expand the other cases into a build-vector.
1726 for (SDValue V : Op.getNode()->ops())
1727 DAG.ExtractVectorElements(V, Elems);
1728 // A vector of i16 will be broken up into a build_vector of i16's.
1729 // This is a problem, since at the time of operation legalization,
1730 // all operations are expected to be type-legalized, and i16 is not
1731 // a legal type. If any of the extracted elements is not of a valid
1732 // type, sign-extend it to a valid one.
1733 for (SDValue &V : Elems) {
1734 MVT Ty = ty(V);
1735 if (!isTypeLegal(Ty)) {
1736 MVT NTy = typeLegalize(Ty, DAG);
1737 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1738 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1740 V.getOperand(0), V.getOperand(1)),
1741 DAG.getValueType(Ty));
1742 continue;
1743 }
1744 // A few less complicated cases.
1745 switch (V.getOpcode()) {
1746 case ISD::Constant:
1747 V = DAG.getSExtOrTrunc(V, dl, NTy);
1748 break;
1749 case ISD::UNDEF:
1750 V = DAG.getUNDEF(NTy);
1751 break;
1752 case ISD::TRUNCATE:
1753 V = V.getOperand(0);
1754 break;
1755 default:
1756 llvm_unreachable("Unexpected vector element");
1757 }
1758 }
1759 }
1760 return DAG.getBuildVector(VecTy, dl, Elems);
1761 }
1762
1763 assert(VecTy.getVectorElementType() == MVT::i1);
1764 unsigned HwLen = Subtarget.getVectorLength();
1765 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1766
1767 SDValue Op0 = Op.getOperand(0);
1768
1769 // If the operands are HVX types (i.e. not scalar predicates), then
1770 // defer the concatenation, and create QCAT instead.
1771 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1772 if (NumOp == 2)
1773 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1774
1775 ArrayRef<SDUse> U(Op.getNode()->ops());
1778
1779 MVT HalfTy = typeSplit(VecTy).first;
1780 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1781 Ops.take_front(NumOp/2));
1782 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1783 Ops.take_back(NumOp/2));
1784 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1785 }
1786
1787 // Count how many bytes (in a vector register) each bit in VecTy
1788 // corresponds to.
1789 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1790
1791 SmallVector<SDValue,8> Prefixes;
1792 for (SDValue V : Op.getNode()->op_values()) {
1793 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1794 Prefixes.push_back(P);
1795 }
1796
1797 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1798 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1799 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1800 SDValue Res = getZero(dl, ByteTy, DAG);
1801 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1802 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1803 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1804 }
1805 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1806}
1807
1808SDValue
1809HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1810 const {
1811 // Change the type of the extracted element to i32.
1812 SDValue VecV = Op.getOperand(0);
1813 MVT ElemTy = ty(VecV).getVectorElementType();
1814 const SDLoc &dl(Op);
1815 SDValue IdxV = Op.getOperand(1);
1816 if (ElemTy == MVT::i1)
1817 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1818
1819 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1820}
1821
1822SDValue
1823HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1824 const {
1825 const SDLoc &dl(Op);
1826 MVT VecTy = ty(Op);
1827 SDValue VecV = Op.getOperand(0);
1828 SDValue ValV = Op.getOperand(1);
1829 SDValue IdxV = Op.getOperand(2);
1830 MVT ElemTy = ty(VecV).getVectorElementType();
1831 if (ElemTy == MVT::i1)
1832 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1833
1834 if (ElemTy == MVT::f16) {
1836 tyVector(VecTy, MVT::i16),
1837 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1838 DAG.getBitcast(MVT::i16, ValV), IdxV);
1839 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1840 }
1841
1842 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1843}
1844
1845SDValue
1846HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1847 const {
1848 SDValue SrcV = Op.getOperand(0);
1849 MVT SrcTy = ty(SrcV);
1850 MVT DstTy = ty(Op);
1851 SDValue IdxV = Op.getOperand(1);
1852 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1853 assert(Idx % DstTy.getVectorNumElements() == 0);
1854 (void)Idx;
1855 const SDLoc &dl(Op);
1856
1857 MVT ElemTy = SrcTy.getVectorElementType();
1858 if (ElemTy == MVT::i1)
1859 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1860
1861 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1862}
1863
1864SDValue
1865HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1866 const {
1867 // Idx does not need to be a constant.
1868 SDValue VecV = Op.getOperand(0);
1869 SDValue ValV = Op.getOperand(1);
1870 SDValue IdxV = Op.getOperand(2);
1871
1872 const SDLoc &dl(Op);
1873 MVT VecTy = ty(VecV);
1874 MVT ElemTy = VecTy.getVectorElementType();
1875 if (ElemTy == MVT::i1)
1876 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1877
1878 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1879}
1880
1881SDValue
1882HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1883 // Lower any-extends of boolean vectors to sign-extends, since they
1884 // translate directly to Q2V. Zero-extending could also be done equally
1885 // fast, but Q2V is used/recognized in more places.
1886 // For all other vectors, use zero-extend.
1887 MVT ResTy = ty(Op);
1888 SDValue InpV = Op.getOperand(0);
1889 MVT ElemTy = ty(InpV).getVectorElementType();
1890 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1891 return LowerHvxSignExt(Op, DAG);
1892 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1893}
1894
1895SDValue
1896HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1897 MVT ResTy = ty(Op);
1898 SDValue InpV = Op.getOperand(0);
1899 MVT ElemTy = ty(InpV).getVectorElementType();
1900 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1901 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1902 return Op;
1903}
1904
1905SDValue
1906HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1907 MVT ResTy = ty(Op);
1908 SDValue InpV = Op.getOperand(0);
1909 MVT ElemTy = ty(InpV).getVectorElementType();
1910 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1911 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1912 return Op;
1913}
1914
1915SDValue
1916HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1917 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1918 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1919 const SDLoc &dl(Op);
1920 MVT ResTy = ty(Op);
1921 SDValue InpV = Op.getOperand(0);
1922 assert(ResTy == ty(InpV));
1923
1924 // Calculate the vectors of 1 and bitwidth(x).
1925 MVT ElemTy = ty(InpV).getVectorElementType();
1926 unsigned ElemWidth = ElemTy.getSizeInBits();
1927
1928 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1929 DAG.getConstant(1, dl, MVT::i32));
1930 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1931 DAG.getConstant(ElemWidth, dl, MVT::i32));
1932 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1933 DAG.getAllOnesConstant(dl, MVT::i32));
1934
1935 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1936 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1937 // it separately in custom combine or selection).
1938 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1939 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1940 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1941 return DAG.getNode(ISD::SUB, dl, ResTy,
1942 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1943}
1944
1945SDValue
1946HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1947 const SDLoc &dl(Op);
1948 MVT ResTy = ty(Op);
1949 assert(ResTy.getVectorElementType() == MVT::i32);
1950
1951 SDValue Vs = Op.getOperand(0);
1952 SDValue Vt = Op.getOperand(1);
1953
1954 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1955 unsigned Opc = Op.getOpcode();
1956
1957 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1958 if (Opc == ISD::MULHU)
1959 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1960 if (Opc == ISD::MULHS)
1961 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1962
1963#ifndef NDEBUG
1964 Op.dump(&DAG);
1965#endif
1966 llvm_unreachable("Unexpected mulh operation");
1967}
1968
1969SDValue
1970HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1971 const SDLoc &dl(Op);
1972 unsigned Opc = Op.getOpcode();
1973 SDValue Vu = Op.getOperand(0);
1974 SDValue Vv = Op.getOperand(1);
1975
1976 // If the HI part is not used, convert it to a regular MUL.
1977 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1978 // Need to preserve the types and the number of values.
1979 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1980 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1981 return DAG.getMergeValues({Lo, Hi}, dl);
1982 }
1983
1984 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1985 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1986
1987 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1988 // valued nodes.
1989 if (Subtarget.useHVXV62Ops())
1990 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1991
1992 if (Opc == HexagonISD::SMUL_LOHI) {
1993 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1994 // for other signedness LOHI is cheaper.
1995 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1996 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1997 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1998 return DAG.getMergeValues({Lo, Hi}, dl);
1999 }
2000 }
2001
2002 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2003}
2004
2005SDValue
2006HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2007 SDValue Val = Op.getOperand(0);
2008 MVT ResTy = ty(Op);
2009 MVT ValTy = ty(Val);
2010 const SDLoc &dl(Op);
2011
2012 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2013 unsigned HwLen = Subtarget.getVectorLength();
2014 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2015 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2016 unsigned BitWidth = ResTy.getSizeInBits();
2017
2018 if (BitWidth < 64) {
2019 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2020 dl, MVT::i32, DAG);
2021 if (BitWidth == 32)
2022 return W0;
2023 assert(BitWidth < 32u);
2024 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2025 }
2026
2027 // The result is >= 64 bits. The only options are 64 or 128.
2028 assert(BitWidth == 64 || BitWidth == 128);
2030 for (unsigned i = 0; i != BitWidth/32; ++i) {
2031 SDValue W = extractHvxElementReg(
2032 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2033 Words.push_back(W);
2034 }
2035 SmallVector<SDValue,2> Combines;
2036 assert(Words.size() % 2 == 0);
2037 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2038 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2039 Combines.push_back(C);
2040 }
2041
2042 if (BitWidth == 64)
2043 return Combines[0];
2044
2045 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2046 }
2047
2048 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2049 // Splat the input into a 32-element i32 vector, then AND each element
2050 // with a unique bitmask to isolate individual bits.
2051 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2052 assert(Val32.getValueType().getSizeInBits() == 32 &&
2053 "Input must be 32 bits");
2054 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2055 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2057 for (unsigned i = 0; i < 32; ++i)
2058 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2059
2060 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2061 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2062 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2063 };
2064 // === Case: v32i1 ===
2065 if (ResTy == MVT::v32i1 &&
2066 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2067 Subtarget.useHVX128BOps()) {
2068 SDValue Val32 = Val;
2069 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2070 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2071 return bitcastI32ToV32I1(Val32);
2072 }
2073 // === Case: v64i1 ===
2074 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2075 // Split i64 into lo/hi 32-bit halves.
2076 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2077 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2078 DAG.getConstant(32, dl, MVT::i64));
2079 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2080
2081 // Reuse the same 32-bit logic twice.
2082 SDValue LoRes = bitcastI32ToV32I1(Lo);
2083 SDValue HiRes = bitcastI32ToV32I1(Hi);
2084
2085 // Concatenate into a v64i1 predicate.
2086 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2087 }
2088
2089 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2090 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2091 unsigned BitWidth = ValTy.getSizeInBits();
2092 unsigned HwLen = Subtarget.getVectorLength();
2093 assert(BitWidth == HwLen);
2094
2095 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2096 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2097 // Splat each byte of Val 8 times.
2098 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2099 // where b0, b1,..., b15 are least to most significant bytes of I.
2101 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2102 // These are bytes with the LSB rotated left with respect to their index.
2104 for (unsigned I = 0; I != HwLen / 8; ++I) {
2105 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2106 SDValue Byte =
2107 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2108 for (unsigned J = 0; J != 8; ++J) {
2109 Bytes.push_back(Byte);
2110 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2111 }
2112 }
2113
2114 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2115 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2116 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2117
2118 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2119 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2120 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2121 }
2122
2123 return Op;
2124}
2125
2126SDValue
2127HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2128 // Sign- and zero-extends are legal.
2129 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2130 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2131 Op.getOperand(0));
2132}
2133
2134SDValue
2135HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2136 MVT ResTy = ty(Op);
2137 if (ResTy.getVectorElementType() != MVT::i1)
2138 return Op;
2139
2140 const SDLoc &dl(Op);
2141 unsigned HwLen = Subtarget.getVectorLength();
2142 unsigned VecLen = ResTy.getVectorNumElements();
2143 assert(HwLen % VecLen == 0);
2144 unsigned ElemSize = HwLen / VecLen;
2145
2146 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2147 SDValue S =
2148 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2149 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2150 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2151 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2152}
2153
2154SDValue
2155HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2156 if (SDValue S = getVectorShiftByInt(Op, DAG))
2157 return S;
2158 return Op;
2159}
2160
2161SDValue
2162HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2163 SelectionDAG &DAG) const {
2164 unsigned Opc = Op.getOpcode();
2165 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2166
2167 // Make sure the shift amount is within the range of the bitwidth
2168 // of the element type.
2169 SDValue A = Op.getOperand(0);
2170 SDValue B = Op.getOperand(1);
2171 SDValue S = Op.getOperand(2);
2172
2173 MVT InpTy = ty(A);
2174 MVT ElemTy = InpTy.getVectorElementType();
2175
2176 const SDLoc &dl(Op);
2177 unsigned ElemWidth = ElemTy.getSizeInBits();
2178 bool IsLeft = Opc == ISD::FSHL;
2179
2180 // The expansion into regular shifts produces worse code for i8 and for
2181 // right shift of i32 on v65+.
2182 bool UseShifts = ElemTy != MVT::i8;
2183 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2184 UseShifts = false;
2185
2186 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2187 // If this is a funnel shift by a scalar, lower it into regular shifts.
2188 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2189 SDValue ModS =
2190 DAG.getNode(ISD::AND, dl, MVT::i32,
2191 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2192 SDValue NegS =
2193 DAG.getNode(ISD::SUB, dl, MVT::i32,
2194 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2195 SDValue IsZero =
2196 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2197 // FSHL A, B => A << | B >>n
2198 // FSHR A, B => A <<n | B >>
2199 SDValue Part1 =
2200 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2201 SDValue Part2 =
2202 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2203 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2204 // If the shift amount was 0, pick A or B, depending on the direction.
2205 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2206 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2207 }
2208
2210 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2211
2212 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2213 return DAG.getNode(MOpc, dl, ty(Op),
2214 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2215}
2216
2217SDValue
2218HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2219 const SDLoc &dl(Op);
2220 unsigned IntNo = Op.getConstantOperandVal(0);
2221 SmallVector<SDValue> Ops(Op->ops());
2222
2223 auto Swap = [&](SDValue P) {
2224 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2225 };
2226
2227 switch (IntNo) {
2228 case Intrinsic::hexagon_V6_pred_typecast:
2229 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2230 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2231 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2232 if (ResTy == InpTy)
2233 return Ops[1];
2234 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2235 }
2236 break;
2237 }
2238 case Intrinsic::hexagon_V6_vmpyss_parts:
2239 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2240 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2241 {Ops[1], Ops[2]}));
2242 case Intrinsic::hexagon_V6_vmpyuu_parts:
2243 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2244 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2245 {Ops[1], Ops[2]}));
2246 case Intrinsic::hexagon_V6_vmpyus_parts:
2247 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2248 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2249 {Ops[1], Ops[2]}));
2250 }
2251 } // switch
2252
2253 return Op;
2254}
2255
2256SDValue
2257HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2258 const SDLoc &dl(Op);
2259 unsigned HwLen = Subtarget.getVectorLength();
2260 MachineFunction &MF = DAG.getMachineFunction();
2261 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2262 SDValue Mask = MaskN->getMask();
2263 SDValue Chain = MaskN->getChain();
2264 SDValue Base = MaskN->getBasePtr();
2265 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2266
2267 unsigned Opc = Op->getOpcode();
2268 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2269
2270 if (Opc == ISD::MLOAD) {
2271 MVT ValTy = ty(Op);
2272 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2273 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2274 if (isUndef(Thru))
2275 return Load;
2276 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2277 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2278 }
2279
2280 // MSTORE
2281 // HVX only has aligned masked stores.
2282
2283 // TODO: Fold negations of the mask into the store.
2284 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2285 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2286 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2287
2288 if (MaskN->getAlign().value() % HwLen == 0) {
2289 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2290 {Mask, Base, Offset0, Value, Chain}, DAG);
2291 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2292 return Store;
2293 }
2294
2295 // Unaligned case.
2296 auto StoreAlign = [&](SDValue V, SDValue A) {
2297 SDValue Z = getZero(dl, ty(V), DAG);
2298 // TODO: use funnel shifts?
2299 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2300 // upper half.
2301 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2302 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2303 return std::make_pair(LoV, HiV);
2304 };
2305
2306 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2307 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2308 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2309 VectorPair Tmp = StoreAlign(MaskV, Base);
2310 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2311 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2312 VectorPair ValueU = StoreAlign(Value, Base);
2313
2314 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2315 SDValue StoreLo =
2316 getInstr(StoreOpc, dl, MVT::Other,
2317 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2318 SDValue StoreHi =
2319 getInstr(StoreOpc, dl, MVT::Other,
2320 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2321 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2322 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2323 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2324}
2325
2326SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2327 SelectionDAG &DAG) const {
2328 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2329 // is legal (done via a pattern).
2330 assert(Subtarget.useHVXQFloatOps());
2331
2332 assert(Op->getOpcode() == ISD::FP_EXTEND);
2333
2334 MVT VecTy = ty(Op);
2335 MVT ArgTy = ty(Op.getOperand(0));
2336 const SDLoc &dl(Op);
2337 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2338
2339 SDValue F16Vec = Op.getOperand(0);
2340
2341 APFloat FloatVal = APFloat(1.0f);
2342 bool Ignored;
2344 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2345 SDValue VmpyVec =
2346 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2347
2348 MVT HalfTy = typeSplit(VecTy).first;
2349 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2350 SDValue LoVec =
2351 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2352 SDValue HiVec =
2353 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2354
2355 SDValue ShuffVec =
2356 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2357 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2358
2359 return ShuffVec;
2360}
2361
2362SDValue
2363HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2364 // Catch invalid conversion ops (just in case).
2365 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2366 Op.getOpcode() == ISD::FP_TO_UINT);
2367
2368 MVT ResTy = ty(Op);
2369 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2370 MVT IntTy = ResTy.getVectorElementType();
2371
2372 if (Subtarget.useHVXIEEEFPOps()) {
2373 // There are only conversions from f16.
2374 if (FpTy == MVT::f16) {
2375 // Other int types aren't legal in HVX, so we shouldn't see them here.
2376 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2377 // Conversions to i8 and i16 are legal.
2378 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2379 return Op;
2380 }
2381 }
2382
2383 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2384 return EqualizeFpIntConversion(Op, DAG);
2385
2386 return ExpandHvxFpToInt(Op, DAG);
2387}
2388
2389// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2390// R1 = #1, R2 holds the v32i1 param
2391// V1 = vsplat(R1)
2392// V2 = vsplat(R2)
2393// Q0 = vand(V1,R1)
2394// V0.w=prefixsum(Q0)
2395// V0.w=vsub(V0.w,V1.w)
2396// V2.w = vlsr(V2.w,V0.w)
2397// V2 = vand(V2,V1)
2398// V2.sf = V2.w
2399SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2400 SelectionDAG &DAG) const {
2401
2402 MVT ResTy = ty(PredOp);
2403 const SDLoc &dl(PredOp);
2404
2405 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2406 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2407 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2408 SDValue(RegConst, 0));
2409 SDNode *PredTransfer =
2410 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2411 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2412 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2413 SDValue(PredTransfer, 0));
2414 SDNode *SplatParam = DAG.getMachineNode(
2415 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2416 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2417 SDNode *Vsub =
2418 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2419 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2420 SDNode *IndexShift =
2421 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2422 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2423 SDNode *MaskOff =
2424 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2425 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2426 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2427 SDValue(MaskOff, 0));
2428 return SDValue(Convert, 0);
2429}
2430
2431// For vector type v64i1 uint_to_fo to v64f16:
2432// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2433// R3 = subreg_high (R32)
2434// R2 = subreg_low (R32)
2435// R1 = #1
2436// V1 = vsplat(R1)
2437// V2 = vsplat(R2)
2438// V3 = vsplat(R3)
2439// Q0 = vand(V1,R1)
2440// V0.w=prefixsum(Q0)
2441// V0.w=vsub(V0.w,V1.w)
2442// V2.w = vlsr(V2.w,V0.w)
2443// V3.w = vlsr(V3.w,V0.w)
2444// V2 = vand(V2,V1)
2445// V3 = vand(V3,V1)
2446// V2.h = vpacke(V3.w,V2.w)
2447// V2.hf = V2.h
2448SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2449 SelectionDAG &DAG) const {
2450
2451 MVT ResTy = ty(PredOp);
2452 const SDLoc &dl(PredOp);
2453
2454 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2455 // Get the hi and lo regs
2456 SDValue HiReg =
2457 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2458 SDValue LoReg =
2459 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2460 // Get constant #1 and splat into vector V1
2461 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2462 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2463 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2464 SDValue(RegConst, 0));
2465 // Splat the hi and lo args
2466 SDNode *SplatHi =
2467 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2468 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2469 SDNode *SplatLo =
2470 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2471 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2472 // vand between splatted const and const
2473 SDNode *PredTransfer =
2474 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2475 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2476 // Get the prefixsum
2477 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2478 SDValue(PredTransfer, 0));
2479 // Get the vsub
2480 SDNode *Vsub =
2481 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2482 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2483 // Get vlsr for hi and lo
2484 SDNode *IndexShift_hi =
2485 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2486 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2487 SDNode *IndexShift_lo =
2488 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2489 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2490 // Get vand of hi and lo
2491 SDNode *MaskOff_hi =
2492 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2493 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2494 SDNode *MaskOff_lo =
2495 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2496 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2497 // Pack them
2498 SDNode *Pack =
2499 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2500 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2501 SDNode *Convert =
2502 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2503 return SDValue(Convert, 0);
2504}
2505
2506SDValue
2507HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2508 // Catch invalid conversion ops (just in case).
2509 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2510 Op.getOpcode() == ISD::UINT_TO_FP);
2511
2512 MVT ResTy = ty(Op);
2513 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2514 MVT FpTy = ResTy.getVectorElementType();
2515
2516 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2517 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2518 return LowerHvxPred32ToFp(Op, DAG);
2519 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2520 return LowerHvxPred64ToFp(Op, DAG);
2521 }
2522
2523 if (Subtarget.useHVXIEEEFPOps()) {
2524 // There are only conversions to f16.
2525 if (FpTy == MVT::f16) {
2526 // Other int types aren't legal in HVX, so we shouldn't see them here.
2527 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2528 // i8, i16 -> f16 is legal.
2529 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2530 return Op;
2531 }
2532 }
2533
2534 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2535 return EqualizeFpIntConversion(Op, DAG);
2536
2537 return ExpandHvxIntToFp(Op, DAG);
2538}
2539
2540HexagonTargetLowering::TypePair
2541HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2542 // Compare the widths of elements of the two types, and extend the narrower
2543 // type to match the with of the wider type. For vector types, apply this
2544 // to the element type.
2545 assert(Ty0.isVector() == Ty1.isVector());
2546
2547 MVT ElemTy0 = Ty0.getScalarType();
2548 MVT ElemTy1 = Ty1.getScalarType();
2549
2550 unsigned Width0 = ElemTy0.getSizeInBits();
2551 unsigned Width1 = ElemTy1.getSizeInBits();
2552 unsigned MaxWidth = std::max(Width0, Width1);
2553
2554 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2555 if (ScalarTy.isInteger())
2556 return MVT::getIntegerVT(Width);
2557 assert(ScalarTy.isFloatingPoint());
2558 return MVT::getFloatingPointVT(Width);
2559 };
2560
2561 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2562 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2563
2564 if (!Ty0.isVector()) {
2565 // Both types are scalars.
2566 return {WideETy0, WideETy1};
2567 }
2568
2569 // Vector types.
2570 unsigned NumElem = Ty0.getVectorNumElements();
2571 assert(NumElem == Ty1.getVectorNumElements());
2572
2573 return {MVT::getVectorVT(WideETy0, NumElem),
2574 MVT::getVectorVT(WideETy1, NumElem)};
2575}
2576
2577HexagonTargetLowering::TypePair
2578HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2579 // Compare the numbers of elements of two vector types, and widen the
2580 // narrower one to match the number of elements in the wider one.
2581 assert(Ty0.isVector() && Ty1.isVector());
2582
2583 unsigned Len0 = Ty0.getVectorNumElements();
2584 unsigned Len1 = Ty1.getVectorNumElements();
2585 if (Len0 == Len1)
2586 return {Ty0, Ty1};
2587
2588 unsigned MaxLen = std::max(Len0, Len1);
2589 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2590 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2591}
2592
2593MVT
2594HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2595 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2596 assert(LegalTy.isSimple());
2597 return LegalTy.getSimpleVT();
2598}
2599
2600MVT
2601HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2602 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2603 assert(Ty.getSizeInBits() <= HwWidth);
2604 if (Ty.getSizeInBits() == HwWidth)
2605 return Ty;
2606
2607 MVT ElemTy = Ty.getScalarType();
2608 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2609}
2610
2611HexagonTargetLowering::VectorPair
2612HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2613 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2614 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2615 // whether an overflow has occurred.
2616 MVT ResTy = ty(A);
2617 assert(ResTy == ty(B));
2618 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2619
2620 if (!Signed) {
2621 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2622 // save any instructions.
2623 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2624 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2625 return {Add, Ovf};
2626 }
2627
2628 // Signed overflow has happened, if:
2629 // (A, B have the same sign) and (A+B has a different sign from either)
2630 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2631 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2632 SDValue NotA =
2633 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2634 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2635 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2636 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2637 SDValue MSB =
2638 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2639 return {Add, MSB};
2640}
2641
2642HexagonTargetLowering::VectorPair
2643HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2644 bool Signed, SelectionDAG &DAG) const {
2645 // Shift Val right by Amt bits, round the result to the nearest integer,
2646 // tie-break by rounding halves to even integer.
2647
2648 const SDLoc &dl(Val);
2649 MVT ValTy = ty(Val);
2650
2651 // This should also work for signed integers.
2652 //
2653 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2654 // bool ovf = (inp > tmp0);
2655 // uint rup = inp & (1 << (Amt+1));
2656 //
2657 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2658 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2659 // uint tmp3 = tmp2 + rup;
2660 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2661 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2662 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2663 MVT IntTy = tyVector(ValTy, ElemTy);
2664 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2665 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2666
2667 SDValue Inp = DAG.getBitcast(IntTy, Val);
2668 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2669
2670 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2671 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2672 SDValue Zero = getZero(dl, IntTy, DAG);
2673 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2674 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2675 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2676
2677 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2678 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2679 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2680 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2681
2682 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2683 SDValue One = DAG.getConstant(1, dl, IntTy);
2684 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2685 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2686 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2687 return {Mux, Ovf};
2688}
2689
2690SDValue
2691HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2692 SelectionDAG &DAG) const {
2693 MVT VecTy = ty(A);
2694 MVT PairTy = typeJoin({VecTy, VecTy});
2695 assert(VecTy.getVectorElementType() == MVT::i32);
2696
2697 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2698
2699 // mulhs(A,B) =
2700 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2701 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2702 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2703 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2704 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2705 // anything, so it cannot produce any carry over to higher bits),
2706 // so everything in [] can be shifted by 16 without loss of precision.
2707 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2708 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2709 // The final additions need to make sure to properly maintain any carry-
2710 // out bits.
2711 //
2712 // Hi(B) Lo(B)
2713 // Hi(A) Lo(A)
2714 // --------------
2715 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2716 // Hi(B)*Lo(A) | + dropping the low 16 bits
2717 // Hi(A)*Lo(B) | T2
2718 // Hi(B)*Hi(A)
2719
2720 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2721 // T1 = get Hi(A) into low halves.
2722 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2723 // P0 = interleaved T1.h*B.uh (full precision product)
2724 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2725 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2726 SDValue T2 = LoHalf(P0, DAG);
2727 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2728 // added to the final sum.
2729 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2730 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2731 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2732 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2733 // T3 = full-precision(T0+T2) >> 16
2734 // The low halves are added-unsigned, the high ones are added-signed.
2735 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2736 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2737 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2738 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2739 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2740 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2741 SDValue T5 = LoHalf(P3, DAG);
2742 // Add:
2743 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2744 return T6;
2745}
2746
2747SDValue
2748HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2749 bool SignedB, const SDLoc &dl,
2750 SelectionDAG &DAG) const {
2751 MVT VecTy = ty(A);
2752 MVT PairTy = typeJoin({VecTy, VecTy});
2753 assert(VecTy.getVectorElementType() == MVT::i32);
2754
2755 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2756
2757 if (SignedA && !SignedB) {
2758 // Make A:unsigned, B:signed.
2759 std::swap(A, B);
2760 std::swap(SignedA, SignedB);
2761 }
2762
2763 // Do halfword-wise multiplications for unsigned*unsigned product, then
2764 // add corrections for signed and unsigned*signed.
2765
2766 SDValue Lo, Hi;
2767
2768 // P0:lo = (uu) products of low halves of A and B,
2769 // P0:hi = (uu) products of high halves.
2770 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2771
2772 // Swap low/high halves in B
2773 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2774 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2775 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2776 // P1 = products of even/odd halfwords.
2777 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2778 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2779 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2780
2781 // P2:lo = low halves of P1:lo + P1:hi,
2782 // P2:hi = high halves of P1:lo + P1:hi.
2783 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2784 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2785 // Still need to add the high halves of P0:lo to P2:lo
2786 SDValue T2 =
2787 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2788 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2789
2790 // The high halves of T3 will contribute to the HI part of LOHI.
2791 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2792 {HiHalf(P2, DAG), T3, S16}, DAG);
2793
2794 // The low halves of P2 need to be added to high halves of the LO part.
2795 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2796 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2797 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2798
2799 if (SignedA) {
2800 assert(SignedB && "Signed A and unsigned B should have been inverted");
2801
2802 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2803 SDValue Zero = getZero(dl, VecTy, DAG);
2804 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2805 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2806 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2807 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2808 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2809 } else if (SignedB) {
2810 // Same correction as for mulhus:
2811 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2812 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2813 SDValue Zero = getZero(dl, VecTy, DAG);
2814 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2815 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2816 } else {
2817 assert(!SignedA && !SignedB);
2818 }
2819
2820 return DAG.getMergeValues({Lo, Hi}, dl);
2821}
2822
2823SDValue
2824HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2825 SDValue B, bool SignedB,
2826 const SDLoc &dl,
2827 SelectionDAG &DAG) const {
2828 MVT VecTy = ty(A);
2829 MVT PairTy = typeJoin({VecTy, VecTy});
2830 assert(VecTy.getVectorElementType() == MVT::i32);
2831
2832 if (SignedA && !SignedB) {
2833 // Make A:unsigned, B:signed.
2834 std::swap(A, B);
2835 std::swap(SignedA, SignedB);
2836 }
2837
2838 // Do S*S first, then make corrections for U*S or U*U if needed.
2839 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2840 SDValue P1 =
2841 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2842 SDValue Lo = LoHalf(P1, DAG);
2843 SDValue Hi = HiHalf(P1, DAG);
2844
2845 if (!SignedB) {
2846 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2847 SDValue Zero = getZero(dl, VecTy, DAG);
2848 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2849
2850 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2851 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2852 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2853 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2854 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2855 // $A))>;
2856 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2857 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2858 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2859 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2860 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2861 } else if (!SignedA) {
2862 SDValue Zero = getZero(dl, VecTy, DAG);
2863 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2864
2865 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2866 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2867 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2868 // (HiHalf (Muls64O $A, $B)),
2869 // $B)>;
2870 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2871 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2872 }
2873
2874 return DAG.getMergeValues({Lo, Hi}, dl);
2875}
2876
2877SDValue
2878HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2879 const {
2880 // Rewrite conversion between integer and floating-point in such a way that
2881 // the integer type is extended/narrowed to match the bitwidth of the
2882 // floating-point type, combined with additional integer-integer extensions
2883 // or narrowings to match the original input/result types.
2884 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2885 //
2886 // The input/result types are not required to be legal, but if they are
2887 // legal, this function should not introduce illegal types.
2888
2889 unsigned Opc = Op.getOpcode();
2892
2893 SDValue Inp = Op.getOperand(0);
2894 MVT InpTy = ty(Inp);
2895 MVT ResTy = ty(Op);
2896
2897 if (InpTy == ResTy)
2898 return Op;
2899
2900 const SDLoc &dl(Op);
2902
2903 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2904 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2905 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2906 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2907 return Res;
2908}
2909
2910SDValue
2911HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2912 unsigned Opc = Op.getOpcode();
2914
2915 const SDLoc &dl(Op);
2916 SDValue Op0 = Op.getOperand(0);
2917 MVT InpTy = ty(Op0);
2918 MVT ResTy = ty(Op);
2919 assert(InpTy.changeTypeToInteger() == ResTy);
2920
2921 // int32_t conv_f32_to_i32(uint32_t inp) {
2922 // // s | exp8 | frac23
2923 //
2924 // int neg = (int32_t)inp < 0;
2925 //
2926 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2927 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2928 // // produce a large positive "expm1", which will result in max u/int.
2929 // // In all IEEE formats, bias is the largest positive number that can be
2930 // // represented in bias-width bits (i.e. 011..1).
2931 // int32_t expm1 = (inp << 1) - 0x80000000;
2932 // expm1 >>= 24;
2933 //
2934 // // Always insert the "implicit 1". Subnormal numbers will become 0
2935 // // regardless.
2936 // uint32_t frac = (inp << 8) | 0x80000000;
2937 //
2938 // // "frac" is the fraction part represented as Q1.31. If it was
2939 // // interpreted as uint32_t, it would be the fraction part multiplied
2940 // // by 2^31.
2941 //
2942 // // Calculate the amount of right shift, since shifting further to the
2943 // // left would lose significant bits. Limit it to 32, because we want
2944 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2945 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2946 // // left by 31). "rsh" can be negative.
2947 // int32_t rsh = min(31 - (expm1 + 1), 32);
2948 //
2949 // frac >>= rsh; // rsh == 32 will produce 0
2950 //
2951 // // Everything up to this point is the same for conversion to signed
2952 // // unsigned integer.
2953 //
2954 // if (neg) // Only for signed int
2955 // frac = -frac; //
2956 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2957 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2958 // if (rsh <= 0 && !neg) //
2959 // frac = 0x7fffffff; //
2960 //
2961 // if (neg) // Only for unsigned int
2962 // frac = 0; //
2963 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2964 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2965 //
2966 // return frac;
2967 // }
2968
2969 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2970
2971 // Zero = V6_vd0();
2972 // Neg = V6_vgtw(Zero, Inp);
2973 // One = V6_lvsplatw(1);
2974 // M80 = V6_lvsplatw(0x80000000);
2975 // Exp00 = V6_vaslwv(Inp, One);
2976 // Exp01 = V6_vsubw(Exp00, M80);
2977 // ExpM1 = V6_vasrw(Exp01, 24);
2978 // Frc00 = V6_vaslw(Inp, 8);
2979 // Frc01 = V6_vor(Frc00, M80);
2980 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2981 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2982 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2983
2984 // if signed int:
2985 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2986 // Pos = V6_vgtw(Rsh01, Zero);
2987 // Frc13 = V6_vsubw(Zero, Frc02);
2988 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2989 // Int = V6_vmux(Pos, Frc14, Bnd);
2990 //
2991 // if unsigned int:
2992 // Rsn = V6_vgtw(Zero, Rsh01)
2993 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2994 // Int = V6_vmux(Neg, Zero, Frc23)
2995
2996 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2997 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2998 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2999
3000 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3001 SDValue Zero = getZero(dl, ResTy, DAG);
3002 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3003 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3004 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3005 SDValue One = DAG.getConstant(1, dl, ResTy);
3006 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3007 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3008 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3009 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3010
3011 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3012 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3013 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3014
3015 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3016 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3017 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3018 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3019 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3020
3021 SDValue Int;
3022
3023 if (Opc == ISD::FP_TO_SINT) {
3024 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3025 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3026 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3027 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3028 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3029 } else {
3031 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3032 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3033 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3034 }
3035
3036 return Int;
3037}
3038
3039SDValue
3040HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3041 unsigned Opc = Op.getOpcode();
3043
3044 const SDLoc &dl(Op);
3045 SDValue Op0 = Op.getOperand(0);
3046 MVT InpTy = ty(Op0);
3047 MVT ResTy = ty(Op);
3048 assert(ResTy.changeTypeToInteger() == InpTy);
3049
3050 // uint32_t vnoc1_rnd(int32_t w) {
3051 // int32_t iszero = w == 0;
3052 // int32_t isneg = w < 0;
3053 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3054 //
3055 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3056 // uint32_t frac0 = (uint64_t)u << norm_left;
3057 //
3058 // // Rounding:
3059 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3060 // uint32_t renorm = (frac0 > frac1);
3061 // uint32_t rup = (int)(frac0 << 22) < 0;
3062 //
3063 // uint32_t frac2 = frac0 >> 8;
3064 // uint32_t frac3 = frac1 >> 8;
3065 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3066 //
3067 // int32_t exp = 32 - norm_left + renorm + 127;
3068 // exp <<= 23;
3069 //
3070 // uint32_t sign = 0x80000000 * isneg;
3071 // uint32_t f = sign | exp | frac;
3072 // return iszero ? 0 : f;
3073 // }
3074
3075 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3076 bool Signed = Opc == ISD::SINT_TO_FP;
3077
3078 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3079 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3080
3081 SDValue Zero = getZero(dl, InpTy, DAG);
3082 SDValue One = DAG.getConstant(1, dl, InpTy);
3083 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3084 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3085 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3086 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3087 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3088
3089 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3090 if (Signed) {
3091 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3092 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3093 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3094 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3095 }
3096
3097 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3098 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3099 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3100 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3101 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3102 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3103 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3104 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3105 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3106
3107 return Flt;
3108}
3109
3110SDValue
3111HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3112 unsigned Opc = Op.getOpcode();
3113 unsigned TLOpc;
3114 switch (Opc) {
3115 case ISD::ANY_EXTEND:
3116 case ISD::SIGN_EXTEND:
3117 case ISD::ZERO_EXTEND:
3118 TLOpc = HexagonISD::TL_EXTEND;
3119 break;
3120 case ISD::TRUNCATE:
3122 break;
3123#ifndef NDEBUG
3124 Op.dump(&DAG);
3125#endif
3126 llvm_unreachable("Unexpected operator");
3127 }
3128
3129 const SDLoc &dl(Op);
3130 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3131 DAG.getUNDEF(MVT::i128), // illegal type
3132 DAG.getConstant(Opc, dl, MVT::i32));
3133}
3134
3135SDValue
3136HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3137 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3138 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3139 unsigned Opc = Op.getConstantOperandVal(2);
3140 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3141}
3142
3143HexagonTargetLowering::VectorPair
3144HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3145 assert(!Op.isMachineOpcode());
3146 SmallVector<SDValue, 2> OpsL, OpsH;
3147 const SDLoc &dl(Op);
3148
3149 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3150 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3151 SDValue TV = DAG.getValueType(Ty);
3152 return std::make_pair(TV, TV);
3153 };
3154
3155 for (SDValue A : Op.getNode()->ops()) {
3156 auto [Lo, Hi] =
3157 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3158 // Special case for type operand.
3159 switch (Op.getOpcode()) {
3160 case ISD::SIGN_EXTEND_INREG:
3161 case HexagonISD::SSAT:
3162 case HexagonISD::USAT:
3163 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3164 std::tie(Lo, Hi) = SplitVTNode(N);
3165 break;
3166 }
3167 OpsL.push_back(Lo);
3168 OpsH.push_back(Hi);
3169 }
3170
3171 MVT ResTy = ty(Op);
3172 MVT HalfTy = typeSplit(ResTy).first;
3173 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3174 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3175 return {L, H};
3176}
3177
3178SDValue
3179HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3180 auto *MemN = cast<MemSDNode>(Op.getNode());
3181
3182 if (!MemN->getMemoryVT().isSimple())
3183 return Op;
3184
3185 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3186 if (!isHvxPairTy(MemTy))
3187 return Op;
3188
3189 const SDLoc &dl(Op);
3190 unsigned HwLen = Subtarget.getVectorLength();
3191 MVT SingleTy = typeSplit(MemTy).first;
3192 SDValue Chain = MemN->getChain();
3193 SDValue Base0 = MemN->getBasePtr();
3194 SDValue Base1 =
3195 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3196 unsigned MemOpc = MemN->getOpcode();
3197
3198 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3199 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3200 MachineFunction &MF = DAG.getMachineFunction();
3201 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3202 ? (uint64_t)MemoryLocation::UnknownSize
3203 : HwLen;
3204 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3205 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3206 }
3207
3208 if (MemOpc == ISD::LOAD) {
3209 assert(cast<LoadSDNode>(Op)->isUnindexed());
3210 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3211 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3212 return DAG.getMergeValues(
3213 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3214 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3215 Load0.getValue(1), Load1.getValue(1)) }, dl);
3216 }
3217 if (MemOpc == ISD::STORE) {
3218 assert(cast<StoreSDNode>(Op)->isUnindexed());
3219 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3220 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3221 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3222 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3223 }
3224
3225 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3226
3227 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3228 assert(MaskN->isUnindexed());
3229 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3230 SDValue Offset = DAG.getUNDEF(MVT::i32);
3231
3232 if (MemOpc == ISD::MLOAD) {
3233 VectorPair Thru =
3234 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3235 SDValue MLoad0 =
3236 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3237 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3238 ISD::NON_EXTLOAD, false);
3239 SDValue MLoad1 =
3240 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3241 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3242 ISD::NON_EXTLOAD, false);
3243 return DAG.getMergeValues(
3244 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3245 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3246 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3247 }
3248 if (MemOpc == ISD::MSTORE) {
3249 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3250 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3251 Masks.first, SingleTy, MOp0,
3252 ISD::UNINDEXED, false, false);
3253 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3254 Masks.second, SingleTy, MOp1,
3255 ISD::UNINDEXED, false, false);
3256 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3257 }
3258
3259 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3260 llvm_unreachable(Name.c_str());
3261}
3262
3263SDValue
3264HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3265 const SDLoc &dl(Op);
3266 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3267 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3268 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3269 "Not widening loads of i1 yet");
3270
3271 SDValue Chain = LoadN->getChain();
3272 SDValue Base = LoadN->getBasePtr();
3273 SDValue Offset = DAG.getUNDEF(MVT::i32);
3274
3275 MVT ResTy = ty(Op);
3276 unsigned HwLen = Subtarget.getVectorLength();
3277 unsigned ResLen = ResTy.getStoreSize();
3278 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3279
3280 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3281 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3282 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3283
3284 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3285 MachineFunction &MF = DAG.getMachineFunction();
3286 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3287
3288 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3289 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3291 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3292 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3293}
3294
3295SDValue
3296HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3297 const SDLoc &dl(Op);
3298 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3299 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3300 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3301 "Not widening stores of i1 yet");
3302
3303 SDValue Chain = StoreN->getChain();
3304 SDValue Base = StoreN->getBasePtr();
3305 SDValue Offset = DAG.getUNDEF(MVT::i32);
3306
3307 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3308 MVT ValueTy = ty(Value);
3309 unsigned ValueLen = ValueTy.getVectorNumElements();
3310 unsigned HwLen = Subtarget.getVectorLength();
3311 assert(isPowerOf2_32(ValueLen));
3312
3313 for (unsigned Len = ValueLen; Len < HwLen; ) {
3314 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3315 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3316 }
3317 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3318
3319 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3320 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3321 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3322 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3323 MachineFunction &MF = DAG.getMachineFunction();
3324 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3325 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3326 MemOp, ISD::UNINDEXED, false, false);
3327}
3328
3329SDValue
3330HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3331 const SDLoc &dl(Op);
3332 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3333 MVT ElemTy = ty(Op0).getVectorElementType();
3334 unsigned HwLen = Subtarget.getVectorLength();
3335
3336 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3337 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3338 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3339 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3340 return SDValue();
3341
3342 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3343 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3344 EVT ResTy =
3345 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3346 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3347 {WideOp0, WideOp1, Op.getOperand(2)});
3348
3349 EVT RetTy = typeLegalize(ty(Op), DAG);
3350 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3351 {SetCC, getZero(dl, MVT::i32, DAG)});
3352}
3353
3354SDValue
3355HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3356 unsigned Opc = Op.getOpcode();
3357 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3358 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3359 return isHvxPairTy(ty(V));
3360 });
3361
3362 if (IsPairOp) {
3363 switch (Opc) {
3364 default:
3365 break;
3366 case ISD::LOAD:
3367 case ISD::STORE:
3368 case ISD::MLOAD:
3369 case ISD::MSTORE:
3370 return SplitHvxMemOp(Op, DAG);
3371 case ISD::SINT_TO_FP:
3372 case ISD::UINT_TO_FP:
3373 case ISD::FP_TO_SINT:
3374 case ISD::FP_TO_UINT:
3375 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3376 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3377 break;
3378 case ISD::ABS:
3379 case ISD::CTPOP:
3380 case ISD::CTLZ:
3381 case ISD::CTTZ:
3382 case ISD::MUL:
3383 case ISD::FADD:
3384 case ISD::FSUB:
3385 case ISD::FMUL:
3386 case ISD::FMINIMUMNUM:
3387 case ISD::FMAXIMUMNUM:
3388 case ISD::MULHS:
3389 case ISD::MULHU:
3390 case ISD::AND:
3391 case ISD::OR:
3392 case ISD::XOR:
3393 case ISD::SRA:
3394 case ISD::SHL:
3395 case ISD::SRL:
3396 case ISD::FSHL:
3397 case ISD::FSHR:
3398 case ISD::SMIN:
3399 case ISD::SMAX:
3400 case ISD::UMIN:
3401 case ISD::UMAX:
3402 case ISD::SETCC:
3403 case ISD::VSELECT:
3405 case ISD::SPLAT_VECTOR:
3406 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3407 case ISD::SIGN_EXTEND:
3408 case ISD::ZERO_EXTEND:
3409 // In general, sign- and zero-extends can't be split and still
3410 // be legal. The only exception is extending bool vectors.
3411 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3412 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3413 break;
3414 }
3415 }
3416
3417 switch (Opc) {
3418 default:
3419 break;
3420 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3421 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3422 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3423 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3424 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3425 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3426 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3427 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3428 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3429 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3430 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3431 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3432 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3433 case ISD::SRA:
3434 case ISD::SHL:
3435 case ISD::SRL: return LowerHvxShift(Op, DAG);
3436 case ISD::FSHL:
3437 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3438 case ISD::MULHS:
3439 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3440 case ISD::SMUL_LOHI:
3441 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3442 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3443 case ISD::SETCC:
3444 case ISD::INTRINSIC_VOID: return Op;
3445 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3446 case ISD::MLOAD:
3447 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3448 // Unaligned loads will be handled by the default lowering.
3449 case ISD::LOAD: return SDValue();
3450 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3451 case ISD::FP_TO_SINT:
3452 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3453 case ISD::SINT_TO_FP:
3454 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3455
3456 // Special nodes:
3459 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3460 }
3461#ifndef NDEBUG
3462 Op.dumpr(&DAG);
3463#endif
3464 llvm_unreachable("Unhandled HVX operation");
3465}
3466
3467SDValue
3468HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3469 const {
3470 // Rewrite the extension/truncation/saturation op into steps where each
3471 // step changes the type widths by a factor of 2.
3472 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3473 //
3474 // Some of the vector types in Op may not be legal.
3475
3476 unsigned Opc = Op.getOpcode();
3477 switch (Opc) {
3478 case HexagonISD::SSAT:
3479 case HexagonISD::USAT:
3482 break;
3483 case ISD::ANY_EXTEND:
3484 case ISD::ZERO_EXTEND:
3485 case ISD::SIGN_EXTEND:
3486 case ISD::TRUNCATE:
3487 llvm_unreachable("ISD:: ops will be auto-folded");
3488 break;
3489#ifndef NDEBUG
3490 Op.dump(&DAG);
3491#endif
3492 llvm_unreachable("Unexpected operation");
3493 }
3494
3495 SDValue Inp = Op.getOperand(0);
3496 MVT InpTy = ty(Inp);
3497 MVT ResTy = ty(Op);
3498
3499 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3500 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3501 assert(InpWidth != ResWidth);
3502
3503 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3504 return Op;
3505
3506 const SDLoc &dl(Op);
3507 unsigned NumElems = InpTy.getVectorNumElements();
3508 assert(NumElems == ResTy.getVectorNumElements());
3509
3510 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3511 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3512 switch (Opc) {
3513 case HexagonISD::SSAT:
3514 case HexagonISD::USAT:
3515 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3518 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3519 default:
3520 llvm_unreachable("Unexpected opcode");
3521 }
3522 };
3523
3524 SDValue S = Inp;
3525 if (InpWidth < ResWidth) {
3526 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3527 while (InpWidth * 2 <= ResWidth)
3528 S = repeatOp(InpWidth *= 2, S);
3529 } else {
3530 // InpWidth > ResWidth
3531 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3532 while (InpWidth / 2 >= ResWidth)
3533 S = repeatOp(InpWidth /= 2, S);
3534 }
3535 return S;
3536}
3537
3538SDValue
3539HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3540 SDValue Inp0 = Op.getOperand(0);
3541 MVT InpTy = ty(Inp0);
3542 MVT ResTy = ty(Op);
3543 unsigned InpWidth = InpTy.getSizeInBits();
3544 unsigned ResWidth = ResTy.getSizeInBits();
3545 unsigned Opc = Op.getOpcode();
3546
3547 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3548 // First, make sure that the narrower type is widened to HVX.
3549 // This may cause the result to be wider than what the legalizer
3550 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3551 // desired type.
3552 auto [WInpTy, WResTy] =
3553 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3554 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3555 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3556 SDValue S;
3558 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3559 Op.getOperand(2));
3560 } else {
3561 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3562 }
3563 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3564 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3565 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3566 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3567 } else {
3568 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3569 return RemoveTLWrapper(Op, DAG);
3570 }
3571 llvm_unreachable("Unexpected situation");
3572}
3573
3574void
3575HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3577 unsigned Opc = N->getOpcode();
3578 SDValue Op(N, 0);
3579 SDValue Inp0; // Optional first argument.
3580 if (N->getNumOperands() > 0)
3581 Inp0 = Op.getOperand(0);
3582
3583 switch (Opc) {
3584 case ISD::ANY_EXTEND:
3585 case ISD::SIGN_EXTEND:
3586 case ISD::ZERO_EXTEND:
3587 case ISD::TRUNCATE:
3588 if (Subtarget.isHVXElementType(ty(Op)) &&
3589 Subtarget.isHVXElementType(ty(Inp0))) {
3590 Results.push_back(CreateTLWrapper(Op, DAG));
3591 }
3592 break;
3593 case ISD::SETCC:
3594 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3595 if (SDValue T = WidenHvxSetCC(Op, DAG))
3596 Results.push_back(T);
3597 }
3598 break;
3599 case ISD::STORE: {
3600 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3601 SDValue Store = WidenHvxStore(Op, DAG);
3602 Results.push_back(Store);
3603 }
3604 break;
3605 }
3606 case ISD::MLOAD:
3607 if (isHvxPairTy(ty(Op))) {
3608 SDValue S = SplitHvxMemOp(Op, DAG);
3610 Results.push_back(S.getOperand(0));
3611 Results.push_back(S.getOperand(1));
3612 }
3613 break;
3614 case ISD::MSTORE:
3615 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3616 SDValue S = SplitHvxMemOp(Op, DAG);
3617 Results.push_back(S);
3618 }
3619 break;
3620 case ISD::SINT_TO_FP:
3621 case ISD::UINT_TO_FP:
3622 case ISD::FP_TO_SINT:
3623 case ISD::FP_TO_UINT:
3624 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3625 SDValue T = EqualizeFpIntConversion(Op, DAG);
3626 Results.push_back(T);
3627 }
3628 break;
3629 case HexagonISD::SSAT:
3630 case HexagonISD::USAT:
3633 Results.push_back(LegalizeHvxResize(Op, DAG));
3634 break;
3635 default:
3636 break;
3637 }
3638}
3639
3640void
3641HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3643 unsigned Opc = N->getOpcode();
3644 SDValue Op(N, 0);
3645 SDValue Inp0; // Optional first argument.
3646 if (N->getNumOperands() > 0)
3647 Inp0 = Op.getOperand(0);
3648
3649 switch (Opc) {
3650 case ISD::ANY_EXTEND:
3651 case ISD::SIGN_EXTEND:
3652 case ISD::ZERO_EXTEND:
3653 case ISD::TRUNCATE:
3654 if (Subtarget.isHVXElementType(ty(Op)) &&
3655 Subtarget.isHVXElementType(ty(Inp0))) {
3656 Results.push_back(CreateTLWrapper(Op, DAG));
3657 }
3658 break;
3659 case ISD::SETCC:
3660 if (shouldWidenToHvx(ty(Op), DAG)) {
3661 if (SDValue T = WidenHvxSetCC(Op, DAG))
3662 Results.push_back(T);
3663 }
3664 break;
3665 case ISD::LOAD: {
3666 if (shouldWidenToHvx(ty(Op), DAG)) {
3667 SDValue Load = WidenHvxLoad(Op, DAG);
3668 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3669 Results.push_back(Load.getOperand(0));
3670 Results.push_back(Load.getOperand(1));
3671 }
3672 break;
3673 }
3674 case ISD::BITCAST:
3675 if (isHvxBoolTy(ty(Inp0))) {
3676 SDValue C = LowerHvxBitcast(Op, DAG);
3677 Results.push_back(C);
3678 }
3679 break;
3680 case ISD::FP_TO_SINT:
3681 case ISD::FP_TO_UINT:
3682 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3683 SDValue T = EqualizeFpIntConversion(Op, DAG);
3684 Results.push_back(T);
3685 }
3686 break;
3687 case HexagonISD::SSAT:
3688 case HexagonISD::USAT:
3691 Results.push_back(LegalizeHvxResize(Op, DAG));
3692 break;
3693 default:
3694 break;
3695 }
3696}
3697
3698SDValue
3699HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3700 DAGCombinerInfo &DCI) const {
3701 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3702 // to extract-subvector (shuffle V, pick even, pick odd)
3703
3704 assert(Op.getOpcode() == ISD::TRUNCATE);
3705 SelectionDAG &DAG = DCI.DAG;
3706 const SDLoc &dl(Op);
3707
3708 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3709 return SDValue();
3710 SDValue Cast = Op.getOperand(0);
3711 SDValue Src = Cast.getOperand(0);
3712
3713 EVT TruncTy = Op.getValueType();
3714 EVT CastTy = Cast.getValueType();
3715 EVT SrcTy = Src.getValueType();
3716 if (SrcTy.isSimple())
3717 return SDValue();
3718 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3719 return SDValue();
3720 unsigned SrcLen = SrcTy.getVectorNumElements();
3721 unsigned CastLen = CastTy.getVectorNumElements();
3722 if (2 * CastLen != SrcLen)
3723 return SDValue();
3724
3725 SmallVector<int, 128> Mask(SrcLen);
3726 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3727 Mask[i] = 2 * i;
3728 Mask[i + CastLen] = 2 * i + 1;
3729 }
3730 SDValue Deal =
3731 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3732 return opSplit(Deal, dl, DAG).first;
3733}
3734
3735SDValue
3736HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3737 SDValue Op, DAGCombinerInfo &DCI) const {
3738 // Fold
3739 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3740 // into
3741 // shuffle (concat x, y), undef, m3
3742 if (Op.getNumOperands() != 2)
3743 return SDValue();
3744
3745 SelectionDAG &DAG = DCI.DAG;
3746 const SDLoc &dl(Op);
3747 SDValue V0 = Op.getOperand(0);
3748 SDValue V1 = Op.getOperand(1);
3749
3750 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3751 return SDValue();
3752 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3753 return SDValue();
3754
3755 SetVector<SDValue> Order;
3756 Order.insert(V0.getOperand(0));
3757 Order.insert(V0.getOperand(1));
3758 Order.insert(V1.getOperand(0));
3759 Order.insert(V1.getOperand(1));
3760
3761 if (Order.size() > 2)
3762 return SDValue();
3763
3764 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3765 // result must be the same.
3766 EVT InpTy = V0.getValueType();
3767 assert(InpTy.isVector());
3768 unsigned InpLen = InpTy.getVectorNumElements();
3769
3770 SmallVector<int, 128> LongMask;
3771 auto AppendToMask = [&](SDValue Shuffle) {
3772 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3773 ArrayRef<int> Mask = SV->getMask();
3774 SDValue X = Shuffle.getOperand(0);
3775 SDValue Y = Shuffle.getOperand(1);
3776 for (int M : Mask) {
3777 if (M == -1) {
3778 LongMask.push_back(M);
3779 continue;
3780 }
3781 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3782 if (static_cast<unsigned>(M) >= InpLen)
3783 M -= InpLen;
3784
3785 int OutOffset = Order[0] == Src ? 0 : InpLen;
3786 LongMask.push_back(M + OutOffset);
3787 }
3788 };
3789
3790 AppendToMask(V0);
3791 AppendToMask(V1);
3792
3793 SDValue C0 = Order.front();
3794 SDValue C1 = Order.back(); // Can be same as front
3795 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3796
3797 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3798 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3799}
3800
3801SDValue
3802HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3803 const {
3804 const SDLoc &dl(N);
3805 SelectionDAG &DAG = DCI.DAG;
3806 SDValue Op(N, 0);
3807 unsigned Opc = Op.getOpcode();
3808
3810
3811 if (Opc == ISD::TRUNCATE)
3812 return combineTruncateBeforeLegal(Op, DCI);
3813 if (Opc == ISD::CONCAT_VECTORS)
3814 return combineConcatVectorsBeforeLegal(Op, DCI);
3815
3816 if (DCI.isBeforeLegalizeOps())
3817 return SDValue();
3818
3819 switch (Opc) {
3820 case ISD::VSELECT: {
3821 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3822 SDValue Cond = Ops[0];
3823 if (Cond->getOpcode() == ISD::XOR) {
3824 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3825 if (C1->getOpcode() == HexagonISD::QTRUE)
3826 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3827 }
3828 break;
3829 }
3830 case HexagonISD::V2Q:
3831 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3832 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3833 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3834 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3835 }
3836 break;
3837 case HexagonISD::Q2V:
3838 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3839 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3840 DAG.getAllOnesConstant(dl, MVT::i32));
3841 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3842 return getZero(dl, ty(Op), DAG);
3843 break;
3845 if (isUndef(Ops[1]))
3846 return Ops[0];
3847 break;
3848 case HexagonISD::VROR: {
3849 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3850 SDValue Vec = Ops[0].getOperand(0);
3851 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3852 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3853 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3854 }
3855 break;
3856 }
3857 }
3858
3859 return SDValue();
3860}
3861
3862bool
3863HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3864 if (Subtarget.isHVXVectorType(Ty, true))
3865 return false;
3866 auto Action = getPreferredHvxVectorAction(Ty);
3868 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3869 return false;
3870}
3871
3872bool
3873HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3874 if (Subtarget.isHVXVectorType(Ty, true))
3875 return false;
3876 auto Action = getPreferredHvxVectorAction(Ty);
3878 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3879 return false;
3880}
3881
3882bool
3883HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3884 if (!Subtarget.useHVXOps())
3885 return false;
3886 // If the type of any result, or any operand type are HVX vector types,
3887 // this is an HVX operation.
3888 auto IsHvxTy = [this](EVT Ty) {
3889 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3890 };
3891 auto IsHvxOp = [this](SDValue Op) {
3892 return Op.getValueType().isSimple() &&
3893 Subtarget.isHVXVectorType(ty(Op), true);
3894 };
3895 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3896 return true;
3897
3898 // Check if this could be an HVX operation after type widening.
3899 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3900 if (!Op.getValueType().isSimple())
3901 return false;
3902 MVT ValTy = ty(Op);
3903 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3904 };
3905
3906 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3907 if (IsWidenedToHvx(SDValue(N, i)))
3908 return true;
3909 }
3910 return llvm::any_of(N->ops(), IsWidenedToHvx);
3911}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:187
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:131
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:137
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.