LLVM 23.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static const unsigned MaxExpandMLA = 8;
44
45static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
46 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
47 MVT ElemTy = Ty.getScalarType();
48 switch (ElemTy.SimpleTy) {
49 case MVT::f16:
50 return std::make_tuple(5, 15, 10);
51 case MVT::f32:
52 return std::make_tuple(8, 127, 23);
53 case MVT::f64:
54 return std::make_tuple(11, 1023, 52);
55 default:
56 break;
57 }
58 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
59}
60
61void
62HexagonTargetLowering::initializeHVXLowering() {
63 if (Subtarget.useHVX64BOps()) {
64 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
65 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
66 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
67 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
68 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
69 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
70 // These "short" boolean vector types should be legal because
71 // they will appear as results of vector compares. If they were
72 // not legal, type legalization would try to make them legal
73 // and that would require using operations that do not use or
74 // produce such types. That, in turn, would imply using custom
75 // nodes, which would be unoptimizable by the DAG combiner.
76 // The idea is to rely on target-independent operations as much
77 // as possible.
78 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
79 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
80 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
81 } else if (Subtarget.useHVX128BOps()) {
82 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
83 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
84 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
85 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
86 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
87 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
88 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
89 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
90 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
91 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
92 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
93 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
94 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
95 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
96 }
97 if (Subtarget.useHVXV81Ops()) {
98 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
99 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
100 }
101 }
102
103 // Set up operation actions.
104
105 bool Use64b = Subtarget.useHVX64BOps();
106 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
107 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
108 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
109 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
110 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
111
112 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
114 AddPromotedToType(Opc, FromTy, ToTy);
115 };
116
117 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
118 // Note: v16i1 -> i16 is handled in type legalization instead of op
119 // legalization.
129
130 if (Subtarget.useHVX128BOps()) {
134 setOperationAction(ISD::LOAD, MVT::v32i1, Custom);
136 setOperationAction(ISD::LOAD, MVT::v64i1, Custom);
137 setOperationAction(ISD::STORE, MVT::v128i1, Custom);
138 setOperationAction(ISD::LOAD, MVT::v128i1, Custom);
139 }
140 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
141 Subtarget.useHVXFloatingPoint()) {
142
143 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
144 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
145
146 for (MVT T : FloatV) {
152
155
158
161 // Custom-lower BUILD_VECTOR. The standard (target-independent)
162 // handling of it would convert it to a load, which is not always
163 // the optimal choice.
165 }
166
167
168 // BUILD_VECTOR with f16 operands cannot be promoted without
169 // promoting the result, so lower the node to vsplat or constant pool
173
174 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
175 // generated.
176 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
177 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
178 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
179 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
180
181 if (Subtarget.useHVXV81Ops()) {
182 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
183 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
184 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
185 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
186 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
187 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
188 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
189 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
190
194
195 setOperationAction(ISD::LOAD, MVT::v128bf16, Custom);
196 setOperationAction(ISD::STORE, MVT::v128bf16, Custom);
197
198 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
199 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
202
203 setOperationAction(ISD::MLOAD, MVT::v128bf16, Custom);
204 setOperationAction(ISD::MSTORE, MVT::v128bf16, Custom);
207
211 }
212
213 for (MVT P : FloatW) {
223
224 // Custom-lower BUILD_VECTOR. The standard (target-independent)
225 // handling of it would convert it to a load, which is not always
226 // the optimal choice.
228 // Make concat-vectors custom to handle concats of more than 2 vectors.
230
233 }
234
235 if (Subtarget.useHVXQFloatOps()) {
238 } else if (Subtarget.useHVXIEEEFPOps()) {
241 }
242 }
243
244 for (MVT T : LegalV) {
247
263 if (T != ByteV) {
267 }
268
271 if (T.getScalarType() != MVT::i32) {
274 }
275
280 if (T.getScalarType() != MVT::i32) {
283 }
284
286 // Make concat-vectors custom to handle concats of more than 2 vectors.
297 if (T != ByteV) {
299 // HVX only has shifts of words and halfwords.
303
304 // Promote all shuffles to operate on vectors of bytes.
305 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
306 }
307
308 if (Subtarget.useHVXFloatingPoint()) {
309 // Same action for both QFloat and IEEE.
314 }
315
323 }
324
325 for (MVT T : LegalW) {
326 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
327 // independent) handling of it would convert it to a load, which is
328 // not always the optimal choice.
330 // Make concat-vectors custom to handle concats of more than 2 vectors.
332
333 // Custom-lower these operations for pairs. Expand them into a concat
334 // of the corresponding operations on individual vectors.
343
352
367 if (T != ByteW) {
371
372 // Promote all shuffles to operate on vectors of bytes.
373 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
374 }
377
380 if (T.getScalarType() != MVT::i32) {
383 }
384
385 if (Subtarget.useHVXFloatingPoint()) {
386 // Same action for both QFloat and IEEE.
391 }
392 }
393
394 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
395 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
396 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
399
400 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
401 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
402 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
403 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
404 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
405 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
406 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
407 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
408 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
409 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
410 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
411 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
412 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
413 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
414
415 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
416 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
417 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
418 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
419 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
420 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
421 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
422 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
423 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
424 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
425 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
426 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
427 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
428 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
429
430 // Boolean vectors.
431
432 for (MVT T : LegalW) {
433 // Boolean types for vector pairs will overlap with the boolean
434 // types for single vectors, e.g.
435 // v64i8 -> v64i1 (single)
436 // v64i16 -> v64i1 (pair)
437 // Set these actions first, and allow the single actions to overwrite
438 // any duplicates.
439 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
444 // Masked load/store takes a mask that may need splitting.
447 }
448
449 for (MVT T : LegalV) {
450 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
461 }
462
463 if (Use64b) {
464 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
466 } else {
467 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
469 }
470
471 // Handle store widening for short vectors.
472 unsigned HwLen = Subtarget.getVectorLength();
473 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
474 if (ElemTy == MVT::i1)
475 continue;
476 int ElemWidth = ElemTy.getFixedSizeInBits();
477 int MaxElems = (8*HwLen) / ElemWidth;
478 for (int N = 2; N < MaxElems; N *= 2) {
479 MVT VecTy = MVT::getVectorVT(ElemTy, N);
480 auto Action = getPreferredVectorAction(VecTy);
489 if (Subtarget.useHVXFloatingPoint()) {
494 }
495
496 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
497 if (!isTypeLegal(BoolTy))
499 }
500 }
501 }
502
503 // Include cases which are not hander earlier
507
509
512
513 // Partial MLA reductions.
514 {
515 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
518
519 auto HvxType = [=](MVT ScalarT, unsigned Factor = 1) {
520 return MVT::getVectorVT(ScalarT, Subtarget.getVectorLength() * Factor *
521 8 / ScalarT.getSizeInBits());
522 };
523
524 // Tuple of (Acc element type, input element type, vector pair).
525 // The assumption is both the input and reduction result are of the same
526 // size so the reduction ratio is the same as the ratio of element type
527 // sizes. This may not hold for all available instructions.
528 typedef std::tuple<MVT, MVT, bool> ReductionSignature;
529
530 static const std::vector<ReductionSignature> NativeReductions = {
531 {MVT::i32, MVT::i8, false},
532 };
533
534 for (const auto &R : NativeReductions) {
535
536 MVT AccType = std::get<0>(R);
537 MVT InputType = std::get<1>(R);
538 unsigned Factor = std::get<2>(R) ? 2 : 1;
539
540 // The native size is legal.
541 setPartialReduceMLAAction(MLAOps, HvxType(AccType), HvxType(InputType),
542 Legal);
543
544 // Allow custom partial MLA reductions on larger vectors than legally
545 // supported. These reduction must be declared as Custom (or Legal)
546 // for foldPartialReduceMLAMulOp() to fold the multiply by one pattern
547 // inserted when the partial reduction intrinsic is converted to
548 // PARTIAL_REDUCE_U/S/SUMLA. Otherwise, the Split action will apply
549 // on the original pattern, including the extensions and multiplies,
550 // which will make it impossible to match.
551 // There are two independent ways to extend the
552 // input size: 1. to concatenate the result - output vector is
553 // proportionally extended, 2) to reduce the result - the output vector
554 // size stays the same. We limit allowed combinations so that the total
555 // number of generated reduction instructions is limited by a constant
556 // number. This limit is arbitrary and can be revised. On one hand, it is
557 // convenient to have more choices; on the other hand, there is a
558 // diminishing benefit of very long sequences, which should probably be
559 // written as loops instead.
560 for (unsigned ConcatFactor = 1; ConcatFactor <= MaxExpandMLA;
561 ConcatFactor <<= 1)
562 for (unsigned ReductionFactor = 1; ReductionFactor <= MaxExpandMLA;
563 ReductionFactor <<= 1)
564 if (ConcatFactor * ReductionFactor != 1 &&
565 ConcatFactor * ReductionFactor <= MaxExpandMLA)
567 MLAOps, HvxType(AccType, Factor * ConcatFactor),
568 HvxType(InputType, Factor * ConcatFactor * ReductionFactor),
569 Custom);
570 }
571 }
572}
573
574unsigned
575HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
576 // Early exit for invalid input types
577 if (!VecTy.isVector())
578 return ~0u;
579
580 MVT ElemTy = VecTy.getVectorElementType();
581 unsigned VecLen = VecTy.getVectorNumElements();
582 unsigned HwLen = Subtarget.getVectorLength();
583
584 // Split vectors of i1 that exceed byte vector length.
585 if (ElemTy == MVT::i1 && VecLen > HwLen)
587
588 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
589 // For shorter vectors of i1, widen them if any of the corresponding
590 // vectors of integers needs to be widened.
591 if (ElemTy == MVT::i1) {
592 for (MVT T : Tys) {
593 assert(T != MVT::i1);
594 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
595 if (A != ~0u)
596 return A;
597 }
598 return ~0u;
599 }
600
601 // If the size of VecTy is at least half of the vector length,
602 // widen the vector. Note: the threshold was not selected in
603 // any scientific way.
604 if (llvm::is_contained(Tys, ElemTy)) {
605 unsigned VecWidth = VecTy.getSizeInBits();
606 unsigned HwWidth = 8*HwLen;
607 if (VecWidth > 2*HwWidth)
609
610 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
611 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
613 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
615 }
616
617 // Defer to default.
618 return ~0u;
619}
620
621unsigned
622HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
623 unsigned Opc = Op.getOpcode();
624 switch (Opc) {
625 case HexagonISD::SMUL_LOHI:
626 case HexagonISD::UMUL_LOHI:
627 case HexagonISD::USMUL_LOHI:
629 }
631}
632
634HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
635 const SDLoc &dl, SelectionDAG &DAG) const {
637 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
638 append_range(IntOps, Ops);
639 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
640}
641
642MVT
643HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
644 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
645
646 MVT ElemTy = Tys.first.getVectorElementType();
647 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
648 Tys.second.getVectorNumElements());
649}
650
651HexagonTargetLowering::TypePair
652HexagonTargetLowering::typeSplit(MVT VecTy) const {
653 assert(VecTy.isVector());
654 unsigned NumElem = VecTy.getVectorNumElements();
655 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
656 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
657 return { HalfTy, HalfTy };
658}
659
660MVT
661HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
662 MVT ElemTy = VecTy.getVectorElementType();
663 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
664 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
665}
666
667MVT
668HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
669 MVT ElemTy = VecTy.getVectorElementType();
670 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
671 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
672}
673
675HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
676 SelectionDAG &DAG) const {
677 if (ty(Vec).getVectorElementType() == ElemTy)
678 return Vec;
679 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
680 return DAG.getBitcast(CastTy, Vec);
681}
682
684HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
685 SelectionDAG &DAG) const {
686 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
687 Ops.first, Ops.second);
688}
689
690HexagonTargetLowering::VectorPair
691HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
692 SelectionDAG &DAG) const {
693 TypePair Tys = typeSplit(ty(Vec));
694 if (Vec.getOpcode() == HexagonISD::QCAT)
695 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
696 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
697}
698
699bool
700HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
701 return Subtarget.isHVXVectorType(Ty) &&
702 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
703}
704
705bool
706HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
707 return Subtarget.isHVXVectorType(Ty) &&
708 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
709}
710
711bool
712HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
713 return Subtarget.isHVXVectorType(Ty, true) &&
714 Ty.getVectorElementType() == MVT::i1;
715}
716
717bool HexagonTargetLowering::allowsHvxMemoryAccess(
718 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
719 // Bool vectors are excluded by default, but make it explicit to
720 // emphasize that bool vectors cannot be loaded or stored.
721 // Also, disallow double vector stores (to prevent unnecessary
722 // store widening in DAG combiner).
723 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
724 return false;
725 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
726 return false;
727 if (Fast)
728 *Fast = 1;
729 return true;
730}
731
732bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
733 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
734 if (!Subtarget.isHVXVectorType(VecTy))
735 return false;
736 // XXX Should this be false? vmemu are a bit slower than vmem.
737 if (Fast)
738 *Fast = 1;
739 return true;
740}
741
742void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
743 MachineInstr &MI, SDNode *Node) const {
744 unsigned Opc = MI.getOpcode();
745 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
746 MachineBasicBlock &MB = *MI.getParent();
747 MachineFunction &MF = *MB.getParent();
748 MachineRegisterInfo &MRI = MF.getRegInfo();
749 DebugLoc DL = MI.getDebugLoc();
750 auto At = MI.getIterator();
751
752 switch (Opc) {
753 case Hexagon::PS_vsplatib:
754 if (Subtarget.useHVXV62Ops()) {
755 // SplatV = A2_tfrsi #imm
756 // OutV = V6_lvsplatb SplatV
757 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
758 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
759 .add(MI.getOperand(1));
760 Register OutV = MI.getOperand(0).getReg();
761 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
762 .addReg(SplatV);
763 } else {
764 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
765 // OutV = V6_lvsplatw SplatV
766 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
767 const MachineOperand &InpOp = MI.getOperand(1);
768 assert(InpOp.isImm());
769 uint32_t V = InpOp.getImm() & 0xFF;
770 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
771 .addImm(V << 24 | V << 16 | V << 8 | V);
772 Register OutV = MI.getOperand(0).getReg();
773 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
774 }
775 MB.erase(At);
776 break;
777 case Hexagon::PS_vsplatrb:
778 if (Subtarget.useHVXV62Ops()) {
779 // OutV = V6_lvsplatb Inp
780 Register OutV = MI.getOperand(0).getReg();
781 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
782 .add(MI.getOperand(1));
783 } else {
784 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
785 const MachineOperand &InpOp = MI.getOperand(1);
786 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
787 .addReg(InpOp.getReg(), {}, InpOp.getSubReg());
788 Register OutV = MI.getOperand(0).getReg();
789 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
790 .addReg(SplatV);
791 }
792 MB.erase(At);
793 break;
794 case Hexagon::PS_vsplatih:
795 if (Subtarget.useHVXV62Ops()) {
796 // SplatV = A2_tfrsi #imm
797 // OutV = V6_lvsplath SplatV
798 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
799 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
800 .add(MI.getOperand(1));
801 Register OutV = MI.getOperand(0).getReg();
802 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
803 .addReg(SplatV);
804 } else {
805 // SplatV = A2_tfrsi #imm:#imm
806 // OutV = V6_lvsplatw SplatV
807 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
808 const MachineOperand &InpOp = MI.getOperand(1);
809 assert(InpOp.isImm());
810 uint32_t V = InpOp.getImm() & 0xFFFF;
811 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
812 .addImm(V << 16 | V);
813 Register OutV = MI.getOperand(0).getReg();
814 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
815 }
816 MB.erase(At);
817 break;
818 case Hexagon::PS_vsplatrh:
819 if (Subtarget.useHVXV62Ops()) {
820 // OutV = V6_lvsplath Inp
821 Register OutV = MI.getOperand(0).getReg();
822 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
823 .add(MI.getOperand(1));
824 } else {
825 // SplatV = A2_combine_ll Inp, Inp
826 // OutV = V6_lvsplatw SplatV
827 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
828 const MachineOperand &InpOp = MI.getOperand(1);
829 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
830 .addReg(InpOp.getReg(), {}, InpOp.getSubReg())
831 .addReg(InpOp.getReg(), {}, InpOp.getSubReg());
832 Register OutV = MI.getOperand(0).getReg();
833 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
834 }
835 MB.erase(At);
836 break;
837 case Hexagon::PS_vsplatiw:
838 case Hexagon::PS_vsplatrw:
839 if (Opc == Hexagon::PS_vsplatiw) {
840 // SplatV = A2_tfrsi #imm
841 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
842 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
843 .add(MI.getOperand(1));
844 MI.getOperand(1).ChangeToRegister(SplatV, false);
845 }
846 // OutV = V6_lvsplatw SplatV/Inp
847 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
848 break;
849 }
850}
851
853HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
854 SelectionDAG &DAG) const {
855 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
856 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
857
858 unsigned ElemWidth = ElemTy.getSizeInBits();
859 if (ElemWidth == 8)
860 return ElemIdx;
861
862 unsigned L = Log2_32(ElemWidth/8);
863 const SDLoc &dl(ElemIdx);
864 return DAG.getNode(ISD::SHL, dl, MVT::i32,
865 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
866}
867
869HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
870 SelectionDAG &DAG) const {
871 unsigned ElemWidth = ElemTy.getSizeInBits();
872 assert(ElemWidth >= 8 && ElemWidth <= 32);
873 if (ElemWidth == 32)
874 return Idx;
875
876 if (ty(Idx) != MVT::i32)
877 Idx = DAG.getBitcast(MVT::i32, Idx);
878 const SDLoc &dl(Idx);
879 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
880 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
881 return SubIdx;
882}
883
885HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
886 SDValue Op1, ArrayRef<int> Mask,
887 SelectionDAG &DAG) const {
888 MVT OpTy = ty(Op0);
889 assert(OpTy == ty(Op1));
890
891 MVT ElemTy = OpTy.getVectorElementType();
892 if (ElemTy == MVT::i8)
893 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
894 assert(ElemTy.getSizeInBits() >= 8);
895
896 MVT ResTy = tyVector(OpTy, MVT::i8);
897 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
898
899 SmallVector<int,128> ByteMask;
900 for (int M : Mask) {
901 if (M < 0) {
902 for (unsigned I = 0; I != ElemSize; ++I)
903 ByteMask.push_back(-1);
904 } else {
905 int NewM = M*ElemSize;
906 for (unsigned I = 0; I != ElemSize; ++I)
907 ByteMask.push_back(NewM+I);
908 }
909 }
910 assert(ResTy.getVectorNumElements() == ByteMask.size());
911 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
912 opCastElem(Op1, MVT::i8, DAG), ByteMask);
913}
914
916HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
917 const SDLoc &dl, MVT VecTy,
918 SelectionDAG &DAG) const {
919 unsigned VecLen = Values.size();
920 MachineFunction &MF = DAG.getMachineFunction();
921 MVT ElemTy = VecTy.getVectorElementType();
922 unsigned ElemWidth = ElemTy.getSizeInBits();
923 unsigned HwLen = Subtarget.getVectorLength();
924
925 unsigned ElemSize = ElemWidth / 8;
926 assert(ElemSize*VecLen == HwLen);
928
929 if (VecTy.getVectorElementType() != MVT::i32 &&
930 !(Subtarget.useHVXFloatingPoint() &&
931 VecTy.getVectorElementType() == MVT::f32)) {
932 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
933 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
934 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
935 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
936 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
937 Words.push_back(DAG.getBitcast(MVT::i32, W));
938 }
939 } else {
940 for (SDValue V : Values)
941 Words.push_back(DAG.getBitcast(MVT::i32, V));
942 }
943 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
944 unsigned NumValues = Values.size();
945 assert(NumValues > 0);
946 bool IsUndef = true;
947 for (unsigned i = 0; i != NumValues; ++i) {
948 if (Values[i].isUndef())
949 continue;
950 IsUndef = false;
951 if (!SplatV.getNode())
952 SplatV = Values[i];
953 else if (SplatV != Values[i])
954 return false;
955 }
956 if (IsUndef)
957 SplatV = Values[0];
958 return true;
959 };
960
961 unsigned NumWords = Words.size();
962 SDValue SplatV;
963 bool IsSplat = isSplat(Words, SplatV);
964 if (IsSplat && isUndef(SplatV))
965 return DAG.getUNDEF(VecTy);
966 if (IsSplat) {
967 assert(SplatV.getNode());
968 if (isNullConstant(SplatV))
969 return getZero(dl, VecTy, DAG);
970 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
971 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
972 return DAG.getBitcast(VecTy, S);
973 }
974
975 // Delay recognizing constant vectors until here, so that we can generate
976 // a vsplat.
977 SmallVector<ConstantInt*, 128> Consts(VecLen);
978 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
979 if (AllConst) {
980 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
981 (Constant**)Consts.end());
982 Constant *CV = ConstantVector::get(Tmp);
983 Align Alignment(HwLen);
985 DAG.getConstantPool(CV, getPointerTy(DAG.getDataLayout()), Alignment),
986 DAG);
987 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
989 }
990
991 // A special case is a situation where the vector is built entirely from
992 // elements extracted from another vector. This could be done via a shuffle
993 // more efficiently, but typically, the size of the source vector will not
994 // match the size of the vector being built (which precludes the use of a
995 // shuffle directly).
996 // This only handles a single source vector, and the vector being built
997 // should be of a sub-vector type of the source vector type.
998 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
999 SmallVectorImpl<int> &SrcIdx) {
1000 SDValue Vec;
1001 for (SDValue V : Values) {
1002 if (isUndef(V)) {
1003 SrcIdx.push_back(-1);
1004 continue;
1005 }
1006 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1007 return false;
1008 // All extracts should come from the same vector.
1009 SDValue T = V.getOperand(0);
1010 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
1011 return false;
1012 Vec = T;
1013 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
1014 if (C == nullptr)
1015 return false;
1016 int I = C->getSExtValue();
1017 assert(I >= 0 && "Negative element index");
1018 SrcIdx.push_back(I);
1019 }
1020 SrcVec = Vec;
1021 return true;
1022 };
1023
1024 SmallVector<int,128> ExtIdx;
1025 SDValue ExtVec;
1026 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
1027 MVT ExtTy = ty(ExtVec);
1028 unsigned ExtLen = ExtTy.getVectorNumElements();
1029 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
1030 // Construct a new shuffle mask that will produce a vector with the same
1031 // number of elements as the input vector, and such that the vector we
1032 // want will be the initial subvector of it.
1033 SmallVector<int,128> Mask;
1034 BitVector Used(ExtLen);
1035
1036 for (int M : ExtIdx) {
1037 Mask.push_back(M);
1038 if (M >= 0)
1039 Used.set(M);
1040 }
1041 // Fill the rest of the mask with the unused elements of ExtVec in hopes
1042 // that it will result in a permutation of ExtVec's elements. It's still
1043 // fine if it doesn't (e.g. if undefs are present, or elements are
1044 // repeated), but permutations can always be done efficiently via vdelta
1045 // and vrdelta.
1046 for (unsigned I = 0; I != ExtLen; ++I) {
1047 if (Mask.size() == ExtLen)
1048 break;
1049 if (!Used.test(I))
1050 Mask.push_back(I);
1051 }
1052
1053 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
1054 DAG.getUNDEF(ExtTy), Mask);
1055 return ExtLen == VecLen ? S : LoHalf(S, DAG);
1056 }
1057 }
1058
1059 // Find most common element to initialize vector with. This is to avoid
1060 // unnecessary vinsert/valign for cases where the same value is present
1061 // many times. Creates a histogram of the vector's elements to find the
1062 // most common element n.
1063 assert(4*Words.size() == Subtarget.getVectorLength());
1064 int VecHist[32];
1065 int n = 0;
1066 for (unsigned i = 0; i != NumWords; ++i) {
1067 VecHist[i] = 0;
1068 if (Words[i].isUndef())
1069 continue;
1070 for (unsigned j = i; j != NumWords; ++j)
1071 if (Words[i] == Words[j])
1072 VecHist[i]++;
1073
1074 if (VecHist[i] > VecHist[n])
1075 n = i;
1076 }
1077
1078 SDValue HalfV = getZero(dl, VecTy, DAG);
1079 if (VecHist[n] > 1) {
1080 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
1081 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
1082 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
1083 }
1084 SDValue HalfV0 = HalfV;
1085 SDValue HalfV1 = HalfV;
1086
1087 // Construct two halves in parallel, then or them together. Rn and Rm count
1088 // number of rotations needed before the next element. One last rotation is
1089 // performed post-loop to position the last element.
1090 int Rn = 0, Rm = 0;
1091 SDValue Sn, Sm;
1092 SDValue N = HalfV0;
1093 SDValue M = HalfV1;
1094 for (unsigned i = 0; i != NumWords/2; ++i) {
1095 // Rotate by element count since last insertion.
1096 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1097 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1098 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1099 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1100 {HalfV0, Words[i]});
1101 Rn = 0;
1102 }
1103 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1104 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1105 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1106 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1107 {HalfV1, Words[i+NumWords/2]});
1108 Rm = 0;
1109 }
1110 Rn += 4;
1111 Rm += 4;
1112 }
1113 // Perform last rotation.
1114 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1115 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1116 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1117 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1118
1119 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1120 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1121
1122 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1123
1124 SDValue OutV =
1125 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1126 return OutV;
1127}
1128
1129SDValue
1130HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1131 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1132 MVT PredTy = ty(PredV);
1133 unsigned HwLen = Subtarget.getVectorLength();
1134 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1135
1136 if (Subtarget.isHVXVectorType(PredTy, true)) {
1137 // Move the vector predicate SubV to a vector register, and scale it
1138 // down to match the representation (bytes per type element) that VecV
1139 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1140 // in general) element and put them at the front of the resulting
1141 // vector. This subvector will then be inserted into the Q2V of VecV.
1142 // To avoid having an operation that generates an illegal type (short
1143 // vector), generate a full size vector.
1144 //
1145 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1146 SmallVector<int,128> Mask(HwLen);
1147 // Scale = BitBytes(PredV) / Given BitBytes.
1148 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1149 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1150
1151 for (unsigned i = 0; i != HwLen; ++i) {
1152 unsigned Num = i % Scale;
1153 unsigned Off = i / Scale;
1154 Mask[BlockLen*Num + Off] = i;
1155 }
1156 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1157 if (!ZeroFill)
1158 return S;
1159 // Fill the bytes beyond BlockLen with 0s.
1160 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1161 // when BlockLen < HwLen.
1162 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1163 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1164 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1165 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1166 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1167 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1168 }
1169
1170 // Make sure that this is a valid scalar predicate.
1171 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1172
1173 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1174 SmallVector<SDValue,4> Words[2];
1175 unsigned IdxW = 0;
1176
1177 SDValue W0 = isUndef(PredV)
1178 ? DAG.getUNDEF(MVT::i64)
1179 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1180 Words[IdxW].push_back(HiHalf(W0, DAG));
1181 Words[IdxW].push_back(LoHalf(W0, DAG));
1182
1183 while (Bytes < BitBytes) {
1184 IdxW ^= 1;
1185 Words[IdxW].clear();
1186
1187 if (Bytes < 4) {
1188 for (const SDValue &W : Words[IdxW ^ 1]) {
1189 SDValue T = expandPredicate(W, dl, DAG);
1190 Words[IdxW].push_back(HiHalf(T, DAG));
1191 Words[IdxW].push_back(LoHalf(T, DAG));
1192 }
1193 } else {
1194 for (const SDValue &W : Words[IdxW ^ 1]) {
1195 Words[IdxW].push_back(W);
1196 Words[IdxW].push_back(W);
1197 }
1198 }
1199 Bytes *= 2;
1200 }
1201
1202 assert(Bytes == BitBytes);
1203 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1204 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1205 for (const SDValue &W : Words[IdxW]) {
1206 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1207 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1208 }
1209
1210 return Vec;
1211}
1212
1213SDValue
1214HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1215 const SDLoc &dl, MVT VecTy,
1216 SelectionDAG &DAG) const {
1217 // Construct a vector V of bytes, such that a comparison V >u 0 would
1218 // produce the required vector predicate.
1219 unsigned VecLen = Values.size();
1220 unsigned HwLen = Subtarget.getVectorLength();
1221 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1223 bool AllT = true, AllF = true;
1224
1225 auto IsTrue = [] (SDValue V) {
1226 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1227 return !N->isZero();
1228 return false;
1229 };
1230 auto IsFalse = [] (SDValue V) {
1231 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1232 return N->isZero();
1233 return false;
1234 };
1235
1236 if (VecLen <= HwLen) {
1237 // In the hardware, each bit of a vector predicate corresponds to a byte
1238 // of a vector register. Calculate how many bytes does a bit of VecTy
1239 // correspond to.
1240 assert(HwLen % VecLen == 0);
1241 unsigned BitBytes = HwLen / VecLen;
1242 for (SDValue V : Values) {
1243 AllT &= IsTrue(V);
1244 AllF &= IsFalse(V);
1245
1246 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1247 : DAG.getUNDEF(MVT::i8);
1248 for (unsigned B = 0; B != BitBytes; ++B)
1249 Bytes.push_back(Ext);
1250 }
1251 } else {
1252 // There are as many i1 values, as there are bits in a vector register.
1253 // Divide the values into groups of 8 and check that each group consists
1254 // of the same value (ignoring undefs).
1255 for (unsigned I = 0; I != VecLen; I += 8) {
1256 unsigned B = 0;
1257 // Find the first non-undef value in this group.
1258 for (; B != 8; ++B) {
1259 if (!Values[I+B].isUndef())
1260 break;
1261 }
1262 SDValue F = Values[I+B];
1263 AllT &= IsTrue(F);
1264 AllF &= IsFalse(F);
1265
1266 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1267 : DAG.getUNDEF(MVT::i8);
1268 Bytes.push_back(Ext);
1269 // Verify that the rest of values in the group are the same as the
1270 // first.
1271 for (; B != 8; ++B)
1272 assert(Values[I+B].isUndef() || Values[I+B] == F);
1273 }
1274 }
1275
1276 if (AllT)
1277 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1278 if (AllF)
1279 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1280
1281 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1282 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1283 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1284}
1285
1286SDValue
1287HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1288 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1289 MVT ElemTy = ty(VecV).getVectorElementType();
1290
1291 unsigned ElemWidth = ElemTy.getSizeInBits();
1292 assert(ElemWidth >= 8 && ElemWidth <= 32);
1293 (void)ElemWidth;
1294
1295 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1296 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1297 {VecV, ByteIdx});
1298 if (ElemTy == MVT::i32)
1299 return ExWord;
1300
1301 // Have an extracted word, need to extract the smaller element out of it.
1302 // 1. Extract the bits of (the original) IdxV that correspond to the index
1303 // of the desired element in the 32-bit word.
1304 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1305 // 2. Extract the element from the word.
1306 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1307 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1308}
1309
1310SDValue
1311HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1312 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1313 // Implement other return types if necessary.
1314 assert(ResTy == MVT::i1);
1315
1316 unsigned HwLen = Subtarget.getVectorLength();
1317 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1318 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1319
1320 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1321 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1322 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1323
1324 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1325 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1326 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1327}
1328
1329SDValue
1330HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1331 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1332 MVT ElemTy = ty(VecV).getVectorElementType();
1333
1334 unsigned ElemWidth = ElemTy.getSizeInBits();
1335 assert(ElemWidth >= 8 && ElemWidth <= 32);
1336 (void)ElemWidth;
1337
1338 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1339 SDValue ByteIdxV) {
1340 MVT VecTy = ty(VecV);
1341 unsigned HwLen = Subtarget.getVectorLength();
1342 SDValue MaskV =
1343 DAG.getNode(ISD::AND, dl, MVT::i32,
1344 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1345 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1346 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1347 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1348 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1349 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1350 return TorV;
1351 };
1352
1353 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1354 if (ElemTy == MVT::i32)
1355 return InsertWord(VecV, ValV, ByteIdx);
1356
1357 // If this is not inserting a 32-bit word, convert it into such a thing.
1358 // 1. Extract the existing word from the target vector.
1359 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1360 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1361 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1362 dl, MVT::i32, DAG);
1363
1364 // 2. Treating the extracted word as a 32-bit vector, insert the given
1365 // value into it.
1366 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1367 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1368 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1369 ValV, SubIdx, dl, ElemTy, DAG);
1370
1371 // 3. Insert the 32-bit word back into the original vector.
1372 return InsertWord(VecV, Ins, ByteIdx);
1373}
1374
1375SDValue
1376HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1377 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1378 unsigned HwLen = Subtarget.getVectorLength();
1379 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1380 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1381
1382 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1383 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1384 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1385 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1386
1387 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1388 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1389}
1390
1391SDValue
1392HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1393 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1394 MVT VecTy = ty(VecV);
1395 unsigned HwLen = Subtarget.getVectorLength();
1396 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1397 MVT ElemTy = VecTy.getVectorElementType();
1398 unsigned ElemWidth = ElemTy.getSizeInBits();
1399
1400 // If the source vector is a vector pair, get the single vector containing
1401 // the subvector of interest. The subvector will never overlap two single
1402 // vectors.
1403 if (isHvxPairTy(VecTy)) {
1404 unsigned SubIdx = Hexagon::vsub_lo;
1405 if (Idx * ElemWidth >= 8 * HwLen) {
1406 SubIdx = Hexagon::vsub_hi;
1407 Idx -= VecTy.getVectorNumElements() / 2;
1408 }
1409
1410 VecTy = typeSplit(VecTy).first;
1411 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1412 if (VecTy == ResTy)
1413 return VecV;
1414 }
1415
1416 // The only meaningful subvectors of a single HVX vector are those that
1417 // fit in a scalar register.
1418 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1419
1420 MVT WordTy = tyVector(VecTy, MVT::i32);
1421 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1422 unsigned WordIdx = (Idx*ElemWidth) / 32;
1423
1424 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1425 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1426 if (ResTy.getSizeInBits() == 32)
1427 return DAG.getBitcast(ResTy, W0);
1428
1429 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1430 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1431 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1432 return DAG.getBitcast(ResTy, WW);
1433}
1434
1435SDValue
1436HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1437 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1438 MVT VecTy = ty(VecV);
1439 unsigned HwLen = Subtarget.getVectorLength();
1440 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1441 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1442 // IdxV is required to be a constant.
1443 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1444
1445 unsigned ResLen = ResTy.getVectorNumElements();
1446 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1447 unsigned Offset = Idx * BitBytes;
1448 SDValue Undef = DAG.getUNDEF(ByteTy);
1449 SmallVector<int,128> Mask;
1450
1451 if (Subtarget.isHVXVectorType(ResTy, true)) {
1452 // Converting between two vector predicates. Since the result is shorter
1453 // than the source, it will correspond to a vector predicate with the
1454 // relevant bits replicated. The replication count is the ratio of the
1455 // source and target vector lengths.
1456 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1457 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1458 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1459 for (unsigned j = 0; j != Rep; ++j)
1460 Mask.push_back(i + Offset);
1461 }
1462 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1463 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1464 }
1465
1466 // Converting between a vector predicate and a scalar predicate. In the
1467 // vector predicate, a group of BitBytes bits will correspond to a single
1468 // i1 element of the source vector type. Those bits will all have the same
1469 // value. The same will be true for ByteVec, where each byte corresponds
1470 // to a bit in the vector predicate.
1471 // The algorithm is to traverse the ByteVec, going over the i1 values from
1472 // the source vector, and generate the corresponding representation in an
1473 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1474 // elements so that the interesting 8 bytes will be in the low end of the
1475 // vector.
1476 unsigned Rep = 8 / ResLen;
1477 // Make sure the output fill the entire vector register, so repeat the
1478 // 8-byte groups as many times as necessary.
1479 for (unsigned r = 0; r != HwLen / 8; ++r) {
1480 // This will generate the indexes of the 8 interesting bytes.
1481 for (unsigned i = 0; i != ResLen; ++i) {
1482 for (unsigned j = 0; j != Rep; ++j)
1483 Mask.push_back(Offset + i*BitBytes);
1484 }
1485 }
1486
1487 SDValue Zero = getZero(dl, MVT::i32, DAG);
1488 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1489 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1490 // them against 0.
1491 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1492 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1493 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1494 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1495 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1496 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1497}
1498
1499SDValue
1500HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1501 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1502 MVT VecTy = ty(VecV);
1503 MVT SubTy = ty(SubV);
1504 unsigned HwLen = Subtarget.getVectorLength();
1505 MVT ElemTy = VecTy.getVectorElementType();
1506 unsigned ElemWidth = ElemTy.getSizeInBits();
1507
1508 bool IsPair = isHvxPairTy(VecTy);
1509 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1510 // The two single vectors that VecV consists of, if it's a pair.
1511 SDValue V0, V1;
1512 SDValue SingleV = VecV;
1513 SDValue PickHi;
1514
1515 if (IsPair) {
1516 V0 = LoHalf(VecV, DAG);
1517 V1 = HiHalf(VecV, DAG);
1518
1519 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1520 dl, MVT::i32);
1521 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1522 if (isHvxSingleTy(SubTy)) {
1523 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1524 unsigned Idx = CN->getZExtValue();
1525 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1526 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1527 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1528 }
1529 // If IdxV is not a constant, generate the two variants: with the
1530 // SubV as the high and as the low subregister, and select the right
1531 // pair based on the IdxV.
1532 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1533 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1534 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1535 }
1536 // The subvector being inserted must be entirely contained in one of
1537 // the vectors V0 or V1. Set SingleV to the correct one, and update
1538 // IdxV to be the index relative to the beginning of that vector.
1539 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1540 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1541 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1542 }
1543
1544 // The only meaningful subvectors of a single HVX vector are those that
1545 // fit in a scalar register.
1546 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1547 // Convert IdxV to be index in bytes.
1548 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1549 if (!IdxN || !IdxN->isZero()) {
1550 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1551 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1552 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1553 }
1554 // When inserting a single word, the rotation back to the original position
1555 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1556 // by (HwLen-4)-Idx.
1557 unsigned RolBase = HwLen;
1558 if (SubTy.getSizeInBits() == 32) {
1559 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1560 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1561 } else {
1562 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1563 SDValue R0 = LoHalf(V, DAG);
1564 SDValue R1 = HiHalf(V, DAG);
1565 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1566 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1567 DAG.getConstant(4, dl, MVT::i32));
1568 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1569 RolBase = HwLen-4;
1570 }
1571 // If the vector wasn't ror'ed, don't ror it back.
1572 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1573 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1574 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1575 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1576 }
1577
1578 if (IsPair) {
1579 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1580 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1581 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1582 }
1583 return SingleV;
1584}
1585
1586SDValue
1587HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1588 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1589 MVT VecTy = ty(VecV);
1590 MVT SubTy = ty(SubV);
1591 assert(Subtarget.isHVXVectorType(VecTy, true));
1592 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1593 // predicate as well, or it can be a scalar predicate.
1594
1595 unsigned VecLen = VecTy.getVectorNumElements();
1596 unsigned HwLen = Subtarget.getVectorLength();
1597 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1598
1599 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1600 unsigned BitBytes = HwLen / VecLen;
1601 unsigned BlockLen = HwLen / Scale;
1602
1603 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1604 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1605 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1606 SDValue ByteIdx;
1607
1608 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1609 if (!IdxN || !IdxN->isZero()) {
1610 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1611 DAG.getConstant(BitBytes, dl, MVT::i32));
1612 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1613 }
1614
1615 // ByteVec is the target vector VecV rotated in such a way that the
1616 // subvector should be inserted at index 0. Generate a predicate mask
1617 // and use vmux to do the insertion.
1618 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1619 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1620 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1621 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1622 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1623 // Rotate ByteVec back, and convert to a vector predicate.
1624 if (!IdxN || !IdxN->isZero()) {
1625 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1626 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1627 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1628 }
1629 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1630}
1631
1632SDValue
1633HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1634 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1635 // Sign- and any-extending of a vector predicate to a vector register is
1636 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1637 // a vector of 1s (where the 1s are of type matching the vector type).
1638 assert(Subtarget.isHVXVectorType(ResTy));
1639 if (!ZeroExt)
1640 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1641
1642 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1643 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1644 DAG.getConstant(1, dl, MVT::i32));
1645 SDValue False = getZero(dl, ResTy, DAG);
1646 return DAG.getSelect(dl, ResTy, VecV, True, False);
1647}
1648
1649SDValue
1650HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1651 MVT ResTy, SelectionDAG &DAG) const {
1652 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1653 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1654 // vector register. The remaining bits of the vector register are
1655 // unspecified.
1656
1657 MachineFunction &MF = DAG.getMachineFunction();
1658 unsigned HwLen = Subtarget.getVectorLength();
1659 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1660 MVT PredTy = ty(VecQ);
1661 unsigned PredLen = PredTy.getVectorNumElements();
1662 assert(HwLen % PredLen == 0);
1663 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1664
1665 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1667 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1668 // These are bytes with the LSB rotated left with respect to their index.
1669 for (unsigned i = 0; i != HwLen/8; ++i) {
1670 for (unsigned j = 0; j != 8; ++j)
1671 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1672 }
1673 Constant *CV = ConstantVector::get(Tmp);
1674 Align Alignment(HwLen);
1676 DAG.getConstantPool(CV, getPointerTy(DAG.getDataLayout()), Alignment),
1677 DAG);
1678 SDValue Bytes =
1679 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1681
1682 // Select the bytes that correspond to true bits in the vector predicate.
1683 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1684 getZero(dl, VecTy, DAG));
1685 // Calculate the OR of all bytes in each group of 8. That will compress
1686 // all the individual bits into a single byte.
1687 // First, OR groups of 4, via vrmpy with 0x01010101.
1688 SDValue All1 =
1689 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1690 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1691 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1692 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1693 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1694 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1695
1696 // Pick every 8th byte and coalesce them at the beginning of the output.
1697 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1698 // byte and so on.
1699 SmallVector<int,128> Mask;
1700 for (unsigned i = 0; i != HwLen; ++i)
1701 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1702 SDValue Collect =
1703 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1704 return DAG.getBitcast(ResTy, Collect);
1705}
1706
1707SDValue
1708HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1709 const SDLoc &dl, SelectionDAG &DAG) const {
1710 // Take a vector and resize the element type to match the given type.
1711 MVT InpTy = ty(VecV);
1712 if (InpTy == ResTy)
1713 return VecV;
1714
1715 unsigned InpWidth = InpTy.getSizeInBits();
1716 unsigned ResWidth = ResTy.getSizeInBits();
1717
1718 if (InpTy.isFloatingPoint()) {
1719 return InpWidth < ResWidth
1720 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1721 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1722 DAG.getTargetConstant(0, dl, MVT::i32));
1723 }
1724
1725 assert(InpTy.isInteger());
1726
1727 if (InpWidth < ResWidth) {
1728 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1729 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1730 } else {
1731 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1732 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1733 }
1734}
1735
1736SDValue
1737HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1738 SelectionDAG &DAG) const {
1739 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1740
1741 const SDLoc &dl(Vec);
1742 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1743 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1744 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1745}
1746
1747SDValue
1748HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1749 const {
1750 const SDLoc &dl(Op);
1751 MVT VecTy = ty(Op);
1752
1753 unsigned Size = Op.getNumOperands();
1755 for (unsigned i = 0; i != Size; ++i)
1756 Ops.push_back(Op.getOperand(i));
1757
1758 if (VecTy.getVectorElementType() == MVT::i1)
1759 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1760
1761 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1762 // not a legal type, just bitcast the node to use i16
1763 // types and bitcast the result back to f16
1764 if (VecTy.getVectorElementType() == MVT::f16 ||
1765 VecTy.getVectorElementType() == MVT::bf16) {
1767 for (unsigned i = 0; i != Size; i++)
1768 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1769
1770 SDValue T0 =
1771 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1772 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1773 }
1774
1775 // First, split the BUILD_VECTOR for vector pairs. We could generate
1776 // some pairs directly (via splat), but splats should be generated
1777 // by the combiner prior to getting here.
1778 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1780 MVT SingleTy = typeSplit(VecTy).first;
1781 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1782 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1783 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1784 }
1785
1786 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1787}
1788
1789SDValue
1790HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1791 const {
1792 const SDLoc &dl(Op);
1793 MVT VecTy = ty(Op);
1794 MVT ArgTy = ty(Op.getOperand(0));
1795
1796 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1797 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1798 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1799 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1800 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1801 return DAG.getBitcast(VecTy, Splat);
1802 }
1803
1804 return SDValue();
1805}
1806
1807SDValue
1808HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1809 const {
1810 // Vector concatenation of two integer (non-bool) vectors does not need
1811 // special lowering. Custom-lower concats of bool vectors and expand
1812 // concats of more than 2 vectors.
1813 MVT VecTy = ty(Op);
1814 const SDLoc &dl(Op);
1815 unsigned NumOp = Op.getNumOperands();
1816 if (VecTy.getVectorElementType() != MVT::i1) {
1817 if (NumOp == 2)
1818 return Op;
1819 // Expand the other cases into a build-vector.
1821 for (SDValue V : Op.getNode()->ops())
1822 DAG.ExtractVectorElements(V, Elems);
1823 // A vector of i16 will be broken up into a build_vector of i16's.
1824 // This is a problem, since at the time of operation legalization,
1825 // all operations are expected to be type-legalized, and i16 is not
1826 // a legal type. If any of the extracted elements is not of a valid
1827 // type, sign-extend it to a valid one.
1828 for (SDValue &V : Elems) {
1829 MVT Ty = ty(V);
1830 if (!isTypeLegal(Ty)) {
1831 MVT NTy = typeLegalize(Ty, DAG);
1832 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1833 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1834 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1835 V.getOperand(0), V.getOperand(1)),
1836 DAG.getValueType(Ty));
1837 continue;
1838 }
1839 // A few less complicated cases.
1840 switch (V.getOpcode()) {
1841 case ISD::Constant:
1842 V = DAG.getSExtOrTrunc(V, dl, NTy);
1843 break;
1844 case ISD::UNDEF:
1845 V = DAG.getUNDEF(NTy);
1846 break;
1847 case ISD::TRUNCATE:
1848 V = V.getOperand(0);
1849 break;
1850 default:
1851 llvm_unreachable("Unexpected vector element");
1852 }
1853 }
1854 }
1855 return DAG.getBuildVector(VecTy, dl, Elems);
1856 }
1857
1858 assert(VecTy.getVectorElementType() == MVT::i1);
1859 unsigned HwLen = Subtarget.getVectorLength();
1860 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1861
1862 SDValue Op0 = Op.getOperand(0);
1863
1864 // If the operands are HVX types (i.e. not scalar predicates), then
1865 // defer the concatenation, and create QCAT instead.
1866 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1867 if (NumOp == 2)
1868 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1869
1870 ArrayRef<SDUse> U(Op.getNode()->ops());
1873
1874 MVT HalfTy = typeSplit(VecTy).first;
1875 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1876 Ops.take_front(NumOp/2));
1877 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1878 Ops.take_back(NumOp/2));
1879 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1880 }
1881
1882 // Count how many bytes (in a vector register) each bit in VecTy
1883 // corresponds to.
1884 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1885
1886 // Make sure that createHvxPrefixPred will only ever need to expand
1887 // the predicate, i.e. bytes-per-bit in the input is not greater than
1888 // the target bytes-per-bit in the result.
1889 SDValue Combined = combineConcatOfScalarPreds(Op, BitBytes, DAG);
1890 SmallVector<SDValue,8> Prefixes;
1891 for (SDValue V : Combined.getNode()->op_values()) {
1892 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1893 Prefixes.push_back(P);
1894 }
1895
1896 unsigned InpLen = ty(Combined.getOperand(0)).getVectorNumElements();
1897 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1898 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1899 SDValue Res = getZero(dl, ByteTy, DAG);
1900 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1901 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1902 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1903 }
1904 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1905}
1906
1907SDValue
1908HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1909 const {
1910 // Change the type of the extracted element to i32.
1911 SDValue VecV = Op.getOperand(0);
1912 MVT ElemTy = ty(VecV).getVectorElementType();
1913 const SDLoc &dl(Op);
1914 SDValue IdxV = Op.getOperand(1);
1915 if (ElemTy == MVT::i1)
1916 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1917
1918 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1923 const {
1924 const SDLoc &dl(Op);
1925 MVT VecTy = ty(Op);
1926 SDValue VecV = Op.getOperand(0);
1927 SDValue ValV = Op.getOperand(1);
1928 SDValue IdxV = Op.getOperand(2);
1929 MVT ElemTy = ty(VecV).getVectorElementType();
1930 if (ElemTy == MVT::i1)
1931 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1932
1933 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1935 tyVector(VecTy, MVT::i16),
1936 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1937 DAG.getBitcast(MVT::i16, ValV), IdxV);
1938 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1939 }
1940
1941 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1942}
1943
1944SDValue
1945HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1946 const {
1947 SDValue SrcV = Op.getOperand(0);
1948 MVT SrcTy = ty(SrcV);
1949 MVT DstTy = ty(Op);
1950 SDValue IdxV = Op.getOperand(1);
1951 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1952 assert(Idx % DstTy.getVectorNumElements() == 0);
1953 (void)Idx;
1954 const SDLoc &dl(Op);
1955
1956 MVT ElemTy = SrcTy.getVectorElementType();
1957 if (ElemTy == MVT::i1)
1958 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1959
1960 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1961}
1962
1963SDValue
1964HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1965 const {
1966 // Idx does not need to be a constant.
1967 SDValue VecV = Op.getOperand(0);
1968 SDValue ValV = Op.getOperand(1);
1969 SDValue IdxV = Op.getOperand(2);
1970
1971 const SDLoc &dl(Op);
1972 MVT VecTy = ty(VecV);
1973 MVT ElemTy = VecTy.getVectorElementType();
1974 if (ElemTy == MVT::i1)
1975 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1976
1977 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1978}
1979
1980SDValue
1981HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1982 // Lower any-extends of boolean vectors to sign-extends, since they
1983 // translate directly to Q2V. Zero-extending could also be done equally
1984 // fast, but Q2V is used/recognized in more places.
1985 // For all other vectors, use zero-extend.
1986 MVT ResTy = ty(Op);
1987 SDValue InpV = Op.getOperand(0);
1988 MVT ElemTy = ty(InpV).getVectorElementType();
1989 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1990 return LowerHvxSignExt(Op, DAG);
1991 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1992}
1993
1994SDValue
1995HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1996 MVT ResTy = ty(Op);
1997 SDValue InpV = Op.getOperand(0);
1998 MVT ElemTy = ty(InpV).getVectorElementType();
1999 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
2000 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
2001 return Op;
2002}
2003
2004SDValue
2005HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
2006 MVT ResTy = ty(Op);
2007 SDValue InpV = Op.getOperand(0);
2008 MVT ElemTy = ty(InpV).getVectorElementType();
2009 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
2010 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
2011 return Op;
2012}
2013
2014SDValue
2015HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
2016 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
2017 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
2018 const SDLoc &dl(Op);
2019 MVT ResTy = ty(Op);
2020 SDValue InpV = Op.getOperand(0);
2021 assert(ResTy == ty(InpV));
2022
2023 // Calculate the vectors of 1 and bitwidth(x).
2024 MVT ElemTy = ty(InpV).getVectorElementType();
2025 unsigned ElemWidth = ElemTy.getSizeInBits();
2026
2027 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
2028 DAG.getConstant(1, dl, MVT::i32));
2029 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
2030 DAG.getConstant(ElemWidth, dl, MVT::i32));
2031 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
2032 DAG.getAllOnesConstant(dl, MVT::i32));
2033
2034 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
2035 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
2036 // it separately in custom combine or selection).
2037 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
2038 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
2039 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
2040 return DAG.getNode(ISD::SUB, dl, ResTy,
2041 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
2042}
2043
2044SDValue
2045HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
2046 const SDLoc &dl(Op);
2047 MVT ResTy = ty(Op);
2048 assert(ResTy.getVectorElementType() == MVT::i32);
2049
2050 SDValue Vs = Op.getOperand(0);
2051 SDValue Vt = Op.getOperand(1);
2052
2053 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
2054 unsigned Opc = Op.getOpcode();
2055
2056 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2057 if (Opc == ISD::MULHU)
2058 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2059 if (Opc == ISD::MULHS)
2060 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2061
2062#ifndef NDEBUG
2063 Op.dump(&DAG);
2064#endif
2065 llvm_unreachable("Unexpected mulh operation");
2066}
2067
2068SDValue
2069HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2070 const SDLoc &dl(Op);
2071 unsigned Opc = Op.getOpcode();
2072 SDValue Vu = Op.getOperand(0);
2073 SDValue Vv = Op.getOperand(1);
2074
2075 // If the HI part is not used, convert it to a regular MUL.
2076 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2077 // Need to preserve the types and the number of values.
2078 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2079 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2080 return DAG.getMergeValues({Lo, Hi}, dl);
2081 }
2082
2083 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2084 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2085
2086 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2087 // valued nodes.
2088 if (Subtarget.useHVXV62Ops())
2089 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2090
2091 if (Opc == HexagonISD::SMUL_LOHI) {
2092 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2093 // for other signedness LOHI is cheaper.
2094 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2095 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2096 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2097 return DAG.getMergeValues({Lo, Hi}, dl);
2098 }
2099 }
2100
2101 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2102}
2103
2104SDValue
2105HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2106 SDValue Val = Op.getOperand(0);
2107 MVT ResTy = ty(Op);
2108 MVT ValTy = ty(Val);
2109 const SDLoc &dl(Op);
2110
2111 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2112 unsigned HwLen = Subtarget.getVectorLength();
2113 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2114 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2115 unsigned BitWidth = ResTy.getSizeInBits();
2116
2117 if (BitWidth < 64) {
2118 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2119 dl, MVT::i32, DAG);
2120 if (BitWidth == 32)
2121 return W0;
2122 assert(BitWidth < 32u);
2123 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2124 }
2125
2126 // The result is >= 64 bits. The only options are 64 or 128.
2127 assert(BitWidth == 64 || BitWidth == 128);
2129 for (unsigned i = 0; i != BitWidth/32; ++i) {
2130 SDValue W = extractHvxElementReg(
2131 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2132 Words.push_back(W);
2133 }
2134 SmallVector<SDValue,2> Combines;
2135 assert(Words.size() % 2 == 0);
2136 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2137 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2138 Combines.push_back(C);
2139 }
2140
2141 if (BitWidth == 64)
2142 return Combines[0];
2143
2144 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2145 }
2146
2147 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2148 // Splat the input into a 32-element i32 vector, then AND each element
2149 // with a unique bitmask to isolate individual bits.
2150 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2151 assert(Val32.getValueType().getSizeInBits() == 32 &&
2152 "Input must be 32 bits");
2153 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2154 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2156 for (unsigned i = 0; i < 32; ++i)
2157 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2158
2159 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2160 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2161 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2162 };
2163 // === Case: v32i1 ===
2164 if (ResTy == MVT::v32i1 &&
2165 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2166 Subtarget.useHVX128BOps()) {
2167 SDValue Val32 = Val;
2168 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2169 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2170 return bitcastI32ToV32I1(Val32);
2171 }
2172 // === Case: v64i1 ===
2173 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2174 // Split i64 into lo/hi 32-bit halves.
2175 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2176 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2177 DAG.getConstant(32, dl, MVT::i64));
2178 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2179
2180 // Reuse the same 32-bit logic twice.
2181 SDValue LoRes = bitcastI32ToV32I1(Lo);
2182 SDValue HiRes = bitcastI32ToV32I1(Hi);
2183
2184 // Concatenate into a v64i1 predicate.
2185 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2186 }
2187
2188 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2189 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2190 unsigned BitWidth = ValTy.getSizeInBits();
2191 unsigned HwLen = Subtarget.getVectorLength();
2192 assert(BitWidth == HwLen);
2193
2194 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2195 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2196 // Splat each byte of Val 8 times.
2197 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2198 // where b0, b1,..., b15 are least to most significant bytes of I.
2200 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2201 // These are bytes with the LSB rotated left with respect to their index.
2203 for (unsigned I = 0; I != HwLen / 8; ++I) {
2204 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2205 SDValue Byte =
2206 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2207 for (unsigned J = 0; J != 8; ++J) {
2208 Bytes.push_back(Byte);
2209 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2210 }
2211 }
2212
2213 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2214 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2215 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2216
2217 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2218 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2219 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2220 }
2221
2222 return Op;
2223}
2224
2225SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
2226 SelectionDAG &DAG) const {
2227 const SDLoc &dl(Op);
2228 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
2229 SDValue Val = SN->getValue();
2230 MVT ValTy = ty(Val);
2231
2232 // Check if this is a store of an HVX bool vector (predicate)
2233 if (!isHvxBoolTy(ValTy))
2234 return SDValue();
2235
2236 unsigned NumBits = ValTy.getVectorNumElements();
2237 MachineMemOperand *MMO = SN->getMemOperand();
2238
2239 // Check alignment requirements based on predicate size
2240 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2241 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2242 return SDValue();
2243
2244 unsigned HwLen = Subtarget.getVectorLength();
2245 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen / 4);
2246
2247 // Compress the predicate into a vector register
2248 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2249
2250 // Extract words from the compressed vector
2252 for (unsigned i = 0; i != NumBits / 32; ++i) {
2253 SDValue W = extractHvxElementReg(VQ, DAG.getConstant(i, dl, MVT::i32), dl,
2254 MVT::i32, DAG);
2255 Words.push_back(W);
2256 }
2257
2258 SDValue Chain = SN->getChain();
2259 SDValue BasePtr = SN->getBasePtr();
2260 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2261
2262 if (NumBits == 32)
2263 return DAG.getStore(Chain, dl, Words[0], BasePtr, PtrInfo,
2264 MMO->getBaseAlign());
2265
2266 if (NumBits == 64) {
2267 SDValue W64 = getCombine(Words[1], Words[0], dl, MVT::i64, DAG);
2268 return DAG.getStore(Chain, dl, W64, BasePtr, PtrInfo, MMO->getBaseAlign());
2269 }
2270
2271 if (NumBits == 128) {
2272 SDValue Lo64 = getCombine(Words[1], Words[0], dl, MVT::i64, DAG);
2273 SDValue Hi64 = getCombine(Words[3], Words[2], dl, MVT::i64, DAG);
2274
2275 Chain =
2276 DAG.getStore(Chain, dl, Lo64, BasePtr, PtrInfo, MMO->getBaseAlign());
2277
2278 SDValue Offset8 = DAG.getConstant(8, dl, MVT::i32);
2279 SDValue Ptr8 = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, Offset8);
2280 return DAG.getStore(Chain, dl, Hi64, Ptr8, PtrInfo.getWithOffset(8),
2281 Align(8));
2282 }
2283
2284 return SDValue();
2285}
2286
2287SDValue HexagonTargetLowering::LowerHvxLoad(SDValue Op,
2288 SelectionDAG &DAG) const {
2289 const SDLoc &dl(Op);
2290 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2291 MVT ResTy = ty(Op);
2292
2293 // Check if this is a load of an HVX bool vector (predicate)
2294 if (!isHvxBoolTy(ResTy))
2295 return SDValue();
2296
2297 unsigned NumBits = ResTy.getVectorNumElements();
2298 MachineMemOperand *MMO = LN->getMemOperand();
2299
2300 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2301 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2302 return SDValue();
2303
2304 SDValue Chain = LN->getChain();
2305 SDValue BasePtr = LN->getBasePtr();
2306 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2307
2308 if (NumBits == 32) {
2309 SDValue W32 =
2310 DAG.getLoad(MVT::i32, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
2311 SDValue Pred = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, W32);
2312 SDValue Ops[] = {Pred, W32.getValue(1)};
2313 return DAG.getMergeValues(Ops, dl);
2314 }
2315
2316 if (NumBits == 64) {
2317 SDValue W64 =
2318 DAG.getLoad(MVT::i64, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
2319 SDValue Pred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, W64);
2320 SDValue Ops[] = {Pred, W64.getValue(1)};
2321 return DAG.getMergeValues(Ops, dl);
2322 }
2323
2324 if (NumBits == 128) {
2325 SDValue Lo64 =
2326 DAG.getLoad(MVT::i64, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
2327 Chain = Lo64.getValue(1);
2328
2329 SDValue Offset8 = DAG.getConstant(8, dl, MVT::i32);
2330 SDValue Ptr8 = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, Offset8);
2331 SDValue Hi64 = DAG.getLoad(MVT::i64, dl, Chain, Ptr8,
2332 PtrInfo.getWithOffset(8), Align(8));
2333
2334 SDValue LoPred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, Lo64);
2335 SDValue HiPred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, Hi64);
2336 SDValue Pred =
2337 DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v128i1, LoPred, HiPred);
2338
2339 SDValue Ops[] = {Pred, Hi64.getValue(1)};
2340 return DAG.getMergeValues(Ops, dl);
2341 }
2342
2343 return SDValue();
2344}
2345
2346SDValue
2347HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2348 // Sign- and zero-extends are legal.
2349 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2350 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2351 Op.getOperand(0));
2352}
2353
2354SDValue
2355HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2356 MVT ResTy = ty(Op);
2357 if (ResTy.getVectorElementType() != MVT::i1)
2358 return Op;
2359
2360 const SDLoc &dl(Op);
2361 unsigned HwLen = Subtarget.getVectorLength();
2362 unsigned VecLen = ResTy.getVectorNumElements();
2363 assert(HwLen % VecLen == 0);
2364 unsigned ElemSize = HwLen / VecLen;
2365
2366 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2367 SDValue S =
2368 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2369 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2370 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2371 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2372}
2373
2374SDValue
2375HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2376 if (SDValue S = getVectorShiftByInt(Op, DAG))
2377 return S;
2378 return Op;
2379}
2380
2381SDValue
2382HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2383 SelectionDAG &DAG) const {
2384 unsigned Opc = Op.getOpcode();
2385 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2386
2387 // Make sure the shift amount is within the range of the bitwidth
2388 // of the element type.
2389 SDValue A = Op.getOperand(0);
2390 SDValue B = Op.getOperand(1);
2391 SDValue S = Op.getOperand(2);
2392
2393 MVT InpTy = ty(A);
2394 MVT ElemTy = InpTy.getVectorElementType();
2395
2396 const SDLoc &dl(Op);
2397 unsigned ElemWidth = ElemTy.getSizeInBits();
2398 bool IsLeft = Opc == ISD::FSHL;
2399
2400 // The expansion into regular shifts produces worse code for i8 and for
2401 // right shift of i32 on v65+.
2402 bool UseShifts = ElemTy != MVT::i8;
2403 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2404 UseShifts = false;
2405
2406 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2407 // If this is a funnel shift by a scalar, lower it into regular shifts.
2408 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2409 SDValue ModS =
2410 DAG.getNode(ISD::AND, dl, MVT::i32,
2411 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2412 SDValue NegS =
2413 DAG.getNode(ISD::SUB, dl, MVT::i32,
2414 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2415 SDValue IsZero =
2416 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2417 // FSHL A, B => A << | B >>n
2418 // FSHR A, B => A <<n | B >>
2419 SDValue Part1 =
2420 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2421 SDValue Part2 =
2422 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2423 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2424 // If the shift amount was 0, pick A or B, depending on the direction.
2425 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2426 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2427 }
2428
2430 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2431
2432 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2433 return DAG.getNode(MOpc, dl, ty(Op),
2434 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2435}
2436
2437SDValue
2438HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2439 const SDLoc &dl(Op);
2440 unsigned IntNo = Op.getConstantOperandVal(0);
2441 SmallVector<SDValue> Ops(Op->ops());
2442
2443 auto Swap = [&](SDValue P) {
2444 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2445 };
2446
2447 switch (IntNo) {
2448 case Intrinsic::hexagon_V6_pred_typecast:
2449 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2450 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2451 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2452 if (ResTy == InpTy)
2453 return Ops[1];
2454 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2455 }
2456 break;
2457 }
2458 case Intrinsic::hexagon_V6_vmpyss_parts:
2459 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2460 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2461 {Ops[1], Ops[2]}));
2462 case Intrinsic::hexagon_V6_vmpyuu_parts:
2463 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2464 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2465 {Ops[1], Ops[2]}));
2466 case Intrinsic::hexagon_V6_vmpyus_parts:
2467 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2468 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2469 {Ops[1], Ops[2]}));
2470 }
2471 } // switch
2472
2473 return Op;
2474}
2475
2476SDValue
2477HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2478 const SDLoc &dl(Op);
2479 unsigned HwLen = Subtarget.getVectorLength();
2480 MachineFunction &MF = DAG.getMachineFunction();
2481 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2482 SDValue Mask = MaskN->getMask();
2483 SDValue Chain = MaskN->getChain();
2484 SDValue Base = MaskN->getBasePtr();
2485 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2486
2487 unsigned Opc = Op->getOpcode();
2489
2490 if (Opc == ISD::MLOAD) {
2491 MVT ValTy = ty(Op);
2492 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2493 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2494 if (isUndef(Thru))
2495 return Load;
2496 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2497 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2498 }
2499
2500 // MSTORE
2501 // HVX only has aligned masked stores.
2502
2503 // TODO: Fold negations of the mask into the store.
2504 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2505 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2506 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2507
2508 if (MaskN->getAlign().value() % HwLen == 0) {
2509 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2510 {Mask, Base, Offset0, Value, Chain}, DAG);
2511 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2512 return Store;
2513 }
2514
2515 // Unaligned case.
2516 auto StoreAlign = [&](SDValue V, SDValue A) {
2517 SDValue Z = getZero(dl, ty(V), DAG);
2518 // TODO: use funnel shifts?
2519 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2520 // upper half.
2521 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2522 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2523 return std::make_pair(LoV, HiV);
2524 };
2525
2526 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2527 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2528 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2529 VectorPair Tmp = StoreAlign(MaskV, Base);
2530 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2531 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2532 VectorPair ValueU = StoreAlign(Value, Base);
2533
2534 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2535 SDValue StoreLo =
2536 getInstr(StoreOpc, dl, MVT::Other,
2537 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2538 SDValue StoreHi =
2539 getInstr(StoreOpc, dl, MVT::Other,
2540 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2541 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2542 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2543 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2544}
2545
2546SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2547 SelectionDAG &DAG) const {
2548 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2549 // is legal (done via a pattern).
2550 assert(Subtarget.useHVXQFloatOps());
2551
2552 assert(Op->getOpcode() == ISD::FP_EXTEND);
2553
2554 MVT VecTy = ty(Op);
2555 MVT ArgTy = ty(Op.getOperand(0));
2556 const SDLoc &dl(Op);
2557
2558 if (ArgTy == MVT::v64bf16) {
2559 MVT HalfTy = typeSplit(VecTy).first;
2560 SDValue BF16Vec = Op.getOperand(0);
2561 SDValue Zeroes =
2562 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2563 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2564 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2565 // values.
2566 SDValue ShuffVec =
2567 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2568 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2569 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2570 {VecPair.second, VecPair.first,
2571 DAG.getSignedConstant(-4, dl, MVT::i32)},
2572 DAG);
2573 return Result;
2574 }
2575
2576 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2577
2578 SDValue F16Vec = Op.getOperand(0);
2579
2580 APFloat FloatVal = APFloat(1.0f);
2581 bool Ignored;
2583 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2584 SDValue VmpyVec =
2585 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2586
2587 MVT HalfTy = typeSplit(VecTy).first;
2588 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2589 SDValue LoVec =
2590 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2591 SDValue HiVec =
2592 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2593
2594 SDValue ShuffVec =
2595 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2596 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2597
2598 return ShuffVec;
2599}
2600
2601SDValue
2602HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2603 // Catch invalid conversion ops (just in case).
2604 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2605 Op.getOpcode() == ISD::FP_TO_UINT);
2606
2607 MVT ResTy = ty(Op);
2608 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2609 MVT IntTy = ResTy.getVectorElementType();
2610
2611 if (Subtarget.useHVXIEEEFPOps()) {
2612 // There are only conversions from f16.
2613 if (FpTy == MVT::f16) {
2614 // Other int types aren't legal in HVX, so we shouldn't see them here.
2615 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2616 // Conversions to i8 and i16 are legal.
2617 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2618 return Op;
2619 }
2620 }
2621
2622 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2623 return EqualizeFpIntConversion(Op, DAG);
2624
2625 return ExpandHvxFpToInt(Op, DAG);
2626}
2627
2628// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2629// R1 = #1, R2 holds the v32i1 param
2630// V1 = vsplat(R1)
2631// V2 = vsplat(R2)
2632// Q0 = vand(V1,R1)
2633// V0.w=prefixsum(Q0)
2634// V0.w=vsub(V0.w,V1.w)
2635// V2.w = vlsr(V2.w,V0.w)
2636// V2 = vand(V2,V1)
2637// V2.sf = V2.w
2638SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2639 SelectionDAG &DAG) const {
2640
2641 MVT ResTy = ty(PredOp);
2642 const SDLoc &dl(PredOp);
2643
2644 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2645 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2646 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2647 SDValue(RegConst, 0));
2648 SDNode *PredTransfer =
2649 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2650 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2651 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2652 SDValue(PredTransfer, 0));
2653 SDNode *SplatParam = DAG.getMachineNode(
2654 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2655 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2656 SDNode *Vsub =
2657 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2658 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2659 SDNode *IndexShift =
2660 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2661 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2662 SDNode *MaskOff =
2663 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2664 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2665 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2666 SDValue(MaskOff, 0));
2667 return SDValue(Convert, 0);
2668}
2669
2670// For vector type v64i1 uint_to_fo to v64f16:
2671// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2672// R3 = subreg_high (R32)
2673// R2 = subreg_low (R32)
2674// R1 = #1
2675// V1 = vsplat(R1)
2676// V2 = vsplat(R2)
2677// V3 = vsplat(R3)
2678// Q0 = vand(V1,R1)
2679// V0.w=prefixsum(Q0)
2680// V0.w=vsub(V0.w,V1.w)
2681// V2.w = vlsr(V2.w,V0.w)
2682// V3.w = vlsr(V3.w,V0.w)
2683// V2 = vand(V2,V1)
2684// V3 = vand(V3,V1)
2685// V2.h = vpacke(V3.w,V2.w)
2686// V2.hf = V2.h
2687SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2688 SelectionDAG &DAG) const {
2689
2690 MVT ResTy = ty(PredOp);
2691 const SDLoc &dl(PredOp);
2692
2693 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2694 // Get the hi and lo regs
2695 SDValue HiReg =
2696 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2697 SDValue LoReg =
2698 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2699 // Get constant #1 and splat into vector V1
2700 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2701 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2702 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2703 SDValue(RegConst, 0));
2704 // Splat the hi and lo args
2705 SDNode *SplatHi =
2706 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2707 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2708 SDNode *SplatLo =
2709 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2710 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2711 // vand between splatted const and const
2712 SDNode *PredTransfer =
2713 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2714 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2715 // Get the prefixsum
2716 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2717 SDValue(PredTransfer, 0));
2718 // Get the vsub
2719 SDNode *Vsub =
2720 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2721 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2722 // Get vlsr for hi and lo
2723 SDNode *IndexShift_hi =
2724 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2725 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2726 SDNode *IndexShift_lo =
2727 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2728 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2729 // Get vand of hi and lo
2730 SDNode *MaskOff_hi =
2731 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2732 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2733 SDNode *MaskOff_lo =
2734 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2735 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2736 // Pack them
2737 SDNode *Pack =
2738 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2739 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2740 SDNode *Convert =
2741 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2742 return SDValue(Convert, 0);
2743}
2744
2745SDValue
2746HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2747 // Catch invalid conversion ops (just in case).
2748 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2749 Op.getOpcode() == ISD::UINT_TO_FP);
2750
2751 MVT ResTy = ty(Op);
2752 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2753 MVT FpTy = ResTy.getVectorElementType();
2754
2755 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2756 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2757 return LowerHvxPred32ToFp(Op, DAG);
2758 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2759 return LowerHvxPred64ToFp(Op, DAG);
2760 }
2761
2762 if (Subtarget.useHVXIEEEFPOps()) {
2763 // There are only conversions to f16.
2764 if (FpTy == MVT::f16) {
2765 // Other int types aren't legal in HVX, so we shouldn't see them here.
2766 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2767 // i8, i16 -> f16 is legal.
2768 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2769 return Op;
2770 }
2771 }
2772
2773 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2774 return EqualizeFpIntConversion(Op, DAG);
2775
2776 return ExpandHvxIntToFp(Op, DAG);
2777}
2778
2779HexagonTargetLowering::TypePair
2780HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2781 // Compare the widths of elements of the two types, and extend the narrower
2782 // type to match the with of the wider type. For vector types, apply this
2783 // to the element type.
2784 assert(Ty0.isVector() == Ty1.isVector());
2785
2786 MVT ElemTy0 = Ty0.getScalarType();
2787 MVT ElemTy1 = Ty1.getScalarType();
2788
2789 unsigned Width0 = ElemTy0.getSizeInBits();
2790 unsigned Width1 = ElemTy1.getSizeInBits();
2791 unsigned MaxWidth = std::max(Width0, Width1);
2792
2793 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2794 if (ScalarTy.isInteger())
2795 return MVT::getIntegerVT(Width);
2796 assert(ScalarTy.isFloatingPoint());
2797 return MVT::getFloatingPointVT(Width);
2798 };
2799
2800 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2801 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2802
2803 if (!Ty0.isVector()) {
2804 // Both types are scalars.
2805 return {WideETy0, WideETy1};
2806 }
2807
2808 // Vector types.
2809 unsigned NumElem = Ty0.getVectorNumElements();
2810 assert(NumElem == Ty1.getVectorNumElements());
2811
2812 return {MVT::getVectorVT(WideETy0, NumElem),
2813 MVT::getVectorVT(WideETy1, NumElem)};
2814}
2815
2816HexagonTargetLowering::TypePair
2817HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2818 // Compare the numbers of elements of two vector types, and widen the
2819 // narrower one to match the number of elements in the wider one.
2820 assert(Ty0.isVector() && Ty1.isVector());
2821
2822 unsigned Len0 = Ty0.getVectorNumElements();
2823 unsigned Len1 = Ty1.getVectorNumElements();
2824 if (Len0 == Len1)
2825 return {Ty0, Ty1};
2826
2827 unsigned MaxLen = std::max(Len0, Len1);
2828 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2829 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2830}
2831
2832MVT
2833HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2834 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2835 assert(LegalTy.isSimple());
2836 return LegalTy.getSimpleVT();
2837}
2838
2839MVT
2840HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2841 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2842 assert(Ty.getSizeInBits() <= HwWidth);
2843 if (Ty.getSizeInBits() == HwWidth)
2844 return Ty;
2845
2846 MVT ElemTy = Ty.getScalarType();
2847 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2848}
2849
2850HexagonTargetLowering::VectorPair
2851HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2852 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2853 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2854 // whether an overflow has occurred.
2855 MVT ResTy = ty(A);
2856 assert(ResTy == ty(B));
2857 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2858
2859 if (!Signed) {
2860 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2861 // save any instructions.
2862 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2863 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2864 return {Add, Ovf};
2865 }
2866
2867 // Signed overflow has happened, if:
2868 // (A, B have the same sign) and (A+B has a different sign from either)
2869 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2870 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2871 SDValue NotA =
2872 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2873 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2874 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2875 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2876 SDValue MSB =
2877 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2878 return {Add, MSB};
2879}
2880
2881HexagonTargetLowering::VectorPair
2882HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2883 bool Signed, SelectionDAG &DAG) const {
2884 // Shift Val right by Amt bits, round the result to the nearest integer,
2885 // tie-break by rounding halves to even integer.
2886
2887 const SDLoc &dl(Val);
2888 MVT ValTy = ty(Val);
2889
2890 // This should also work for signed integers.
2891 //
2892 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2893 // bool ovf = (inp > tmp0);
2894 // uint rup = inp & (1 << (Amt+1));
2895 //
2896 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2897 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2898 // uint tmp3 = tmp2 + rup;
2899 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2900 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2901 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2902 MVT IntTy = tyVector(ValTy, ElemTy);
2903 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2904 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2905
2906 SDValue Inp = DAG.getBitcast(IntTy, Val);
2907 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2908
2909 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2910 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2911 SDValue Zero = getZero(dl, IntTy, DAG);
2912 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2913 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2914 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2915
2916 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2917 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2918 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2919 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2920
2921 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2922 SDValue One = DAG.getConstant(1, dl, IntTy);
2923 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2924 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2925 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2926 return {Mux, Ovf};
2927}
2928
2929SDValue
2930HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2931 SelectionDAG &DAG) const {
2932 MVT VecTy = ty(A);
2933 MVT PairTy = typeJoin({VecTy, VecTy});
2934 assert(VecTy.getVectorElementType() == MVT::i32);
2935
2936 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2937
2938 // mulhs(A,B) =
2939 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2940 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2941 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2942 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2943 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2944 // anything, so it cannot produce any carry over to higher bits),
2945 // so everything in [] can be shifted by 16 without loss of precision.
2946 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2947 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2948 // The final additions need to make sure to properly maintain any carry-
2949 // out bits.
2950 //
2951 // Hi(B) Lo(B)
2952 // Hi(A) Lo(A)
2953 // --------------
2954 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2955 // Hi(B)*Lo(A) | + dropping the low 16 bits
2956 // Hi(A)*Lo(B) | T2
2957 // Hi(B)*Hi(A)
2958
2959 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2960 // T1 = get Hi(A) into low halves.
2961 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2962 // P0 = interleaved T1.h*B.uh (full precision product)
2963 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2964 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2965 SDValue T2 = LoHalf(P0, DAG);
2966 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2967 // added to the final sum.
2968 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2969 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2970 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2971 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2972 // T3 = full-precision(T0+T2) >> 16
2973 // The low halves are added-unsigned, the high ones are added-signed.
2974 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2975 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2976 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2977 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2978 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2979 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2980 SDValue T5 = LoHalf(P3, DAG);
2981 // Add:
2982 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2983 return T6;
2984}
2985
2986SDValue
2987HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2988 bool SignedB, const SDLoc &dl,
2989 SelectionDAG &DAG) const {
2990 MVT VecTy = ty(A);
2991 MVT PairTy = typeJoin({VecTy, VecTy});
2992 assert(VecTy.getVectorElementType() == MVT::i32);
2993
2994 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2995
2996 if (SignedA && !SignedB) {
2997 // Make A:unsigned, B:signed.
2998 std::swap(A, B);
2999 std::swap(SignedA, SignedB);
3000 }
3001
3002 // Do halfword-wise multiplications for unsigned*unsigned product, then
3003 // add corrections for signed and unsigned*signed.
3004
3005 SDValue Lo, Hi;
3006
3007 // P0:lo = (uu) products of low halves of A and B,
3008 // P0:hi = (uu) products of high halves.
3009 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
3010
3011 // Swap low/high halves in B
3012 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
3013 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
3014 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
3015 // P1 = products of even/odd halfwords.
3016 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
3017 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
3018 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
3019
3020 // P2:lo = low halves of P1:lo + P1:hi,
3021 // P2:hi = high halves of P1:lo + P1:hi.
3022 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
3023 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
3024 // Still need to add the high halves of P0:lo to P2:lo
3025 SDValue T2 =
3026 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
3027 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
3028
3029 // The high halves of T3 will contribute to the HI part of LOHI.
3030 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
3031 {HiHalf(P2, DAG), T3, S16}, DAG);
3032
3033 // The low halves of P2 need to be added to high halves of the LO part.
3034 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
3035 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
3036 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
3037
3038 if (SignedA) {
3039 assert(SignedB && "Signed A and unsigned B should have been inverted");
3040
3041 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3042 SDValue Zero = getZero(dl, VecTy, DAG);
3043 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
3044 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
3045 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
3046 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
3047 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
3048 } else if (SignedB) {
3049 // Same correction as for mulhus:
3050 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
3051 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3052 SDValue Zero = getZero(dl, VecTy, DAG);
3053 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
3054 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
3055 } else {
3056 assert(!SignedA && !SignedB);
3057 }
3058
3059 return DAG.getMergeValues({Lo, Hi}, dl);
3060}
3061
3062SDValue
3063HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
3064 SDValue B, bool SignedB,
3065 const SDLoc &dl,
3066 SelectionDAG &DAG) const {
3067 MVT VecTy = ty(A);
3068 MVT PairTy = typeJoin({VecTy, VecTy});
3069 assert(VecTy.getVectorElementType() == MVT::i32);
3070
3071 if (SignedA && !SignedB) {
3072 // Make A:unsigned, B:signed.
3073 std::swap(A, B);
3074 std::swap(SignedA, SignedB);
3075 }
3076
3077 // Do S*S first, then make corrections for U*S or U*U if needed.
3078 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
3079 SDValue P1 =
3080 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
3081 SDValue Lo = LoHalf(P1, DAG);
3082 SDValue Hi = HiHalf(P1, DAG);
3083
3084 if (!SignedB) {
3085 assert(!SignedA && "Signed A and unsigned B should have been inverted");
3086 SDValue Zero = getZero(dl, VecTy, DAG);
3087 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3088
3089 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
3090 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
3091 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
3092 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
3093 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
3094 // $A))>;
3095 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
3096 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
3097 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
3098 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
3099 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
3100 } else if (!SignedA) {
3101 SDValue Zero = getZero(dl, VecTy, DAG);
3102 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3103
3104 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
3105 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
3106 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
3107 // (HiHalf (Muls64O $A, $B)),
3108 // $B)>;
3109 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
3110 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
3111 }
3112
3113 return DAG.getMergeValues({Lo, Hi}, dl);
3114}
3115
3116SDValue
3117HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
3118 const {
3119 // Rewrite conversion between integer and floating-point in such a way that
3120 // the integer type is extended/narrowed to match the bitwidth of the
3121 // floating-point type, combined with additional integer-integer extensions
3122 // or narrowings to match the original input/result types.
3123 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
3124 //
3125 // The input/result types are not required to be legal, but if they are
3126 // legal, this function should not introduce illegal types.
3127
3128 unsigned Opc = Op.getOpcode();
3131
3132 SDValue Inp = Op.getOperand(0);
3133 MVT InpTy = ty(Inp);
3134 MVT ResTy = ty(Op);
3135
3136 if (InpTy == ResTy)
3137 return Op;
3138
3139 const SDLoc &dl(Op);
3141
3142 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
3143 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
3144 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
3145 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
3146 return Res;
3147}
3148
3149SDValue
3150HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
3151 unsigned Opc = Op.getOpcode();
3153
3154 const SDLoc &dl(Op);
3155 SDValue Op0 = Op.getOperand(0);
3156 MVT InpTy = ty(Op0);
3157 MVT ResTy = ty(Op);
3158 assert(InpTy.changeTypeToInteger() == ResTy);
3159
3160 // At this point this is an experiment under a flag.
3161 // In arch before V81 the rounding mode is towards nearest value.
3162 // The C/C++ standard requires rounding towards zero:
3163 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
3164 // finite value of real floating type is converted to an integer type, the
3165 // fractional part is discarded (i.e., the value is truncated toward zero)."
3166 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
3167 // floating-point type can be converted to a prvalue of an integer type. The
3168 // conversion truncates; that is, the fractional part is discarded."
3169 if (InpTy == MVT::v64f16) {
3170 if (Subtarget.useHVXV81Ops()) {
3171 // This is c/c++ compliant
3172 SDValue ConvVec =
3173 getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
3174 return ConvVec;
3175 } else if (EnableFpFastConvert) {
3176 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3177 SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
3178 return ConvVec;
3179 }
3180 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3181 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3182 SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3183 return ConvVec;
3184 }
3185
3186 // int32_t conv_f32_to_i32(uint32_t inp) {
3187 // // s | exp8 | frac23
3188 //
3189 // int neg = (int32_t)inp < 0;
3190 //
3191 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3192 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3193 // // produce a large positive "expm1", which will result in max u/int.
3194 // // In all IEEE formats, bias is the largest positive number that can be
3195 // // represented in bias-width bits (i.e. 011..1).
3196 // int32_t expm1 = (inp << 1) - 0x80000000;
3197 // expm1 >>= 24;
3198 //
3199 // // Always insert the "implicit 1". Subnormal numbers will become 0
3200 // // regardless.
3201 // uint32_t frac = (inp << 8) | 0x80000000;
3202 //
3203 // // "frac" is the fraction part represented as Q1.31. If it was
3204 // // interpreted as uint32_t, it would be the fraction part multiplied
3205 // // by 2^31.
3206 //
3207 // // Calculate the amount of right shift, since shifting further to the
3208 // // left would lose significant bits. Limit it to 32, because we want
3209 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3210 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3211 // // left by 31). "rsh" can be negative.
3212 // int32_t rsh = min(31 - (expm1 + 1), 32);
3213 //
3214 // frac >>= rsh; // rsh == 32 will produce 0
3215 //
3216 // // Everything up to this point is the same for conversion to signed
3217 // // unsigned integer.
3218 //
3219 // if (neg) // Only for signed int
3220 // frac = -frac; //
3221 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3222 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3223 // if (rsh <= 0 && !neg) //
3224 // frac = 0x7fffffff; //
3225 //
3226 // if (neg) // Only for unsigned int
3227 // frac = 0; //
3228 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3229 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3230 //
3231 // return frac;
3232 // }
3233
3234 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3235
3236 // Zero = V6_vd0();
3237 // Neg = V6_vgtw(Zero, Inp);
3238 // One = V6_lvsplatw(1);
3239 // M80 = V6_lvsplatw(0x80000000);
3240 // Exp00 = V6_vaslwv(Inp, One);
3241 // Exp01 = V6_vsubw(Exp00, M80);
3242 // ExpM1 = V6_vasrw(Exp01, 24);
3243 // Frc00 = V6_vaslw(Inp, 8);
3244 // Frc01 = V6_vor(Frc00, M80);
3245 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3246 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3247 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3248
3249 // if signed int:
3250 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3251 // Pos = V6_vgtw(Rsh01, Zero);
3252 // Frc13 = V6_vsubw(Zero, Frc02);
3253 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3254 // Int = V6_vmux(Pos, Frc14, Bnd);
3255 //
3256 // if unsigned int:
3257 // Rsn = V6_vgtw(Zero, Rsh01)
3258 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3259 // Int = V6_vmux(Neg, Zero, Frc23)
3260
3261 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3262 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3263 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3264
3265 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3266 SDValue Zero = getZero(dl, ResTy, DAG);
3267 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3268 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3269 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3270 SDValue One = DAG.getConstant(1, dl, ResTy);
3271 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3272 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3273 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3274 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3275
3276 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3277 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3278 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3279
3280 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3281 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3282 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3283 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3284 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3285
3286 SDValue Int;
3287
3288 if (Opc == ISD::FP_TO_SINT) {
3289 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3290 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3291 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3292 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3293 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3294 } else {
3296 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3297 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3298 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3299 }
3300
3301 return Int;
3302}
3303
3304SDValue
3305HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3306 unsigned Opc = Op.getOpcode();
3308
3309 const SDLoc &dl(Op);
3310 SDValue Op0 = Op.getOperand(0);
3311 MVT InpTy = ty(Op0);
3312 MVT ResTy = ty(Op);
3313 assert(ResTy.changeTypeToInteger() == InpTy);
3314
3315 // uint32_t vnoc1_rnd(int32_t w) {
3316 // int32_t iszero = w == 0;
3317 // int32_t isneg = w < 0;
3318 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3319 //
3320 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3321 // uint32_t frac0 = (uint64_t)u << norm_left;
3322 //
3323 // // Rounding:
3324 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3325 // uint32_t renorm = (frac0 > frac1);
3326 // uint32_t rup = (int)(frac0 << 22) < 0;
3327 //
3328 // uint32_t frac2 = frac0 >> 8;
3329 // uint32_t frac3 = frac1 >> 8;
3330 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3331 //
3332 // int32_t exp = 32 - norm_left + renorm + 127;
3333 // exp <<= 23;
3334 //
3335 // uint32_t sign = 0x80000000 * isneg;
3336 // uint32_t f = sign | exp | frac;
3337 // return iszero ? 0 : f;
3338 // }
3339
3340 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3341 bool Signed = Opc == ISD::SINT_TO_FP;
3342
3343 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3344 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3345
3346 SDValue Zero = getZero(dl, InpTy, DAG);
3347 SDValue One = DAG.getConstant(1, dl, InpTy);
3348 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3349 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3350 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3351 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3352 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3353
3354 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3355 if (Signed) {
3356 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3357 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3358 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3359 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3360 }
3361
3362 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3363 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3364 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3365 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3366 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3367 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3368 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3369 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3370 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3371
3372 return Flt;
3373}
3374
3375SDValue
3376HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3377 unsigned Opc = Op.getOpcode();
3378 unsigned TLOpc;
3379 switch (Opc) {
3380 case ISD::ANY_EXTEND:
3381 case ISD::SIGN_EXTEND:
3382 case ISD::ZERO_EXTEND:
3383 TLOpc = HexagonISD::TL_EXTEND;
3384 break;
3385 case ISD::TRUNCATE:
3387 break;
3388#ifndef NDEBUG
3389 Op.dump(&DAG);
3390#endif
3391 llvm_unreachable("Unexpected operator");
3392 }
3393
3394 const SDLoc &dl(Op);
3395 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3396 DAG.getUNDEF(MVT::i128), // illegal type
3397 DAG.getConstant(Opc, dl, MVT::i32));
3398}
3399
3400SDValue
3401HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3402 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3403 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3404 unsigned Opc = Op.getConstantOperandVal(2);
3405 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3406}
3407
3408HexagonTargetLowering::VectorPair
3409HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3410 assert(!Op.isMachineOpcode());
3411 SmallVector<SDValue, 2> OpsL, OpsH;
3412 const SDLoc &dl(Op);
3413
3414 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3415 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3416 SDValue TV = DAG.getValueType(Ty);
3417 return std::make_pair(TV, TV);
3418 };
3419
3420 for (SDValue A : Op.getNode()->ops()) {
3421 auto [Lo, Hi] =
3422 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3423 // Special case for type operand.
3424 switch (Op.getOpcode()) {
3425 case ISD::SIGN_EXTEND_INREG:
3426 case HexagonISD::SSAT:
3427 case HexagonISD::USAT:
3428 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3429 std::tie(Lo, Hi) = SplitVTNode(N);
3430 break;
3431 }
3432 OpsL.push_back(Lo);
3433 OpsH.push_back(Hi);
3434 }
3435
3436 MVT ResTy = ty(Op);
3437 MVT HalfTy = typeSplit(ResTy).first;
3438 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3439 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3440 return {L, H};
3441}
3442
3443SDValue
3444HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3445 auto *MemN = cast<MemSDNode>(Op.getNode());
3446
3447 if (!MemN->getMemoryVT().isSimple())
3448 return Op;
3449
3450 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3451 if (!isHvxPairTy(MemTy))
3452 return Op;
3453
3454 const SDLoc &dl(Op);
3455 unsigned HwLen = Subtarget.getVectorLength();
3456 MVT SingleTy = typeSplit(MemTy).first;
3457 SDValue Chain = MemN->getChain();
3458 SDValue Base0 = MemN->getBasePtr();
3459 SDValue Base1 =
3460 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3461 unsigned MemOpc = MemN->getOpcode();
3462
3463 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3464 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3465 MachineFunction &MF = DAG.getMachineFunction();
3466 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3467 ? (uint64_t)MemoryLocation::UnknownSize
3468 : HwLen;
3469 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3470 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3471 }
3472
3473 if (MemOpc == ISD::LOAD) {
3474 assert(cast<LoadSDNode>(Op)->isUnindexed());
3475 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3476 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3477 return DAG.getMergeValues(
3478 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3479 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3480 Load0.getValue(1), Load1.getValue(1)) }, dl);
3481 }
3482 if (MemOpc == ISD::STORE) {
3483 assert(cast<StoreSDNode>(Op)->isUnindexed());
3484 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3485 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3486 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3487 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3488 }
3489
3490 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3491
3492 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3493 assert(MaskN->isUnindexed());
3494 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3495 SDValue Offset = DAG.getUNDEF(MVT::i32);
3496
3497 if (MemOpc == ISD::MLOAD) {
3498 VectorPair Thru =
3499 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3500 SDValue MLoad0 =
3501 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3502 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3503 ISD::NON_EXTLOAD, false);
3504 SDValue MLoad1 =
3505 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3506 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3507 ISD::NON_EXTLOAD, false);
3508 return DAG.getMergeValues(
3509 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3510 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3511 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3512 }
3513 if (MemOpc == ISD::MSTORE) {
3514 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3515 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3516 Masks.first, SingleTy, MOp0,
3517 ISD::UNINDEXED, false, false);
3518 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3519 Masks.second, SingleTy, MOp1,
3520 ISD::UNINDEXED, false, false);
3521 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3522 }
3523
3524 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3525 llvm_unreachable(Name.c_str());
3526}
3527
3528SDValue
3529HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3530 const SDLoc &dl(Op);
3531 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3532 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3533 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3534 "Not widening loads of i1 yet");
3535
3536 SDValue Chain = LoadN->getChain();
3537 SDValue Base = LoadN->getBasePtr();
3538 SDValue Offset = DAG.getUNDEF(MVT::i32);
3539
3540 MVT ResTy = ty(Op);
3541 unsigned HwLen = Subtarget.getVectorLength();
3542 unsigned ResLen = ResTy.getStoreSize();
3543 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3544
3545 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3546 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3547 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3548
3549 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3550 MachineFunction &MF = DAG.getMachineFunction();
3551 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3552
3553 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3554 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3556 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3557 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3558}
3559
3560SDValue
3561HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3562 const SDLoc &dl(Op);
3563 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3564 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3565 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3566 "Not widening stores of i1 yet");
3567
3568 SDValue Chain = StoreN->getChain();
3569 SDValue Base = StoreN->getBasePtr();
3570 SDValue Offset = DAG.getUNDEF(MVT::i32);
3571
3572 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3573 MVT ValueTy = ty(Value);
3574 unsigned ValueLen = ValueTy.getVectorNumElements();
3575 unsigned HwLen = Subtarget.getVectorLength();
3576 assert(isPowerOf2_32(ValueLen));
3577
3578 for (unsigned Len = ValueLen; Len < HwLen; ) {
3579 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3580 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3581 }
3582 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3583
3584 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3585 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3586 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3587 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3588 MachineFunction &MF = DAG.getMachineFunction();
3589 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3590 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3591 MemOp, ISD::UNINDEXED, false, false);
3592}
3593
3594SDValue
3595HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3596 const SDLoc &dl(Op);
3597 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3598 MVT ElemTy = ty(Op0).getVectorElementType();
3599 unsigned HwLen = Subtarget.getVectorLength();
3600
3601 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3602 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3603 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3604 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3605 return SDValue();
3606
3607 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3608 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3609 EVT ResTy =
3610 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3611 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3612 {WideOp0, WideOp1, Op.getOperand(2)});
3613
3614 EVT RetTy = typeLegalize(ty(Op), DAG);
3615 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3616 {SetCC, getZero(dl, MVT::i32, DAG)});
3617}
3618
3619SDValue
3620HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3621 unsigned Opc = Op.getOpcode();
3622 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3623 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3624 return isHvxPairTy(ty(V));
3625 });
3626
3627 if (IsPairOp) {
3628 switch (Opc) {
3629 default:
3630 break;
3631 case ISD::LOAD:
3632 case ISD::STORE:
3633 case ISD::MLOAD:
3634 case ISD::MSTORE:
3635 return SplitHvxMemOp(Op, DAG);
3636 case ISD::SINT_TO_FP:
3637 case ISD::UINT_TO_FP:
3638 case ISD::FP_TO_SINT:
3639 case ISD::FP_TO_UINT:
3640 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3641 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3642 break;
3643 case ISD::ABS:
3644 case ISD::CTPOP:
3645 case ISD::CTLZ:
3646 case ISD::CTTZ:
3647 case ISD::MUL:
3648 case ISD::FADD:
3649 case ISD::FSUB:
3650 case ISD::FMUL:
3651 case ISD::FMINIMUMNUM:
3652 case ISD::FMAXIMUMNUM:
3653 case ISD::MULHS:
3654 case ISD::MULHU:
3655 case ISD::AND:
3656 case ISD::OR:
3657 case ISD::XOR:
3658 case ISD::SRA:
3659 case ISD::SHL:
3660 case ISD::SRL:
3661 case ISD::FSHL:
3662 case ISD::FSHR:
3663 case ISD::SMIN:
3664 case ISD::SMAX:
3665 case ISD::UMIN:
3666 case ISD::UMAX:
3667 case ISD::SETCC:
3668 case ISD::VSELECT:
3670 case ISD::SPLAT_VECTOR:
3671 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3672 case ISD::SIGN_EXTEND:
3673 case ISD::ZERO_EXTEND:
3674 // In general, sign- and zero-extends can't be split and still
3675 // be legal. The only exception is extending bool vectors.
3676 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3677 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3678 break;
3679 }
3680 }
3681
3682 switch (Opc) {
3683 default:
3684 break;
3685 // clang-format off
3686 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3687 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3688 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3689 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3690 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3691 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3692 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3693 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3694 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3695 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3696 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3697 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3698 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3699 case ISD::SRA:
3700 case ISD::SHL:
3701 case ISD::SRL: return LowerHvxShift(Op, DAG);
3702 case ISD::FSHL:
3703 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3704 case ISD::MULHS:
3705 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3706 case ISD::SMUL_LOHI:
3707 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3708 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3709 case ISD::SETCC:
3710 case ISD::INTRINSIC_VOID: return Op;
3711 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3712 case ISD::MLOAD:
3713 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3714 // Unaligned loads will be handled by the default lowering.
3715 case ISD::LOAD: return LowerHvxLoad(Op, DAG);
3716 case ISD::STORE: return LowerHvxStore(Op, DAG);
3717 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3718 case ISD::FP_TO_SINT:
3719 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3720 case ISD::SINT_TO_FP:
3721 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3722
3723 // Special nodes:
3724 case HexagonISD::SMUL_LOHI:
3725 case HexagonISD::UMUL_LOHI:
3726 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3727
3731 return LowerHvxPartialReduceMLA(Op, DAG);
3732 // clang-format on
3733 }
3734#ifndef NDEBUG
3735 Op.dumpr(&DAG);
3736#endif
3737 llvm_unreachable("Unhandled HVX operation");
3738}
3739
3740SDValue
3741HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3742 const {
3743 // Rewrite the extension/truncation/saturation op into steps where each
3744 // step changes the type widths by a factor of 2.
3745 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3746 //
3747 // Some of the vector types in Op may not be legal.
3748
3749 unsigned Opc = Op.getOpcode();
3750 switch (Opc) {
3751 case HexagonISD::SSAT:
3752 case HexagonISD::USAT:
3755 break;
3756 case ISD::ANY_EXTEND:
3757 case ISD::ZERO_EXTEND:
3758 case ISD::SIGN_EXTEND:
3759 case ISD::TRUNCATE:
3760 llvm_unreachable("ISD:: ops will be auto-folded");
3761 break;
3762#ifndef NDEBUG
3763 Op.dump(&DAG);
3764#endif
3765 llvm_unreachable("Unexpected operation");
3766 }
3767
3768 SDValue Inp = Op.getOperand(0);
3769 MVT InpTy = ty(Inp);
3770 MVT ResTy = ty(Op);
3771
3772 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3773 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3774 assert(InpWidth != ResWidth);
3775
3776 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3777 return Op;
3778
3779 const SDLoc &dl(Op);
3780 unsigned NumElems = InpTy.getVectorNumElements();
3781 assert(NumElems == ResTy.getVectorNumElements());
3782
3783 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3784 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3785 switch (Opc) {
3786 case HexagonISD::SSAT:
3787 case HexagonISD::USAT:
3788 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3791 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3792 default:
3793 llvm_unreachable("Unexpected opcode");
3794 }
3795 };
3796
3797 SDValue S = Inp;
3798 if (InpWidth < ResWidth) {
3799 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3800 while (InpWidth * 2 <= ResWidth)
3801 S = repeatOp(InpWidth *= 2, S);
3802 } else {
3803 // InpWidth > ResWidth
3804 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3805 while (InpWidth / 2 >= ResWidth)
3806 S = repeatOp(InpWidth /= 2, S);
3807 }
3808 return S;
3809}
3810
3811SDValue
3812HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3813 SDValue Inp0 = Op.getOperand(0);
3814 MVT InpTy = ty(Inp0);
3815 MVT ResTy = ty(Op);
3816 unsigned InpWidth = InpTy.getSizeInBits();
3817 unsigned ResWidth = ResTy.getSizeInBits();
3818 unsigned Opc = Op.getOpcode();
3819
3820 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3821 // First, make sure that the narrower type is widened to HVX.
3822 // This may cause the result to be wider than what the legalizer
3823 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3824 // desired type.
3825 auto [WInpTy, WResTy] =
3826 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3827 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3828 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3829 SDValue S;
3831 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3832 Op.getOperand(2));
3833 } else {
3834 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3835 }
3836 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3837 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3838 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3839 // For multi-step extends/truncates (e.g., i8->i32), expand into
3840 // single-step operations first. Splitting a multi-step TL_EXTEND
3841 // would halve the operand type to a sub-HVX size (e.g., v128i8 ->
3842 // v64i8), creating illegal types that cause issues in the type
3843 // legalizer's map tracking. Single-step operations (e.g., i16->i32)
3844 // are safe to split because their halved operand types remain legal.
3845 SDValue T = ExpandHvxResizeIntoSteps(Op, DAG);
3846 if (T != Op)
3847 return T;
3848 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3849 } else {
3850 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3851 return RemoveTLWrapper(Op, DAG);
3852 }
3853 llvm_unreachable("Unexpected situation");
3854}
3855
3856void
3857HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3859 unsigned Opc = N->getOpcode();
3860 SDValue Op(N, 0);
3861 SDValue Inp0; // Optional first argument.
3862 if (N->getNumOperands() > 0)
3863 Inp0 = Op.getOperand(0);
3864
3865 switch (Opc) {
3866 case ISD::ANY_EXTEND:
3867 case ISD::SIGN_EXTEND:
3868 case ISD::ZERO_EXTEND:
3869 case ISD::TRUNCATE:
3870 if (Subtarget.isHVXElementType(ty(Op)) &&
3871 Subtarget.isHVXElementType(ty(Inp0))) {
3872 Results.push_back(CreateTLWrapper(Op, DAG));
3873 }
3874 break;
3875 case ISD::SETCC:
3876 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3877 if (SDValue T = WidenHvxSetCC(Op, DAG))
3878 Results.push_back(T);
3879 }
3880 break;
3881 case ISD::STORE: {
3882 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3883 SDValue Store = WidenHvxStore(Op, DAG);
3884 Results.push_back(Store);
3885 }
3886 break;
3887 }
3888 case ISD::MLOAD:
3889 if (isHvxPairTy(ty(Op))) {
3890 SDValue S = SplitHvxMemOp(Op, DAG);
3892 Results.push_back(S.getOperand(0));
3893 Results.push_back(S.getOperand(1));
3894 }
3895 break;
3896 case ISD::MSTORE:
3897 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3898 SDValue S = SplitHvxMemOp(Op, DAG);
3899 Results.push_back(S);
3900 }
3901 break;
3902 case ISD::SINT_TO_FP:
3903 case ISD::UINT_TO_FP:
3904 case ISD::FP_TO_SINT:
3905 case ISD::FP_TO_UINT:
3906 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3907 SDValue T = EqualizeFpIntConversion(Op, DAG);
3908 Results.push_back(T);
3909 }
3910 break;
3911 case HexagonISD::SSAT:
3912 case HexagonISD::USAT:
3915 Results.push_back(LegalizeHvxResize(Op, DAG));
3916 break;
3917 default:
3918 break;
3919 }
3920}
3921
3922void
3923HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3925 unsigned Opc = N->getOpcode();
3926 SDValue Op(N, 0);
3927 SDValue Inp0; // Optional first argument.
3928 if (N->getNumOperands() > 0)
3929 Inp0 = Op.getOperand(0);
3930
3931 switch (Opc) {
3932 case ISD::ANY_EXTEND:
3933 case ISD::SIGN_EXTEND:
3934 case ISD::ZERO_EXTEND:
3935 case ISD::TRUNCATE:
3936 if (Subtarget.isHVXElementType(ty(Op)) &&
3937 Subtarget.isHVXElementType(ty(Inp0))) {
3938 Results.push_back(CreateTLWrapper(Op, DAG));
3939 }
3940 break;
3941 case ISD::SETCC:
3942 if (shouldWidenToHvx(ty(Op), DAG)) {
3943 if (SDValue T = WidenHvxSetCC(Op, DAG))
3944 Results.push_back(T);
3945 }
3946 break;
3947 case ISD::LOAD: {
3948 if (shouldWidenToHvx(ty(Op), DAG)) {
3949 SDValue Load = WidenHvxLoad(Op, DAG);
3950 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3951 Results.push_back(Load.getOperand(0));
3952 Results.push_back(Load.getOperand(1));
3953 }
3954 break;
3955 }
3956 case ISD::BITCAST:
3957 if (isHvxBoolTy(ty(Inp0))) {
3958 SDValue C = LowerHvxBitcast(Op, DAG);
3959 Results.push_back(C);
3960 }
3961 break;
3962 case ISD::FP_TO_SINT:
3963 case ISD::FP_TO_UINT:
3964 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3965 SDValue T = EqualizeFpIntConversion(Op, DAG);
3966 Results.push_back(T);
3967 }
3968 break;
3969 case HexagonISD::SSAT:
3970 case HexagonISD::USAT:
3973 Results.push_back(LegalizeHvxResize(Op, DAG));
3974 break;
3975 default:
3976 break;
3977 }
3978}
3979
3980SDValue
3981HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3982 DAGCombinerInfo &DCI) const {
3983 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3984 // to extract-subvector (shuffle V, pick even, pick odd)
3985
3986 assert(Op.getOpcode() == ISD::TRUNCATE);
3987 SelectionDAG &DAG = DCI.DAG;
3988 const SDLoc &dl(Op);
3989
3990 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3991 return SDValue();
3992 SDValue Cast = Op.getOperand(0);
3993 SDValue Src = Cast.getOperand(0);
3994
3995 EVT TruncTy = Op.getValueType();
3996 EVT CastTy = Cast.getValueType();
3997 EVT SrcTy = Src.getValueType();
3998 if (SrcTy.isSimple())
3999 return SDValue();
4000 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
4001 return SDValue();
4002 unsigned SrcLen = SrcTy.getVectorNumElements();
4003 unsigned CastLen = CastTy.getVectorNumElements();
4004 if (2 * CastLen != SrcLen)
4005 return SDValue();
4006
4007 SmallVector<int, 128> Mask(SrcLen);
4008 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
4009 Mask[i] = 2 * i;
4010 Mask[i + CastLen] = 2 * i + 1;
4011 }
4012 SDValue Deal =
4013 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
4014 return opSplit(Deal, dl, DAG).first;
4015}
4016
4017SDValue
4018HexagonTargetLowering::combineConcatOfShuffles(SDValue Op,
4019 SelectionDAG &DAG) const {
4020 // Fold
4021 // concat (shuffle x, y, m1), (shuffle x, y, m2)
4022 // into
4023 // shuffle (concat x, y), undef, m3
4024 if (Op.getNumOperands() != 2)
4025 return SDValue();
4026
4027 const SDLoc &dl(Op);
4028 SDValue V0 = Op.getOperand(0);
4029 SDValue V1 = Op.getOperand(1);
4030
4031 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
4032 return SDValue();
4033 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
4034 return SDValue();
4035
4036 SetVector<SDValue> Order;
4037 Order.insert(V0.getOperand(0));
4038 Order.insert(V0.getOperand(1));
4039 Order.insert(V1.getOperand(0));
4040 Order.insert(V1.getOperand(1));
4041
4042 if (Order.size() > 2)
4043 return SDValue();
4044
4045 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
4046 // result must be the same.
4047 EVT InpTy = V0.getValueType();
4048 assert(InpTy.isVector());
4049 unsigned InpLen = InpTy.getVectorNumElements();
4050
4051 SmallVector<int, 128> LongMask;
4052 auto AppendToMask = [&](SDValue Shuffle) {
4053 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
4054 ArrayRef<int> Mask = SV->getMask();
4055 SDValue X = Shuffle.getOperand(0);
4056 SDValue Y = Shuffle.getOperand(1);
4057 for (int M : Mask) {
4058 if (M == -1) {
4059 LongMask.push_back(M);
4060 continue;
4061 }
4062 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
4063 if (static_cast<unsigned>(M) >= InpLen)
4064 M -= InpLen;
4065
4066 int OutOffset = Order[0] == Src ? 0 : InpLen;
4067 LongMask.push_back(M + OutOffset);
4068 }
4069 };
4070
4071 AppendToMask(V0);
4072 AppendToMask(V1);
4073
4074 SDValue C0 = Order.front();
4075 SDValue C1 = Order.back(); // Can be same as front
4076 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
4077
4078 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
4079 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
4080}
4081
4082// Reassociate concat(p1, p2, ...) into
4083// concat(concat(p1, ...), concat(pi, ...), ...)
4084// where each inner concat produces a predicate where each bit corresponds
4085// to at most BitBytes bytes.
4086// Concatenating predicates decreases the number of bytes per each predicate
4087// bit.
4088SDValue
4089HexagonTargetLowering::combineConcatOfScalarPreds(SDValue Op, unsigned BitBytes,
4090 SelectionDAG &DAG) const {
4091 const SDLoc &dl(Op);
4092 SmallVector<SDValue> Ops(Op->ops());
4093 MVT ResTy = ty(Op);
4094 MVT InpTy = ty(Ops[0]);
4095 unsigned InpLen = InpTy.getVectorNumElements(); // Scalar predicate
4096 unsigned ResLen = ResTy.getVectorNumElements(); // HVX vector predicate
4097 assert(InpLen <= 8 && "Too long for scalar predicate");
4098 assert(ResLen > 8 && "Too short for HVX vector predicate");
4099
4100 unsigned Bytes = 8 / InpLen; // Bytes-per-bit in input
4101
4102 // Already in the right form?
4103 if (Bytes <= BitBytes)
4104 return Op;
4105
4106 ArrayRef<SDValue> Inputs(Ops);
4107 unsigned SliceLen = Bytes / BitBytes;
4108
4110 // (8 / BitBytes) is the desired length of the result of the inner concat.
4111 for (unsigned i = 0; i != ResLen / (8 / BitBytes); ++i) {
4112 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1,
4113 Inputs.slice(SliceLen * i, SliceLen));
4114 Cats.push_back(Cat);
4115 }
4116
4117 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Cats);
4118}
4119
4120SDValue HexagonTargetLowering::combineConcatVectorsBeforeLegal(
4121 SDValue Op, DAGCombinerInfo &DCI) const {
4122 MVT ResTy = ty(Op);
4123 MVT ElemTy = ResTy.getVectorElementType();
4124
4125 if (ElemTy != MVT::i1) {
4126 return combineConcatOfShuffles(Op, DCI.DAG);
4127 }
4128 return SDValue();
4129}
4130
4131// Create the inner partial reduction MLA that can be efficiently lowered. This
4132// function is used by partial and full reductions.
4133SDValue HexagonTargetLowering::createExtendingPartialReduceMLA(
4134 unsigned Opcode, EVT AccEltType, unsigned AccNumElements, EVT InputType,
4135 const SDValue &A, const SDValue &B, unsigned &RemainingReductionRatio,
4136 const SDLoc &DL, SelectionDAG &DAG) const {
4137 const auto &Subtarget = DAG.getSubtarget<HexagonSubtarget>();
4138 if (!Subtarget.useHVXOps())
4139 return SDValue();
4140
4141 EVT InputEltType = InputType.getVectorElementType();
4142
4143 // Find if an optimized instruction for the sub-reduction is available.
4144 unsigned NativeRatio;
4145 if (AccEltType == MVT::i32 && InputEltType == MVT::i8)
4146 NativeRatio = 4;
4147 else
4148 return SDValue();
4149
4150 // We only handle the case when additional reduction will be needed, i.e.
4151 // input is longer by a larger factor than the result.
4152 ElementCount InputEC = InputType.getVectorElementCount();
4153 if (!InputEC.isKnownMultipleOf(AccNumElements * NativeRatio))
4154 return SDValue();
4155
4156 unsigned InputNumElements = InputEC.getFixedValue();
4157 RemainingReductionRatio = InputNumElements / (AccNumElements * NativeRatio);
4158 if (RemainingReductionRatio == 1)
4159 return SDValue();
4160
4161 // Create a reduction by the natively supported factor.
4162 EVT IntermediateType = EVT::getVectorVT(*DAG.getContext(), AccEltType,
4163 InputNumElements / NativeRatio);
4164
4165 SDValue Zero = DAG.getConstant(0, DL, IntermediateType);
4166 return DAG.getNode(Opcode, DL, IntermediateType, Zero, A, B);
4167}
4168
4169static bool DetectExtendingMultiply(const SDValue &N, EVT ScalarType,
4170 unsigned &Opcode, SDValue &A, SDValue &B) {
4171 SDValue Mul = N;
4172 EVT AccType = Mul.getValueType(); // Vector input type after extension.
4173 if (ScalarType != AccType.getVectorElementType())
4174 return false;
4175 bool swap = false;
4176 if (Mul->getOpcode() != ISD::MUL)
4177 return false;
4178 A = Mul->getOperand(0);
4179 B = Mul->getOperand(1);
4180 if (A.getOpcode() == ISD::ZERO_EXTEND) {
4181 if (B.getOpcode() == ISD::ZERO_EXTEND)
4182 Opcode = ISD::PARTIAL_REDUCE_UMLA;
4183 else if (B.getOpcode() == ISD::SIGN_EXTEND) {
4184 swap = true;
4186 } else
4187 return false;
4188 } else if (A.getOpcode() == ISD::SIGN_EXTEND) {
4189 if (B.getOpcode() == ISD::ZERO_EXTEND)
4191 else if (B.getOpcode() == ISD::SIGN_EXTEND)
4192 Opcode = ISD::PARTIAL_REDUCE_SMLA;
4193 else
4194 return false;
4195 } else
4196 return false;
4197
4198 // Get multiplication arguments before extension.
4199 A = A->getOperand(0);
4200 B = B->getOperand(0);
4201 if (A.getValueType() != B.getValueType())
4202 return false;
4203
4204 if (swap)
4205 std::swap(A, B);
4206
4207 return true;
4208}
4209
4210SDValue HexagonTargetLowering::splitVecReduceAdd(SDNode *N,
4211 SelectionDAG &DAG) const {
4212 if (!Subtarget.useHVXOps())
4213 return SDValue();
4214
4215 EVT ScalarType = N->getValueType(0);
4216 unsigned Opcode;
4217 SDValue A, B;
4218 if (!DetectExtendingMultiply(N->getOperand(0), ScalarType, Opcode, A, B))
4219 return SDValue();
4220
4221 SDLoc DL(N);
4222 unsigned RemainingReductionRatio;
4223 SDValue Partial =
4224 createExtendingPartialReduceMLA(Opcode, ScalarType, 1, A.getValueType(),
4225 A, B, RemainingReductionRatio, DL, DAG);
4226 if (!Partial)
4227 return SDValue();
4228
4229 // We could have inserted a trivial MLA and rely on the folding action,
4230 // similar to how vector_partial_reduce_add is lowered to an MLA in
4231 // SelectionDAGBuilder. However, we just replace the final result since we
4232 // have analyzed the input completely.
4233 return DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarType, Partial);
4234}
4235
4236// When possible, separate an MLA reduction with extended operands but
4237// unsupported reduction factor into an extending partial reduction that
4238// can be efficiently lowered, and a follow-up partial reduction.
4239// partial_reduce_mla(a, x, y) ->
4240// partial_reduce_mla(a, partial_reduce_mla(0, x, y), 1)
4241SDValue
4242HexagonTargetLowering::splitExtendingPartialReduceMLA(SDNode *N,
4243 SelectionDAG &DAG) const {
4244 if (!Subtarget.useHVXOps())
4245 return SDValue();
4246
4247 SDValue Acc = N->getOperand(0);
4248 SDValue A = N->getOperand(1);
4249 SDValue B = N->getOperand(2);
4250 if (A.getValueType() != B.getValueType())
4251 return SDValue();
4252
4253 // The types should be declared as custom, but do not split already legal
4254 // operation.
4255 EVT AccType = Acc.getValueType();
4256 EVT InputType = A.getValueType();
4257 if (getPartialReduceMLAAction(N->getOpcode(), AccType, InputType) != Custom)
4258 return SDValue();
4259
4260 SDLoc DL(N);
4261 unsigned RemainingReductionRatio;
4262 SDValue Partial = createExtendingPartialReduceMLA(
4263 N->getOpcode(), AccType.getVectorElementType(),
4264 AccType.getVectorNumElements(), InputType, A, B, RemainingReductionRatio,
4265 DL, DAG);
4266 if (!Partial)
4267 return SDValue();
4268 assert(RemainingReductionRatio <= MaxExpandMLA);
4269
4270 // Create the reduction for the remaining ratio.
4271 EVT IntermediateType = Partial->getOperand(0).getValueType();
4272 SDValue One = DAG.getConstant(1, DL, IntermediateType);
4273 return DAG.getNode(N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
4276 DL, AccType, Acc, Partial, One);
4277}
4278
4279SDValue
4280HexagonTargetLowering::LowerHvxPartialReduceMLA(SDValue Op,
4281 SelectionDAG &DAG) const {
4282 const SDLoc &DL(Op);
4283 SDValue Acc = Op.getOperand(0);
4284 SDValue A = Op.getOperand(1);
4285 SDValue B = Op.getOperand(2);
4286
4287 // Split the input vectors into units of one HVX vector length.
4288 unsigned HwVectorSizeInBits = Subtarget.getVectorLength() * 8;
4289
4290 EVT AccType = Acc.getValueType();
4291 EVT AccEltType = AccType.getVectorElementType();
4292 unsigned AccSubvectorNumElements =
4293 HwVectorSizeInBits / AccEltType.getSizeInBits();
4294 EVT AccSubvectorType =
4295 EVT::getVectorVT(*DAG.getContext(), AccEltType, AccSubvectorNumElements);
4296
4297 EVT InputType = A.getValueType();
4298 assert(InputType.getSizeInBits() % HwVectorSizeInBits == 0);
4299 EVT InputEltType = InputType.getVectorElementType();
4300 unsigned InputSubvectorNumElements =
4301 HwVectorSizeInBits / InputEltType.getSizeInBits();
4302 EVT InputSubvectorType = EVT::getVectorVT(*DAG.getContext(), InputEltType,
4303 InputSubvectorNumElements);
4304
4305 unsigned SubvectorNum = InputType.getFixedSizeInBits() / HwVectorSizeInBits;
4307
4308 for (unsigned I = 0; I != SubvectorNum; ++I) {
4309 SDValue SubvectorAcc = DAG.getExtractSubvector(DL, AccSubvectorType, Acc,
4310 I * AccSubvectorNumElements);
4311 SDValue SubvectorA = DAG.getExtractSubvector(DL, InputSubvectorType, A,
4312 I * InputSubvectorNumElements);
4313 SDValue SubvectorB = DAG.getExtractSubvector(DL, InputSubvectorType, B,
4314 I * InputSubvectorNumElements);
4315 SDValue SubvectorMLA = DAG.getNode(Op.getOpcode(), DL, AccSubvectorType,
4316 SubvectorAcc, SubvectorA, SubvectorB);
4317 Subvectors.push_back(SubvectorMLA);
4318 }
4319
4320 return DAG.getNode(ISD::CONCAT_VECTORS, DL, AccType, Subvectors);
4321}
4322
4323SDValue
4324HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
4325 const {
4326 const SDLoc &dl(N);
4327 SelectionDAG &DAG = DCI.DAG;
4328 SDValue Op(N, 0);
4329 unsigned Opc = Op.getOpcode();
4330
4332
4333 if (Opc == ISD::TRUNCATE)
4334 return combineTruncateBeforeLegal(Op, DCI);
4335 if (Opc == ISD::CONCAT_VECTORS)
4336 return combineConcatVectorsBeforeLegal(Op, DCI);
4337
4338 if (DCI.isBeforeLegalizeOps())
4339 return SDValue();
4340
4341 switch (Opc) {
4342 case HexagonISD::V2Q:
4343 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
4344 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
4345 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
4346 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
4347 }
4348 break;
4349 case HexagonISD::Q2V:
4350 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
4351 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
4352 DAG.getAllOnesConstant(dl, MVT::i32));
4353 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
4354 return getZero(dl, ty(Op), DAG);
4355 break;
4356 case HexagonISD::VINSERTW0:
4357 if (isUndef(Ops[1]))
4358 return Ops[0];
4359 break;
4360 case HexagonISD::VROR: {
4361 if (Ops[0].getOpcode() == HexagonISD::VROR) {
4362 SDValue Vec = Ops[0].getOperand(0);
4363 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
4364 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
4365 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
4366 }
4367 break;
4368 }
4369 }
4370
4371 return SDValue();
4372}
4373
4374bool
4375HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
4376 if (Subtarget.isHVXVectorType(Ty, true))
4377 return false;
4378 auto Action = getPreferredHvxVectorAction(Ty);
4380 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
4381 return false;
4382}
4383
4384bool
4385HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
4386 if (Subtarget.isHVXVectorType(Ty, true))
4387 return false;
4388 auto Action = getPreferredHvxVectorAction(Ty);
4390 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
4391 return false;
4392}
4393
4394bool
4395HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
4396 if (!Subtarget.useHVXOps())
4397 return false;
4398 // If the type of any result, or any operand type are HVX vector types,
4399 // this is an HVX operation.
4400 auto IsHvxTy = [this](EVT Ty) {
4401 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
4402 };
4403 auto IsHvxOp = [this](SDValue Op) {
4404 return Op.getValueType().isSimple() &&
4405 Subtarget.isHVXVectorType(ty(Op), true);
4406 };
4407 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
4408 return true;
4409
4410 // Check if this could be an HVX operation after type widening.
4411 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
4412 if (!Op.getValueType().isSimple())
4413 return false;
4414 MVT ValTy = ty(Op);
4415 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
4416 };
4417
4418 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
4419 if (IsWidenedToHvx(SDValue(N, i)))
4420 return true;
4421 }
4422 return llvm::any_of(N->ops(), IsWidenedToHvx);
4423}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const unsigned MaxExpandMLA
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static bool DetectExtendingMultiply(const SDValue &N, EVT ScalarType, unsigned &Opcode, SDValue &A, SDValue &B)
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
static cl::opt< bool > EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), cl::desc("Enable FP fast conversion routine."))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5975
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const SDValue & getBasePtr() const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
const MachinePointerInfo & getPointerInfo() const
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< value_op_iterator > op_values() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:138
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
const SDValue & getBasePtr() const
const SDValue & getValue() const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT, EVT InputVT) const
Return how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treated.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const