LLVM 20.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103 setOperationAction(Opc, FromTy, Promote);
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
119
120 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
121 Subtarget.useHVXFloatingPoint()) {
122
123 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
124 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
125
126 for (MVT T : FloatV) {
132
135
138
141 // Custom-lower BUILD_VECTOR. The standard (target-independent)
142 // handling of it would convert it to a load, which is not always
143 // the optimal choice.
145 }
146
147
148 // BUILD_VECTOR with f16 operands cannot be promoted without
149 // promoting the result, so lower the node to vsplat or constant pool
153
154 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
155 // generated.
156 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
157 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
158 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
159 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
160
161 for (MVT P : FloatW) {
171
172 // Custom-lower BUILD_VECTOR. The standard (target-independent)
173 // handling of it would convert it to a load, which is not always
174 // the optimal choice.
176 // Make concat-vectors custom to handle concats of more than 2 vectors.
178
181 }
182
183 if (Subtarget.useHVXQFloatOps()) {
186 } else if (Subtarget.useHVXIEEEFPOps()) {
189 }
190 }
191
192 for (MVT T : LegalV) {
195
207 if (T != ByteV) {
211 }
212
215 if (T.getScalarType() != MVT::i32) {
218 }
219
224 if (T.getScalarType() != MVT::i32) {
227 }
228
230 // Make concat-vectors custom to handle concats of more than 2 vectors.
241 if (T != ByteV) {
243 // HVX only has shifts of words and halfwords.
247
248 // Promote all shuffles to operate on vectors of bytes.
249 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
250 }
251
252 if (Subtarget.useHVXFloatingPoint()) {
253 // Same action for both QFloat and IEEE.
258 }
259
267 }
268
269 for (MVT T : LegalW) {
270 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
271 // independent) handling of it would convert it to a load, which is
272 // not always the optimal choice.
274 // Make concat-vectors custom to handle concats of more than 2 vectors.
276
277 // Custom-lower these operations for pairs. Expand them into a concat
278 // of the corresponding operations on individual vectors.
287
296
307 if (T != ByteW) {
311
312 // Promote all shuffles to operate on vectors of bytes.
313 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
314 }
317
320 if (T.getScalarType() != MVT::i32) {
323 }
324
325 if (Subtarget.useHVXFloatingPoint()) {
326 // Same action for both QFloat and IEEE.
331 }
332 }
333
334 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
335 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
336 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
339
340 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
341 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
342 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
343 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
344 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
345 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
346 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
347 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
348 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
349 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
350 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
351 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
352
353 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
354 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
355 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
356 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
357 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
358 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
359 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
360 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
361 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
362 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
363 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
364 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
365
366 // Boolean vectors.
367
368 for (MVT T : LegalW) {
369 // Boolean types for vector pairs will overlap with the boolean
370 // types for single vectors, e.g.
371 // v64i8 -> v64i1 (single)
372 // v64i16 -> v64i1 (pair)
373 // Set these actions first, and allow the single actions to overwrite
374 // any duplicates.
375 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
380 // Masked load/store takes a mask that may need splitting.
383 }
384
385 for (MVT T : LegalV) {
386 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
397 }
398
399 if (Use64b) {
400 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
402 } else {
403 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
405 }
406
407 // Handle store widening for short vectors.
408 unsigned HwLen = Subtarget.getVectorLength();
409 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
410 if (ElemTy == MVT::i1)
411 continue;
412 int ElemWidth = ElemTy.getFixedSizeInBits();
413 int MaxElems = (8*HwLen) / ElemWidth;
414 for (int N = 2; N < MaxElems; N *= 2) {
415 MVT VecTy = MVT::getVectorVT(ElemTy, N);
416 auto Action = getPreferredVectorAction(VecTy);
425 if (Subtarget.useHVXFloatingPoint()) {
430 }
431
432 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
433 if (!isTypeLegal(BoolTy))
435 }
436 }
437 }
438
440}
441
442unsigned
443HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
444 MVT ElemTy = VecTy.getVectorElementType();
445 unsigned VecLen = VecTy.getVectorNumElements();
446 unsigned HwLen = Subtarget.getVectorLength();
447
448 // Split vectors of i1 that exceed byte vector length.
449 if (ElemTy == MVT::i1 && VecLen > HwLen)
451
452 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
453 // For shorter vectors of i1, widen them if any of the corresponding
454 // vectors of integers needs to be widened.
455 if (ElemTy == MVT::i1) {
456 for (MVT T : Tys) {
457 assert(T != MVT::i1);
458 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
459 if (A != ~0u)
460 return A;
461 }
462 return ~0u;
463 }
464
465 // If the size of VecTy is at least half of the vector length,
466 // widen the vector. Note: the threshold was not selected in
467 // any scientific way.
468 if (llvm::is_contained(Tys, ElemTy)) {
469 unsigned VecWidth = VecTy.getSizeInBits();
470 unsigned HwWidth = 8*HwLen;
471 if (VecWidth > 2*HwWidth)
473
474 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
475 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
477 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
479 }
480
481 // Defer to default.
482 return ~0u;
483}
484
485unsigned
486HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
487 unsigned Opc = Op.getOpcode();
488 switch (Opc) {
493 }
495}
496
498HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
499 const SDLoc &dl, SelectionDAG &DAG) const {
501 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
502 append_range(IntOps, Ops);
503 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
504}
505
506MVT
507HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
508 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
509
510 MVT ElemTy = Tys.first.getVectorElementType();
511 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
512 Tys.second.getVectorNumElements());
513}
514
515HexagonTargetLowering::TypePair
516HexagonTargetLowering::typeSplit(MVT VecTy) const {
517 assert(VecTy.isVector());
518 unsigned NumElem = VecTy.getVectorNumElements();
519 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
520 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
521 return { HalfTy, HalfTy };
522}
523
524MVT
525HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
526 MVT ElemTy = VecTy.getVectorElementType();
527 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
528 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
529}
530
531MVT
532HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
533 MVT ElemTy = VecTy.getVectorElementType();
534 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
535 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
536}
537
539HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
540 SelectionDAG &DAG) const {
541 if (ty(Vec).getVectorElementType() == ElemTy)
542 return Vec;
543 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
544 return DAG.getBitcast(CastTy, Vec);
545}
546
548HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
549 SelectionDAG &DAG) const {
550 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
551 Ops.first, Ops.second);
552}
553
554HexagonTargetLowering::VectorPair
555HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
556 SelectionDAG &DAG) const {
557 TypePair Tys = typeSplit(ty(Vec));
558 if (Vec.getOpcode() == HexagonISD::QCAT)
559 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
560 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
561}
562
563bool
564HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
565 return Subtarget.isHVXVectorType(Ty) &&
566 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
567}
568
569bool
570HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
571 return Subtarget.isHVXVectorType(Ty) &&
572 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
573}
574
575bool
576HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
577 return Subtarget.isHVXVectorType(Ty, true) &&
578 Ty.getVectorElementType() == MVT::i1;
579}
580
581bool HexagonTargetLowering::allowsHvxMemoryAccess(
582 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
583 // Bool vectors are excluded by default, but make it explicit to
584 // emphasize that bool vectors cannot be loaded or stored.
585 // Also, disallow double vector stores (to prevent unnecessary
586 // store widening in DAG combiner).
587 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
588 return false;
589 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
590 return false;
591 if (Fast)
592 *Fast = 1;
593 return true;
594}
595
596bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
597 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
598 if (!Subtarget.isHVXVectorType(VecTy))
599 return false;
600 // XXX Should this be false? vmemu are a bit slower than vmem.
601 if (Fast)
602 *Fast = 1;
603 return true;
604}
605
606void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
607 MachineInstr &MI, SDNode *Node) const {
608 unsigned Opc = MI.getOpcode();
609 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
610 MachineBasicBlock &MB = *MI.getParent();
611 MachineFunction &MF = *MB.getParent();
613 DebugLoc DL = MI.getDebugLoc();
614 auto At = MI.getIterator();
615
616 switch (Opc) {
617 case Hexagon::PS_vsplatib:
618 if (Subtarget.useHVXV62Ops()) {
619 // SplatV = A2_tfrsi #imm
620 // OutV = V6_lvsplatb SplatV
621 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
622 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
623 .add(MI.getOperand(1));
624 Register OutV = MI.getOperand(0).getReg();
625 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
626 .addReg(SplatV);
627 } else {
628 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
629 // OutV = V6_lvsplatw SplatV
630 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
631 const MachineOperand &InpOp = MI.getOperand(1);
632 assert(InpOp.isImm());
633 uint32_t V = InpOp.getImm() & 0xFF;
634 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
635 .addImm(V << 24 | V << 16 | V << 8 | V);
636 Register OutV = MI.getOperand(0).getReg();
637 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
638 }
639 MB.erase(At);
640 break;
641 case Hexagon::PS_vsplatrb:
642 if (Subtarget.useHVXV62Ops()) {
643 // OutV = V6_lvsplatb Inp
644 Register OutV = MI.getOperand(0).getReg();
645 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
646 .add(MI.getOperand(1));
647 } else {
648 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
649 const MachineOperand &InpOp = MI.getOperand(1);
650 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
651 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
652 Register OutV = MI.getOperand(0).getReg();
653 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
654 .addReg(SplatV);
655 }
656 MB.erase(At);
657 break;
658 case Hexagon::PS_vsplatih:
659 if (Subtarget.useHVXV62Ops()) {
660 // SplatV = A2_tfrsi #imm
661 // OutV = V6_lvsplath SplatV
662 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
663 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
664 .add(MI.getOperand(1));
665 Register OutV = MI.getOperand(0).getReg();
666 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
667 .addReg(SplatV);
668 } else {
669 // SplatV = A2_tfrsi #imm:#imm
670 // OutV = V6_lvsplatw SplatV
671 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
672 const MachineOperand &InpOp = MI.getOperand(1);
673 assert(InpOp.isImm());
674 uint32_t V = InpOp.getImm() & 0xFFFF;
675 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
676 .addImm(V << 16 | V);
677 Register OutV = MI.getOperand(0).getReg();
678 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
679 }
680 MB.erase(At);
681 break;
682 case Hexagon::PS_vsplatrh:
683 if (Subtarget.useHVXV62Ops()) {
684 // OutV = V6_lvsplath Inp
685 Register OutV = MI.getOperand(0).getReg();
686 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
687 .add(MI.getOperand(1));
688 } else {
689 // SplatV = A2_combine_ll Inp, Inp
690 // OutV = V6_lvsplatw SplatV
691 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
692 const MachineOperand &InpOp = MI.getOperand(1);
693 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
694 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
695 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
696 Register OutV = MI.getOperand(0).getReg();
697 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
698 }
699 MB.erase(At);
700 break;
701 case Hexagon::PS_vsplatiw:
702 case Hexagon::PS_vsplatrw:
703 if (Opc == Hexagon::PS_vsplatiw) {
704 // SplatV = A2_tfrsi #imm
705 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
706 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
707 .add(MI.getOperand(1));
708 MI.getOperand(1).ChangeToRegister(SplatV, false);
709 }
710 // OutV = V6_lvsplatw SplatV/Inp
711 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
712 break;
713 }
714}
715
717HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
718 SelectionDAG &DAG) const {
719 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
720 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
721
722 unsigned ElemWidth = ElemTy.getSizeInBits();
723 if (ElemWidth == 8)
724 return ElemIdx;
725
726 unsigned L = Log2_32(ElemWidth/8);
727 const SDLoc &dl(ElemIdx);
728 return DAG.getNode(ISD::SHL, dl, MVT::i32,
729 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
730}
731
733HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
734 SelectionDAG &DAG) const {
735 unsigned ElemWidth = ElemTy.getSizeInBits();
736 assert(ElemWidth >= 8 && ElemWidth <= 32);
737 if (ElemWidth == 32)
738 return Idx;
739
740 if (ty(Idx) != MVT::i32)
741 Idx = DAG.getBitcast(MVT::i32, Idx);
742 const SDLoc &dl(Idx);
743 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
744 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
745 return SubIdx;
746}
747
749HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
750 SDValue Op1, ArrayRef<int> Mask,
751 SelectionDAG &DAG) const {
752 MVT OpTy = ty(Op0);
753 assert(OpTy == ty(Op1));
754
755 MVT ElemTy = OpTy.getVectorElementType();
756 if (ElemTy == MVT::i8)
757 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
758 assert(ElemTy.getSizeInBits() >= 8);
759
760 MVT ResTy = tyVector(OpTy, MVT::i8);
761 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
762
763 SmallVector<int,128> ByteMask;
764 for (int M : Mask) {
765 if (M < 0) {
766 for (unsigned I = 0; I != ElemSize; ++I)
767 ByteMask.push_back(-1);
768 } else {
769 int NewM = M*ElemSize;
770 for (unsigned I = 0; I != ElemSize; ++I)
771 ByteMask.push_back(NewM+I);
772 }
773 }
774 assert(ResTy.getVectorNumElements() == ByteMask.size());
775 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
776 opCastElem(Op1, MVT::i8, DAG), ByteMask);
777}
778
780HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
781 const SDLoc &dl, MVT VecTy,
782 SelectionDAG &DAG) const {
783 unsigned VecLen = Values.size();
785 MVT ElemTy = VecTy.getVectorElementType();
786 unsigned ElemWidth = ElemTy.getSizeInBits();
787 unsigned HwLen = Subtarget.getVectorLength();
788
789 unsigned ElemSize = ElemWidth / 8;
790 assert(ElemSize*VecLen == HwLen);
792
793 if (VecTy.getVectorElementType() != MVT::i32 &&
794 !(Subtarget.useHVXFloatingPoint() &&
795 VecTy.getVectorElementType() == MVT::f32)) {
796 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
797 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
798 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
799 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
800 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
801 Words.push_back(DAG.getBitcast(MVT::i32, W));
802 }
803 } else {
804 for (SDValue V : Values)
805 Words.push_back(DAG.getBitcast(MVT::i32, V));
806 }
807 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
808 unsigned NumValues = Values.size();
809 assert(NumValues > 0);
810 bool IsUndef = true;
811 for (unsigned i = 0; i != NumValues; ++i) {
812 if (Values[i].isUndef())
813 continue;
814 IsUndef = false;
815 if (!SplatV.getNode())
816 SplatV = Values[i];
817 else if (SplatV != Values[i])
818 return false;
819 }
820 if (IsUndef)
821 SplatV = Values[0];
822 return true;
823 };
824
825 unsigned NumWords = Words.size();
826 SDValue SplatV;
827 bool IsSplat = isSplat(Words, SplatV);
828 if (IsSplat && isUndef(SplatV))
829 return DAG.getUNDEF(VecTy);
830 if (IsSplat) {
831 assert(SplatV.getNode());
832 if (isNullConstant(SplatV))
833 return getZero(dl, VecTy, DAG);
834 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
835 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
836 return DAG.getBitcast(VecTy, S);
837 }
838
839 // Delay recognizing constant vectors until here, so that we can generate
840 // a vsplat.
841 SmallVector<ConstantInt*, 128> Consts(VecLen);
842 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
843 if (AllConst) {
844 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
845 (Constant**)Consts.end());
846 Constant *CV = ConstantVector::get(Tmp);
847 Align Alignment(HwLen);
848 SDValue CP =
849 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
850 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
852 }
853
854 // A special case is a situation where the vector is built entirely from
855 // elements extracted from another vector. This could be done via a shuffle
856 // more efficiently, but typically, the size of the source vector will not
857 // match the size of the vector being built (which precludes the use of a
858 // shuffle directly).
859 // This only handles a single source vector, and the vector being built
860 // should be of a sub-vector type of the source vector type.
861 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
862 SmallVectorImpl<int> &SrcIdx) {
863 SDValue Vec;
864 for (SDValue V : Values) {
865 if (isUndef(V)) {
866 SrcIdx.push_back(-1);
867 continue;
868 }
869 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
870 return false;
871 // All extracts should come from the same vector.
872 SDValue T = V.getOperand(0);
873 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
874 return false;
875 Vec = T;
876 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
877 if (C == nullptr)
878 return false;
879 int I = C->getSExtValue();
880 assert(I >= 0 && "Negative element index");
881 SrcIdx.push_back(I);
882 }
883 SrcVec = Vec;
884 return true;
885 };
886
888 SDValue ExtVec;
889 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
890 MVT ExtTy = ty(ExtVec);
891 unsigned ExtLen = ExtTy.getVectorNumElements();
892 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
893 // Construct a new shuffle mask that will produce a vector with the same
894 // number of elements as the input vector, and such that the vector we
895 // want will be the initial subvector of it.
897 BitVector Used(ExtLen);
898
899 for (int M : ExtIdx) {
900 Mask.push_back(M);
901 if (M >= 0)
902 Used.set(M);
903 }
904 // Fill the rest of the mask with the unused elements of ExtVec in hopes
905 // that it will result in a permutation of ExtVec's elements. It's still
906 // fine if it doesn't (e.g. if undefs are present, or elements are
907 // repeated), but permutations can always be done efficiently via vdelta
908 // and vrdelta.
909 for (unsigned I = 0; I != ExtLen; ++I) {
910 if (Mask.size() == ExtLen)
911 break;
912 if (!Used.test(I))
913 Mask.push_back(I);
914 }
915
916 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
917 DAG.getUNDEF(ExtTy), Mask);
918 return ExtLen == VecLen ? S : LoHalf(S, DAG);
919 }
920 }
921
922 // Find most common element to initialize vector with. This is to avoid
923 // unnecessary vinsert/valign for cases where the same value is present
924 // many times. Creates a histogram of the vector's elements to find the
925 // most common element n.
926 assert(4*Words.size() == Subtarget.getVectorLength());
927 int VecHist[32];
928 int n = 0;
929 for (unsigned i = 0; i != NumWords; ++i) {
930 VecHist[i] = 0;
931 if (Words[i].isUndef())
932 continue;
933 for (unsigned j = i; j != NumWords; ++j)
934 if (Words[i] == Words[j])
935 VecHist[i]++;
936
937 if (VecHist[i] > VecHist[n])
938 n = i;
939 }
940
941 SDValue HalfV = getZero(dl, VecTy, DAG);
942 if (VecHist[n] > 1) {
943 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
944 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
945 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
946 }
947 SDValue HalfV0 = HalfV;
948 SDValue HalfV1 = HalfV;
949
950 // Construct two halves in parallel, then or them together. Rn and Rm count
951 // number of rotations needed before the next element. One last rotation is
952 // performed post-loop to position the last element.
953 int Rn = 0, Rm = 0;
954 SDValue Sn, Sm;
955 SDValue N = HalfV0;
956 SDValue M = HalfV1;
957 for (unsigned i = 0; i != NumWords/2; ++i) {
958 // Rotate by element count since last insertion.
959 if (Words[i] != Words[n] || VecHist[n] <= 1) {
960 Sn = DAG.getConstant(Rn, dl, MVT::i32);
961 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
962 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
963 {HalfV0, Words[i]});
964 Rn = 0;
965 }
966 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
967 Sm = DAG.getConstant(Rm, dl, MVT::i32);
968 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
969 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
970 {HalfV1, Words[i+NumWords/2]});
971 Rm = 0;
972 }
973 Rn += 4;
974 Rm += 4;
975 }
976 // Perform last rotation.
977 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
978 Sm = DAG.getConstant(Rm, dl, MVT::i32);
979 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
980 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
981
982 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
983 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
984
985 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
986
987 SDValue OutV =
988 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
989 return OutV;
990}
991
993HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
994 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
995 MVT PredTy = ty(PredV);
996 unsigned HwLen = Subtarget.getVectorLength();
997 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
998
999 if (Subtarget.isHVXVectorType(PredTy, true)) {
1000 // Move the vector predicate SubV to a vector register, and scale it
1001 // down to match the representation (bytes per type element) that VecV
1002 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1003 // in general) element and put them at the front of the resulting
1004 // vector. This subvector will then be inserted into the Q2V of VecV.
1005 // To avoid having an operation that generates an illegal type (short
1006 // vector), generate a full size vector.
1007 //
1008 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1010 // Scale = BitBytes(PredV) / Given BitBytes.
1011 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1012 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1013
1014 for (unsigned i = 0; i != HwLen; ++i) {
1015 unsigned Num = i % Scale;
1016 unsigned Off = i / Scale;
1017 Mask[BlockLen*Num + Off] = i;
1018 }
1019 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1020 if (!ZeroFill)
1021 return S;
1022 // Fill the bytes beyond BlockLen with 0s.
1023 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1024 // when BlockLen < HwLen.
1025 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1026 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1027 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1028 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1029 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1030 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1031 }
1032
1033 // Make sure that this is a valid scalar predicate.
1034 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1035
1036 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1037 SmallVector<SDValue,4> Words[2];
1038 unsigned IdxW = 0;
1039
1040 SDValue W0 = isUndef(PredV)
1041 ? DAG.getUNDEF(MVT::i64)
1042 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1043 Words[IdxW].push_back(HiHalf(W0, DAG));
1044 Words[IdxW].push_back(LoHalf(W0, DAG));
1045
1046 while (Bytes < BitBytes) {
1047 IdxW ^= 1;
1048 Words[IdxW].clear();
1049
1050 if (Bytes < 4) {
1051 for (const SDValue &W : Words[IdxW ^ 1]) {
1052 SDValue T = expandPredicate(W, dl, DAG);
1053 Words[IdxW].push_back(HiHalf(T, DAG));
1054 Words[IdxW].push_back(LoHalf(T, DAG));
1055 }
1056 } else {
1057 for (const SDValue &W : Words[IdxW ^ 1]) {
1058 Words[IdxW].push_back(W);
1059 Words[IdxW].push_back(W);
1060 }
1061 }
1062 Bytes *= 2;
1063 }
1064
1065 assert(Bytes == BitBytes);
1066
1067 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1068 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1069 for (const SDValue &W : Words[IdxW]) {
1070 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1071 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1072 }
1073
1074 return Vec;
1075}
1076
1077SDValue
1078HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1079 const SDLoc &dl, MVT VecTy,
1080 SelectionDAG &DAG) const {
1081 // Construct a vector V of bytes, such that a comparison V >u 0 would
1082 // produce the required vector predicate.
1083 unsigned VecLen = Values.size();
1084 unsigned HwLen = Subtarget.getVectorLength();
1085 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1087 bool AllT = true, AllF = true;
1088
1089 auto IsTrue = [] (SDValue V) {
1090 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1091 return !N->isZero();
1092 return false;
1093 };
1094 auto IsFalse = [] (SDValue V) {
1095 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1096 return N->isZero();
1097 return false;
1098 };
1099
1100 if (VecLen <= HwLen) {
1101 // In the hardware, each bit of a vector predicate corresponds to a byte
1102 // of a vector register. Calculate how many bytes does a bit of VecTy
1103 // correspond to.
1104 assert(HwLen % VecLen == 0);
1105 unsigned BitBytes = HwLen / VecLen;
1106 for (SDValue V : Values) {
1107 AllT &= IsTrue(V);
1108 AllF &= IsFalse(V);
1109
1110 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1111 : DAG.getUNDEF(MVT::i8);
1112 for (unsigned B = 0; B != BitBytes; ++B)
1113 Bytes.push_back(Ext);
1114 }
1115 } else {
1116 // There are as many i1 values, as there are bits in a vector register.
1117 // Divide the values into groups of 8 and check that each group consists
1118 // of the same value (ignoring undefs).
1119 for (unsigned I = 0; I != VecLen; I += 8) {
1120 unsigned B = 0;
1121 // Find the first non-undef value in this group.
1122 for (; B != 8; ++B) {
1123 if (!Values[I+B].isUndef())
1124 break;
1125 }
1126 SDValue F = Values[I+B];
1127 AllT &= IsTrue(F);
1128 AllF &= IsFalse(F);
1129
1130 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1131 : DAG.getUNDEF(MVT::i8);
1132 Bytes.push_back(Ext);
1133 // Verify that the rest of values in the group are the same as the
1134 // first.
1135 for (; B != 8; ++B)
1136 assert(Values[I+B].isUndef() || Values[I+B] == F);
1137 }
1138 }
1139
1140 if (AllT)
1141 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1142 if (AllF)
1143 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1144
1145 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1146 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1147 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1148}
1149
1150SDValue
1151HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1152 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1153 MVT ElemTy = ty(VecV).getVectorElementType();
1154
1155 unsigned ElemWidth = ElemTy.getSizeInBits();
1156 assert(ElemWidth >= 8 && ElemWidth <= 32);
1157 (void)ElemWidth;
1158
1159 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1160 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1161 {VecV, ByteIdx});
1162 if (ElemTy == MVT::i32)
1163 return ExWord;
1164
1165 // Have an extracted word, need to extract the smaller element out of it.
1166 // 1. Extract the bits of (the original) IdxV that correspond to the index
1167 // of the desired element in the 32-bit word.
1168 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1169 // 2. Extract the element from the word.
1170 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1171 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1172}
1173
1174SDValue
1175HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1176 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1177 // Implement other return types if necessary.
1178 assert(ResTy == MVT::i1);
1179
1180 unsigned HwLen = Subtarget.getVectorLength();
1181 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1182 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1183
1184 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1185 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1186 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1187
1188 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1189 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1190 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1191}
1192
1193SDValue
1194HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1195 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1196 MVT ElemTy = ty(VecV).getVectorElementType();
1197
1198 unsigned ElemWidth = ElemTy.getSizeInBits();
1199 assert(ElemWidth >= 8 && ElemWidth <= 32);
1200 (void)ElemWidth;
1201
1202 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1203 SDValue ByteIdxV) {
1204 MVT VecTy = ty(VecV);
1205 unsigned HwLen = Subtarget.getVectorLength();
1206 SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
1207 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)});
1208 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1209 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1210 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1211 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1212 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1213 return TorV;
1214 };
1215
1216 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1217 if (ElemTy == MVT::i32)
1218 return InsertWord(VecV, ValV, ByteIdx);
1219
1220 // If this is not inserting a 32-bit word, convert it into such a thing.
1221 // 1. Extract the existing word from the target vector.
1222 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1223 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1224 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1225 dl, MVT::i32, DAG);
1226
1227 // 2. Treating the extracted word as a 32-bit vector, insert the given
1228 // value into it.
1229 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1230 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1231 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1232 ValV, SubIdx, dl, ElemTy, DAG);
1233
1234 // 3. Insert the 32-bit word back into the original vector.
1235 return InsertWord(VecV, Ins, ByteIdx);
1236}
1237
1238SDValue
1239HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1240 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1241 unsigned HwLen = Subtarget.getVectorLength();
1242 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1243 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1244
1245 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1246 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1247 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1248 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1249
1250 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1251 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1252}
1253
1254SDValue
1255HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1256 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1257 MVT VecTy = ty(VecV);
1258 unsigned HwLen = Subtarget.getVectorLength();
1259 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1260 MVT ElemTy = VecTy.getVectorElementType();
1261 unsigned ElemWidth = ElemTy.getSizeInBits();
1262
1263 // If the source vector is a vector pair, get the single vector containing
1264 // the subvector of interest. The subvector will never overlap two single
1265 // vectors.
1266 if (isHvxPairTy(VecTy)) {
1267 if (Idx * ElemWidth >= 8*HwLen)
1268 Idx -= VecTy.getVectorNumElements() / 2;
1269
1270 VecV = OrigOp;
1271 if (typeSplit(VecTy).first == ResTy)
1272 return VecV;
1273 }
1274
1275 // The only meaningful subvectors of a single HVX vector are those that
1276 // fit in a scalar register.
1277 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1278
1279 MVT WordTy = tyVector(VecTy, MVT::i32);
1280 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1281 unsigned WordIdx = (Idx*ElemWidth) / 32;
1282
1283 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1284 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1285 if (ResTy.getSizeInBits() == 32)
1286 return DAG.getBitcast(ResTy, W0);
1287
1288 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1289 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1290 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1291 return DAG.getBitcast(ResTy, WW);
1292}
1293
1294SDValue
1295HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1296 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1297 MVT VecTy = ty(VecV);
1298 unsigned HwLen = Subtarget.getVectorLength();
1299 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1300 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1301 // IdxV is required to be a constant.
1302 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1303
1304 unsigned ResLen = ResTy.getVectorNumElements();
1305 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1306 unsigned Offset = Idx * BitBytes;
1307 SDValue Undef = DAG.getUNDEF(ByteTy);
1309
1310 if (Subtarget.isHVXVectorType(ResTy, true)) {
1311 // Converting between two vector predicates. Since the result is shorter
1312 // than the source, it will correspond to a vector predicate with the
1313 // relevant bits replicated. The replication count is the ratio of the
1314 // source and target vector lengths.
1315 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1316 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1317 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1318 for (unsigned j = 0; j != Rep; ++j)
1319 Mask.push_back(i + Offset);
1320 }
1321 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1322 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1323 }
1324
1325 // Converting between a vector predicate and a scalar predicate. In the
1326 // vector predicate, a group of BitBytes bits will correspond to a single
1327 // i1 element of the source vector type. Those bits will all have the same
1328 // value. The same will be true for ByteVec, where each byte corresponds
1329 // to a bit in the vector predicate.
1330 // The algorithm is to traverse the ByteVec, going over the i1 values from
1331 // the source vector, and generate the corresponding representation in an
1332 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1333 // elements so that the interesting 8 bytes will be in the low end of the
1334 // vector.
1335 unsigned Rep = 8 / ResLen;
1336 // Make sure the output fill the entire vector register, so repeat the
1337 // 8-byte groups as many times as necessary.
1338 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1339 // This will generate the indexes of the 8 interesting bytes.
1340 for (unsigned i = 0; i != ResLen; ++i) {
1341 for (unsigned j = 0; j != Rep; ++j)
1342 Mask.push_back(Offset + i*BitBytes);
1343 }
1344 }
1345
1346 SDValue Zero = getZero(dl, MVT::i32, DAG);
1347 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1348 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1349 // them against 0.
1350 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1351 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1352 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1353 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1354 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1355 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1356}
1357
1358SDValue
1359HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1360 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1361 MVT VecTy = ty(VecV);
1362 MVT SubTy = ty(SubV);
1363 unsigned HwLen = Subtarget.getVectorLength();
1364 MVT ElemTy = VecTy.getVectorElementType();
1365 unsigned ElemWidth = ElemTy.getSizeInBits();
1366
1367 bool IsPair = isHvxPairTy(VecTy);
1368 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1369 // The two single vectors that VecV consists of, if it's a pair.
1370 SDValue V0, V1;
1371 SDValue SingleV = VecV;
1372 SDValue PickHi;
1373
1374 if (IsPair) {
1375 V0 = LoHalf(VecV, DAG);
1376 V1 = HiHalf(VecV, DAG);
1377
1378 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1379 dl, MVT::i32);
1380 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1381 if (isHvxSingleTy(SubTy)) {
1382 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1383 unsigned Idx = CN->getZExtValue();
1384 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1385 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1386 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1387 }
1388 // If IdxV is not a constant, generate the two variants: with the
1389 // SubV as the high and as the low subregister, and select the right
1390 // pair based on the IdxV.
1391 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1392 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1393 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1394 }
1395 // The subvector being inserted must be entirely contained in one of
1396 // the vectors V0 or V1. Set SingleV to the correct one, and update
1397 // IdxV to be the index relative to the beginning of that vector.
1398 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1399 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1400 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1401 }
1402
1403 // The only meaningful subvectors of a single HVX vector are those that
1404 // fit in a scalar register.
1405 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1406 // Convert IdxV to be index in bytes.
1407 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1408 if (!IdxN || !IdxN->isZero()) {
1409 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1410 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1411 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1412 }
1413 // When inserting a single word, the rotation back to the original position
1414 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1415 // by (HwLen-4)-Idx.
1416 unsigned RolBase = HwLen;
1417 if (SubTy.getSizeInBits() == 32) {
1418 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1419 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1420 } else {
1421 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1422 SDValue R0 = LoHalf(V, DAG);
1423 SDValue R1 = HiHalf(V, DAG);
1424 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1425 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1426 DAG.getConstant(4, dl, MVT::i32));
1427 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1428 RolBase = HwLen-4;
1429 }
1430 // If the vector wasn't ror'ed, don't ror it back.
1431 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1432 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1433 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1434 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1435 }
1436
1437 if (IsPair) {
1438 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1439 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1440 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1441 }
1442 return SingleV;
1443}
1444
1445SDValue
1446HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1447 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1448 MVT VecTy = ty(VecV);
1449 MVT SubTy = ty(SubV);
1450 assert(Subtarget.isHVXVectorType(VecTy, true));
1451 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1452 // predicate as well, or it can be a scalar predicate.
1453
1454 unsigned VecLen = VecTy.getVectorNumElements();
1455 unsigned HwLen = Subtarget.getVectorLength();
1456 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1457
1458 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1459 unsigned BitBytes = HwLen / VecLen;
1460 unsigned BlockLen = HwLen / Scale;
1461
1462 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1463 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1464 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1465 SDValue ByteIdx;
1466
1467 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1468 if (!IdxN || !IdxN->isZero()) {
1469 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1470 DAG.getConstant(BitBytes, dl, MVT::i32));
1471 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1472 }
1473
1474 // ByteVec is the target vector VecV rotated in such a way that the
1475 // subvector should be inserted at index 0. Generate a predicate mask
1476 // and use vmux to do the insertion.
1477 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1478 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1479 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1480 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1481 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1482 // Rotate ByteVec back, and convert to a vector predicate.
1483 if (!IdxN || !IdxN->isZero()) {
1484 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1485 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1486 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1487 }
1488 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1489}
1490
1491SDValue
1492HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1493 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1494 // Sign- and any-extending of a vector predicate to a vector register is
1495 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1496 // a vector of 1s (where the 1s are of type matching the vector type).
1497 assert(Subtarget.isHVXVectorType(ResTy));
1498 if (!ZeroExt)
1499 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1500
1501 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1502 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1503 DAG.getConstant(1, dl, MVT::i32));
1504 SDValue False = getZero(dl, ResTy, DAG);
1505 return DAG.getSelect(dl, ResTy, VecV, True, False);
1506}
1507
1508SDValue
1509HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1510 MVT ResTy, SelectionDAG &DAG) const {
1511 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1512 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1513 // vector register. The remaining bits of the vector register are
1514 // unspecified.
1515
1517 unsigned HwLen = Subtarget.getVectorLength();
1518 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1519 MVT PredTy = ty(VecQ);
1520 unsigned PredLen = PredTy.getVectorNumElements();
1521 assert(HwLen % PredLen == 0);
1522 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1523
1524 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1526 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1527 // These are bytes with the LSB rotated left with respect to their index.
1528 for (unsigned i = 0; i != HwLen/8; ++i) {
1529 for (unsigned j = 0; j != 8; ++j)
1530 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1531 }
1532 Constant *CV = ConstantVector::get(Tmp);
1533 Align Alignment(HwLen);
1534 SDValue CP =
1535 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1536 SDValue Bytes =
1537 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1539
1540 // Select the bytes that correspond to true bits in the vector predicate.
1541 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1542 getZero(dl, VecTy, DAG));
1543 // Calculate the OR of all bytes in each group of 8. That will compress
1544 // all the individual bits into a single byte.
1545 // First, OR groups of 4, via vrmpy with 0x01010101.
1546 SDValue All1 =
1547 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1548 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1549 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1550 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1551 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1552 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1553
1554 // Pick every 8th byte and coalesce them at the beginning of the output.
1555 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1556 // byte and so on.
1558 for (unsigned i = 0; i != HwLen; ++i)
1559 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1560 SDValue Collect =
1561 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1562 return DAG.getBitcast(ResTy, Collect);
1563}
1564
1565SDValue
1566HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1567 const SDLoc &dl, SelectionDAG &DAG) const {
1568 // Take a vector and resize the element type to match the given type.
1569 MVT InpTy = ty(VecV);
1570 if (InpTy == ResTy)
1571 return VecV;
1572
1573 unsigned InpWidth = InpTy.getSizeInBits();
1574 unsigned ResWidth = ResTy.getSizeInBits();
1575
1576 if (InpTy.isFloatingPoint()) {
1577 return InpWidth < ResWidth ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1578 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1579 getZero(dl, MVT::i32, DAG));
1580 }
1581
1582 assert(InpTy.isInteger());
1583
1584 if (InpWidth < ResWidth) {
1585 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1586 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1587 } else {
1588 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1589 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1590 }
1591}
1592
1593SDValue
1594HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1595 SelectionDAG &DAG) const {
1596 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1597
1598 const SDLoc &dl(Vec);
1599 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1600 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1601 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1602}
1603
1604SDValue
1605HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1606 const {
1607 const SDLoc &dl(Op);
1608 MVT VecTy = ty(Op);
1609
1610 unsigned Size = Op.getNumOperands();
1612 for (unsigned i = 0; i != Size; ++i)
1613 Ops.push_back(Op.getOperand(i));
1614
1615 // First, split the BUILD_VECTOR for vector pairs. We could generate
1616 // some pairs directly (via splat), but splats should be generated
1617 // by the combiner prior to getting here.
1618 if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
1619 ArrayRef<SDValue> A(Ops);
1620 MVT SingleTy = typeSplit(VecTy).first;
1621 SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
1622 SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
1623 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1624 }
1625
1626 if (VecTy.getVectorElementType() == MVT::i1)
1627 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1628
1629 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1630 // not a legal type, just bitcast the node to use i16
1631 // types and bitcast the result back to f16
1632 if (VecTy.getVectorElementType() == MVT::f16) {
1634 for (unsigned i = 0; i != Size; i++)
1635 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1636
1637 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1638 tyVector(VecTy, MVT::i16), NewOps);
1639 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1640 }
1641
1642 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1643}
1644
1645SDValue
1646HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1647 const {
1648 const SDLoc &dl(Op);
1649 MVT VecTy = ty(Op);
1650 MVT ArgTy = ty(Op.getOperand(0));
1651
1652 if (ArgTy == MVT::f16) {
1653 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1654 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1655 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1656 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1657 return DAG.getBitcast(VecTy, Splat);
1658 }
1659
1660 return SDValue();
1661}
1662
1663SDValue
1664HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1665 const {
1666 // Vector concatenation of two integer (non-bool) vectors does not need
1667 // special lowering. Custom-lower concats of bool vectors and expand
1668 // concats of more than 2 vectors.
1669 MVT VecTy = ty(Op);
1670 const SDLoc &dl(Op);
1671 unsigned NumOp = Op.getNumOperands();
1672 if (VecTy.getVectorElementType() != MVT::i1) {
1673 if (NumOp == 2)
1674 return Op;
1675 // Expand the other cases into a build-vector.
1677 for (SDValue V : Op.getNode()->ops())
1678 DAG.ExtractVectorElements(V, Elems);
1679 // A vector of i16 will be broken up into a build_vector of i16's.
1680 // This is a problem, since at the time of operation legalization,
1681 // all operations are expected to be type-legalized, and i16 is not
1682 // a legal type. If any of the extracted elements is not of a valid
1683 // type, sign-extend it to a valid one.
1684 for (unsigned i = 0, e = Elems.size(); i != e; ++i) {
1685 SDValue V = Elems[i];
1686 MVT Ty = ty(V);
1687 if (!isTypeLegal(Ty)) {
1688 MVT NTy = typeLegalize(Ty, DAG);
1689 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1690 Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1691 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1692 V.getOperand(0), V.getOperand(1)),
1693 DAG.getValueType(Ty));
1694 continue;
1695 }
1696 // A few less complicated cases.
1697 switch (V.getOpcode()) {
1698 case ISD::Constant:
1699 Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
1700 break;
1701 case ISD::UNDEF:
1702 Elems[i] = DAG.getUNDEF(NTy);
1703 break;
1704 case ISD::TRUNCATE:
1705 Elems[i] = V.getOperand(0);
1706 break;
1707 default:
1708 llvm_unreachable("Unexpected vector element");
1709 }
1710 }
1711 }
1712 return DAG.getBuildVector(VecTy, dl, Elems);
1713 }
1714
1715 assert(VecTy.getVectorElementType() == MVT::i1);
1716 unsigned HwLen = Subtarget.getVectorLength();
1717 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1718
1719 SDValue Op0 = Op.getOperand(0);
1720
1721 // If the operands are HVX types (i.e. not scalar predicates), then
1722 // defer the concatenation, and create QCAT instead.
1723 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1724 if (NumOp == 2)
1725 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1726
1727 ArrayRef<SDUse> U(Op.getNode()->ops());
1729 ArrayRef<SDValue> Ops(SV);
1730
1731 MVT HalfTy = typeSplit(VecTy).first;
1732 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1733 Ops.take_front(NumOp/2));
1734 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1735 Ops.take_back(NumOp/2));
1736 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1737 }
1738
1739 // Count how many bytes (in a vector register) each bit in VecTy
1740 // corresponds to.
1741 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1742
1743 SmallVector<SDValue,8> Prefixes;
1744 for (SDValue V : Op.getNode()->op_values()) {
1745 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1746 Prefixes.push_back(P);
1747 }
1748
1749 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1750 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1751 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1752 SDValue Res = getZero(dl, ByteTy, DAG);
1753 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1754 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1755 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1756 }
1757 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1758}
1759
1760SDValue
1761HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1762 const {
1763 // Change the type of the extracted element to i32.
1764 SDValue VecV = Op.getOperand(0);
1765 MVT ElemTy = ty(VecV).getVectorElementType();
1766 const SDLoc &dl(Op);
1767 SDValue IdxV = Op.getOperand(1);
1768 if (ElemTy == MVT::i1)
1769 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1770
1771 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1772}
1773
1774SDValue
1775HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1776 const {
1777 const SDLoc &dl(Op);
1778 MVT VecTy = ty(Op);
1779 SDValue VecV = Op.getOperand(0);
1780 SDValue ValV = Op.getOperand(1);
1781 SDValue IdxV = Op.getOperand(2);
1782 MVT ElemTy = ty(VecV).getVectorElementType();
1783 if (ElemTy == MVT::i1)
1784 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1785
1786 if (ElemTy == MVT::f16) {
1788 tyVector(VecTy, MVT::i16),
1789 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1790 DAG.getBitcast(MVT::i16, ValV), IdxV);
1791 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1792 }
1793
1794 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1795}
1796
1797SDValue
1798HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1799 const {
1800 SDValue SrcV = Op.getOperand(0);
1801 MVT SrcTy = ty(SrcV);
1802 MVT DstTy = ty(Op);
1803 SDValue IdxV = Op.getOperand(1);
1804 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1805 assert(Idx % DstTy.getVectorNumElements() == 0);
1806 (void)Idx;
1807 const SDLoc &dl(Op);
1808
1809 MVT ElemTy = SrcTy.getVectorElementType();
1810 if (ElemTy == MVT::i1)
1811 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1812
1813 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1814}
1815
1816SDValue
1817HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1818 const {
1819 // Idx does not need to be a constant.
1820 SDValue VecV = Op.getOperand(0);
1821 SDValue ValV = Op.getOperand(1);
1822 SDValue IdxV = Op.getOperand(2);
1823
1824 const SDLoc &dl(Op);
1825 MVT VecTy = ty(VecV);
1826 MVT ElemTy = VecTy.getVectorElementType();
1827 if (ElemTy == MVT::i1)
1828 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1829
1830 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1831}
1832
1833SDValue
1834HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1835 // Lower any-extends of boolean vectors to sign-extends, since they
1836 // translate directly to Q2V. Zero-extending could also be done equally
1837 // fast, but Q2V is used/recognized in more places.
1838 // For all other vectors, use zero-extend.
1839 MVT ResTy = ty(Op);
1840 SDValue InpV = Op.getOperand(0);
1841 MVT ElemTy = ty(InpV).getVectorElementType();
1842 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1843 return LowerHvxSignExt(Op, DAG);
1844 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1845}
1846
1847SDValue
1848HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1849 MVT ResTy = ty(Op);
1850 SDValue InpV = Op.getOperand(0);
1851 MVT ElemTy = ty(InpV).getVectorElementType();
1852 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1853 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1854 return Op;
1855}
1856
1857SDValue
1858HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1859 MVT ResTy = ty(Op);
1860 SDValue InpV = Op.getOperand(0);
1861 MVT ElemTy = ty(InpV).getVectorElementType();
1862 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1863 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1864 return Op;
1865}
1866
1867SDValue
1868HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1869 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1870 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1871 const SDLoc &dl(Op);
1872 MVT ResTy = ty(Op);
1873 SDValue InpV = Op.getOperand(0);
1874 assert(ResTy == ty(InpV));
1875
1876 // Calculate the vectors of 1 and bitwidth(x).
1877 MVT ElemTy = ty(InpV).getVectorElementType();
1878 unsigned ElemWidth = ElemTy.getSizeInBits();
1879
1880 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1881 DAG.getConstant(1, dl, MVT::i32));
1882 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1883 DAG.getConstant(ElemWidth, dl, MVT::i32));
1884 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1885 DAG.getConstant(-1, dl, MVT::i32));
1886
1887 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1888 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1889 // it separately in custom combine or selection).
1890 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1891 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1892 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1893 return DAG.getNode(ISD::SUB, dl, ResTy,
1894 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1895}
1896
1897SDValue
1898HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1899 const SDLoc &dl(Op);
1900 MVT ResTy = ty(Op);
1901 assert(ResTy.getVectorElementType() == MVT::i32);
1902
1903 SDValue Vs = Op.getOperand(0);
1904 SDValue Vt = Op.getOperand(1);
1905
1906 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1907 unsigned Opc = Op.getOpcode();
1908
1909 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1910 if (Opc == ISD::MULHU)
1911 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1912 if (Opc == ISD::MULHS)
1913 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1914
1915#ifndef NDEBUG
1916 Op.dump(&DAG);
1917#endif
1918 llvm_unreachable("Unexpected mulh operation");
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1923 const SDLoc &dl(Op);
1924 unsigned Opc = Op.getOpcode();
1925 SDValue Vu = Op.getOperand(0);
1926 SDValue Vv = Op.getOperand(1);
1927
1928 // If the HI part is not used, convert it to a regular MUL.
1929 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1930 // Need to preserve the types and the number of values.
1931 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1932 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1933 return DAG.getMergeValues({Lo, Hi}, dl);
1934 }
1935
1936 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1937 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1938
1939 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1940 // valued nodes.
1941 if (Subtarget.useHVXV62Ops())
1942 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1943
1944 if (Opc == HexagonISD::SMUL_LOHI) {
1945 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1946 // for other signedness LOHI is cheaper.
1947 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1948 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1949 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1950 return DAG.getMergeValues({Lo, Hi}, dl);
1951 }
1952 }
1953
1954 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1955}
1956
1957SDValue
1958HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1959 SDValue Val = Op.getOperand(0);
1960 MVT ResTy = ty(Op);
1961 MVT ValTy = ty(Val);
1962 const SDLoc &dl(Op);
1963
1964 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1965 unsigned HwLen = Subtarget.getVectorLength();
1966 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1967 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1968 unsigned BitWidth = ResTy.getSizeInBits();
1969
1970 if (BitWidth < 64) {
1971 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1972 dl, MVT::i32, DAG);
1973 if (BitWidth == 32)
1974 return W0;
1975 assert(BitWidth < 32u);
1976 return DAG.getZExtOrTrunc(W0, dl, ResTy);
1977 }
1978
1979 // The result is >= 64 bits. The only options are 64 or 128.
1980 assert(BitWidth == 64 || BitWidth == 128);
1982 for (unsigned i = 0; i != BitWidth/32; ++i) {
1983 SDValue W = extractHvxElementReg(
1984 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1985 Words.push_back(W);
1986 }
1987 SmallVector<SDValue,2> Combines;
1988 assert(Words.size() % 2 == 0);
1989 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
1990 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
1991 Combines.push_back(C);
1992 }
1993
1994 if (BitWidth == 64)
1995 return Combines[0];
1996
1997 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
1998 }
1999 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2000 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2001 unsigned BitWidth = ValTy.getSizeInBits();
2002 unsigned HwLen = Subtarget.getVectorLength();
2003 assert(BitWidth == HwLen);
2004
2005 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2006 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2007 // Splat each byte of Val 8 times.
2008 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2009 // where b0, b1,..., b15 are least to most significant bytes of I.
2011 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2012 // These are bytes with the LSB rotated left with respect to their index.
2014 for (unsigned I = 0; I != HwLen / 8; ++I) {
2015 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2016 SDValue Byte =
2017 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2018 for (unsigned J = 0; J != 8; ++J) {
2019 Bytes.push_back(Byte);
2020 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2021 }
2022 }
2023
2024 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2025 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2026 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2027
2028 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2029 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2030 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2031 }
2032
2033 return Op;
2034}
2035
2036SDValue
2037HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2038 // Sign- and zero-extends are legal.
2039 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2041 Op.getOperand(0));
2042}
2043
2044SDValue
2045HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2046 MVT ResTy = ty(Op);
2047 if (ResTy.getVectorElementType() != MVT::i1)
2048 return Op;
2049
2050 const SDLoc &dl(Op);
2051 unsigned HwLen = Subtarget.getVectorLength();
2052 unsigned VecLen = ResTy.getVectorNumElements();
2053 assert(HwLen % VecLen == 0);
2054 unsigned ElemSize = HwLen / VecLen;
2055
2056 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2057 SDValue S =
2058 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2059 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2060 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2061 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2062}
2063
2064SDValue
2065HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2066 if (SDValue S = getVectorShiftByInt(Op, DAG))
2067 return S;
2068 return Op;
2069}
2070
2071SDValue
2072HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2073 SelectionDAG &DAG) const {
2074 unsigned Opc = Op.getOpcode();
2075 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2076
2077 // Make sure the shift amount is within the range of the bitwidth
2078 // of the element type.
2079 SDValue A = Op.getOperand(0);
2080 SDValue B = Op.getOperand(1);
2081 SDValue S = Op.getOperand(2);
2082
2083 MVT InpTy = ty(A);
2084 MVT ElemTy = InpTy.getVectorElementType();
2085
2086 const SDLoc &dl(Op);
2087 unsigned ElemWidth = ElemTy.getSizeInBits();
2088 bool IsLeft = Opc == ISD::FSHL;
2089
2090 // The expansion into regular shifts produces worse code for i8 and for
2091 // right shift of i32 on v65+.
2092 bool UseShifts = ElemTy != MVT::i8;
2093 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2094 UseShifts = false;
2095
2096 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2097 // If this is a funnel shift by a scalar, lower it into regular shifts.
2098 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2099 SDValue ModS =
2100 DAG.getNode(ISD::AND, dl, MVT::i32,
2101 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2102 SDValue NegS =
2103 DAG.getNode(ISD::SUB, dl, MVT::i32,
2104 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2105 SDValue IsZero =
2106 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2107 // FSHL A, B => A << | B >>n
2108 // FSHR A, B => A <<n | B >>
2109 SDValue Part1 =
2110 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2111 SDValue Part2 =
2112 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2113 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2114 // If the shift amount was 0, pick A or B, depending on the direction.
2115 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2116 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2117 }
2118
2120 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2121
2122 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2123 return DAG.getNode(MOpc, dl, ty(Op),
2124 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2125}
2126
2127SDValue
2128HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2129 const SDLoc &dl(Op);
2130 unsigned IntNo = Op.getConstantOperandVal(0);
2131 SmallVector<SDValue> Ops(Op->ops());
2132
2133 auto Swap = [&](SDValue P) {
2134 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2135 };
2136
2137 switch (IntNo) {
2138 case Intrinsic::hexagon_V6_pred_typecast:
2139 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2140 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2141 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2142 if (ResTy == InpTy)
2143 return Ops[1];
2144 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2145 }
2146 break;
2147 }
2148 case Intrinsic::hexagon_V6_vmpyss_parts:
2149 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2150 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2151 {Ops[1], Ops[2]}));
2152 case Intrinsic::hexagon_V6_vmpyuu_parts:
2153 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2154 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2155 {Ops[1], Ops[2]}));
2156 case Intrinsic::hexagon_V6_vmpyus_parts:
2157 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2158 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2159 {Ops[1], Ops[2]}));
2160 }
2161 } // switch
2162
2163 return Op;
2164}
2165
2166SDValue
2167HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2168 const SDLoc &dl(Op);
2169 unsigned HwLen = Subtarget.getVectorLength();
2171 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2172 SDValue Mask = MaskN->getMask();
2173 SDValue Chain = MaskN->getChain();
2174 SDValue Base = MaskN->getBasePtr();
2175 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2176
2177 unsigned Opc = Op->getOpcode();
2178 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2179
2180 if (Opc == ISD::MLOAD) {
2181 MVT ValTy = ty(Op);
2182 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2183 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2184 if (isUndef(Thru))
2185 return Load;
2186 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2187 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2188 }
2189
2190 // MSTORE
2191 // HVX only has aligned masked stores.
2192
2193 // TODO: Fold negations of the mask into the store.
2194 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2195 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2196 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2197
2198 if (MaskN->getAlign().value() % HwLen == 0) {
2199 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2200 {Mask, Base, Offset0, Value, Chain}, DAG);
2201 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2202 return Store;
2203 }
2204
2205 // Unaligned case.
2206 auto StoreAlign = [&](SDValue V, SDValue A) {
2207 SDValue Z = getZero(dl, ty(V), DAG);
2208 // TODO: use funnel shifts?
2209 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2210 // upper half.
2211 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2212 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2213 return std::make_pair(LoV, HiV);
2214 };
2215
2216 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2217 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2218 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2219 VectorPair Tmp = StoreAlign(MaskV, Base);
2220 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2221 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2222 VectorPair ValueU = StoreAlign(Value, Base);
2223
2224 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2225 SDValue StoreLo =
2226 getInstr(StoreOpc, dl, MVT::Other,
2227 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2228 SDValue StoreHi =
2229 getInstr(StoreOpc, dl, MVT::Other,
2230 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2231 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2232 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2233 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2234}
2235
2236SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2237 SelectionDAG &DAG) const {
2238 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2239 // is legal (done via a pattern).
2240 assert(Subtarget.useHVXQFloatOps());
2241
2242 assert(Op->getOpcode() == ISD::FP_EXTEND);
2243
2244 MVT VecTy = ty(Op);
2245 MVT ArgTy = ty(Op.getOperand(0));
2246 const SDLoc &dl(Op);
2247 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2248
2249 SDValue F16Vec = Op.getOperand(0);
2250
2251 APFloat FloatVal = APFloat(1.0f);
2252 bool Ignored;
2254 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2255 SDValue VmpyVec =
2256 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2257
2258 MVT HalfTy = typeSplit(VecTy).first;
2259 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2260 SDValue LoVec =
2261 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2262 SDValue HiVec =
2263 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2264
2265 SDValue ShuffVec =
2266 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2267 {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG);
2268
2269 return ShuffVec;
2270}
2271
2272SDValue
2273HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2274 // Catch invalid conversion ops (just in case).
2275 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2276 Op.getOpcode() == ISD::FP_TO_UINT);
2277
2278 MVT ResTy = ty(Op);
2279 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2280 MVT IntTy = ResTy.getVectorElementType();
2281
2282 if (Subtarget.useHVXIEEEFPOps()) {
2283 // There are only conversions from f16.
2284 if (FpTy == MVT::f16) {
2285 // Other int types aren't legal in HVX, so we shouldn't see them here.
2286 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2287 // Conversions to i8 and i16 are legal.
2288 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2289 return Op;
2290 }
2291 }
2292
2293 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2294 return EqualizeFpIntConversion(Op, DAG);
2295
2296 return ExpandHvxFpToInt(Op, DAG);
2297}
2298
2299SDValue
2300HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2301 // Catch invalid conversion ops (just in case).
2302 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2303 Op.getOpcode() == ISD::UINT_TO_FP);
2304
2305 MVT ResTy = ty(Op);
2306 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2307 MVT FpTy = ResTy.getVectorElementType();
2308
2309 if (Subtarget.useHVXIEEEFPOps()) {
2310 // There are only conversions to f16.
2311 if (FpTy == MVT::f16) {
2312 // Other int types aren't legal in HVX, so we shouldn't see them here.
2313 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2314 // i8, i16 -> f16 is legal.
2315 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2316 return Op;
2317 }
2318 }
2319
2320 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2321 return EqualizeFpIntConversion(Op, DAG);
2322
2323 return ExpandHvxIntToFp(Op, DAG);
2324}
2325
2326HexagonTargetLowering::TypePair
2327HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2328 // Compare the widths of elements of the two types, and extend the narrower
2329 // type to match the with of the wider type. For vector types, apply this
2330 // to the element type.
2331 assert(Ty0.isVector() == Ty1.isVector());
2332
2333 MVT ElemTy0 = Ty0.getScalarType();
2334 MVT ElemTy1 = Ty1.getScalarType();
2335
2336 unsigned Width0 = ElemTy0.getSizeInBits();
2337 unsigned Width1 = ElemTy1.getSizeInBits();
2338 unsigned MaxWidth = std::max(Width0, Width1);
2339
2340 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2341 if (ScalarTy.isInteger())
2342 return MVT::getIntegerVT(Width);
2343 assert(ScalarTy.isFloatingPoint());
2344 return MVT::getFloatingPointVT(Width);
2345 };
2346
2347 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2348 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2349
2350 if (!Ty0.isVector()) {
2351 // Both types are scalars.
2352 return {WideETy0, WideETy1};
2353 }
2354
2355 // Vector types.
2356 unsigned NumElem = Ty0.getVectorNumElements();
2357 assert(NumElem == Ty1.getVectorNumElements());
2358
2359 return {MVT::getVectorVT(WideETy0, NumElem),
2360 MVT::getVectorVT(WideETy1, NumElem)};
2361}
2362
2363HexagonTargetLowering::TypePair
2364HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2365 // Compare the numbers of elements of two vector types, and widen the
2366 // narrower one to match the number of elements in the wider one.
2367 assert(Ty0.isVector() && Ty1.isVector());
2368
2369 unsigned Len0 = Ty0.getVectorNumElements();
2370 unsigned Len1 = Ty1.getVectorNumElements();
2371 if (Len0 == Len1)
2372 return {Ty0, Ty1};
2373
2374 unsigned MaxLen = std::max(Len0, Len1);
2375 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2376 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2377}
2378
2379MVT
2380HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2381 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2382 assert(LegalTy.isSimple());
2383 return LegalTy.getSimpleVT();
2384}
2385
2386MVT
2387HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2388 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2389 assert(Ty.getSizeInBits() <= HwWidth);
2390 if (Ty.getSizeInBits() == HwWidth)
2391 return Ty;
2392
2393 MVT ElemTy = Ty.getScalarType();
2394 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2395}
2396
2397HexagonTargetLowering::VectorPair
2398HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2399 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2400 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2401 // whether an overflow has occured.
2402 MVT ResTy = ty(A);
2403 assert(ResTy == ty(B));
2404 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2405
2406 if (!Signed) {
2407 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2408 // save any instructions.
2409 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2410 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2411 return {Add, Ovf};
2412 }
2413
2414 // Signed overflow has happened, if:
2415 // (A, B have the same sign) and (A+B has a different sign from either)
2416 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2417 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2418 SDValue NotA =
2419 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getConstant(-1, dl, ResTy)});
2420 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2421 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2422 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2423 SDValue MSB =
2424 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2425 return {Add, MSB};
2426}
2427
2428HexagonTargetLowering::VectorPair
2429HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2430 bool Signed, SelectionDAG &DAG) const {
2431 // Shift Val right by Amt bits, round the result to the nearest integer,
2432 // tie-break by rounding halves to even integer.
2433
2434 const SDLoc &dl(Val);
2435 MVT ValTy = ty(Val);
2436
2437 // This should also work for signed integers.
2438 //
2439 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2440 // bool ovf = (inp > tmp0);
2441 // uint rup = inp & (1 << (Amt+1));
2442 //
2443 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2444 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2445 // uint tmp3 = tmp2 + rup;
2446 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2447 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2448 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2449 MVT IntTy = tyVector(ValTy, ElemTy);
2450 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2451 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2452
2453 SDValue Inp = DAG.getBitcast(IntTy, Val);
2454 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2455
2456 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2457 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2458 SDValue Zero = getZero(dl, IntTy, DAG);
2459 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2460 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2461 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2462
2463 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2464 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2465 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2466 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2467
2468 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2469 SDValue One = DAG.getConstant(1, dl, IntTy);
2470 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2471 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2472 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2473 return {Mux, Ovf};
2474}
2475
2476SDValue
2477HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2478 SelectionDAG &DAG) const {
2479 MVT VecTy = ty(A);
2480 MVT PairTy = typeJoin({VecTy, VecTy});
2481 assert(VecTy.getVectorElementType() == MVT::i32);
2482
2483 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2484
2485 // mulhs(A,B) =
2486 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2487 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2488 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2489 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2490 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2491 // anything, so it cannot produce any carry over to higher bits),
2492 // so everything in [] can be shifted by 16 without loss of precision.
2493 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2494 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2495 // The final additions need to make sure to properly maintain any carry-
2496 // out bits.
2497 //
2498 // Hi(B) Lo(B)
2499 // Hi(A) Lo(A)
2500 // --------------
2501 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2502 // Hi(B)*Lo(A) | + dropping the low 16 bits
2503 // Hi(A)*Lo(B) | T2
2504 // Hi(B)*Hi(A)
2505
2506 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2507 // T1 = get Hi(A) into low halves.
2508 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2509 // P0 = interleaved T1.h*B.uh (full precision product)
2510 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2511 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2512 SDValue T2 = LoHalf(P0, DAG);
2513 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2514 // added to the final sum.
2515 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2516 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2517 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2518 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2519 // T3 = full-precision(T0+T2) >> 16
2520 // The low halves are added-unsigned, the high ones are added-signed.
2521 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2522 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2523 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2524 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2525 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2526 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2527 SDValue T5 = LoHalf(P3, DAG);
2528 // Add:
2529 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2530 return T6;
2531}
2532
2533SDValue
2534HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2535 bool SignedB, const SDLoc &dl,
2536 SelectionDAG &DAG) const {
2537 MVT VecTy = ty(A);
2538 MVT PairTy = typeJoin({VecTy, VecTy});
2539 assert(VecTy.getVectorElementType() == MVT::i32);
2540
2541 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2542
2543 if (SignedA && !SignedB) {
2544 // Make A:unsigned, B:signed.
2545 std::swap(A, B);
2546 std::swap(SignedA, SignedB);
2547 }
2548
2549 // Do halfword-wise multiplications for unsigned*unsigned product, then
2550 // add corrections for signed and unsigned*signed.
2551
2552 SDValue Lo, Hi;
2553
2554 // P0:lo = (uu) products of low halves of A and B,
2555 // P0:hi = (uu) products of high halves.
2556 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2557
2558 // Swap low/high halves in B
2559 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2560 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2561 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2562 // P1 = products of even/odd halfwords.
2563 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2564 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2565 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2566
2567 // P2:lo = low halves of P1:lo + P1:hi,
2568 // P2:hi = high halves of P1:lo + P1:hi.
2569 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2570 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2571 // Still need to add the high halves of P0:lo to P2:lo
2572 SDValue T2 =
2573 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2574 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2575
2576 // The high halves of T3 will contribute to the HI part of LOHI.
2577 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2578 {HiHalf(P2, DAG), T3, S16}, DAG);
2579
2580 // The low halves of P2 need to be added to high halves of the LO part.
2581 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2582 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2583 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2584
2585 if (SignedA) {
2586 assert(SignedB && "Signed A and unsigned B should have been inverted");
2587
2588 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2589 SDValue Zero = getZero(dl, VecTy, DAG);
2590 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2591 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2592 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2593 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2594 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2595 } else if (SignedB) {
2596 // Same correction as for mulhus:
2597 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2598 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2599 SDValue Zero = getZero(dl, VecTy, DAG);
2600 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2601 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2602 } else {
2603 assert(!SignedA && !SignedB);
2604 }
2605
2606 return DAG.getMergeValues({Lo, Hi}, dl);
2607}
2608
2609SDValue
2610HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2611 SDValue B, bool SignedB,
2612 const SDLoc &dl,
2613 SelectionDAG &DAG) const {
2614 MVT VecTy = ty(A);
2615 MVT PairTy = typeJoin({VecTy, VecTy});
2616 assert(VecTy.getVectorElementType() == MVT::i32);
2617
2618 if (SignedA && !SignedB) {
2619 // Make A:unsigned, B:signed.
2620 std::swap(A, B);
2621 std::swap(SignedA, SignedB);
2622 }
2623
2624 // Do S*S first, then make corrections for U*S or U*U if needed.
2625 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2626 SDValue P1 =
2627 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2628 SDValue Lo = LoHalf(P1, DAG);
2629 SDValue Hi = HiHalf(P1, DAG);
2630
2631 if (!SignedB) {
2632 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2633 SDValue Zero = getZero(dl, VecTy, DAG);
2634 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2635
2636 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2637 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2638 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2639 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2640 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2641 // $A))>;
2642 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2643 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2644 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2645 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2646 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2647 } else if (!SignedA) {
2648 SDValue Zero = getZero(dl, VecTy, DAG);
2649 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2650
2651 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2652 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2653 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2654 // (HiHalf (Muls64O $A, $B)),
2655 // $B)>;
2656 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2657 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2658 }
2659
2660 return DAG.getMergeValues({Lo, Hi}, dl);
2661}
2662
2663SDValue
2664HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2665 const {
2666 // Rewrite conversion between integer and floating-point in such a way that
2667 // the integer type is extended/narrowed to match the bitwidth of the
2668 // floating-point type, combined with additional integer-integer extensions
2669 // or narrowings to match the original input/result types.
2670 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2671 //
2672 // The input/result types are not required to be legal, but if they are
2673 // legal, this function should not introduce illegal types.
2674
2675 unsigned Opc = Op.getOpcode();
2676 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2677 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2678
2679 SDValue Inp = Op.getOperand(0);
2680 MVT InpTy = ty(Inp);
2681 MVT ResTy = ty(Op);
2682
2683 if (InpTy == ResTy)
2684 return Op;
2685
2686 const SDLoc &dl(Op);
2687 bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
2688
2689 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2690 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2691 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2692 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2693 return Res;
2694}
2695
2696SDValue
2697HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2698 unsigned Opc = Op.getOpcode();
2699 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
2700
2701 const SDLoc &dl(Op);
2702 SDValue Op0 = Op.getOperand(0);
2703 MVT InpTy = ty(Op0);
2704 MVT ResTy = ty(Op);
2705 assert(InpTy.changeTypeToInteger() == ResTy);
2706
2707 // int32_t conv_f32_to_i32(uint32_t inp) {
2708 // // s | exp8 | frac23
2709 //
2710 // int neg = (int32_t)inp < 0;
2711 //
2712 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2713 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2714 // // produce a large positive "expm1", which will result in max u/int.
2715 // // In all IEEE formats, bias is the largest positive number that can be
2716 // // represented in bias-width bits (i.e. 011..1).
2717 // int32_t expm1 = (inp << 1) - 0x80000000;
2718 // expm1 >>= 24;
2719 //
2720 // // Always insert the "implicit 1". Subnormal numbers will become 0
2721 // // regardless.
2722 // uint32_t frac = (inp << 8) | 0x80000000;
2723 //
2724 // // "frac" is the fraction part represented as Q1.31. If it was
2725 // // interpreted as uint32_t, it would be the fraction part multiplied
2726 // // by 2^31.
2727 //
2728 // // Calculate the amount of right shift, since shifting further to the
2729 // // left would lose significant bits. Limit it to 32, because we want
2730 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2731 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2732 // // left by 31). "rsh" can be negative.
2733 // int32_t rsh = min(31 - (expm1 + 1), 32);
2734 //
2735 // frac >>= rsh; // rsh == 32 will produce 0
2736 //
2737 // // Everything up to this point is the same for conversion to signed
2738 // // unsigned integer.
2739 //
2740 // if (neg) // Only for signed int
2741 // frac = -frac; //
2742 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2743 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2744 // if (rsh <= 0 && !neg) //
2745 // frac = 0x7fffffff; //
2746 //
2747 // if (neg) // Only for unsigned int
2748 // frac = 0; //
2749 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2750 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2751 //
2752 // return frac;
2753 // }
2754
2755 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2756
2757 // Zero = V6_vd0();
2758 // Neg = V6_vgtw(Zero, Inp);
2759 // One = V6_lvsplatw(1);
2760 // M80 = V6_lvsplatw(0x80000000);
2761 // Exp00 = V6_vaslwv(Inp, One);
2762 // Exp01 = V6_vsubw(Exp00, M80);
2763 // ExpM1 = V6_vasrw(Exp01, 24);
2764 // Frc00 = V6_vaslw(Inp, 8);
2765 // Frc01 = V6_vor(Frc00, M80);
2766 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2767 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2768 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2769
2770 // if signed int:
2771 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2772 // Pos = V6_vgtw(Rsh01, Zero);
2773 // Frc13 = V6_vsubw(Zero, Frc02);
2774 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2775 // Int = V6_vmux(Pos, Frc14, Bnd);
2776 //
2777 // if unsigned int:
2778 // Rsn = V6_vgtw(Zero, Rsh01)
2779 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2780 // Int = V6_vmux(Neg, Zero, Frc23)
2781
2782 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2783 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2784 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2785
2786 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2787 SDValue Zero = getZero(dl, ResTy, DAG);
2788 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2789 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2790 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2791 SDValue One = DAG.getConstant(1, dl, ResTy);
2792 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2793 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2794 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2795 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2796
2797 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2798 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2799 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2800
2801 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2802 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2803 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2804 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2805 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2806
2807 SDValue Int;
2808
2809 if (Opc == ISD::FP_TO_SINT) {
2810 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2811 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2812 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2813 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2814 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2815 } else {
2816 assert(Opc == ISD::FP_TO_UINT);
2817 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2818 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2819 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2820 }
2821
2822 return Int;
2823}
2824
2825SDValue
2826HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2827 unsigned Opc = Op.getOpcode();
2828 assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2829
2830 const SDLoc &dl(Op);
2831 SDValue Op0 = Op.getOperand(0);
2832 MVT InpTy = ty(Op0);
2833 MVT ResTy = ty(Op);
2834 assert(ResTy.changeTypeToInteger() == InpTy);
2835
2836 // uint32_t vnoc1_rnd(int32_t w) {
2837 // int32_t iszero = w == 0;
2838 // int32_t isneg = w < 0;
2839 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
2840 //
2841 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2842 // uint32_t frac0 = (uint64_t)u << norm_left;
2843 //
2844 // // Rounding:
2845 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
2846 // uint32_t renorm = (frac0 > frac1);
2847 // uint32_t rup = (int)(frac0 << 22) < 0;
2848 //
2849 // uint32_t frac2 = frac0 >> 8;
2850 // uint32_t frac3 = frac1 >> 8;
2851 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2852 //
2853 // int32_t exp = 32 - norm_left + renorm + 127;
2854 // exp <<= 23;
2855 //
2856 // uint32_t sign = 0x80000000 * isneg;
2857 // uint32_t f = sign | exp | frac;
2858 // return iszero ? 0 : f;
2859 // }
2860
2861 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
2862 bool Signed = Opc == ISD::SINT_TO_FP;
2863
2864 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
2865 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2866
2867 SDValue Zero = getZero(dl, InpTy, DAG);
2868 SDValue One = DAG.getConstant(1, dl, InpTy);
2869 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
2870 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
2871 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
2872 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
2873 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
2874
2875 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
2876 if (Signed) {
2877 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
2878 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
2879 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
2880 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
2881 }
2882
2883 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
2884 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
2885 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
2886 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
2887 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
2888 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
2889 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
2890 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
2891 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
2892
2893 return Flt;
2894}
2895
2896SDValue
2897HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2898 unsigned Opc = Op.getOpcode();
2899 unsigned TLOpc;
2900 switch (Opc) {
2901 case ISD::ANY_EXTEND:
2902 case ISD::SIGN_EXTEND:
2903 case ISD::ZERO_EXTEND:
2904 TLOpc = HexagonISD::TL_EXTEND;
2905 break;
2906 case ISD::TRUNCATE:
2908 break;
2909#ifndef NDEBUG
2910 Op.dump(&DAG);
2911#endif
2912 llvm_unreachable("Unepected operator");
2913 }
2914
2915 const SDLoc &dl(Op);
2916 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
2917 DAG.getUNDEF(MVT::i128), // illegal type
2918 DAG.getConstant(Opc, dl, MVT::i32));
2919}
2920
2921SDValue
2922HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2923 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
2924 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2925 unsigned Opc = Op.getConstantOperandVal(2);
2926 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
2927}
2928
2929HexagonTargetLowering::VectorPair
2930HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2931 assert(!Op.isMachineOpcode());
2932 SmallVector<SDValue, 2> OpsL, OpsH;
2933 const SDLoc &dl(Op);
2934
2935 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2936 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2937 SDValue TV = DAG.getValueType(Ty);
2938 return std::make_pair(TV, TV);
2939 };
2940
2941 for (SDValue A : Op.getNode()->ops()) {
2942 auto [Lo, Hi] =
2943 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
2944 // Special case for type operand.
2945 switch (Op.getOpcode()) {
2947 case HexagonISD::SSAT:
2948 case HexagonISD::USAT:
2949 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2950 std::tie(Lo, Hi) = SplitVTNode(N);
2951 break;
2952 }
2953 OpsL.push_back(Lo);
2954 OpsH.push_back(Hi);
2955 }
2956
2957 MVT ResTy = ty(Op);
2958 MVT HalfTy = typeSplit(ResTy).first;
2959 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2960 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2961 return {L, H};
2962}
2963
2964SDValue
2965HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2966 auto *MemN = cast<MemSDNode>(Op.getNode());
2967
2968 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2969 if (!isHvxPairTy(MemTy))
2970 return Op;
2971
2972 const SDLoc &dl(Op);
2973 unsigned HwLen = Subtarget.getVectorLength();
2974 MVT SingleTy = typeSplit(MemTy).first;
2975 SDValue Chain = MemN->getChain();
2976 SDValue Base0 = MemN->getBasePtr();
2977 SDValue Base1 =
2978 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
2979 unsigned MemOpc = MemN->getOpcode();
2980
2981 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
2982 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2984 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
2986 : HwLen;
2987 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
2988 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
2989 }
2990
2991 if (MemOpc == ISD::LOAD) {
2992 assert(cast<LoadSDNode>(Op)->isUnindexed());
2993 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
2994 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
2995 return DAG.getMergeValues(
2996 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
2997 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2998 Load0.getValue(1), Load1.getValue(1)) }, dl);
2999 }
3000 if (MemOpc == ISD::STORE) {
3001 assert(cast<StoreSDNode>(Op)->isUnindexed());
3002 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3003 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3004 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3005 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3006 }
3007
3008 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3009
3010 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3011 assert(MaskN->isUnindexed());
3012 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3013 SDValue Offset = DAG.getUNDEF(MVT::i32);
3014
3015 if (MemOpc == ISD::MLOAD) {
3016 VectorPair Thru =
3017 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3018 SDValue MLoad0 =
3019 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3020 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3021 ISD::NON_EXTLOAD, false);
3022 SDValue MLoad1 =
3023 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3024 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3025 ISD::NON_EXTLOAD, false);
3026 return DAG.getMergeValues(
3027 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3028 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3029 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3030 }
3031 if (MemOpc == ISD::MSTORE) {
3032 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3033 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3034 Masks.first, SingleTy, MOp0,
3035 ISD::UNINDEXED, false, false);
3036 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3037 Masks.second, SingleTy, MOp1,
3038 ISD::UNINDEXED, false, false);
3039 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3040 }
3041
3042 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3043 llvm_unreachable(Name.c_str());
3044}
3045
3046SDValue
3047HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3048 const SDLoc &dl(Op);
3049 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3050 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3051 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3052 "Not widening loads of i1 yet");
3053
3054 SDValue Chain = LoadN->getChain();
3055 SDValue Base = LoadN->getBasePtr();
3056 SDValue Offset = DAG.getUNDEF(MVT::i32);
3057
3058 MVT ResTy = ty(Op);
3059 unsigned HwLen = Subtarget.getVectorLength();
3060 unsigned ResLen = ResTy.getStoreSize();
3061 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3062
3063 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3064 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3065 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3066
3067 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3069 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3070
3071 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3072 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3074 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3075 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3076}
3077
3078SDValue
3079HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3080 const SDLoc &dl(Op);
3081 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3082 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3083 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3084 "Not widening stores of i1 yet");
3085
3086 SDValue Chain = StoreN->getChain();
3087 SDValue Base = StoreN->getBasePtr();
3088 SDValue Offset = DAG.getUNDEF(MVT::i32);
3089
3090 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3091 MVT ValueTy = ty(Value);
3092 unsigned ValueLen = ValueTy.getVectorNumElements();
3093 unsigned HwLen = Subtarget.getVectorLength();
3094 assert(isPowerOf2_32(ValueLen));
3095
3096 for (unsigned Len = ValueLen; Len < HwLen; ) {
3097 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3098 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3099 }
3100 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3101
3102 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3103 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3104 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3105 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3107 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3108 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3109 MemOp, ISD::UNINDEXED, false, false);
3110}
3111
3112SDValue
3113HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3114 const SDLoc &dl(Op);
3115 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3116 MVT ElemTy = ty(Op0).getVectorElementType();
3117 unsigned HwLen = Subtarget.getVectorLength();
3118
3119 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3120 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3121 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3122 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3123 return SDValue();
3124
3125 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3126 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3127 EVT ResTy =
3128 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3129 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3130 {WideOp0, WideOp1, Op.getOperand(2)});
3131
3132 EVT RetTy = typeLegalize(ty(Op), DAG);
3133 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3134 {SetCC, getZero(dl, MVT::i32, DAG)});
3135}
3136
3137SDValue
3138HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3139 unsigned Opc = Op.getOpcode();
3140 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3141 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3142 return isHvxPairTy(ty(V));
3143 });
3144
3145 if (IsPairOp) {
3146 switch (Opc) {
3147 default:
3148 break;
3149 case ISD::LOAD:
3150 case ISD::STORE:
3151 case ISD::MLOAD:
3152 case ISD::MSTORE:
3153 return SplitHvxMemOp(Op, DAG);
3154 case ISD::SINT_TO_FP:
3155 case ISD::UINT_TO_FP:
3156 case ISD::FP_TO_SINT:
3157 case ISD::FP_TO_UINT:
3158 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3159 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3160 break;
3161 case ISD::ABS:
3162 case ISD::CTPOP:
3163 case ISD::CTLZ:
3164 case ISD::CTTZ:
3165 case ISD::MUL:
3166 case ISD::FADD:
3167 case ISD::FSUB:
3168 case ISD::FMUL:
3169 case ISD::FMINNUM:
3170 case ISD::FMAXNUM:
3171 case ISD::MULHS:
3172 case ISD::MULHU:
3173 case ISD::AND:
3174 case ISD::OR:
3175 case ISD::XOR:
3176 case ISD::SRA:
3177 case ISD::SHL:
3178 case ISD::SRL:
3179 case ISD::FSHL:
3180 case ISD::FSHR:
3181 case ISD::SMIN:
3182 case ISD::SMAX:
3183 case ISD::UMIN:
3184 case ISD::UMAX:
3185 case ISD::SETCC:
3186 case ISD::VSELECT:
3188 case ISD::SPLAT_VECTOR:
3189 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3190 case ISD::SIGN_EXTEND:
3191 case ISD::ZERO_EXTEND:
3192 // In general, sign- and zero-extends can't be split and still
3193 // be legal. The only exception is extending bool vectors.
3194 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3195 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3196 break;
3197 }
3198 }
3199
3200 switch (Opc) {
3201 default:
3202 break;
3203 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3204 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3205 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3206 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3207 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3208 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3209 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3210 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3211 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3212 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3213 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3214 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3215 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3216 case ISD::SRA:
3217 case ISD::SHL:
3218 case ISD::SRL: return LowerHvxShift(Op, DAG);
3219 case ISD::FSHL:
3220 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3221 case ISD::MULHS:
3222 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3223 case ISD::SMUL_LOHI:
3224 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3225 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3226 case ISD::SETCC:
3227 case ISD::INTRINSIC_VOID: return Op;
3228 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3229 case ISD::MLOAD:
3230 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3231 // Unaligned loads will be handled by the default lowering.
3232 case ISD::LOAD: return SDValue();
3233 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3234 case ISD::FP_TO_SINT:
3235 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3236 case ISD::SINT_TO_FP:
3237 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3238
3239 // Special nodes:
3242 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3243 }
3244#ifndef NDEBUG
3245 Op.dumpr(&DAG);
3246#endif
3247 llvm_unreachable("Unhandled HVX operation");
3248}
3249
3250SDValue
3251HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3252 const {
3253 // Rewrite the extension/truncation/saturation op into steps where each
3254 // step changes the type widths by a factor of 2.
3255 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3256 //
3257 // Some of the vector types in Op may not be legal.
3258
3259 unsigned Opc = Op.getOpcode();
3260 switch (Opc) {
3261 case HexagonISD::SSAT:
3262 case HexagonISD::USAT:
3265 break;
3266 case ISD::ANY_EXTEND:
3267 case ISD::ZERO_EXTEND:
3268 case ISD::SIGN_EXTEND:
3269 case ISD::TRUNCATE:
3270 llvm_unreachable("ISD:: ops will be auto-folded");
3271 break;
3272#ifndef NDEBUG
3273 Op.dump(&DAG);
3274#endif
3275 llvm_unreachable("Unexpected operation");
3276 }
3277
3278 SDValue Inp = Op.getOperand(0);
3279 MVT InpTy = ty(Inp);
3280 MVT ResTy = ty(Op);
3281
3282 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3283 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3284 assert(InpWidth != ResWidth);
3285
3286 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3287 return Op;
3288
3289 const SDLoc &dl(Op);
3290 unsigned NumElems = InpTy.getVectorNumElements();
3291 assert(NumElems == ResTy.getVectorNumElements());
3292
3293 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3294 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3295 switch (Opc) {
3296 case HexagonISD::SSAT:
3297 case HexagonISD::USAT:
3298 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3301 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3302 default:
3303 llvm_unreachable("Unexpected opcode");
3304 }
3305 };
3306
3307 SDValue S = Inp;
3308 if (InpWidth < ResWidth) {
3309 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3310 while (InpWidth * 2 <= ResWidth)
3311 S = repeatOp(InpWidth *= 2, S);
3312 } else {
3313 // InpWidth > ResWidth
3314 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3315 while (InpWidth / 2 >= ResWidth)
3316 S = repeatOp(InpWidth /= 2, S);
3317 }
3318 return S;
3319}
3320
3321SDValue
3322HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3323 SDValue Inp0 = Op.getOperand(0);
3324 MVT InpTy = ty(Inp0);
3325 MVT ResTy = ty(Op);
3326 unsigned InpWidth = InpTy.getSizeInBits();
3327 unsigned ResWidth = ResTy.getSizeInBits();
3328 unsigned Opc = Op.getOpcode();
3329
3330 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3331 // First, make sure that the narrower type is widened to HVX.
3332 // This may cause the result to be wider than what the legalizer
3333 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3334 // desired type.
3335 auto [WInpTy, WResTy] =
3336 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3337 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3338 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3339 SDValue S;
3340 if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3341 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3342 Op.getOperand(2));
3343 } else {
3344 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3345 }
3346 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3347 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3348 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3349 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3350 } else {
3351 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3352 return RemoveTLWrapper(Op, DAG);
3353 }
3354 llvm_unreachable("Unexpected situation");
3355}
3356
3357void
3358HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3360 unsigned Opc = N->getOpcode();
3361 SDValue Op(N, 0);
3362 SDValue Inp0; // Optional first argument.
3363 if (N->getNumOperands() > 0)
3364 Inp0 = Op.getOperand(0);
3365
3366 switch (Opc) {
3367 case ISD::ANY_EXTEND:
3368 case ISD::SIGN_EXTEND:
3369 case ISD::ZERO_EXTEND:
3370 case ISD::TRUNCATE:
3371 if (Subtarget.isHVXElementType(ty(Op)) &&
3372 Subtarget.isHVXElementType(ty(Inp0))) {
3373 Results.push_back(CreateTLWrapper(Op, DAG));
3374 }
3375 break;
3376 case ISD::SETCC:
3377 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3378 if (SDValue T = WidenHvxSetCC(Op, DAG))
3379 Results.push_back(T);
3380 }
3381 break;
3382 case ISD::STORE: {
3383 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3384 SDValue Store = WidenHvxStore(Op, DAG);
3385 Results.push_back(Store);
3386 }
3387 break;
3388 }
3389 case ISD::MLOAD:
3390 if (isHvxPairTy(ty(Op))) {
3391 SDValue S = SplitHvxMemOp(Op, DAG);
3393 Results.push_back(S.getOperand(0));
3394 Results.push_back(S.getOperand(1));
3395 }
3396 break;
3397 case ISD::MSTORE:
3398 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3399 SDValue S = SplitHvxMemOp(Op, DAG);
3400 Results.push_back(S);
3401 }
3402 break;
3403 case ISD::SINT_TO_FP:
3404 case ISD::UINT_TO_FP:
3405 case ISD::FP_TO_SINT:
3406 case ISD::FP_TO_UINT:
3407 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3408 SDValue T = EqualizeFpIntConversion(Op, DAG);
3409 Results.push_back(T);
3410 }
3411 break;
3412 case HexagonISD::SSAT:
3413 case HexagonISD::USAT:
3416 Results.push_back(LegalizeHvxResize(Op, DAG));
3417 break;
3418 default:
3419 break;
3420 }
3421}
3422
3423void
3424HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3426 unsigned Opc = N->getOpcode();
3427 SDValue Op(N, 0);
3428 SDValue Inp0; // Optional first argument.
3429 if (N->getNumOperands() > 0)
3430 Inp0 = Op.getOperand(0);
3431
3432 switch (Opc) {
3433 case ISD::ANY_EXTEND:
3434 case ISD::SIGN_EXTEND:
3435 case ISD::ZERO_EXTEND:
3436 case ISD::TRUNCATE:
3437 if (Subtarget.isHVXElementType(ty(Op)) &&
3438 Subtarget.isHVXElementType(ty(Inp0))) {
3439 Results.push_back(CreateTLWrapper(Op, DAG));
3440 }
3441 break;
3442 case ISD::SETCC:
3443 if (shouldWidenToHvx(ty(Op), DAG)) {
3444 if (SDValue T = WidenHvxSetCC(Op, DAG))
3445 Results.push_back(T);
3446 }
3447 break;
3448 case ISD::LOAD: {
3449 if (shouldWidenToHvx(ty(Op), DAG)) {
3450 SDValue Load = WidenHvxLoad(Op, DAG);
3451 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3452 Results.push_back(Load.getOperand(0));
3453 Results.push_back(Load.getOperand(1));
3454 }
3455 break;
3456 }
3457 case ISD::BITCAST:
3458 if (isHvxBoolTy(ty(Inp0))) {
3459 SDValue C = LowerHvxBitcast(Op, DAG);
3460 Results.push_back(C);
3461 }
3462 break;
3463 case ISD::FP_TO_SINT:
3464 case ISD::FP_TO_UINT:
3465 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3466 SDValue T = EqualizeFpIntConversion(Op, DAG);
3467 Results.push_back(T);
3468 }
3469 break;
3470 case HexagonISD::SSAT:
3471 case HexagonISD::USAT:
3474 Results.push_back(LegalizeHvxResize(Op, DAG));
3475 break;
3476 default:
3477 break;
3478 }
3479}
3480
3481SDValue
3482HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3483 DAGCombinerInfo &DCI) const {
3484 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3485 // to extract-subvector (shuffle V, pick even, pick odd)
3486
3487 assert(Op.getOpcode() == ISD::TRUNCATE);
3488 SelectionDAG &DAG = DCI.DAG;
3489 const SDLoc &dl(Op);
3490
3491 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3492 return SDValue();
3493 SDValue Cast = Op.getOperand(0);
3494 SDValue Src = Cast.getOperand(0);
3495
3496 EVT TruncTy = Op.getValueType();
3497 EVT CastTy = Cast.getValueType();
3498 EVT SrcTy = Src.getValueType();
3499 if (SrcTy.isSimple())
3500 return SDValue();
3501 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3502 return SDValue();
3503 unsigned SrcLen = SrcTy.getVectorNumElements();
3504 unsigned CastLen = CastTy.getVectorNumElements();
3505 if (2 * CastLen != SrcLen)
3506 return SDValue();
3507
3509 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3510 Mask[i] = 2 * i;
3511 Mask[i + CastLen] = 2 * i + 1;
3512 }
3513 SDValue Deal =
3514 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3515 return opSplit(Deal, dl, DAG).first;
3516}
3517
3518SDValue
3519HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3520 SDValue Op, DAGCombinerInfo &DCI) const {
3521 // Fold
3522 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3523 // into
3524 // shuffle (concat x, y), undef, m3
3525 if (Op.getNumOperands() != 2)
3526 return SDValue();
3527
3528 SelectionDAG &DAG = DCI.DAG;
3529 const SDLoc &dl(Op);
3530 SDValue V0 = Op.getOperand(0);
3531 SDValue V1 = Op.getOperand(1);
3532
3533 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3534 return SDValue();
3535 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3536 return SDValue();
3537
3538 SetVector<SDValue> Order;
3539 Order.insert(V0.getOperand(0));
3540 Order.insert(V0.getOperand(1));
3541 Order.insert(V1.getOperand(0));
3542 Order.insert(V1.getOperand(1));
3543
3544 if (Order.size() > 2)
3545 return SDValue();
3546
3547 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3548 // result must be the same.
3549 EVT InpTy = V0.getValueType();
3550 assert(InpTy.isVector());
3551 unsigned InpLen = InpTy.getVectorNumElements();
3552
3553 SmallVector<int, 128> LongMask;
3554 auto AppendToMask = [&](SDValue Shuffle) {
3555 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3556 ArrayRef<int> Mask = SV->getMask();
3557 SDValue X = Shuffle.getOperand(0);
3558 SDValue Y = Shuffle.getOperand(1);
3559 for (int M : Mask) {
3560 if (M == -1) {
3561 LongMask.push_back(M);
3562 continue;
3563 }
3564 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3565 if (static_cast<unsigned>(M) >= InpLen)
3566 M -= InpLen;
3567
3568 int OutOffset = Order[0] == Src ? 0 : InpLen;
3569 LongMask.push_back(M + OutOffset);
3570 }
3571 };
3572
3573 AppendToMask(V0);
3574 AppendToMask(V1);
3575
3576 SDValue C0 = Order.front();
3577 SDValue C1 = Order.back(); // Can be same as front
3578 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3579
3580 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3581 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3582}
3583
3584SDValue
3585HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3586 const {
3587 const SDLoc &dl(N);
3588 SelectionDAG &DAG = DCI.DAG;
3589 SDValue Op(N, 0);
3590 unsigned Opc = Op.getOpcode();
3591
3592 SmallVector<SDValue, 4> Ops(N->ops());
3593
3594 if (Opc == ISD::TRUNCATE)
3595 return combineTruncateBeforeLegal(Op, DCI);
3596 if (Opc == ISD::CONCAT_VECTORS)
3597 return combineConcatVectorsBeforeLegal(Op, DCI);
3598
3599 if (DCI.isBeforeLegalizeOps())
3600 return SDValue();
3601
3602 switch (Opc) {
3603 case ISD::VSELECT: {
3604 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3605 SDValue Cond = Ops[0];
3606 if (Cond->getOpcode() == ISD::XOR) {
3607 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3608 if (C1->getOpcode() == HexagonISD::QTRUE)
3609 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3610 }
3611 break;
3612 }
3613 case HexagonISD::V2Q:
3614 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3615 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3616 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3617 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3618 }
3619 break;
3620 case HexagonISD::Q2V:
3621 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3622 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3623 DAG.getConstant(-1, dl, MVT::i32));
3624 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3625 return getZero(dl, ty(Op), DAG);
3626 break;
3628 if (isUndef(Ops[1]))
3629 return Ops[0];
3630 break;
3631 case HexagonISD::VROR: {
3632 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3633 SDValue Vec = Ops[0].getOperand(0);
3634 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3635 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3636 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3637 }
3638 break;
3639 }
3640 }
3641
3642 return SDValue();
3643}
3644
3645bool
3646HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3647 if (Subtarget.isHVXVectorType(Ty, true))
3648 return false;
3649 auto Action = getPreferredHvxVectorAction(Ty);
3651 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3652 return false;
3653}
3654
3655bool
3656HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3657 if (Subtarget.isHVXVectorType(Ty, true))
3658 return false;
3659 auto Action = getPreferredHvxVectorAction(Ty);
3661 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3662 return false;
3663}
3664
3665bool
3666HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3667 if (!Subtarget.useHVXOps())
3668 return false;
3669 // If the type of any result, or any operand type are HVX vector types,
3670 // this is an HVX operation.
3671 auto IsHvxTy = [this](EVT Ty) {
3672 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3673 };
3674 auto IsHvxOp = [this](SDValue Op) {
3675 return Op.getValueType().isSimple() &&
3676 Subtarget.isHVXVectorType(ty(Op), true);
3677 };
3678 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3679 return true;
3680
3681 // Check if this could be an HVX operation after type widening.
3682 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3683 if (!Op.getValueType().isSimple())
3684 return false;
3685 MVT ValTy = ty(Op);
3686 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3687 };
3688
3689 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3690 if (IsWidenedToHvx(SDValue(N, i)))
3691 return true;
3692 }
3693 return llvm::any_of(N->ops(), IsWidenedToHvx);
3694}
unsigned const MachineRegisterInfo * MRI
static const LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
This file provides utility analysis objects describing memory locations.
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5337
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
const HexagonInstrInfo * getInstrInfo() const override
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
unsigned getVectorLength() const
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
const value_type & front() const
Return the first element of the SetVector.
Definition: SetVector.h:143
const value_type & back() const
Return the last element of the SetVector.
Definition: SetVector.h:149
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1355
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:870
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:859
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:881
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
@ Undef
Value of the register doesn't matter.
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:279
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:449
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.