LLVM 20.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103 setOperationAction(Opc, FromTy, Promote);
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
119
120 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
121 Subtarget.useHVXFloatingPoint()) {
122
123 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
124 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
125
126 for (MVT T : FloatV) {
132
135
138
141 // Custom-lower BUILD_VECTOR. The standard (target-independent)
142 // handling of it would convert it to a load, which is not always
143 // the optimal choice.
145 }
146
147
148 // BUILD_VECTOR with f16 operands cannot be promoted without
149 // promoting the result, so lower the node to vsplat or constant pool
153
154 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
155 // generated.
156 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
157 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
158 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
159 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
160
161 for (MVT P : FloatW) {
171
172 // Custom-lower BUILD_VECTOR. The standard (target-independent)
173 // handling of it would convert it to a load, which is not always
174 // the optimal choice.
176 // Make concat-vectors custom to handle concats of more than 2 vectors.
178
181 }
182
183 if (Subtarget.useHVXQFloatOps()) {
186 } else if (Subtarget.useHVXIEEEFPOps()) {
189 }
190 }
191
192 for (MVT T : LegalV) {
195
207 if (T != ByteV) {
211 }
212
215 if (T.getScalarType() != MVT::i32) {
218 }
219
224 if (T.getScalarType() != MVT::i32) {
227 }
228
230 // Make concat-vectors custom to handle concats of more than 2 vectors.
241 if (T != ByteV) {
243 // HVX only has shifts of words and halfwords.
247
248 // Promote all shuffles to operate on vectors of bytes.
249 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
250 }
251
252 if (Subtarget.useHVXFloatingPoint()) {
253 // Same action for both QFloat and IEEE.
258 }
259
267 }
268
269 for (MVT T : LegalW) {
270 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
271 // independent) handling of it would convert it to a load, which is
272 // not always the optimal choice.
274 // Make concat-vectors custom to handle concats of more than 2 vectors.
276
277 // Custom-lower these operations for pairs. Expand them into a concat
278 // of the corresponding operations on individual vectors.
287
296
307 if (T != ByteW) {
311
312 // Promote all shuffles to operate on vectors of bytes.
313 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
314 }
317
320 if (T.getScalarType() != MVT::i32) {
323 }
324
325 if (Subtarget.useHVXFloatingPoint()) {
326 // Same action for both QFloat and IEEE.
331 }
332 }
333
334 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
335 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
336 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
339
340 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
341 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
342 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
343 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
344 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
345 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
346 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
347 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
348 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
349 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
350 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
351 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
352
353 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
354 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
355 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
356 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
357 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
358 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
359 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
360 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
361 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
362 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
363 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
364 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
365
366 // Boolean vectors.
367
368 for (MVT T : LegalW) {
369 // Boolean types for vector pairs will overlap with the boolean
370 // types for single vectors, e.g.
371 // v64i8 -> v64i1 (single)
372 // v64i16 -> v64i1 (pair)
373 // Set these actions first, and allow the single actions to overwrite
374 // any duplicates.
375 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
380 // Masked load/store takes a mask that may need splitting.
383 }
384
385 for (MVT T : LegalV) {
386 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
397 }
398
399 if (Use64b) {
400 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
402 } else {
403 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
405 }
406
407 // Handle store widening for short vectors.
408 unsigned HwLen = Subtarget.getVectorLength();
409 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
410 if (ElemTy == MVT::i1)
411 continue;
412 int ElemWidth = ElemTy.getFixedSizeInBits();
413 int MaxElems = (8*HwLen) / ElemWidth;
414 for (int N = 2; N < MaxElems; N *= 2) {
415 MVT VecTy = MVT::getVectorVT(ElemTy, N);
416 auto Action = getPreferredVectorAction(VecTy);
425 if (Subtarget.useHVXFloatingPoint()) {
430 }
431
432 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
433 if (!isTypeLegal(BoolTy))
435 }
436 }
437 }
438
440}
441
442unsigned
443HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
444 MVT ElemTy = VecTy.getVectorElementType();
445 unsigned VecLen = VecTy.getVectorNumElements();
446 unsigned HwLen = Subtarget.getVectorLength();
447
448 // Split vectors of i1 that exceed byte vector length.
449 if (ElemTy == MVT::i1 && VecLen > HwLen)
451
453 // For shorter vectors of i1, widen them if any of the corresponding
454 // vectors of integers needs to be widened.
455 if (ElemTy == MVT::i1) {
456 for (MVT T : Tys) {
457 assert(T != MVT::i1);
458 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
459 if (A != ~0u)
460 return A;
461 }
462 return ~0u;
463 }
464
465 // If the size of VecTy is at least half of the vector length,
466 // widen the vector. Note: the threshold was not selected in
467 // any scientific way.
468 if (llvm::is_contained(Tys, ElemTy)) {
469 unsigned VecWidth = VecTy.getSizeInBits();
470 unsigned HwWidth = 8*HwLen;
471 if (VecWidth > 2*HwWidth)
473
474 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
475 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
477 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
479 }
480
481 // Defer to default.
482 return ~0u;
483}
484
485unsigned
486HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
487 unsigned Opc = Op.getOpcode();
488 switch (Opc) {
493 }
495}
496
498HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
499 const SDLoc &dl, SelectionDAG &DAG) const {
501 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
502 append_range(IntOps, Ops);
503 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
504}
505
506MVT
507HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
508 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
509
510 MVT ElemTy = Tys.first.getVectorElementType();
511 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
512 Tys.second.getVectorNumElements());
513}
514
515HexagonTargetLowering::TypePair
516HexagonTargetLowering::typeSplit(MVT VecTy) const {
517 assert(VecTy.isVector());
518 unsigned NumElem = VecTy.getVectorNumElements();
519 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
520 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
521 return { HalfTy, HalfTy };
522}
523
524MVT
525HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
526 MVT ElemTy = VecTy.getVectorElementType();
527 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
528 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
529}
530
531MVT
532HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
533 MVT ElemTy = VecTy.getVectorElementType();
534 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
535 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
536}
537
539HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
540 SelectionDAG &DAG) const {
541 if (ty(Vec).getVectorElementType() == ElemTy)
542 return Vec;
543 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
544 return DAG.getBitcast(CastTy, Vec);
545}
546
548HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
549 SelectionDAG &DAG) const {
550 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
551 Ops.first, Ops.second);
552}
553
554HexagonTargetLowering::VectorPair
555HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
556 SelectionDAG &DAG) const {
557 TypePair Tys = typeSplit(ty(Vec));
558 if (Vec.getOpcode() == HexagonISD::QCAT)
559 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
560 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
561}
562
563bool
564HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
565 return Subtarget.isHVXVectorType(Ty) &&
566 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
567}
568
569bool
570HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
571 return Subtarget.isHVXVectorType(Ty) &&
572 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
573}
574
575bool
576HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
577 return Subtarget.isHVXVectorType(Ty, true) &&
578 Ty.getVectorElementType() == MVT::i1;
579}
580
581bool HexagonTargetLowering::allowsHvxMemoryAccess(
582 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
583 // Bool vectors are excluded by default, but make it explicit to
584 // emphasize that bool vectors cannot be loaded or stored.
585 // Also, disallow double vector stores (to prevent unnecessary
586 // store widening in DAG combiner).
587 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
588 return false;
589 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
590 return false;
591 if (Fast)
592 *Fast = 1;
593 return true;
594}
595
596bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
597 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
598 if (!Subtarget.isHVXVectorType(VecTy))
599 return false;
600 // XXX Should this be false? vmemu are a bit slower than vmem.
601 if (Fast)
602 *Fast = 1;
603 return true;
604}
605
606void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
607 MachineInstr &MI, SDNode *Node) const {
608 unsigned Opc = MI.getOpcode();
609 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
610 MachineBasicBlock &MB = *MI.getParent();
611 MachineFunction &MF = *MB.getParent();
613 DebugLoc DL = MI.getDebugLoc();
614 auto At = MI.getIterator();
615
616 switch (Opc) {
617 case Hexagon::PS_vsplatib:
618 if (Subtarget.useHVXV62Ops()) {
619 // SplatV = A2_tfrsi #imm
620 // OutV = V6_lvsplatb SplatV
621 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
622 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
623 .add(MI.getOperand(1));
624 Register OutV = MI.getOperand(0).getReg();
625 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
626 .addReg(SplatV);
627 } else {
628 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
629 // OutV = V6_lvsplatw SplatV
630 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
631 const MachineOperand &InpOp = MI.getOperand(1);
632 assert(InpOp.isImm());
633 uint32_t V = InpOp.getImm() & 0xFF;
634 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
635 .addImm(V << 24 | V << 16 | V << 8 | V);
636 Register OutV = MI.getOperand(0).getReg();
637 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
638 }
639 MB.erase(At);
640 break;
641 case Hexagon::PS_vsplatrb:
642 if (Subtarget.useHVXV62Ops()) {
643 // OutV = V6_lvsplatb Inp
644 Register OutV = MI.getOperand(0).getReg();
645 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
646 .add(MI.getOperand(1));
647 } else {
648 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
649 const MachineOperand &InpOp = MI.getOperand(1);
650 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
651 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
652 Register OutV = MI.getOperand(0).getReg();
653 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
654 .addReg(SplatV);
655 }
656 MB.erase(At);
657 break;
658 case Hexagon::PS_vsplatih:
659 if (Subtarget.useHVXV62Ops()) {
660 // SplatV = A2_tfrsi #imm
661 // OutV = V6_lvsplath SplatV
662 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
663 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
664 .add(MI.getOperand(1));
665 Register OutV = MI.getOperand(0).getReg();
666 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
667 .addReg(SplatV);
668 } else {
669 // SplatV = A2_tfrsi #imm:#imm
670 // OutV = V6_lvsplatw SplatV
671 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
672 const MachineOperand &InpOp = MI.getOperand(1);
673 assert(InpOp.isImm());
674 uint32_t V = InpOp.getImm() & 0xFFFF;
675 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
676 .addImm(V << 16 | V);
677 Register OutV = MI.getOperand(0).getReg();
678 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
679 }
680 MB.erase(At);
681 break;
682 case Hexagon::PS_vsplatrh:
683 if (Subtarget.useHVXV62Ops()) {
684 // OutV = V6_lvsplath Inp
685 Register OutV = MI.getOperand(0).getReg();
686 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
687 .add(MI.getOperand(1));
688 } else {
689 // SplatV = A2_combine_ll Inp, Inp
690 // OutV = V6_lvsplatw SplatV
691 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
692 const MachineOperand &InpOp = MI.getOperand(1);
693 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
694 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
695 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
696 Register OutV = MI.getOperand(0).getReg();
697 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
698 }
699 MB.erase(At);
700 break;
701 case Hexagon::PS_vsplatiw:
702 case Hexagon::PS_vsplatrw:
703 if (Opc == Hexagon::PS_vsplatiw) {
704 // SplatV = A2_tfrsi #imm
705 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
706 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
707 .add(MI.getOperand(1));
708 MI.getOperand(1).ChangeToRegister(SplatV, false);
709 }
710 // OutV = V6_lvsplatw SplatV/Inp
711 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
712 break;
713 }
714}
715
717HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
718 SelectionDAG &DAG) const {
719 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
720 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
721
722 unsigned ElemWidth = ElemTy.getSizeInBits();
723 if (ElemWidth == 8)
724 return ElemIdx;
725
726 unsigned L = Log2_32(ElemWidth/8);
727 const SDLoc &dl(ElemIdx);
728 return DAG.getNode(ISD::SHL, dl, MVT::i32,
729 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
730}
731
733HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
734 SelectionDAG &DAG) const {
735 unsigned ElemWidth = ElemTy.getSizeInBits();
736 assert(ElemWidth >= 8 && ElemWidth <= 32);
737 if (ElemWidth == 32)
738 return Idx;
739
740 if (ty(Idx) != MVT::i32)
741 Idx = DAG.getBitcast(MVT::i32, Idx);
742 const SDLoc &dl(Idx);
743 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
744 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
745 return SubIdx;
746}
747
749HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
750 SDValue Op1, ArrayRef<int> Mask,
751 SelectionDAG &DAG) const {
752 MVT OpTy = ty(Op0);
753 assert(OpTy == ty(Op1));
754
755 MVT ElemTy = OpTy.getVectorElementType();
756 if (ElemTy == MVT::i8)
757 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
758 assert(ElemTy.getSizeInBits() >= 8);
759
760 MVT ResTy = tyVector(OpTy, MVT::i8);
761 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
762
763 SmallVector<int,128> ByteMask;
764 for (int M : Mask) {
765 if (M < 0) {
766 for (unsigned I = 0; I != ElemSize; ++I)
767 ByteMask.push_back(-1);
768 } else {
769 int NewM = M*ElemSize;
770 for (unsigned I = 0; I != ElemSize; ++I)
771 ByteMask.push_back(NewM+I);
772 }
773 }
774 assert(ResTy.getVectorNumElements() == ByteMask.size());
775 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
776 opCastElem(Op1, MVT::i8, DAG), ByteMask);
777}
778
780HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
781 const SDLoc &dl, MVT VecTy,
782 SelectionDAG &DAG) const {
783 unsigned VecLen = Values.size();
785 MVT ElemTy = VecTy.getVectorElementType();
786 unsigned ElemWidth = ElemTy.getSizeInBits();
787 unsigned HwLen = Subtarget.getVectorLength();
788
789 unsigned ElemSize = ElemWidth / 8;
790 assert(ElemSize*VecLen == HwLen);
792
793 if (VecTy.getVectorElementType() != MVT::i32 &&
794 !(Subtarget.useHVXFloatingPoint() &&
795 VecTy.getVectorElementType() == MVT::f32)) {
796 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
797 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
798 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
799 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
800 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
801 Words.push_back(DAG.getBitcast(MVT::i32, W));
802 }
803 } else {
804 for (SDValue V : Values)
805 Words.push_back(DAG.getBitcast(MVT::i32, V));
806 }
807 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
808 unsigned NumValues = Values.size();
809 assert(NumValues > 0);
810 bool IsUndef = true;
811 for (unsigned i = 0; i != NumValues; ++i) {
812 if (Values[i].isUndef())
813 continue;
814 IsUndef = false;
815 if (!SplatV.getNode())
816 SplatV = Values[i];
817 else if (SplatV != Values[i])
818 return false;
819 }
820 if (IsUndef)
821 SplatV = Values[0];
822 return true;
823 };
824
825 unsigned NumWords = Words.size();
826 SDValue SplatV;
827 bool IsSplat = isSplat(Words, SplatV);
828 if (IsSplat && isUndef(SplatV))
829 return DAG.getUNDEF(VecTy);
830 if (IsSplat) {
831 assert(SplatV.getNode());
832 if (isNullConstant(SplatV))
833 return getZero(dl, VecTy, DAG);
834 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
835 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
836 return DAG.getBitcast(VecTy, S);
837 }
838
839 // Delay recognizing constant vectors until here, so that we can generate
840 // a vsplat.
841 SmallVector<ConstantInt*, 128> Consts(VecLen);
842 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
843 if (AllConst) {
844 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
845 (Constant**)Consts.end());
846 Constant *CV = ConstantVector::get(Tmp);
847 Align Alignment(HwLen);
848 SDValue CP =
849 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
850 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
852 }
853
854 // A special case is a situation where the vector is built entirely from
855 // elements extracted from another vector. This could be done via a shuffle
856 // more efficiently, but typically, the size of the source vector will not
857 // match the size of the vector being built (which precludes the use of a
858 // shuffle directly).
859 // This only handles a single source vector, and the vector being built
860 // should be of a sub-vector type of the source vector type.
861 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
862 SmallVectorImpl<int> &SrcIdx) {
863 SDValue Vec;
864 for (SDValue V : Values) {
865 if (isUndef(V)) {
866 SrcIdx.push_back(-1);
867 continue;
868 }
869 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
870 return false;
871 // All extracts should come from the same vector.
872 SDValue T = V.getOperand(0);
873 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
874 return false;
875 Vec = T;
876 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
877 if (C == nullptr)
878 return false;
879 int I = C->getSExtValue();
880 assert(I >= 0 && "Negative element index");
881 SrcIdx.push_back(I);
882 }
883 SrcVec = Vec;
884 return true;
885 };
886
888 SDValue ExtVec;
889 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
890 MVT ExtTy = ty(ExtVec);
891 unsigned ExtLen = ExtTy.getVectorNumElements();
892 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
893 // Construct a new shuffle mask that will produce a vector with the same
894 // number of elements as the input vector, and such that the vector we
895 // want will be the initial subvector of it.
897 BitVector Used(ExtLen);
898
899 for (int M : ExtIdx) {
900 Mask.push_back(M);
901 if (M >= 0)
902 Used.set(M);
903 }
904 // Fill the rest of the mask with the unused elements of ExtVec in hopes
905 // that it will result in a permutation of ExtVec's elements. It's still
906 // fine if it doesn't (e.g. if undefs are present, or elements are
907 // repeated), but permutations can always be done efficiently via vdelta
908 // and vrdelta.
909 for (unsigned I = 0; I != ExtLen; ++I) {
910 if (Mask.size() == ExtLen)
911 break;
912 if (!Used.test(I))
913 Mask.push_back(I);
914 }
915
916 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
917 DAG.getUNDEF(ExtTy), Mask);
918 return ExtLen == VecLen ? S : LoHalf(S, DAG);
919 }
920 }
921
922 // Find most common element to initialize vector with. This is to avoid
923 // unnecessary vinsert/valign for cases where the same value is present
924 // many times. Creates a histogram of the vector's elements to find the
925 // most common element n.
926 assert(4*Words.size() == Subtarget.getVectorLength());
927 int VecHist[32];
928 int n = 0;
929 for (unsigned i = 0; i != NumWords; ++i) {
930 VecHist[i] = 0;
931 if (Words[i].isUndef())
932 continue;
933 for (unsigned j = i; j != NumWords; ++j)
934 if (Words[i] == Words[j])
935 VecHist[i]++;
936
937 if (VecHist[i] > VecHist[n])
938 n = i;
939 }
940
941 SDValue HalfV = getZero(dl, VecTy, DAG);
942 if (VecHist[n] > 1) {
943 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
944 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
945 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
946 }
947 SDValue HalfV0 = HalfV;
948 SDValue HalfV1 = HalfV;
949
950 // Construct two halves in parallel, then or them together. Rn and Rm count
951 // number of rotations needed before the next element. One last rotation is
952 // performed post-loop to position the last element.
953 int Rn = 0, Rm = 0;
954 SDValue Sn, Sm;
955 SDValue N = HalfV0;
956 SDValue M = HalfV1;
957 for (unsigned i = 0; i != NumWords/2; ++i) {
958 // Rotate by element count since last insertion.
959 if (Words[i] != Words[n] || VecHist[n] <= 1) {
960 Sn = DAG.getConstant(Rn, dl, MVT::i32);
961 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
962 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
963 {HalfV0, Words[i]});
964 Rn = 0;
965 }
966 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
967 Sm = DAG.getConstant(Rm, dl, MVT::i32);
968 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
969 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
970 {HalfV1, Words[i+NumWords/2]});
971 Rm = 0;
972 }
973 Rn += 4;
974 Rm += 4;
975 }
976 // Perform last rotation.
977 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
978 Sm = DAG.getConstant(Rm, dl, MVT::i32);
979 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
980 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
981
982 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
983 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
984
985 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
986
987 SDValue OutV =
988 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
989 return OutV;
990}
991
993HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
994 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
995 MVT PredTy = ty(PredV);
996 unsigned HwLen = Subtarget.getVectorLength();
997 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
998
999 if (Subtarget.isHVXVectorType(PredTy, true)) {
1000 // Move the vector predicate SubV to a vector register, and scale it
1001 // down to match the representation (bytes per type element) that VecV
1002 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1003 // in general) element and put them at the front of the resulting
1004 // vector. This subvector will then be inserted into the Q2V of VecV.
1005 // To avoid having an operation that generates an illegal type (short
1006 // vector), generate a full size vector.
1007 //
1008 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1010 // Scale = BitBytes(PredV) / Given BitBytes.
1011 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1012 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1013
1014 for (unsigned i = 0; i != HwLen; ++i) {
1015 unsigned Num = i % Scale;
1016 unsigned Off = i / Scale;
1017 Mask[BlockLen*Num + Off] = i;
1018 }
1019 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1020 if (!ZeroFill)
1021 return S;
1022 // Fill the bytes beyond BlockLen with 0s.
1023 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1024 // when BlockLen < HwLen.
1025 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1026 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1027 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1028 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1029 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1030 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1031 }
1032
1033 // Make sure that this is a valid scalar predicate.
1034 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1035
1036 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1037 SmallVector<SDValue,4> Words[2];
1038 unsigned IdxW = 0;
1039
1040 SDValue W0 = isUndef(PredV)
1041 ? DAG.getUNDEF(MVT::i64)
1042 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1043 Words[IdxW].push_back(HiHalf(W0, DAG));
1044 Words[IdxW].push_back(LoHalf(W0, DAG));
1045
1046 while (Bytes < BitBytes) {
1047 IdxW ^= 1;
1048 Words[IdxW].clear();
1049
1050 if (Bytes < 4) {
1051 for (const SDValue &W : Words[IdxW ^ 1]) {
1052 SDValue T = expandPredicate(W, dl, DAG);
1053 Words[IdxW].push_back(HiHalf(T, DAG));
1054 Words[IdxW].push_back(LoHalf(T, DAG));
1055 }
1056 } else {
1057 for (const SDValue &W : Words[IdxW ^ 1]) {
1058 Words[IdxW].push_back(W);
1059 Words[IdxW].push_back(W);
1060 }
1061 }
1062 Bytes *= 2;
1063 }
1064
1065 assert(Bytes == BitBytes);
1066
1067 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1068 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1069 for (const SDValue &W : Words[IdxW]) {
1070 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1071 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1072 }
1073
1074 return Vec;
1075}
1076
1077SDValue
1078HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1079 const SDLoc &dl, MVT VecTy,
1080 SelectionDAG &DAG) const {
1081 // Construct a vector V of bytes, such that a comparison V >u 0 would
1082 // produce the required vector predicate.
1083 unsigned VecLen = Values.size();
1084 unsigned HwLen = Subtarget.getVectorLength();
1085 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1087 bool AllT = true, AllF = true;
1088
1089 auto IsTrue = [] (SDValue V) {
1090 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1091 return !N->isZero();
1092 return false;
1093 };
1094 auto IsFalse = [] (SDValue V) {
1095 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1096 return N->isZero();
1097 return false;
1098 };
1099
1100 if (VecLen <= HwLen) {
1101 // In the hardware, each bit of a vector predicate corresponds to a byte
1102 // of a vector register. Calculate how many bytes does a bit of VecTy
1103 // correspond to.
1104 assert(HwLen % VecLen == 0);
1105 unsigned BitBytes = HwLen / VecLen;
1106 for (SDValue V : Values) {
1107 AllT &= IsTrue(V);
1108 AllF &= IsFalse(V);
1109
1110 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1111 : DAG.getUNDEF(MVT::i8);
1112 for (unsigned B = 0; B != BitBytes; ++B)
1113 Bytes.push_back(Ext);
1114 }
1115 } else {
1116 // There are as many i1 values, as there are bits in a vector register.
1117 // Divide the values into groups of 8 and check that each group consists
1118 // of the same value (ignoring undefs).
1119 for (unsigned I = 0; I != VecLen; I += 8) {
1120 unsigned B = 0;
1121 // Find the first non-undef value in this group.
1122 for (; B != 8; ++B) {
1123 if (!Values[I+B].isUndef())
1124 break;
1125 }
1126 SDValue F = Values[I+B];
1127 AllT &= IsTrue(F);
1128 AllF &= IsFalse(F);
1129
1130 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1131 : DAG.getUNDEF(MVT::i8);
1132 Bytes.push_back(Ext);
1133 // Verify that the rest of values in the group are the same as the
1134 // first.
1135 for (; B != 8; ++B)
1136 assert(Values[I+B].isUndef() || Values[I+B] == F);
1137 }
1138 }
1139
1140 if (AllT)
1141 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1142 if (AllF)
1143 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1144
1145 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1146 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1147 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1148}
1149
1150SDValue
1151HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1152 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1153 MVT ElemTy = ty(VecV).getVectorElementType();
1154
1155 unsigned ElemWidth = ElemTy.getSizeInBits();
1156 assert(ElemWidth >= 8 && ElemWidth <= 32);
1157 (void)ElemWidth;
1158
1159 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1160 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1161 {VecV, ByteIdx});
1162 if (ElemTy == MVT::i32)
1163 return ExWord;
1164
1165 // Have an extracted word, need to extract the smaller element out of it.
1166 // 1. Extract the bits of (the original) IdxV that correspond to the index
1167 // of the desired element in the 32-bit word.
1168 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1169 // 2. Extract the element from the word.
1170 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1171 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1172}
1173
1174SDValue
1175HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1176 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1177 // Implement other return types if necessary.
1178 assert(ResTy == MVT::i1);
1179
1180 unsigned HwLen = Subtarget.getVectorLength();
1181 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1182 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1183
1184 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1185 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1186 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1187
1188 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1189 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1190 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1191}
1192
1193SDValue
1194HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1195 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1196 MVT ElemTy = ty(VecV).getVectorElementType();
1197
1198 unsigned ElemWidth = ElemTy.getSizeInBits();
1199 assert(ElemWidth >= 8 && ElemWidth <= 32);
1200 (void)ElemWidth;
1201
1202 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1203 SDValue ByteIdxV) {
1204 MVT VecTy = ty(VecV);
1205 unsigned HwLen = Subtarget.getVectorLength();
1206 SDValue MaskV =
1207 DAG.getNode(ISD::AND, dl, MVT::i32,
1208 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1209 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1210 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1211 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1212 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1213 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1214 return TorV;
1215 };
1216
1217 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1218 if (ElemTy == MVT::i32)
1219 return InsertWord(VecV, ValV, ByteIdx);
1220
1221 // If this is not inserting a 32-bit word, convert it into such a thing.
1222 // 1. Extract the existing word from the target vector.
1223 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1224 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1225 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1226 dl, MVT::i32, DAG);
1227
1228 // 2. Treating the extracted word as a 32-bit vector, insert the given
1229 // value into it.
1230 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1231 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1232 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1233 ValV, SubIdx, dl, ElemTy, DAG);
1234
1235 // 3. Insert the 32-bit word back into the original vector.
1236 return InsertWord(VecV, Ins, ByteIdx);
1237}
1238
1239SDValue
1240HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1241 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1242 unsigned HwLen = Subtarget.getVectorLength();
1243 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1244 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1245
1246 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1247 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1248 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1249 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1250
1251 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1252 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1253}
1254
1255SDValue
1256HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1257 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1258 MVT VecTy = ty(VecV);
1259 unsigned HwLen = Subtarget.getVectorLength();
1260 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1261 MVT ElemTy = VecTy.getVectorElementType();
1262 unsigned ElemWidth = ElemTy.getSizeInBits();
1263
1264 // If the source vector is a vector pair, get the single vector containing
1265 // the subvector of interest. The subvector will never overlap two single
1266 // vectors.
1267 if (isHvxPairTy(VecTy)) {
1268 if (Idx * ElemWidth >= 8*HwLen)
1269 Idx -= VecTy.getVectorNumElements() / 2;
1270
1271 VecV = OrigOp;
1272 if (typeSplit(VecTy).first == ResTy)
1273 return VecV;
1274 }
1275
1276 // The only meaningful subvectors of a single HVX vector are those that
1277 // fit in a scalar register.
1278 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1279
1280 MVT WordTy = tyVector(VecTy, MVT::i32);
1281 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1282 unsigned WordIdx = (Idx*ElemWidth) / 32;
1283
1284 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1285 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1286 if (ResTy.getSizeInBits() == 32)
1287 return DAG.getBitcast(ResTy, W0);
1288
1289 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1290 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1291 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1292 return DAG.getBitcast(ResTy, WW);
1293}
1294
1295SDValue
1296HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1297 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1298 MVT VecTy = ty(VecV);
1299 unsigned HwLen = Subtarget.getVectorLength();
1300 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1301 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1302 // IdxV is required to be a constant.
1303 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1304
1305 unsigned ResLen = ResTy.getVectorNumElements();
1306 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1307 unsigned Offset = Idx * BitBytes;
1308 SDValue Undef = DAG.getUNDEF(ByteTy);
1310
1311 if (Subtarget.isHVXVectorType(ResTy, true)) {
1312 // Converting between two vector predicates. Since the result is shorter
1313 // than the source, it will correspond to a vector predicate with the
1314 // relevant bits replicated. The replication count is the ratio of the
1315 // source and target vector lengths.
1316 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1317 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1318 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1319 for (unsigned j = 0; j != Rep; ++j)
1320 Mask.push_back(i + Offset);
1321 }
1322 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1323 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1324 }
1325
1326 // Converting between a vector predicate and a scalar predicate. In the
1327 // vector predicate, a group of BitBytes bits will correspond to a single
1328 // i1 element of the source vector type. Those bits will all have the same
1329 // value. The same will be true for ByteVec, where each byte corresponds
1330 // to a bit in the vector predicate.
1331 // The algorithm is to traverse the ByteVec, going over the i1 values from
1332 // the source vector, and generate the corresponding representation in an
1333 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1334 // elements so that the interesting 8 bytes will be in the low end of the
1335 // vector.
1336 unsigned Rep = 8 / ResLen;
1337 // Make sure the output fill the entire vector register, so repeat the
1338 // 8-byte groups as many times as necessary.
1339 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1340 // This will generate the indexes of the 8 interesting bytes.
1341 for (unsigned i = 0; i != ResLen; ++i) {
1342 for (unsigned j = 0; j != Rep; ++j)
1343 Mask.push_back(Offset + i*BitBytes);
1344 }
1345 }
1346
1347 SDValue Zero = getZero(dl, MVT::i32, DAG);
1348 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1349 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1350 // them against 0.
1351 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1352 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1353 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1354 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1355 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1356 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1357}
1358
1359SDValue
1360HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1361 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1362 MVT VecTy = ty(VecV);
1363 MVT SubTy = ty(SubV);
1364 unsigned HwLen = Subtarget.getVectorLength();
1365 MVT ElemTy = VecTy.getVectorElementType();
1366 unsigned ElemWidth = ElemTy.getSizeInBits();
1367
1368 bool IsPair = isHvxPairTy(VecTy);
1369 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1370 // The two single vectors that VecV consists of, if it's a pair.
1371 SDValue V0, V1;
1372 SDValue SingleV = VecV;
1373 SDValue PickHi;
1374
1375 if (IsPair) {
1376 V0 = LoHalf(VecV, DAG);
1377 V1 = HiHalf(VecV, DAG);
1378
1379 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1380 dl, MVT::i32);
1381 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1382 if (isHvxSingleTy(SubTy)) {
1383 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1384 unsigned Idx = CN->getZExtValue();
1385 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1386 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1387 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1388 }
1389 // If IdxV is not a constant, generate the two variants: with the
1390 // SubV as the high and as the low subregister, and select the right
1391 // pair based on the IdxV.
1392 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1393 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1394 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1395 }
1396 // The subvector being inserted must be entirely contained in one of
1397 // the vectors V0 or V1. Set SingleV to the correct one, and update
1398 // IdxV to be the index relative to the beginning of that vector.
1399 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1400 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1401 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1402 }
1403
1404 // The only meaningful subvectors of a single HVX vector are those that
1405 // fit in a scalar register.
1406 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1407 // Convert IdxV to be index in bytes.
1408 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1409 if (!IdxN || !IdxN->isZero()) {
1410 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1411 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1412 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1413 }
1414 // When inserting a single word, the rotation back to the original position
1415 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1416 // by (HwLen-4)-Idx.
1417 unsigned RolBase = HwLen;
1418 if (SubTy.getSizeInBits() == 32) {
1419 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1420 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1421 } else {
1422 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1423 SDValue R0 = LoHalf(V, DAG);
1424 SDValue R1 = HiHalf(V, DAG);
1425 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1426 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1427 DAG.getConstant(4, dl, MVT::i32));
1428 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1429 RolBase = HwLen-4;
1430 }
1431 // If the vector wasn't ror'ed, don't ror it back.
1432 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1433 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1434 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1435 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1436 }
1437
1438 if (IsPair) {
1439 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1440 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1441 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1442 }
1443 return SingleV;
1444}
1445
1446SDValue
1447HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1448 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1449 MVT VecTy = ty(VecV);
1450 MVT SubTy = ty(SubV);
1451 assert(Subtarget.isHVXVectorType(VecTy, true));
1452 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1453 // predicate as well, or it can be a scalar predicate.
1454
1455 unsigned VecLen = VecTy.getVectorNumElements();
1456 unsigned HwLen = Subtarget.getVectorLength();
1457 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1458
1459 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1460 unsigned BitBytes = HwLen / VecLen;
1461 unsigned BlockLen = HwLen / Scale;
1462
1463 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1464 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1465 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1466 SDValue ByteIdx;
1467
1468 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1469 if (!IdxN || !IdxN->isZero()) {
1470 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1471 DAG.getConstant(BitBytes, dl, MVT::i32));
1472 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1473 }
1474
1475 // ByteVec is the target vector VecV rotated in such a way that the
1476 // subvector should be inserted at index 0. Generate a predicate mask
1477 // and use vmux to do the insertion.
1478 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1479 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1480 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1481 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1482 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1483 // Rotate ByteVec back, and convert to a vector predicate.
1484 if (!IdxN || !IdxN->isZero()) {
1485 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1486 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1487 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1488 }
1489 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1490}
1491
1492SDValue
1493HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1494 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1495 // Sign- and any-extending of a vector predicate to a vector register is
1496 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1497 // a vector of 1s (where the 1s are of type matching the vector type).
1498 assert(Subtarget.isHVXVectorType(ResTy));
1499 if (!ZeroExt)
1500 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1501
1502 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1503 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1504 DAG.getConstant(1, dl, MVT::i32));
1505 SDValue False = getZero(dl, ResTy, DAG);
1506 return DAG.getSelect(dl, ResTy, VecV, True, False);
1507}
1508
1509SDValue
1510HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1511 MVT ResTy, SelectionDAG &DAG) const {
1512 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1513 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1514 // vector register. The remaining bits of the vector register are
1515 // unspecified.
1516
1518 unsigned HwLen = Subtarget.getVectorLength();
1519 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1520 MVT PredTy = ty(VecQ);
1521 unsigned PredLen = PredTy.getVectorNumElements();
1522 assert(HwLen % PredLen == 0);
1523 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1524
1525 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1527 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1528 // These are bytes with the LSB rotated left with respect to their index.
1529 for (unsigned i = 0; i != HwLen/8; ++i) {
1530 for (unsigned j = 0; j != 8; ++j)
1531 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1532 }
1533 Constant *CV = ConstantVector::get(Tmp);
1534 Align Alignment(HwLen);
1535 SDValue CP =
1536 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1537 SDValue Bytes =
1538 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1540
1541 // Select the bytes that correspond to true bits in the vector predicate.
1542 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1543 getZero(dl, VecTy, DAG));
1544 // Calculate the OR of all bytes in each group of 8. That will compress
1545 // all the individual bits into a single byte.
1546 // First, OR groups of 4, via vrmpy with 0x01010101.
1547 SDValue All1 =
1548 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1549 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1550 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1551 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1552 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1553 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1554
1555 // Pick every 8th byte and coalesce them at the beginning of the output.
1556 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1557 // byte and so on.
1559 for (unsigned i = 0; i != HwLen; ++i)
1560 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1561 SDValue Collect =
1562 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1563 return DAG.getBitcast(ResTy, Collect);
1564}
1565
1566SDValue
1567HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1568 const SDLoc &dl, SelectionDAG &DAG) const {
1569 // Take a vector and resize the element type to match the given type.
1570 MVT InpTy = ty(VecV);
1571 if (InpTy == ResTy)
1572 return VecV;
1573
1574 unsigned InpWidth = InpTy.getSizeInBits();
1575 unsigned ResWidth = ResTy.getSizeInBits();
1576
1577 if (InpTy.isFloatingPoint()) {
1578 return InpWidth < ResWidth
1579 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1580 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1581 DAG.getTargetConstant(0, dl, MVT::i32));
1582 }
1583
1584 assert(InpTy.isInteger());
1585
1586 if (InpWidth < ResWidth) {
1587 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1588 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1589 } else {
1590 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1591 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1592 }
1593}
1594
1595SDValue
1596HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1597 SelectionDAG &DAG) const {
1598 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1599
1600 const SDLoc &dl(Vec);
1601 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1602 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1603 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1604}
1605
1606SDValue
1607HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1608 const {
1609 const SDLoc &dl(Op);
1610 MVT VecTy = ty(Op);
1611
1612 unsigned Size = Op.getNumOperands();
1614 for (unsigned i = 0; i != Size; ++i)
1615 Ops.push_back(Op.getOperand(i));
1616
1617 // First, split the BUILD_VECTOR for vector pairs. We could generate
1618 // some pairs directly (via splat), but splats should be generated
1619 // by the combiner prior to getting here.
1620 if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
1621 ArrayRef<SDValue> A(Ops);
1622 MVT SingleTy = typeSplit(VecTy).first;
1623 SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
1624 SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
1625 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1626 }
1627
1628 if (VecTy.getVectorElementType() == MVT::i1)
1629 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1630
1631 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1632 // not a legal type, just bitcast the node to use i16
1633 // types and bitcast the result back to f16
1634 if (VecTy.getVectorElementType() == MVT::f16) {
1636 for (unsigned i = 0; i != Size; i++)
1637 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1638
1639 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1640 tyVector(VecTy, MVT::i16), NewOps);
1641 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1642 }
1643
1644 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1645}
1646
1647SDValue
1648HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1649 const {
1650 const SDLoc &dl(Op);
1651 MVT VecTy = ty(Op);
1652 MVT ArgTy = ty(Op.getOperand(0));
1653
1654 if (ArgTy == MVT::f16) {
1655 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1656 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1657 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1658 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1659 return DAG.getBitcast(VecTy, Splat);
1660 }
1661
1662 return SDValue();
1663}
1664
1665SDValue
1666HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1667 const {
1668 // Vector concatenation of two integer (non-bool) vectors does not need
1669 // special lowering. Custom-lower concats of bool vectors and expand
1670 // concats of more than 2 vectors.
1671 MVT VecTy = ty(Op);
1672 const SDLoc &dl(Op);
1673 unsigned NumOp = Op.getNumOperands();
1674 if (VecTy.getVectorElementType() != MVT::i1) {
1675 if (NumOp == 2)
1676 return Op;
1677 // Expand the other cases into a build-vector.
1679 for (SDValue V : Op.getNode()->ops())
1680 DAG.ExtractVectorElements(V, Elems);
1681 // A vector of i16 will be broken up into a build_vector of i16's.
1682 // This is a problem, since at the time of operation legalization,
1683 // all operations are expected to be type-legalized, and i16 is not
1684 // a legal type. If any of the extracted elements is not of a valid
1685 // type, sign-extend it to a valid one.
1686 for (unsigned i = 0, e = Elems.size(); i != e; ++i) {
1687 SDValue V = Elems[i];
1688 MVT Ty = ty(V);
1689 if (!isTypeLegal(Ty)) {
1690 MVT NTy = typeLegalize(Ty, DAG);
1691 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1692 Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1693 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1694 V.getOperand(0), V.getOperand(1)),
1695 DAG.getValueType(Ty));
1696 continue;
1697 }
1698 // A few less complicated cases.
1699 switch (V.getOpcode()) {
1700 case ISD::Constant:
1701 Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
1702 break;
1703 case ISD::UNDEF:
1704 Elems[i] = DAG.getUNDEF(NTy);
1705 break;
1706 case ISD::TRUNCATE:
1707 Elems[i] = V.getOperand(0);
1708 break;
1709 default:
1710 llvm_unreachable("Unexpected vector element");
1711 }
1712 }
1713 }
1714 return DAG.getBuildVector(VecTy, dl, Elems);
1715 }
1716
1717 assert(VecTy.getVectorElementType() == MVT::i1);
1718 unsigned HwLen = Subtarget.getVectorLength();
1719 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1720
1721 SDValue Op0 = Op.getOperand(0);
1722
1723 // If the operands are HVX types (i.e. not scalar predicates), then
1724 // defer the concatenation, and create QCAT instead.
1725 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1726 if (NumOp == 2)
1727 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1728
1729 ArrayRef<SDUse> U(Op.getNode()->ops());
1731 ArrayRef<SDValue> Ops(SV);
1732
1733 MVT HalfTy = typeSplit(VecTy).first;
1734 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1735 Ops.take_front(NumOp/2));
1736 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1737 Ops.take_back(NumOp/2));
1738 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1739 }
1740
1741 // Count how many bytes (in a vector register) each bit in VecTy
1742 // corresponds to.
1743 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1744
1745 SmallVector<SDValue,8> Prefixes;
1746 for (SDValue V : Op.getNode()->op_values()) {
1747 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1748 Prefixes.push_back(P);
1749 }
1750
1751 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1752 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1753 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1754 SDValue Res = getZero(dl, ByteTy, DAG);
1755 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1756 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1757 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1758 }
1759 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1760}
1761
1762SDValue
1763HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1764 const {
1765 // Change the type of the extracted element to i32.
1766 SDValue VecV = Op.getOperand(0);
1767 MVT ElemTy = ty(VecV).getVectorElementType();
1768 const SDLoc &dl(Op);
1769 SDValue IdxV = Op.getOperand(1);
1770 if (ElemTy == MVT::i1)
1771 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1772
1773 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1774}
1775
1776SDValue
1777HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1778 const {
1779 const SDLoc &dl(Op);
1780 MVT VecTy = ty(Op);
1781 SDValue VecV = Op.getOperand(0);
1782 SDValue ValV = Op.getOperand(1);
1783 SDValue IdxV = Op.getOperand(2);
1784 MVT ElemTy = ty(VecV).getVectorElementType();
1785 if (ElemTy == MVT::i1)
1786 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1787
1788 if (ElemTy == MVT::f16) {
1790 tyVector(VecTy, MVT::i16),
1791 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1792 DAG.getBitcast(MVT::i16, ValV), IdxV);
1793 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1794 }
1795
1796 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1797}
1798
1799SDValue
1800HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1801 const {
1802 SDValue SrcV = Op.getOperand(0);
1803 MVT SrcTy = ty(SrcV);
1804 MVT DstTy = ty(Op);
1805 SDValue IdxV = Op.getOperand(1);
1806 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1807 assert(Idx % DstTy.getVectorNumElements() == 0);
1808 (void)Idx;
1809 const SDLoc &dl(Op);
1810
1811 MVT ElemTy = SrcTy.getVectorElementType();
1812 if (ElemTy == MVT::i1)
1813 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1814
1815 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1816}
1817
1818SDValue
1819HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1820 const {
1821 // Idx does not need to be a constant.
1822 SDValue VecV = Op.getOperand(0);
1823 SDValue ValV = Op.getOperand(1);
1824 SDValue IdxV = Op.getOperand(2);
1825
1826 const SDLoc &dl(Op);
1827 MVT VecTy = ty(VecV);
1828 MVT ElemTy = VecTy.getVectorElementType();
1829 if (ElemTy == MVT::i1)
1830 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1831
1832 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1833}
1834
1835SDValue
1836HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1837 // Lower any-extends of boolean vectors to sign-extends, since they
1838 // translate directly to Q2V. Zero-extending could also be done equally
1839 // fast, but Q2V is used/recognized in more places.
1840 // For all other vectors, use zero-extend.
1841 MVT ResTy = ty(Op);
1842 SDValue InpV = Op.getOperand(0);
1843 MVT ElemTy = ty(InpV).getVectorElementType();
1844 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1845 return LowerHvxSignExt(Op, DAG);
1846 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1847}
1848
1849SDValue
1850HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1851 MVT ResTy = ty(Op);
1852 SDValue InpV = Op.getOperand(0);
1853 MVT ElemTy = ty(InpV).getVectorElementType();
1854 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1855 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1856 return Op;
1857}
1858
1859SDValue
1860HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1861 MVT ResTy = ty(Op);
1862 SDValue InpV = Op.getOperand(0);
1863 MVT ElemTy = ty(InpV).getVectorElementType();
1864 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1865 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1866 return Op;
1867}
1868
1869SDValue
1870HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1871 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1872 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1873 const SDLoc &dl(Op);
1874 MVT ResTy = ty(Op);
1875 SDValue InpV = Op.getOperand(0);
1876 assert(ResTy == ty(InpV));
1877
1878 // Calculate the vectors of 1 and bitwidth(x).
1879 MVT ElemTy = ty(InpV).getVectorElementType();
1880 unsigned ElemWidth = ElemTy.getSizeInBits();
1881
1882 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1883 DAG.getConstant(1, dl, MVT::i32));
1884 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1885 DAG.getConstant(ElemWidth, dl, MVT::i32));
1886 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1887 DAG.getAllOnesConstant(dl, MVT::i32));
1888
1889 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1890 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1891 // it separately in custom combine or selection).
1892 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1893 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1894 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1895 return DAG.getNode(ISD::SUB, dl, ResTy,
1896 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1897}
1898
1899SDValue
1900HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1901 const SDLoc &dl(Op);
1902 MVT ResTy = ty(Op);
1903 assert(ResTy.getVectorElementType() == MVT::i32);
1904
1905 SDValue Vs = Op.getOperand(0);
1906 SDValue Vt = Op.getOperand(1);
1907
1908 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1909 unsigned Opc = Op.getOpcode();
1910
1911 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1912 if (Opc == ISD::MULHU)
1913 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1914 if (Opc == ISD::MULHS)
1915 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1916
1917#ifndef NDEBUG
1918 Op.dump(&DAG);
1919#endif
1920 llvm_unreachable("Unexpected mulh operation");
1921}
1922
1923SDValue
1924HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1925 const SDLoc &dl(Op);
1926 unsigned Opc = Op.getOpcode();
1927 SDValue Vu = Op.getOperand(0);
1928 SDValue Vv = Op.getOperand(1);
1929
1930 // If the HI part is not used, convert it to a regular MUL.
1931 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1932 // Need to preserve the types and the number of values.
1933 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1934 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1935 return DAG.getMergeValues({Lo, Hi}, dl);
1936 }
1937
1938 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1939 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1940
1941 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1942 // valued nodes.
1943 if (Subtarget.useHVXV62Ops())
1944 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1945
1946 if (Opc == HexagonISD::SMUL_LOHI) {
1947 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1948 // for other signedness LOHI is cheaper.
1949 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1950 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1951 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1952 return DAG.getMergeValues({Lo, Hi}, dl);
1953 }
1954 }
1955
1956 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1957}
1958
1959SDValue
1960HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1961 SDValue Val = Op.getOperand(0);
1962 MVT ResTy = ty(Op);
1963 MVT ValTy = ty(Val);
1964 const SDLoc &dl(Op);
1965
1966 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1967 unsigned HwLen = Subtarget.getVectorLength();
1968 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1969 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1970 unsigned BitWidth = ResTy.getSizeInBits();
1971
1972 if (BitWidth < 64) {
1973 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1974 dl, MVT::i32, DAG);
1975 if (BitWidth == 32)
1976 return W0;
1977 assert(BitWidth < 32u);
1978 return DAG.getZExtOrTrunc(W0, dl, ResTy);
1979 }
1980
1981 // The result is >= 64 bits. The only options are 64 or 128.
1982 assert(BitWidth == 64 || BitWidth == 128);
1984 for (unsigned i = 0; i != BitWidth/32; ++i) {
1985 SDValue W = extractHvxElementReg(
1986 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1987 Words.push_back(W);
1988 }
1989 SmallVector<SDValue,2> Combines;
1990 assert(Words.size() % 2 == 0);
1991 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
1992 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
1993 Combines.push_back(C);
1994 }
1995
1996 if (BitWidth == 64)
1997 return Combines[0];
1998
1999 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2000 }
2001 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2002 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2003 unsigned BitWidth = ValTy.getSizeInBits();
2004 unsigned HwLen = Subtarget.getVectorLength();
2005 assert(BitWidth == HwLen);
2006
2007 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2008 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2009 // Splat each byte of Val 8 times.
2010 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2011 // where b0, b1,..., b15 are least to most significant bytes of I.
2013 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2014 // These are bytes with the LSB rotated left with respect to their index.
2016 for (unsigned I = 0; I != HwLen / 8; ++I) {
2017 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2018 SDValue Byte =
2019 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2020 for (unsigned J = 0; J != 8; ++J) {
2021 Bytes.push_back(Byte);
2022 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2023 }
2024 }
2025
2026 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2027 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2028 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2029
2030 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2031 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2032 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2033 }
2034
2035 return Op;
2036}
2037
2038SDValue
2039HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2040 // Sign- and zero-extends are legal.
2041 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2043 Op.getOperand(0));
2044}
2045
2046SDValue
2047HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2048 MVT ResTy = ty(Op);
2049 if (ResTy.getVectorElementType() != MVT::i1)
2050 return Op;
2051
2052 const SDLoc &dl(Op);
2053 unsigned HwLen = Subtarget.getVectorLength();
2054 unsigned VecLen = ResTy.getVectorNumElements();
2055 assert(HwLen % VecLen == 0);
2056 unsigned ElemSize = HwLen / VecLen;
2057
2058 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2059 SDValue S =
2060 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2061 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2062 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2063 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2064}
2065
2066SDValue
2067HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2068 if (SDValue S = getVectorShiftByInt(Op, DAG))
2069 return S;
2070 return Op;
2071}
2072
2073SDValue
2074HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2075 SelectionDAG &DAG) const {
2076 unsigned Opc = Op.getOpcode();
2077 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2078
2079 // Make sure the shift amount is within the range of the bitwidth
2080 // of the element type.
2081 SDValue A = Op.getOperand(0);
2082 SDValue B = Op.getOperand(1);
2083 SDValue S = Op.getOperand(2);
2084
2085 MVT InpTy = ty(A);
2086 MVT ElemTy = InpTy.getVectorElementType();
2087
2088 const SDLoc &dl(Op);
2089 unsigned ElemWidth = ElemTy.getSizeInBits();
2090 bool IsLeft = Opc == ISD::FSHL;
2091
2092 // The expansion into regular shifts produces worse code for i8 and for
2093 // right shift of i32 on v65+.
2094 bool UseShifts = ElemTy != MVT::i8;
2095 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2096 UseShifts = false;
2097
2098 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2099 // If this is a funnel shift by a scalar, lower it into regular shifts.
2100 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2101 SDValue ModS =
2102 DAG.getNode(ISD::AND, dl, MVT::i32,
2103 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2104 SDValue NegS =
2105 DAG.getNode(ISD::SUB, dl, MVT::i32,
2106 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2107 SDValue IsZero =
2108 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2109 // FSHL A, B => A << | B >>n
2110 // FSHR A, B => A <<n | B >>
2111 SDValue Part1 =
2112 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2113 SDValue Part2 =
2114 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2115 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2116 // If the shift amount was 0, pick A or B, depending on the direction.
2117 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2118 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2119 }
2120
2122 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2123
2124 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2125 return DAG.getNode(MOpc, dl, ty(Op),
2126 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2127}
2128
2129SDValue
2130HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2131 const SDLoc &dl(Op);
2132 unsigned IntNo = Op.getConstantOperandVal(0);
2133 SmallVector<SDValue> Ops(Op->ops());
2134
2135 auto Swap = [&](SDValue P) {
2136 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2137 };
2138
2139 switch (IntNo) {
2140 case Intrinsic::hexagon_V6_pred_typecast:
2141 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2142 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2143 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2144 if (ResTy == InpTy)
2145 return Ops[1];
2146 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2147 }
2148 break;
2149 }
2150 case Intrinsic::hexagon_V6_vmpyss_parts:
2151 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2152 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2153 {Ops[1], Ops[2]}));
2154 case Intrinsic::hexagon_V6_vmpyuu_parts:
2155 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2156 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2157 {Ops[1], Ops[2]}));
2158 case Intrinsic::hexagon_V6_vmpyus_parts:
2159 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2160 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2161 {Ops[1], Ops[2]}));
2162 }
2163 } // switch
2164
2165 return Op;
2166}
2167
2168SDValue
2169HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2170 const SDLoc &dl(Op);
2171 unsigned HwLen = Subtarget.getVectorLength();
2173 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2174 SDValue Mask = MaskN->getMask();
2175 SDValue Chain = MaskN->getChain();
2176 SDValue Base = MaskN->getBasePtr();
2177 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2178
2179 unsigned Opc = Op->getOpcode();
2180 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2181
2182 if (Opc == ISD::MLOAD) {
2183 MVT ValTy = ty(Op);
2184 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2185 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2186 if (isUndef(Thru))
2187 return Load;
2188 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2189 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2190 }
2191
2192 // MSTORE
2193 // HVX only has aligned masked stores.
2194
2195 // TODO: Fold negations of the mask into the store.
2196 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2197 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2198 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2199
2200 if (MaskN->getAlign().value() % HwLen == 0) {
2201 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2202 {Mask, Base, Offset0, Value, Chain}, DAG);
2203 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2204 return Store;
2205 }
2206
2207 // Unaligned case.
2208 auto StoreAlign = [&](SDValue V, SDValue A) {
2209 SDValue Z = getZero(dl, ty(V), DAG);
2210 // TODO: use funnel shifts?
2211 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2212 // upper half.
2213 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2214 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2215 return std::make_pair(LoV, HiV);
2216 };
2217
2218 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2219 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2220 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2221 VectorPair Tmp = StoreAlign(MaskV, Base);
2222 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2223 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2224 VectorPair ValueU = StoreAlign(Value, Base);
2225
2226 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2227 SDValue StoreLo =
2228 getInstr(StoreOpc, dl, MVT::Other,
2229 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2230 SDValue StoreHi =
2231 getInstr(StoreOpc, dl, MVT::Other,
2232 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2233 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2234 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2235 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2236}
2237
2238SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2239 SelectionDAG &DAG) const {
2240 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2241 // is legal (done via a pattern).
2242 assert(Subtarget.useHVXQFloatOps());
2243
2244 assert(Op->getOpcode() == ISD::FP_EXTEND);
2245
2246 MVT VecTy = ty(Op);
2247 MVT ArgTy = ty(Op.getOperand(0));
2248 const SDLoc &dl(Op);
2249 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2250
2251 SDValue F16Vec = Op.getOperand(0);
2252
2253 APFloat FloatVal = APFloat(1.0f);
2254 bool Ignored;
2256 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2257 SDValue VmpyVec =
2258 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2259
2260 MVT HalfTy = typeSplit(VecTy).first;
2261 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2262 SDValue LoVec =
2263 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2264 SDValue HiVec =
2265 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2266
2267 SDValue ShuffVec =
2268 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2269 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2270
2271 return ShuffVec;
2272}
2273
2274SDValue
2275HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2276 // Catch invalid conversion ops (just in case).
2277 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2278 Op.getOpcode() == ISD::FP_TO_UINT);
2279
2280 MVT ResTy = ty(Op);
2281 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2282 MVT IntTy = ResTy.getVectorElementType();
2283
2284 if (Subtarget.useHVXIEEEFPOps()) {
2285 // There are only conversions from f16.
2286 if (FpTy == MVT::f16) {
2287 // Other int types aren't legal in HVX, so we shouldn't see them here.
2288 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2289 // Conversions to i8 and i16 are legal.
2290 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2291 return Op;
2292 }
2293 }
2294
2295 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2296 return EqualizeFpIntConversion(Op, DAG);
2297
2298 return ExpandHvxFpToInt(Op, DAG);
2299}
2300
2301SDValue
2302HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2303 // Catch invalid conversion ops (just in case).
2304 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2305 Op.getOpcode() == ISD::UINT_TO_FP);
2306
2307 MVT ResTy = ty(Op);
2308 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2309 MVT FpTy = ResTy.getVectorElementType();
2310
2311 if (Subtarget.useHVXIEEEFPOps()) {
2312 // There are only conversions to f16.
2313 if (FpTy == MVT::f16) {
2314 // Other int types aren't legal in HVX, so we shouldn't see them here.
2315 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2316 // i8, i16 -> f16 is legal.
2317 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2318 return Op;
2319 }
2320 }
2321
2322 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2323 return EqualizeFpIntConversion(Op, DAG);
2324
2325 return ExpandHvxIntToFp(Op, DAG);
2326}
2327
2328HexagonTargetLowering::TypePair
2329HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2330 // Compare the widths of elements of the two types, and extend the narrower
2331 // type to match the with of the wider type. For vector types, apply this
2332 // to the element type.
2333 assert(Ty0.isVector() == Ty1.isVector());
2334
2335 MVT ElemTy0 = Ty0.getScalarType();
2336 MVT ElemTy1 = Ty1.getScalarType();
2337
2338 unsigned Width0 = ElemTy0.getSizeInBits();
2339 unsigned Width1 = ElemTy1.getSizeInBits();
2340 unsigned MaxWidth = std::max(Width0, Width1);
2341
2342 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2343 if (ScalarTy.isInteger())
2344 return MVT::getIntegerVT(Width);
2345 assert(ScalarTy.isFloatingPoint());
2346 return MVT::getFloatingPointVT(Width);
2347 };
2348
2349 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2350 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2351
2352 if (!Ty0.isVector()) {
2353 // Both types are scalars.
2354 return {WideETy0, WideETy1};
2355 }
2356
2357 // Vector types.
2358 unsigned NumElem = Ty0.getVectorNumElements();
2359 assert(NumElem == Ty1.getVectorNumElements());
2360
2361 return {MVT::getVectorVT(WideETy0, NumElem),
2362 MVT::getVectorVT(WideETy1, NumElem)};
2363}
2364
2365HexagonTargetLowering::TypePair
2366HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2367 // Compare the numbers of elements of two vector types, and widen the
2368 // narrower one to match the number of elements in the wider one.
2369 assert(Ty0.isVector() && Ty1.isVector());
2370
2371 unsigned Len0 = Ty0.getVectorNumElements();
2372 unsigned Len1 = Ty1.getVectorNumElements();
2373 if (Len0 == Len1)
2374 return {Ty0, Ty1};
2375
2376 unsigned MaxLen = std::max(Len0, Len1);
2377 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2378 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2379}
2380
2381MVT
2382HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2383 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2384 assert(LegalTy.isSimple());
2385 return LegalTy.getSimpleVT();
2386}
2387
2388MVT
2389HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2390 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2391 assert(Ty.getSizeInBits() <= HwWidth);
2392 if (Ty.getSizeInBits() == HwWidth)
2393 return Ty;
2394
2395 MVT ElemTy = Ty.getScalarType();
2396 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2397}
2398
2399HexagonTargetLowering::VectorPair
2400HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2401 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2402 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2403 // whether an overflow has occured.
2404 MVT ResTy = ty(A);
2405 assert(ResTy == ty(B));
2406 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2407
2408 if (!Signed) {
2409 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2410 // save any instructions.
2411 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2412 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2413 return {Add, Ovf};
2414 }
2415
2416 // Signed overflow has happened, if:
2417 // (A, B have the same sign) and (A+B has a different sign from either)
2418 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2419 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2420 SDValue NotA =
2421 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2422 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2423 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2424 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2425 SDValue MSB =
2426 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2427 return {Add, MSB};
2428}
2429
2430HexagonTargetLowering::VectorPair
2431HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2432 bool Signed, SelectionDAG &DAG) const {
2433 // Shift Val right by Amt bits, round the result to the nearest integer,
2434 // tie-break by rounding halves to even integer.
2435
2436 const SDLoc &dl(Val);
2437 MVT ValTy = ty(Val);
2438
2439 // This should also work for signed integers.
2440 //
2441 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2442 // bool ovf = (inp > tmp0);
2443 // uint rup = inp & (1 << (Amt+1));
2444 //
2445 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2446 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2447 // uint tmp3 = tmp2 + rup;
2448 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2449 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2450 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2451 MVT IntTy = tyVector(ValTy, ElemTy);
2452 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2453 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2454
2455 SDValue Inp = DAG.getBitcast(IntTy, Val);
2456 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2457
2458 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2459 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2460 SDValue Zero = getZero(dl, IntTy, DAG);
2461 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2462 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2463 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2464
2465 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2466 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2467 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2468 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2469
2470 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2471 SDValue One = DAG.getConstant(1, dl, IntTy);
2472 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2473 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2474 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2475 return {Mux, Ovf};
2476}
2477
2478SDValue
2479HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2480 SelectionDAG &DAG) const {
2481 MVT VecTy = ty(A);
2482 MVT PairTy = typeJoin({VecTy, VecTy});
2483 assert(VecTy.getVectorElementType() == MVT::i32);
2484
2485 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2486
2487 // mulhs(A,B) =
2488 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2489 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2490 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2491 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2492 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2493 // anything, so it cannot produce any carry over to higher bits),
2494 // so everything in [] can be shifted by 16 without loss of precision.
2495 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2496 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2497 // The final additions need to make sure to properly maintain any carry-
2498 // out bits.
2499 //
2500 // Hi(B) Lo(B)
2501 // Hi(A) Lo(A)
2502 // --------------
2503 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2504 // Hi(B)*Lo(A) | + dropping the low 16 bits
2505 // Hi(A)*Lo(B) | T2
2506 // Hi(B)*Hi(A)
2507
2508 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2509 // T1 = get Hi(A) into low halves.
2510 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2511 // P0 = interleaved T1.h*B.uh (full precision product)
2512 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2513 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2514 SDValue T2 = LoHalf(P0, DAG);
2515 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2516 // added to the final sum.
2517 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2518 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2519 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2520 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2521 // T3 = full-precision(T0+T2) >> 16
2522 // The low halves are added-unsigned, the high ones are added-signed.
2523 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2524 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2525 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2526 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2527 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2528 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2529 SDValue T5 = LoHalf(P3, DAG);
2530 // Add:
2531 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2532 return T6;
2533}
2534
2535SDValue
2536HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2537 bool SignedB, const SDLoc &dl,
2538 SelectionDAG &DAG) const {
2539 MVT VecTy = ty(A);
2540 MVT PairTy = typeJoin({VecTy, VecTy});
2541 assert(VecTy.getVectorElementType() == MVT::i32);
2542
2543 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2544
2545 if (SignedA && !SignedB) {
2546 // Make A:unsigned, B:signed.
2547 std::swap(A, B);
2548 std::swap(SignedA, SignedB);
2549 }
2550
2551 // Do halfword-wise multiplications for unsigned*unsigned product, then
2552 // add corrections for signed and unsigned*signed.
2553
2554 SDValue Lo, Hi;
2555
2556 // P0:lo = (uu) products of low halves of A and B,
2557 // P0:hi = (uu) products of high halves.
2558 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2559
2560 // Swap low/high halves in B
2561 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2562 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2563 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2564 // P1 = products of even/odd halfwords.
2565 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2566 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2567 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2568
2569 // P2:lo = low halves of P1:lo + P1:hi,
2570 // P2:hi = high halves of P1:lo + P1:hi.
2571 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2572 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2573 // Still need to add the high halves of P0:lo to P2:lo
2574 SDValue T2 =
2575 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2576 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2577
2578 // The high halves of T3 will contribute to the HI part of LOHI.
2579 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2580 {HiHalf(P2, DAG), T3, S16}, DAG);
2581
2582 // The low halves of P2 need to be added to high halves of the LO part.
2583 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2584 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2585 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2586
2587 if (SignedA) {
2588 assert(SignedB && "Signed A and unsigned B should have been inverted");
2589
2590 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2591 SDValue Zero = getZero(dl, VecTy, DAG);
2592 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2593 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2594 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2595 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2596 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2597 } else if (SignedB) {
2598 // Same correction as for mulhus:
2599 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2600 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2601 SDValue Zero = getZero(dl, VecTy, DAG);
2602 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2603 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2604 } else {
2605 assert(!SignedA && !SignedB);
2606 }
2607
2608 return DAG.getMergeValues({Lo, Hi}, dl);
2609}
2610
2611SDValue
2612HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2613 SDValue B, bool SignedB,
2614 const SDLoc &dl,
2615 SelectionDAG &DAG) const {
2616 MVT VecTy = ty(A);
2617 MVT PairTy = typeJoin({VecTy, VecTy});
2618 assert(VecTy.getVectorElementType() == MVT::i32);
2619
2620 if (SignedA && !SignedB) {
2621 // Make A:unsigned, B:signed.
2622 std::swap(A, B);
2623 std::swap(SignedA, SignedB);
2624 }
2625
2626 // Do S*S first, then make corrections for U*S or U*U if needed.
2627 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2628 SDValue P1 =
2629 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2630 SDValue Lo = LoHalf(P1, DAG);
2631 SDValue Hi = HiHalf(P1, DAG);
2632
2633 if (!SignedB) {
2634 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2635 SDValue Zero = getZero(dl, VecTy, DAG);
2636 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2637
2638 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2639 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2640 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2641 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2642 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2643 // $A))>;
2644 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2645 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2646 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2647 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2648 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2649 } else if (!SignedA) {
2650 SDValue Zero = getZero(dl, VecTy, DAG);
2651 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2652
2653 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2654 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2655 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2656 // (HiHalf (Muls64O $A, $B)),
2657 // $B)>;
2658 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2659 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2660 }
2661
2662 return DAG.getMergeValues({Lo, Hi}, dl);
2663}
2664
2665SDValue
2666HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2667 const {
2668 // Rewrite conversion between integer and floating-point in such a way that
2669 // the integer type is extended/narrowed to match the bitwidth of the
2670 // floating-point type, combined with additional integer-integer extensions
2671 // or narrowings to match the original input/result types.
2672 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2673 //
2674 // The input/result types are not required to be legal, but if they are
2675 // legal, this function should not introduce illegal types.
2676
2677 unsigned Opc = Op.getOpcode();
2678 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2679 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2680
2681 SDValue Inp = Op.getOperand(0);
2682 MVT InpTy = ty(Inp);
2683 MVT ResTy = ty(Op);
2684
2685 if (InpTy == ResTy)
2686 return Op;
2687
2688 const SDLoc &dl(Op);
2689 bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
2690
2691 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2692 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2693 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2694 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2695 return Res;
2696}
2697
2698SDValue
2699HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2700 unsigned Opc = Op.getOpcode();
2701 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
2702
2703 const SDLoc &dl(Op);
2704 SDValue Op0 = Op.getOperand(0);
2705 MVT InpTy = ty(Op0);
2706 MVT ResTy = ty(Op);
2707 assert(InpTy.changeTypeToInteger() == ResTy);
2708
2709 // int32_t conv_f32_to_i32(uint32_t inp) {
2710 // // s | exp8 | frac23
2711 //
2712 // int neg = (int32_t)inp < 0;
2713 //
2714 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2715 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2716 // // produce a large positive "expm1", which will result in max u/int.
2717 // // In all IEEE formats, bias is the largest positive number that can be
2718 // // represented in bias-width bits (i.e. 011..1).
2719 // int32_t expm1 = (inp << 1) - 0x80000000;
2720 // expm1 >>= 24;
2721 //
2722 // // Always insert the "implicit 1". Subnormal numbers will become 0
2723 // // regardless.
2724 // uint32_t frac = (inp << 8) | 0x80000000;
2725 //
2726 // // "frac" is the fraction part represented as Q1.31. If it was
2727 // // interpreted as uint32_t, it would be the fraction part multiplied
2728 // // by 2^31.
2729 //
2730 // // Calculate the amount of right shift, since shifting further to the
2731 // // left would lose significant bits. Limit it to 32, because we want
2732 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2733 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2734 // // left by 31). "rsh" can be negative.
2735 // int32_t rsh = min(31 - (expm1 + 1), 32);
2736 //
2737 // frac >>= rsh; // rsh == 32 will produce 0
2738 //
2739 // // Everything up to this point is the same for conversion to signed
2740 // // unsigned integer.
2741 //
2742 // if (neg) // Only for signed int
2743 // frac = -frac; //
2744 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2745 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2746 // if (rsh <= 0 && !neg) //
2747 // frac = 0x7fffffff; //
2748 //
2749 // if (neg) // Only for unsigned int
2750 // frac = 0; //
2751 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2752 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2753 //
2754 // return frac;
2755 // }
2756
2757 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2758
2759 // Zero = V6_vd0();
2760 // Neg = V6_vgtw(Zero, Inp);
2761 // One = V6_lvsplatw(1);
2762 // M80 = V6_lvsplatw(0x80000000);
2763 // Exp00 = V6_vaslwv(Inp, One);
2764 // Exp01 = V6_vsubw(Exp00, M80);
2765 // ExpM1 = V6_vasrw(Exp01, 24);
2766 // Frc00 = V6_vaslw(Inp, 8);
2767 // Frc01 = V6_vor(Frc00, M80);
2768 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2769 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2770 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2771
2772 // if signed int:
2773 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2774 // Pos = V6_vgtw(Rsh01, Zero);
2775 // Frc13 = V6_vsubw(Zero, Frc02);
2776 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2777 // Int = V6_vmux(Pos, Frc14, Bnd);
2778 //
2779 // if unsigned int:
2780 // Rsn = V6_vgtw(Zero, Rsh01)
2781 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2782 // Int = V6_vmux(Neg, Zero, Frc23)
2783
2784 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2785 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2786 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2787
2788 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2789 SDValue Zero = getZero(dl, ResTy, DAG);
2790 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2791 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2792 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2793 SDValue One = DAG.getConstant(1, dl, ResTy);
2794 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2795 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2796 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2797 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2798
2799 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2800 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2801 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2802
2803 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2804 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2805 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2806 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2807 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2808
2809 SDValue Int;
2810
2811 if (Opc == ISD::FP_TO_SINT) {
2812 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2813 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2814 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2815 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2816 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2817 } else {
2818 assert(Opc == ISD::FP_TO_UINT);
2819 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2820 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2821 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2822 }
2823
2824 return Int;
2825}
2826
2827SDValue
2828HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2829 unsigned Opc = Op.getOpcode();
2830 assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2831
2832 const SDLoc &dl(Op);
2833 SDValue Op0 = Op.getOperand(0);
2834 MVT InpTy = ty(Op0);
2835 MVT ResTy = ty(Op);
2836 assert(ResTy.changeTypeToInteger() == InpTy);
2837
2838 // uint32_t vnoc1_rnd(int32_t w) {
2839 // int32_t iszero = w == 0;
2840 // int32_t isneg = w < 0;
2841 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
2842 //
2843 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2844 // uint32_t frac0 = (uint64_t)u << norm_left;
2845 //
2846 // // Rounding:
2847 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
2848 // uint32_t renorm = (frac0 > frac1);
2849 // uint32_t rup = (int)(frac0 << 22) < 0;
2850 //
2851 // uint32_t frac2 = frac0 >> 8;
2852 // uint32_t frac3 = frac1 >> 8;
2853 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2854 //
2855 // int32_t exp = 32 - norm_left + renorm + 127;
2856 // exp <<= 23;
2857 //
2858 // uint32_t sign = 0x80000000 * isneg;
2859 // uint32_t f = sign | exp | frac;
2860 // return iszero ? 0 : f;
2861 // }
2862
2863 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
2864 bool Signed = Opc == ISD::SINT_TO_FP;
2865
2866 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
2867 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2868
2869 SDValue Zero = getZero(dl, InpTy, DAG);
2870 SDValue One = DAG.getConstant(1, dl, InpTy);
2871 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
2872 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
2873 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
2874 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
2875 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
2876
2877 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
2878 if (Signed) {
2879 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
2880 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
2881 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
2882 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
2883 }
2884
2885 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
2886 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
2887 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
2888 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
2889 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
2890 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
2891 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
2892 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
2893 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
2894
2895 return Flt;
2896}
2897
2898SDValue
2899HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2900 unsigned Opc = Op.getOpcode();
2901 unsigned TLOpc;
2902 switch (Opc) {
2903 case ISD::ANY_EXTEND:
2904 case ISD::SIGN_EXTEND:
2905 case ISD::ZERO_EXTEND:
2906 TLOpc = HexagonISD::TL_EXTEND;
2907 break;
2908 case ISD::TRUNCATE:
2910 break;
2911#ifndef NDEBUG
2912 Op.dump(&DAG);
2913#endif
2914 llvm_unreachable("Unepected operator");
2915 }
2916
2917 const SDLoc &dl(Op);
2918 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
2919 DAG.getUNDEF(MVT::i128), // illegal type
2920 DAG.getConstant(Opc, dl, MVT::i32));
2921}
2922
2923SDValue
2924HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2925 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
2926 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2927 unsigned Opc = Op.getConstantOperandVal(2);
2928 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
2929}
2930
2931HexagonTargetLowering::VectorPair
2932HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2933 assert(!Op.isMachineOpcode());
2934 SmallVector<SDValue, 2> OpsL, OpsH;
2935 const SDLoc &dl(Op);
2936
2937 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2938 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2939 SDValue TV = DAG.getValueType(Ty);
2940 return std::make_pair(TV, TV);
2941 };
2942
2943 for (SDValue A : Op.getNode()->ops()) {
2944 auto [Lo, Hi] =
2945 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
2946 // Special case for type operand.
2947 switch (Op.getOpcode()) {
2949 case HexagonISD::SSAT:
2950 case HexagonISD::USAT:
2951 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2952 std::tie(Lo, Hi) = SplitVTNode(N);
2953 break;
2954 }
2955 OpsL.push_back(Lo);
2956 OpsH.push_back(Hi);
2957 }
2958
2959 MVT ResTy = ty(Op);
2960 MVT HalfTy = typeSplit(ResTy).first;
2961 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2962 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2963 return {L, H};
2964}
2965
2966SDValue
2967HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2968 auto *MemN = cast<MemSDNode>(Op.getNode());
2969
2970 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2971 if (!isHvxPairTy(MemTy))
2972 return Op;
2973
2974 const SDLoc &dl(Op);
2975 unsigned HwLen = Subtarget.getVectorLength();
2976 MVT SingleTy = typeSplit(MemTy).first;
2977 SDValue Chain = MemN->getChain();
2978 SDValue Base0 = MemN->getBasePtr();
2979 SDValue Base1 =
2980 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
2981 unsigned MemOpc = MemN->getOpcode();
2982
2983 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
2984 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2986 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
2988 : HwLen;
2989 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
2990 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
2991 }
2992
2993 if (MemOpc == ISD::LOAD) {
2994 assert(cast<LoadSDNode>(Op)->isUnindexed());
2995 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
2996 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
2997 return DAG.getMergeValues(
2998 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
2999 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3000 Load0.getValue(1), Load1.getValue(1)) }, dl);
3001 }
3002 if (MemOpc == ISD::STORE) {
3003 assert(cast<StoreSDNode>(Op)->isUnindexed());
3004 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3005 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3006 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3007 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3008 }
3009
3010 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3011
3012 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3013 assert(MaskN->isUnindexed());
3014 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3015 SDValue Offset = DAG.getUNDEF(MVT::i32);
3016
3017 if (MemOpc == ISD::MLOAD) {
3018 VectorPair Thru =
3019 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3020 SDValue MLoad0 =
3021 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3022 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3023 ISD::NON_EXTLOAD, false);
3024 SDValue MLoad1 =
3025 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3026 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3027 ISD::NON_EXTLOAD, false);
3028 return DAG.getMergeValues(
3029 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3030 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3031 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3032 }
3033 if (MemOpc == ISD::MSTORE) {
3034 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3035 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3036 Masks.first, SingleTy, MOp0,
3037 ISD::UNINDEXED, false, false);
3038 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3039 Masks.second, SingleTy, MOp1,
3040 ISD::UNINDEXED, false, false);
3041 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3042 }
3043
3044 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3045 llvm_unreachable(Name.c_str());
3046}
3047
3048SDValue
3049HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3050 const SDLoc &dl(Op);
3051 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3052 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3053 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3054 "Not widening loads of i1 yet");
3055
3056 SDValue Chain = LoadN->getChain();
3057 SDValue Base = LoadN->getBasePtr();
3058 SDValue Offset = DAG.getUNDEF(MVT::i32);
3059
3060 MVT ResTy = ty(Op);
3061 unsigned HwLen = Subtarget.getVectorLength();
3062 unsigned ResLen = ResTy.getStoreSize();
3063 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3064
3065 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3066 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3067 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3068
3069 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3071 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3072
3073 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3074 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3076 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3077 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3078}
3079
3080SDValue
3081HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3082 const SDLoc &dl(Op);
3083 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3084 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3085 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3086 "Not widening stores of i1 yet");
3087
3088 SDValue Chain = StoreN->getChain();
3089 SDValue Base = StoreN->getBasePtr();
3090 SDValue Offset = DAG.getUNDEF(MVT::i32);
3091
3092 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3093 MVT ValueTy = ty(Value);
3094 unsigned ValueLen = ValueTy.getVectorNumElements();
3095 unsigned HwLen = Subtarget.getVectorLength();
3096 assert(isPowerOf2_32(ValueLen));
3097
3098 for (unsigned Len = ValueLen; Len < HwLen; ) {
3099 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3100 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3101 }
3102 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3103
3104 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3105 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3106 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3107 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3109 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3110 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3111 MemOp, ISD::UNINDEXED, false, false);
3112}
3113
3114SDValue
3115HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3116 const SDLoc &dl(Op);
3117 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3118 MVT ElemTy = ty(Op0).getVectorElementType();
3119 unsigned HwLen = Subtarget.getVectorLength();
3120
3121 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3122 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3123 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3124 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3125 return SDValue();
3126
3127 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3128 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3129 EVT ResTy =
3130 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3131 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3132 {WideOp0, WideOp1, Op.getOperand(2)});
3133
3134 EVT RetTy = typeLegalize(ty(Op), DAG);
3135 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3136 {SetCC, getZero(dl, MVT::i32, DAG)});
3137}
3138
3139SDValue
3140HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3141 unsigned Opc = Op.getOpcode();
3142 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3143 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3144 return isHvxPairTy(ty(V));
3145 });
3146
3147 if (IsPairOp) {
3148 switch (Opc) {
3149 default:
3150 break;
3151 case ISD::LOAD:
3152 case ISD::STORE:
3153 case ISD::MLOAD:
3154 case ISD::MSTORE:
3155 return SplitHvxMemOp(Op, DAG);
3156 case ISD::SINT_TO_FP:
3157 case ISD::UINT_TO_FP:
3158 case ISD::FP_TO_SINT:
3159 case ISD::FP_TO_UINT:
3160 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3161 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3162 break;
3163 case ISD::ABS:
3164 case ISD::CTPOP:
3165 case ISD::CTLZ:
3166 case ISD::CTTZ:
3167 case ISD::MUL:
3168 case ISD::FADD:
3169 case ISD::FSUB:
3170 case ISD::FMUL:
3171 case ISD::FMINNUM:
3172 case ISD::FMAXNUM:
3173 case ISD::MULHS:
3174 case ISD::MULHU:
3175 case ISD::AND:
3176 case ISD::OR:
3177 case ISD::XOR:
3178 case ISD::SRA:
3179 case ISD::SHL:
3180 case ISD::SRL:
3181 case ISD::FSHL:
3182 case ISD::FSHR:
3183 case ISD::SMIN:
3184 case ISD::SMAX:
3185 case ISD::UMIN:
3186 case ISD::UMAX:
3187 case ISD::SETCC:
3188 case ISD::VSELECT:
3190 case ISD::SPLAT_VECTOR:
3191 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3192 case ISD::SIGN_EXTEND:
3193 case ISD::ZERO_EXTEND:
3194 // In general, sign- and zero-extends can't be split and still
3195 // be legal. The only exception is extending bool vectors.
3196 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3197 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3198 break;
3199 }
3200 }
3201
3202 switch (Opc) {
3203 default:
3204 break;
3205 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3206 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3207 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3208 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3209 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3210 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3211 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3212 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3213 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3214 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3215 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3216 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3217 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3218 case ISD::SRA:
3219 case ISD::SHL:
3220 case ISD::SRL: return LowerHvxShift(Op, DAG);
3221 case ISD::FSHL:
3222 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3223 case ISD::MULHS:
3224 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3225 case ISD::SMUL_LOHI:
3226 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3227 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3228 case ISD::SETCC:
3229 case ISD::INTRINSIC_VOID: return Op;
3230 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3231 case ISD::MLOAD:
3232 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3233 // Unaligned loads will be handled by the default lowering.
3234 case ISD::LOAD: return SDValue();
3235 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3236 case ISD::FP_TO_SINT:
3237 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3238 case ISD::SINT_TO_FP:
3239 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3240
3241 // Special nodes:
3244 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3245 }
3246#ifndef NDEBUG
3247 Op.dumpr(&DAG);
3248#endif
3249 llvm_unreachable("Unhandled HVX operation");
3250}
3251
3252SDValue
3253HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3254 const {
3255 // Rewrite the extension/truncation/saturation op into steps where each
3256 // step changes the type widths by a factor of 2.
3257 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3258 //
3259 // Some of the vector types in Op may not be legal.
3260
3261 unsigned Opc = Op.getOpcode();
3262 switch (Opc) {
3263 case HexagonISD::SSAT:
3264 case HexagonISD::USAT:
3267 break;
3268 case ISD::ANY_EXTEND:
3269 case ISD::ZERO_EXTEND:
3270 case ISD::SIGN_EXTEND:
3271 case ISD::TRUNCATE:
3272 llvm_unreachable("ISD:: ops will be auto-folded");
3273 break;
3274#ifndef NDEBUG
3275 Op.dump(&DAG);
3276#endif
3277 llvm_unreachable("Unexpected operation");
3278 }
3279
3280 SDValue Inp = Op.getOperand(0);
3281 MVT InpTy = ty(Inp);
3282 MVT ResTy = ty(Op);
3283
3284 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3285 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3286 assert(InpWidth != ResWidth);
3287
3288 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3289 return Op;
3290
3291 const SDLoc &dl(Op);
3292 unsigned NumElems = InpTy.getVectorNumElements();
3293 assert(NumElems == ResTy.getVectorNumElements());
3294
3295 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3296 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3297 switch (Opc) {
3298 case HexagonISD::SSAT:
3299 case HexagonISD::USAT:
3300 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3303 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3304 default:
3305 llvm_unreachable("Unexpected opcode");
3306 }
3307 };
3308
3309 SDValue S = Inp;
3310 if (InpWidth < ResWidth) {
3311 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3312 while (InpWidth * 2 <= ResWidth)
3313 S = repeatOp(InpWidth *= 2, S);
3314 } else {
3315 // InpWidth > ResWidth
3316 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3317 while (InpWidth / 2 >= ResWidth)
3318 S = repeatOp(InpWidth /= 2, S);
3319 }
3320 return S;
3321}
3322
3323SDValue
3324HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3325 SDValue Inp0 = Op.getOperand(0);
3326 MVT InpTy = ty(Inp0);
3327 MVT ResTy = ty(Op);
3328 unsigned InpWidth = InpTy.getSizeInBits();
3329 unsigned ResWidth = ResTy.getSizeInBits();
3330 unsigned Opc = Op.getOpcode();
3331
3332 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3333 // First, make sure that the narrower type is widened to HVX.
3334 // This may cause the result to be wider than what the legalizer
3335 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3336 // desired type.
3337 auto [WInpTy, WResTy] =
3338 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3339 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3340 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3341 SDValue S;
3342 if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3343 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3344 Op.getOperand(2));
3345 } else {
3346 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3347 }
3348 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3349 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3350 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3351 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3352 } else {
3353 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3354 return RemoveTLWrapper(Op, DAG);
3355 }
3356 llvm_unreachable("Unexpected situation");
3357}
3358
3359void
3360HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3362 unsigned Opc = N->getOpcode();
3363 SDValue Op(N, 0);
3364 SDValue Inp0; // Optional first argument.
3365 if (N->getNumOperands() > 0)
3366 Inp0 = Op.getOperand(0);
3367
3368 switch (Opc) {
3369 case ISD::ANY_EXTEND:
3370 case ISD::SIGN_EXTEND:
3371 case ISD::ZERO_EXTEND:
3372 case ISD::TRUNCATE:
3373 if (Subtarget.isHVXElementType(ty(Op)) &&
3374 Subtarget.isHVXElementType(ty(Inp0))) {
3375 Results.push_back(CreateTLWrapper(Op, DAG));
3376 }
3377 break;
3378 case ISD::SETCC:
3379 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3380 if (SDValue T = WidenHvxSetCC(Op, DAG))
3381 Results.push_back(T);
3382 }
3383 break;
3384 case ISD::STORE: {
3385 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3386 SDValue Store = WidenHvxStore(Op, DAG);
3387 Results.push_back(Store);
3388 }
3389 break;
3390 }
3391 case ISD::MLOAD:
3392 if (isHvxPairTy(ty(Op))) {
3393 SDValue S = SplitHvxMemOp(Op, DAG);
3395 Results.push_back(S.getOperand(0));
3396 Results.push_back(S.getOperand(1));
3397 }
3398 break;
3399 case ISD::MSTORE:
3400 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3401 SDValue S = SplitHvxMemOp(Op, DAG);
3402 Results.push_back(S);
3403 }
3404 break;
3405 case ISD::SINT_TO_FP:
3406 case ISD::UINT_TO_FP:
3407 case ISD::FP_TO_SINT:
3408 case ISD::FP_TO_UINT:
3409 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3410 SDValue T = EqualizeFpIntConversion(Op, DAG);
3411 Results.push_back(T);
3412 }
3413 break;
3414 case HexagonISD::SSAT:
3415 case HexagonISD::USAT:
3418 Results.push_back(LegalizeHvxResize(Op, DAG));
3419 break;
3420 default:
3421 break;
3422 }
3423}
3424
3425void
3426HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3428 unsigned Opc = N->getOpcode();
3429 SDValue Op(N, 0);
3430 SDValue Inp0; // Optional first argument.
3431 if (N->getNumOperands() > 0)
3432 Inp0 = Op.getOperand(0);
3433
3434 switch (Opc) {
3435 case ISD::ANY_EXTEND:
3436 case ISD::SIGN_EXTEND:
3437 case ISD::ZERO_EXTEND:
3438 case ISD::TRUNCATE:
3439 if (Subtarget.isHVXElementType(ty(Op)) &&
3440 Subtarget.isHVXElementType(ty(Inp0))) {
3441 Results.push_back(CreateTLWrapper(Op, DAG));
3442 }
3443 break;
3444 case ISD::SETCC:
3445 if (shouldWidenToHvx(ty(Op), DAG)) {
3446 if (SDValue T = WidenHvxSetCC(Op, DAG))
3447 Results.push_back(T);
3448 }
3449 break;
3450 case ISD::LOAD: {
3451 if (shouldWidenToHvx(ty(Op), DAG)) {
3452 SDValue Load = WidenHvxLoad(Op, DAG);
3453 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3454 Results.push_back(Load.getOperand(0));
3455 Results.push_back(Load.getOperand(1));
3456 }
3457 break;
3458 }
3459 case ISD::BITCAST:
3460 if (isHvxBoolTy(ty(Inp0))) {
3461 SDValue C = LowerHvxBitcast(Op, DAG);
3462 Results.push_back(C);
3463 }
3464 break;
3465 case ISD::FP_TO_SINT:
3466 case ISD::FP_TO_UINT:
3467 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3468 SDValue T = EqualizeFpIntConversion(Op, DAG);
3469 Results.push_back(T);
3470 }
3471 break;
3472 case HexagonISD::SSAT:
3473 case HexagonISD::USAT:
3476 Results.push_back(LegalizeHvxResize(Op, DAG));
3477 break;
3478 default:
3479 break;
3480 }
3481}
3482
3483SDValue
3484HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3485 DAGCombinerInfo &DCI) const {
3486 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3487 // to extract-subvector (shuffle V, pick even, pick odd)
3488
3489 assert(Op.getOpcode() == ISD::TRUNCATE);
3490 SelectionDAG &DAG = DCI.DAG;
3491 const SDLoc &dl(Op);
3492
3493 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3494 return SDValue();
3495 SDValue Cast = Op.getOperand(0);
3496 SDValue Src = Cast.getOperand(0);
3497
3498 EVT TruncTy = Op.getValueType();
3499 EVT CastTy = Cast.getValueType();
3500 EVT SrcTy = Src.getValueType();
3501 if (SrcTy.isSimple())
3502 return SDValue();
3503 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3504 return SDValue();
3505 unsigned SrcLen = SrcTy.getVectorNumElements();
3506 unsigned CastLen = CastTy.getVectorNumElements();
3507 if (2 * CastLen != SrcLen)
3508 return SDValue();
3509
3511 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3512 Mask[i] = 2 * i;
3513 Mask[i + CastLen] = 2 * i + 1;
3514 }
3515 SDValue Deal =
3516 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3517 return opSplit(Deal, dl, DAG).first;
3518}
3519
3520SDValue
3521HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3522 SDValue Op, DAGCombinerInfo &DCI) const {
3523 // Fold
3524 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3525 // into
3526 // shuffle (concat x, y), undef, m3
3527 if (Op.getNumOperands() != 2)
3528 return SDValue();
3529
3530 SelectionDAG &DAG = DCI.DAG;
3531 const SDLoc &dl(Op);
3532 SDValue V0 = Op.getOperand(0);
3533 SDValue V1 = Op.getOperand(1);
3534
3535 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3536 return SDValue();
3537 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3538 return SDValue();
3539
3540 SetVector<SDValue> Order;
3541 Order.insert(V0.getOperand(0));
3542 Order.insert(V0.getOperand(1));
3543 Order.insert(V1.getOperand(0));
3544 Order.insert(V1.getOperand(1));
3545
3546 if (Order.size() > 2)
3547 return SDValue();
3548
3549 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3550 // result must be the same.
3551 EVT InpTy = V0.getValueType();
3552 assert(InpTy.isVector());
3553 unsigned InpLen = InpTy.getVectorNumElements();
3554
3555 SmallVector<int, 128> LongMask;
3556 auto AppendToMask = [&](SDValue Shuffle) {
3557 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3558 ArrayRef<int> Mask = SV->getMask();
3559 SDValue X = Shuffle.getOperand(0);
3560 SDValue Y = Shuffle.getOperand(1);
3561 for (int M : Mask) {
3562 if (M == -1) {
3563 LongMask.push_back(M);
3564 continue;
3565 }
3566 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3567 if (static_cast<unsigned>(M) >= InpLen)
3568 M -= InpLen;
3569
3570 int OutOffset = Order[0] == Src ? 0 : InpLen;
3571 LongMask.push_back(M + OutOffset);
3572 }
3573 };
3574
3575 AppendToMask(V0);
3576 AppendToMask(V1);
3577
3578 SDValue C0 = Order.front();
3579 SDValue C1 = Order.back(); // Can be same as front
3580 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3581
3582 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3583 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3584}
3585
3586SDValue
3587HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3588 const {
3589 const SDLoc &dl(N);
3590 SelectionDAG &DAG = DCI.DAG;
3591 SDValue Op(N, 0);
3592 unsigned Opc = Op.getOpcode();
3593
3594 SmallVector<SDValue, 4> Ops(N->ops());
3595
3596 if (Opc == ISD::TRUNCATE)
3597 return combineTruncateBeforeLegal(Op, DCI);
3598 if (Opc == ISD::CONCAT_VECTORS)
3599 return combineConcatVectorsBeforeLegal(Op, DCI);
3600
3601 if (DCI.isBeforeLegalizeOps())
3602 return SDValue();
3603
3604 switch (Opc) {
3605 case ISD::VSELECT: {
3606 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3607 SDValue Cond = Ops[0];
3608 if (Cond->getOpcode() == ISD::XOR) {
3609 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3610 if (C1->getOpcode() == HexagonISD::QTRUE)
3611 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3612 }
3613 break;
3614 }
3615 case HexagonISD::V2Q:
3616 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3617 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3618 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3619 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3620 }
3621 break;
3622 case HexagonISD::Q2V:
3623 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3624 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3625 DAG.getAllOnesConstant(dl, MVT::i32));
3626 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3627 return getZero(dl, ty(Op), DAG);
3628 break;
3630 if (isUndef(Ops[1]))
3631 return Ops[0];
3632 break;
3633 case HexagonISD::VROR: {
3634 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3635 SDValue Vec = Ops[0].getOperand(0);
3636 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3637 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3638 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3639 }
3640 break;
3641 }
3642 }
3643
3644 return SDValue();
3645}
3646
3647bool
3648HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3649 if (Subtarget.isHVXVectorType(Ty, true))
3650 return false;
3651 auto Action = getPreferredHvxVectorAction(Ty);
3653 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3654 return false;
3655}
3656
3657bool
3658HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3659 if (Subtarget.isHVXVectorType(Ty, true))
3660 return false;
3661 auto Action = getPreferredHvxVectorAction(Ty);
3663 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3664 return false;
3665}
3666
3667bool
3668HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3669 if (!Subtarget.useHVXOps())
3670 return false;
3671 // If the type of any result, or any operand type are HVX vector types,
3672 // this is an HVX operation.
3673 auto IsHvxTy = [this](EVT Ty) {
3674 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3675 };
3676 auto IsHvxOp = [this](SDValue Op) {
3677 return Op.getValueType().isSimple() &&
3678 Subtarget.isHVXVectorType(ty(Op), true);
3679 };
3680 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3681 return true;
3682
3683 // Check if this could be an HVX operation after type widening.
3684 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3685 if (!Op.getValueType().isSimple())
3686 return false;
3687 MVT ValTy = ty(Op);
3688 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3689 };
3690
3691 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3692 if (IsWidenedToHvx(SDValue(N, i)))
3693 return true;
3694 }
3695 return llvm::any_of(N->ops(), IsWidenedToHvx);
3696}
unsigned const MachineRegisterInfo * MRI
static const LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
This file provides utility analysis objects describing memory locations.
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5465
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1421
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
const HexagonInstrInfo * getInstrInfo() const override
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
unsigned getVectorLength() const
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
const value_type & front() const
Return the first element of the SetVector.
Definition: SetVector.h:143
const value_type & back() const
Return the last element of the SetVector.
Definition: SetVector.h:149
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:263
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:458
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.