LLVM 20.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37// Temporarily let this be disabled by default until all known problems
38// related to argument extensions are fixed.
40 "argext-abi-check", cl::init(false),
41 cl::desc("Verify that narrow int args are properly extended per the "
42 "SystemZ ABI."));
43
44namespace {
45// Represents information about a comparison.
46struct Comparison {
47 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
48 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
49 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
50
51 // The operands to the comparison.
52 SDValue Op0, Op1;
53
54 // Chain if this is a strict floating-point comparison.
55 SDValue Chain;
56
57 // The opcode that should be used to compare Op0 and Op1.
58 unsigned Opcode;
59
60 // A SystemZICMP value. Only used for integer comparisons.
61 unsigned ICmpType;
62
63 // The mask of CC values that Opcode can produce.
64 unsigned CCValid;
65
66 // The mask of CC values for which the original condition is true.
67 unsigned CCMask;
68};
69} // end anonymous namespace
70
71// Classify VT as either 32 or 64 bit.
72static bool is32Bit(EVT VT) {
73 switch (VT.getSimpleVT().SimpleTy) {
74 case MVT::i32:
75 return true;
76 case MVT::i64:
77 return false;
78 default:
79 llvm_unreachable("Unsupported type");
80 }
81}
82
83// Return a version of MachineOperand that can be safely used before the
84// final use.
86 if (Op.isReg())
87 Op.setIsKill(false);
88 return Op;
89}
90
92 const SystemZSubtarget &STI)
93 : TargetLowering(TM), Subtarget(STI) {
94 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
95
96 auto *Regs = STI.getSpecialRegisters();
97
98 // Set up the register classes.
99 if (Subtarget.hasHighWord())
100 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
101 else
102 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
103 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
104 if (!useSoftFloat()) {
105 if (Subtarget.hasVector()) {
106 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
107 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
108 } else {
109 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
111 }
112 if (Subtarget.hasVectorEnhancements1())
113 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
114 else
115 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
116
117 if (Subtarget.hasVector()) {
118 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
119 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
120 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
121 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
122 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
124 }
125
126 if (Subtarget.hasVector())
127 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
128 }
129
130 // Compute derived properties from the register classes
132
133 // Set up special registers.
134 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
135
136 // TODO: It may be better to default to latency-oriented scheduling, however
137 // LLVM's current latency-oriented scheduler can't handle physreg definitions
138 // such as SystemZ has with CC, so set this to the register-pressure
139 // scheduler, because it can.
141
144
146
147 // Instructions are strings of 2-byte aligned 2-byte values.
149 // For performance reasons we prefer 16-byte alignment.
151
152 // Handle operations that are handled in a similar way for all types.
153 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
154 I <= MVT::LAST_FP_VALUETYPE;
155 ++I) {
157 if (isTypeLegal(VT)) {
158 // Lower SET_CC into an IPM-based sequence.
162
163 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
165
166 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
169 }
170 }
171
172 // Expand jump table branches as address arithmetic followed by an
173 // indirect jump.
175
176 // Expand BRCOND into a BR_CC (see above).
178
179 // Handle integer types except i128.
180 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
181 I <= MVT::LAST_INTEGER_VALUETYPE;
182 ++I) {
184 if (isTypeLegal(VT) && VT != MVT::i128) {
186
187 // Expand individual DIV and REMs into DIVREMs.
194
195 // Support addition/subtraction with overflow.
198
199 // Support addition/subtraction with carry.
202
203 // Support carry in as value rather than glue.
206
207 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
208 // available, or if the operand is constant.
210
211 // Use POPCNT on z196 and above.
212 if (Subtarget.hasPopulationCount())
214 else
216
217 // No special instructions for these.
220
221 // Use *MUL_LOHI where possible instead of MULH*.
226
227 // Only z196 and above have native support for conversions to unsigned.
228 // On z10, promoting to i64 doesn't generate an inexact condition for
229 // values that are outside the i32 range but in the i64 range, so use
230 // the default expansion.
231 if (!Subtarget.hasFPExtension())
233
234 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
235 // default to Expand, so need to be modified to Legal where appropriate.
237 if (Subtarget.hasFPExtension())
239
240 // And similarly for STRICT_[SU]INT_TO_FP.
242 if (Subtarget.hasFPExtension())
244 }
245 }
246
247 // Handle i128 if legal.
248 if (isTypeLegal(MVT::i128)) {
249 // No special instructions for these.
265
266 // Support addition/subtraction with carry.
271
272 // Use VPOPCT and add up partial results.
274
275 // We have to use libcalls for these.
284 }
285
286 // Type legalization will convert 8- and 16-bit atomic operations into
287 // forms that operate on i32s (but still keeping the original memory VT).
288 // Lower them into full i32 operations.
300
301 // Whether or not i128 is not a legal type, we need to custom lower
302 // the atomic operations in order to exploit SystemZ instructions.
307
308 // Mark sign/zero extending atomic loads as legal, which will make
309 // DAGCombiner fold extensions into atomic loads if possible.
311 {MVT::i8, MVT::i16, MVT::i32}, Legal);
313 {MVT::i8, MVT::i16}, Legal);
315 MVT::i8, Legal);
316
317 // We can use the CC result of compare-and-swap to implement
318 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
322
324
325 // Traps are legal, as we will convert them to "j .+2".
326 setOperationAction(ISD::TRAP, MVT::Other, Legal);
327
328 // z10 has instructions for signed but not unsigned FP conversion.
329 // Handle unsigned 32-bit types as signed 64-bit types.
330 if (!Subtarget.hasFPExtension()) {
335 }
336
337 // We have native support for a 64-bit CTLZ, via FLOGR.
341
342 // On z15 we have native support for a 64-bit CTPOP.
343 if (Subtarget.hasMiscellaneousExtensions3()) {
346 }
347
348 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
350
351 // Expand 128 bit shifts without using a libcall.
355
356 // Also expand 256 bit shifts if i128 is a legal type.
357 if (isTypeLegal(MVT::i128)) {
361 }
362
363 // Handle bitcast from fp128 to i128.
364 if (!isTypeLegal(MVT::i128))
366
367 // We have native instructions for i8, i16 and i32 extensions, but not i1.
369 for (MVT VT : MVT::integer_valuetypes()) {
373 }
374
375 // Handle the various types of symbolic address.
381
382 // We need to handle dynamic allocations specially because of the
383 // 160-byte area at the bottom of the stack.
386
389
390 // Handle prefetches with PFD or PFDRL.
392
393 // Handle readcyclecounter with STCKF.
395
397 // Assume by default that all vector operations need to be expanded.
398 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
399 if (getOperationAction(Opcode, VT) == Legal)
400 setOperationAction(Opcode, VT, Expand);
401
402 // Likewise all truncating stores and extending loads.
403 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
404 setTruncStoreAction(VT, InnerVT, Expand);
407 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
408 }
409
410 if (isTypeLegal(VT)) {
411 // These operations are legal for anything that can be stored in a
412 // vector register, even if there is no native support for the format
413 // as such. In particular, we can do these for v4f32 even though there
414 // are no specific instructions for that format.
420
421 // Likewise, except that we need to replace the nodes with something
422 // more specific.
425 }
426 }
427
428 // Handle integer vector types.
430 if (isTypeLegal(VT)) {
431 // These operations have direct equivalents.
436 if (VT != MVT::v2i64)
442 if (Subtarget.hasVectorEnhancements1())
444 else
448
449 // Convert a GPR scalar to a vector by inserting it into element 0.
451
452 // Use a series of unpacks for extensions.
455
456 // Detect shifts/rotates by a scalar amount and convert them into
457 // V*_BY_SCALAR.
462
463 // Add ISD::VECREDUCE_ADD as custom in order to implement
464 // it with VZERO+VSUM
466
467 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
468 // and inverting the result as necessary.
470 }
471 }
472
473 if (Subtarget.hasVector()) {
474 // There should be no need to check for float types other than v2f64
475 // since <2 x f32> isn't a legal type.
484
493 }
494
495 if (Subtarget.hasVectorEnhancements2()) {
504
513 }
514
515 // Handle floating-point types.
516 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
517 I <= MVT::LAST_FP_VALUETYPE;
518 ++I) {
520 if (isTypeLegal(VT)) {
521 // We can use FI for FRINT.
523
524 // We can use the extended form of FI for other rounding operations.
525 if (Subtarget.hasFPExtension()) {
531 }
532
533 // No special instructions for these.
539
540 // Special treatment.
542
543 // Handle constrained floating-point operations.
553 if (Subtarget.hasFPExtension()) {
559 }
560 }
561 }
562
563 // Handle floating-point vector types.
564 if (Subtarget.hasVector()) {
565 // Scalar-to-vector conversion is just a subreg.
568
569 // Some insertions and extractions can be done directly but others
570 // need to go via integers.
575
576 // These operations have direct equivalents.
577 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
578 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
579 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
580 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
581 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
582 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
583 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
584 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
585 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
588 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
591
592 // Handle constrained floating-point operations.
605
610 if (Subtarget.hasVectorEnhancements1()) {
613 }
614 }
615
616 // The vector enhancements facility 1 has instructions for these.
617 if (Subtarget.hasVectorEnhancements1()) {
618 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
619 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
620 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
621 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
622 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
623 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
624 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
625 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
626 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
629 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
632
637
642
647
652
657
658 // Handle constrained floating-point operations.
671 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
672 MVT::v4f32, MVT::v2f64 }) {
677 }
678 }
679
680 // We only have fused f128 multiply-addition on vector registers.
681 if (!Subtarget.hasVectorEnhancements1()) {
684 }
685
686 // We don't have a copysign instruction on vector registers.
687 if (Subtarget.hasVectorEnhancements1())
689
690 // Needed so that we don't try to implement f128 constant loads using
691 // a load-and-extend of a f80 constant (in cases where the constant
692 // would fit in an f80).
693 for (MVT VT : MVT::fp_valuetypes())
694 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
695
696 // We don't have extending load instruction on vector registers.
697 if (Subtarget.hasVectorEnhancements1()) {
698 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
699 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
700 }
701
702 // Floating-point truncation and stores need to be done separately.
703 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
704 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
705 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
706
707 // We have 64-bit FPR<->GPR moves, but need special handling for
708 // 32-bit forms.
709 if (!Subtarget.hasVector()) {
712 }
713
714 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
715 // structure, but VAEND is a no-op.
719
720 if (Subtarget.isTargetzOS()) {
721 // Handle address space casts between mixed sized pointers.
724 }
725
727
728 // Codes for which we want to perform some z-specific combinations.
732 ISD::LOAD,
743 ISD::SDIV,
744 ISD::UDIV,
745 ISD::SREM,
746 ISD::UREM,
749
750 // Handle intrinsics.
753
754 // We're not using SJLJ for exception handling, but they're implemented
755 // solely to support use of __builtin_setjmp / __builtin_longjmp.
758
759 // We want to use MVC in preference to even a single load/store pair.
760 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
762
763 // The main memset sequence is a byte store followed by an MVC.
764 // Two STC or MV..I stores win over that, but the kind of fused stores
765 // generated by target-independent code don't when the byte value is
766 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
767 // than "STC;MVC". Handle the choice in target-specific code instead.
768 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
770
771 // Default to having -disable-strictnode-mutation on
772 IsStrictFPEnabled = true;
773
774 if (Subtarget.isTargetzOS()) {
775 struct RTLibCallMapping {
776 RTLIB::Libcall Code;
777 const char *Name;
778 };
779 static RTLibCallMapping RTLibCallCommon[] = {
780#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
781#include "ZOSLibcallNames.def"
782 };
783 for (auto &E : RTLibCallCommon)
784 setLibcallName(E.Code, E.Name);
785 }
786}
787
789 return Subtarget.hasSoftFloat();
790}
791
793 LLVMContext &, EVT VT) const {
794 if (!VT.isVector())
795 return MVT::i32;
797}
798
800 const MachineFunction &MF, EVT VT) const {
801 if (useSoftFloat())
802 return false;
803
804 VT = VT.getScalarType();
805
806 if (!VT.isSimple())
807 return false;
808
809 switch (VT.getSimpleVT().SimpleTy) {
810 case MVT::f32:
811 case MVT::f64:
812 return true;
813 case MVT::f128:
814 return Subtarget.hasVectorEnhancements1();
815 default:
816 break;
817 }
818
819 return false;
820}
821
822// Return true if the constant can be generated with a vector instruction,
823// such as VGM, VGMB or VREPI.
825 const SystemZSubtarget &Subtarget) {
826 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
827 if (!Subtarget.hasVector() ||
828 (isFP128 && !Subtarget.hasVectorEnhancements1()))
829 return false;
830
831 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
832 // preferred way of creating all-zero and all-one vectors so give it
833 // priority over other methods below.
834 unsigned Mask = 0;
835 unsigned I = 0;
836 for (; I < SystemZ::VectorBytes; ++I) {
837 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
838 if (Byte == 0xff)
839 Mask |= 1ULL << I;
840 else if (Byte != 0)
841 break;
842 }
843 if (I == SystemZ::VectorBytes) {
845 OpVals.push_back(Mask);
847 return true;
848 }
849
850 if (SplatBitSize > 64)
851 return false;
852
853 auto tryValue = [&](uint64_t Value) -> bool {
854 // Try VECTOR REPLICATE IMMEDIATE
855 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
856 if (isInt<16>(SignedValue)) {
857 OpVals.push_back(((unsigned) SignedValue));
860 SystemZ::VectorBits / SplatBitSize);
861 return true;
862 }
863 // Try VECTOR GENERATE MASK
864 unsigned Start, End;
865 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
866 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
867 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
868 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
869 OpVals.push_back(Start - (64 - SplatBitSize));
870 OpVals.push_back(End - (64 - SplatBitSize));
873 SystemZ::VectorBits / SplatBitSize);
874 return true;
875 }
876 return false;
877 };
878
879 // First try assuming that any undefined bits above the highest set bit
880 // and below the lowest set bit are 1s. This increases the likelihood of
881 // being able to use a sign-extended element value in VECTOR REPLICATE
882 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
883 uint64_t SplatBitsZ = SplatBits.getZExtValue();
884 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
885 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
886 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
887 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
888 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
889 if (tryValue(SplatBitsZ | Upper | Lower))
890 return true;
891
892 // Now try assuming that any undefined bits between the first and
893 // last defined set bits are set. This increases the chances of
894 // using a non-wraparound mask.
895 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
896 return tryValue(SplatBitsZ | Middle);
897}
898
900 if (IntImm.isSingleWord()) {
901 IntBits = APInt(128, IntImm.getZExtValue());
902 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
903 } else
904 IntBits = IntImm;
905 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
906
907 // Find the smallest splat.
908 SplatBits = IntImm;
909 unsigned Width = SplatBits.getBitWidth();
910 while (Width > 8) {
911 unsigned HalfSize = Width / 2;
912 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
913 APInt LowValue = SplatBits.trunc(HalfSize);
914
915 // If the two halves do not match, stop here.
916 if (HighValue != LowValue || 8 > HalfSize)
917 break;
918
919 SplatBits = HighValue;
920 Width = HalfSize;
921 }
922 SplatUndef = 0;
923 SplatBitSize = Width;
924}
925
927 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
928 bool HasAnyUndefs;
929
930 // Get IntBits by finding the 128 bit splat.
931 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
932 true);
933
934 // Get SplatBits by finding the 8 bit or greater splat.
935 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
936 true);
937}
938
940 bool ForCodeSize) const {
941 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
942 if (Imm.isZero() || Imm.isNegZero())
943 return true;
944
946}
947
950 MachineBasicBlock *MBB) const {
951 DebugLoc DL = MI.getDebugLoc();
952 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
953 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
954
957
958 const BasicBlock *BB = MBB->getBasicBlock();
960
961 Register DstReg = MI.getOperand(0).getReg();
962 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
963 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
964 (void)TRI;
965 Register mainDstReg = MRI.createVirtualRegister(RC);
966 Register restoreDstReg = MRI.createVirtualRegister(RC);
967
968 MVT PVT = getPointerTy(MF->getDataLayout());
969 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
970 // For v = setjmp(buf), we generate.
971 // Algorithm:
972 //
973 // ---------
974 // | thisMBB |
975 // ---------
976 // |
977 // ------------------------
978 // | |
979 // ---------- ---------------
980 // | mainMBB | | restoreMBB |
981 // | v = 0 | | v = 1 |
982 // ---------- ---------------
983 // | |
984 // -------------------------
985 // |
986 // -----------------------------
987 // | sinkMBB |
988 // | phi(v_mainMBB,v_restoreMBB) |
989 // -----------------------------
990 // thisMBB:
991 // buf[FPOffset] = Frame Pointer if hasFP.
992 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
993 // buf[BCOffset] = Backchain value if building with -mbackchain.
994 // buf[SPOffset] = Stack Pointer.
995 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
996 // SjLjSetup restoreMBB
997 // mainMBB:
998 // v_main = 0
999 // sinkMBB:
1000 // v = phi(v_main, v_restore)
1001 // restoreMBB:
1002 // v_restore = 1
1003
1004 MachineBasicBlock *thisMBB = MBB;
1005 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
1006 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
1007 MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
1008
1009 MF->insert(I, mainMBB);
1010 MF->insert(I, sinkMBB);
1011 MF->push_back(restoreMBB);
1012 restoreMBB->setMachineBlockAddressTaken();
1013
1015
1016 // Transfer the remainder of BB and its successor edges to sinkMBB.
1017 sinkMBB->splice(sinkMBB->begin(), MBB,
1018 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1020
1021 // thisMBB:
1022 const int64_t FPOffset = 0; // Slot 1.
1023 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1024 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1025 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1026
1027 // Buf address.
1028 Register BufReg = MI.getOperand(1).getReg();
1029
1030 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1031 unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
1032
1033 // Prepare IP for longjmp.
1034 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1035 .addMBB(restoreMBB);
1036 // Store IP for return from jmp, slot 2, offset = 1.
1037 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1038 .addReg(LabelReg)
1039 .addReg(BufReg)
1040 .addImm(LabelOffset)
1041 .addReg(0);
1042
1043 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1044 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1045 if (HasFP) {
1046 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1047 .addReg(SpecialRegs->getFramePointerRegister())
1048 .addReg(BufReg)
1049 .addImm(FPOffset)
1050 .addReg(0);
1051 }
1052
1053 // Store SP.
1054 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1055 .addReg(SpecialRegs->getStackPointerRegister())
1056 .addReg(BufReg)
1057 .addImm(SPOffset)
1058 .addReg(0);
1059
1060 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1061 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1062 if (BackChain) {
1063 Register BCReg = MRI.createVirtualRegister(PtrRC);
1064 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1065 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1066 .addReg(SpecialRegs->getStackPointerRegister())
1067 .addImm(TFL->getBackchainOffset(*MF))
1068 .addReg(0);
1069
1070 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1071 .addReg(BCReg)
1072 .addReg(BufReg)
1073 .addImm(BCOffset)
1074 .addReg(0);
1075 }
1076
1077 // Setup.
1078 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1079 .addMBB(restoreMBB);
1080
1081 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1082 MIB.addRegMask(RegInfo->getNoPreservedMask());
1083
1084 thisMBB->addSuccessor(mainMBB);
1085 thisMBB->addSuccessor(restoreMBB);
1086
1087 // mainMBB:
1088 BuildMI(mainMBB, DL, TII->get(SystemZ::LHI), mainDstReg).addImm(0);
1089 mainMBB->addSuccessor(sinkMBB);
1090
1091 // sinkMBB:
1092 BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1093 .addReg(mainDstReg)
1094 .addMBB(mainMBB)
1095 .addReg(restoreDstReg)
1096 .addMBB(restoreMBB);
1097
1098 // restoreMBB.
1099 BuildMI(restoreMBB, DL, TII->get(SystemZ::LHI), restoreDstReg).addImm(1);
1100 BuildMI(restoreMBB, DL, TII->get(SystemZ::J)).addMBB(sinkMBB);
1101 restoreMBB->addSuccessor(sinkMBB);
1102
1103 MI.eraseFromParent();
1104
1105 return sinkMBB;
1106}
1107
1110 MachineBasicBlock *MBB) const {
1111
1112 DebugLoc DL = MI.getDebugLoc();
1113 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1114
1115 MachineFunction *MF = MBB->getParent();
1117
1118 MVT PVT = getPointerTy(MF->getDataLayout());
1119 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1120 Register BufReg = MI.getOperand(0).getReg();
1121 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1122 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1123
1124 Register Tmp = MRI.createVirtualRegister(RC);
1125 Register BCReg = MRI.createVirtualRegister(RC);
1126
1128
1129 const int64_t FPOffset = 0;
1130 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1131 const int64_t BCOffset = 2 * PVT.getStoreSize();
1132 const int64_t SPOffset = 3 * PVT.getStoreSize();
1133 const int64_t LPOffset = 4 * PVT.getStoreSize();
1134
1135 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1136 .addReg(BufReg)
1137 .addImm(LabelOffset)
1138 .addReg(0);
1139
1140 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1141 SpecialRegs->getFramePointerRegister())
1142 .addReg(BufReg)
1143 .addImm(FPOffset)
1144 .addReg(0);
1145
1146 // We are restoring R13 even though we never stored in setjmp from llvm,
1147 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1148 // gcc setjmp and llvm longjmp.
1149 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1150 .addReg(BufReg)
1151 .addImm(LPOffset)
1152 .addReg(0);
1153
1154 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1155 if (BackChain) {
1156 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1157 .addReg(BufReg)
1158 .addImm(BCOffset)
1159 .addReg(0);
1160 }
1161
1162 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1163 SpecialRegs->getStackPointerRegister())
1164 .addReg(BufReg)
1165 .addImm(SPOffset)
1166 .addReg(0);
1167
1168 if (BackChain) {
1169 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1170 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1171 .addReg(BCReg)
1172 .addReg(SpecialRegs->getStackPointerRegister())
1173 .addImm(TFL->getBackchainOffset(*MF))
1174 .addReg(0);
1175 }
1176
1177 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1178
1179 MI.eraseFromParent();
1180 return MBB;
1181}
1182
1183/// Returns true if stack probing through inline assembly is requested.
1185 // If the function specifically requests inline stack probes, emit them.
1186 if (MF.getFunction().hasFnAttribute("probe-stack"))
1187 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1188 "inline-asm";
1189 return false;
1190}
1191
1195}
1196
1200}
1201
1204 // Don't expand subword operations as they require special treatment.
1205 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1207
1208 // Don't expand if there is a target instruction available.
1209 if (Subtarget.hasInterlockedAccess1() &&
1210 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1217
1219}
1220
1222 // We can use CGFI or CLGFI.
1223 return isInt<32>(Imm) || isUInt<32>(Imm);
1224}
1225
1227 // We can use ALGFI or SLGFI.
1228 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1229}
1230
1232 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1233 // Unaligned accesses should never be slower than the expanded version.
1234 // We check specifically for aligned accesses in the few cases where
1235 // they are required.
1236 if (Fast)
1237 *Fast = 1;
1238 return true;
1239}
1240
1241// Information about the addressing mode for a memory access.
1243 // True if a long displacement is supported.
1245
1246 // True if use of index register is supported.
1248
1249 AddressingMode(bool LongDispl, bool IdxReg) :
1250 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1251};
1252
1253// Return the desired addressing mode for a Load which has only one use (in
1254// the same block) which is a Store.
1256 Type *Ty) {
1257 // With vector support a Load->Store combination may be combined to either
1258 // an MVC or vector operations and it seems to work best to allow the
1259 // vector addressing mode.
1260 if (HasVector)
1261 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1262
1263 // Otherwise only the MVC case is special.
1264 bool MVC = Ty->isIntegerTy(8);
1265 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1266}
1267
1268// Return the addressing mode which seems most desirable given an LLVM
1269// Instruction pointer.
1270static AddressingMode
1272 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1273 switch (II->getIntrinsicID()) {
1274 default: break;
1275 case Intrinsic::memset:
1276 case Intrinsic::memmove:
1277 case Intrinsic::memcpy:
1278 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1279 }
1280 }
1281
1282 if (isa<LoadInst>(I) && I->hasOneUse()) {
1283 auto *SingleUser = cast<Instruction>(*I->user_begin());
1284 if (SingleUser->getParent() == I->getParent()) {
1285 if (isa<ICmpInst>(SingleUser)) {
1286 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1287 if (C->getBitWidth() <= 64 &&
1288 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1289 // Comparison of memory with 16 bit signed / unsigned immediate
1290 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1291 } else if (isa<StoreInst>(SingleUser))
1292 // Load->Store
1293 return getLoadStoreAddrMode(HasVector, I->getType());
1294 }
1295 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1296 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1297 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1298 // Load->Store
1299 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1300 }
1301
1302 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1303
1304 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1305 // dependencies (LDE only supports small offsets).
1306 // * Utilize the vector registers to hold floating point
1307 // values (vector load / store instructions only support small
1308 // offsets).
1309
1310 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1311 I->getOperand(0)->getType());
1312 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1313 bool IsVectorAccess = MemAccessTy->isVectorTy();
1314
1315 // A store of an extracted vector element will be combined into a VSTE type
1316 // instruction.
1317 if (!IsVectorAccess && isa<StoreInst>(I)) {
1318 Value *DataOp = I->getOperand(0);
1319 if (isa<ExtractElementInst>(DataOp))
1320 IsVectorAccess = true;
1321 }
1322
1323 // A load which gets inserted into a vector element will be combined into a
1324 // VLE type instruction.
1325 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1326 User *LoadUser = *I->user_begin();
1327 if (isa<InsertElementInst>(LoadUser))
1328 IsVectorAccess = true;
1329 }
1330
1331 if (IsFPAccess || IsVectorAccess)
1332 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1333 }
1334
1335 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1336}
1337
1339 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1340 // Punt on globals for now, although they can be used in limited
1341 // RELATIVE LONG cases.
1342 if (AM.BaseGV)
1343 return false;
1344
1345 // Require a 20-bit signed offset.
1346 if (!isInt<20>(AM.BaseOffs))
1347 return false;
1348
1349 bool RequireD12 =
1350 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1351 AddressingMode SupportedAM(!RequireD12, true);
1352 if (I != nullptr)
1353 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1354
1355 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1356 return false;
1357
1358 if (!SupportedAM.IndexReg)
1359 // No indexing allowed.
1360 return AM.Scale == 0;
1361 else
1362 // Indexing is OK but no scale factor can be applied.
1363 return AM.Scale == 0 || AM.Scale == 1;
1364}
1365
1367 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1368 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1369 const int MVCFastLen = 16;
1370
1371 if (Limit != ~unsigned(0)) {
1372 // Don't expand Op into scalar loads/stores in these cases:
1373 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1374 return false; // Small memcpy: Use MVC
1375 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1376 return false; // Small memset (first byte with STC/MVI): Use MVC
1377 if (Op.isZeroMemset())
1378 return false; // Memset zero: Use XC
1379 }
1380
1381 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1382 SrcAS, FuncAttributes);
1383}
1384
1386 const AttributeList &FuncAttributes) const {
1387 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1388}
1389
1390bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1391 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1392 return false;
1393 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1394 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1395 return FromBits > ToBits;
1396}
1397
1399 if (!FromVT.isInteger() || !ToVT.isInteger())
1400 return false;
1401 unsigned FromBits = FromVT.getFixedSizeInBits();
1402 unsigned ToBits = ToVT.getFixedSizeInBits();
1403 return FromBits > ToBits;
1404}
1405
1406//===----------------------------------------------------------------------===//
1407// Inline asm support
1408//===----------------------------------------------------------------------===//
1409
1412 if (Constraint.size() == 1) {
1413 switch (Constraint[0]) {
1414 case 'a': // Address register
1415 case 'd': // Data register (equivalent to 'r')
1416 case 'f': // Floating-point register
1417 case 'h': // High-part register
1418 case 'r': // General-purpose register
1419 case 'v': // Vector register
1420 return C_RegisterClass;
1421
1422 case 'Q': // Memory with base and unsigned 12-bit displacement
1423 case 'R': // Likewise, plus an index
1424 case 'S': // Memory with base and signed 20-bit displacement
1425 case 'T': // Likewise, plus an index
1426 case 'm': // Equivalent to 'T'.
1427 return C_Memory;
1428
1429 case 'I': // Unsigned 8-bit constant
1430 case 'J': // Unsigned 12-bit constant
1431 case 'K': // Signed 16-bit constant
1432 case 'L': // Signed 20-bit displacement (on all targets we support)
1433 case 'M': // 0x7fffffff
1434 return C_Immediate;
1435
1436 default:
1437 break;
1438 }
1439 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1440 switch (Constraint[1]) {
1441 case 'Q': // Address with base and unsigned 12-bit displacement
1442 case 'R': // Likewise, plus an index
1443 case 'S': // Address with base and signed 20-bit displacement
1444 case 'T': // Likewise, plus an index
1445 return C_Address;
1446
1447 default:
1448 break;
1449 }
1450 }
1451 return TargetLowering::getConstraintType(Constraint);
1452}
1453
1456 const char *constraint) const {
1458 Value *CallOperandVal = info.CallOperandVal;
1459 // If we don't have a value, we can't do a match,
1460 // but allow it at the lowest weight.
1461 if (!CallOperandVal)
1462 return CW_Default;
1463 Type *type = CallOperandVal->getType();
1464 // Look at the constraint type.
1465 switch (*constraint) {
1466 default:
1468 break;
1469
1470 case 'a': // Address register
1471 case 'd': // Data register (equivalent to 'r')
1472 case 'h': // High-part register
1473 case 'r': // General-purpose register
1474 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1475 break;
1476
1477 case 'f': // Floating-point register
1478 if (!useSoftFloat())
1479 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1480 break;
1481
1482 case 'v': // Vector register
1483 if (Subtarget.hasVector())
1484 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1485 : CW_Default;
1486 break;
1487
1488 case 'I': // Unsigned 8-bit constant
1489 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1490 if (isUInt<8>(C->getZExtValue()))
1491 weight = CW_Constant;
1492 break;
1493
1494 case 'J': // Unsigned 12-bit constant
1495 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1496 if (isUInt<12>(C->getZExtValue()))
1497 weight = CW_Constant;
1498 break;
1499
1500 case 'K': // Signed 16-bit constant
1501 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1502 if (isInt<16>(C->getSExtValue()))
1503 weight = CW_Constant;
1504 break;
1505
1506 case 'L': // Signed 20-bit displacement (on all targets we support)
1507 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1508 if (isInt<20>(C->getSExtValue()))
1509 weight = CW_Constant;
1510 break;
1511
1512 case 'M': // 0x7fffffff
1513 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1514 if (C->getZExtValue() == 0x7fffffff)
1515 weight = CW_Constant;
1516 break;
1517 }
1518 return weight;
1519}
1520
1521// Parse a "{tNNN}" register constraint for which the register type "t"
1522// has already been verified. MC is the class associated with "t" and
1523// Map maps 0-based register numbers to LLVM register numbers.
1524static std::pair<unsigned, const TargetRegisterClass *>
1526 const unsigned *Map, unsigned Size) {
1527 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1528 if (isdigit(Constraint[2])) {
1529 unsigned Index;
1530 bool Failed =
1531 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1532 if (!Failed && Index < Size && Map[Index])
1533 return std::make_pair(Map[Index], RC);
1534 }
1535 return std::make_pair(0U, nullptr);
1536}
1537
1538std::pair<unsigned, const TargetRegisterClass *>
1540 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1541 if (Constraint.size() == 1) {
1542 // GCC Constraint Letters
1543 switch (Constraint[0]) {
1544 default: break;
1545 case 'd': // Data register (equivalent to 'r')
1546 case 'r': // General-purpose register
1547 if (VT.getSizeInBits() == 64)
1548 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1549 else if (VT.getSizeInBits() == 128)
1550 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1551 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1552
1553 case 'a': // Address register
1554 if (VT == MVT::i64)
1555 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1556 else if (VT == MVT::i128)
1557 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1558 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1559
1560 case 'h': // High-part register (an LLVM extension)
1561 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1562
1563 case 'f': // Floating-point register
1564 if (!useSoftFloat()) {
1565 if (VT.getSizeInBits() == 64)
1566 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1567 else if (VT.getSizeInBits() == 128)
1568 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1569 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1570 }
1571 break;
1572
1573 case 'v': // Vector register
1574 if (Subtarget.hasVector()) {
1575 if (VT.getSizeInBits() == 32)
1576 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1577 if (VT.getSizeInBits() == 64)
1578 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1579 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1580 }
1581 break;
1582 }
1583 }
1584 if (Constraint.starts_with("{")) {
1585
1586 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1587 // to check the size on.
1588 auto getVTSizeInBits = [&VT]() {
1589 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1590 };
1591
1592 // We need to override the default register parsing for GPRs and FPRs
1593 // because the interpretation depends on VT. The internal names of
1594 // the registers are also different from the external names
1595 // (F0D and F0S instead of F0, etc.).
1596 if (Constraint[1] == 'r') {
1597 if (getVTSizeInBits() == 32)
1598 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1600 if (getVTSizeInBits() == 128)
1601 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1603 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1605 }
1606 if (Constraint[1] == 'f') {
1607 if (useSoftFloat())
1608 return std::make_pair(
1609 0u, static_cast<const TargetRegisterClass *>(nullptr));
1610 if (getVTSizeInBits() == 32)
1611 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1613 if (getVTSizeInBits() == 128)
1614 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1616 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1618 }
1619 if (Constraint[1] == 'v') {
1620 if (!Subtarget.hasVector())
1621 return std::make_pair(
1622 0u, static_cast<const TargetRegisterClass *>(nullptr));
1623 if (getVTSizeInBits() == 32)
1624 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1626 if (getVTSizeInBits() == 64)
1627 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1629 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1631 }
1632 }
1633 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1634}
1635
1636// FIXME? Maybe this could be a TableGen attribute on some registers and
1637// this table could be generated automatically from RegInfo.
1640 const MachineFunction &MF) const {
1641 Register Reg =
1643 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1644 : SystemZ::NoRegister)
1645 .Case("r15",
1646 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1647 .Default(SystemZ::NoRegister);
1648
1649 if (Reg)
1650 return Reg;
1651 report_fatal_error("Invalid register name global variable");
1652}
1653
1655 const Constant *PersonalityFn) const {
1656 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1657}
1658
1660 const Constant *PersonalityFn) const {
1661 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1662}
1663
1665 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1666 SelectionDAG &DAG) const {
1667 // Only support length 1 constraints for now.
1668 if (Constraint.size() == 1) {
1669 switch (Constraint[0]) {
1670 case 'I': // Unsigned 8-bit constant
1671 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1672 if (isUInt<8>(C->getZExtValue()))
1673 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1674 Op.getValueType()));
1675 return;
1676
1677 case 'J': // Unsigned 12-bit constant
1678 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1679 if (isUInt<12>(C->getZExtValue()))
1680 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1681 Op.getValueType()));
1682 return;
1683
1684 case 'K': // Signed 16-bit constant
1685 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1686 if (isInt<16>(C->getSExtValue()))
1687 Ops.push_back(DAG.getSignedTargetConstant(
1688 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1689 return;
1690
1691 case 'L': // Signed 20-bit displacement (on all targets we support)
1692 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1693 if (isInt<20>(C->getSExtValue()))
1694 Ops.push_back(DAG.getSignedTargetConstant(
1695 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1696 return;
1697
1698 case 'M': // 0x7fffffff
1699 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1700 if (C->getZExtValue() == 0x7fffffff)
1701 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1702 Op.getValueType()));
1703 return;
1704 }
1705 }
1706 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1707}
1708
1709//===----------------------------------------------------------------------===//
1710// Calling conventions
1711//===----------------------------------------------------------------------===//
1712
1713#include "SystemZGenCallingConv.inc"
1714
1716 CallingConv::ID) const {
1717 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1718 SystemZ::R14D, 0 };
1719 return ScratchRegs;
1720}
1721
1723 Type *ToType) const {
1724 return isTruncateFree(FromType, ToType);
1725}
1726
1728 return CI->isTailCall();
1729}
1730
1731// Value is a value that has been passed to us in the location described by VA
1732// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1733// any loads onto Chain.
1735 CCValAssign &VA, SDValue Chain,
1736 SDValue Value) {
1737 // If the argument has been promoted from a smaller type, insert an
1738 // assertion to capture this.
1739 if (VA.getLocInfo() == CCValAssign::SExt)
1741 DAG.getValueType(VA.getValVT()));
1742 else if (VA.getLocInfo() == CCValAssign::ZExt)
1744 DAG.getValueType(VA.getValVT()));
1745
1746 if (VA.isExtInLoc())
1747 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1748 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1749 // If this is a short vector argument loaded from the stack,
1750 // extend from i64 to full vector size and then bitcast.
1751 assert(VA.getLocVT() == MVT::i64);
1752 assert(VA.getValVT().isVector());
1753 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1754 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1755 } else
1756 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1757 return Value;
1758}
1759
1760// Value is a value of type VA.getValVT() that we need to copy into
1761// the location described by VA. Return a copy of Value converted to
1762// VA.getValVT(). The caller is responsible for handling indirect values.
1764 CCValAssign &VA, SDValue Value) {
1765 switch (VA.getLocInfo()) {
1766 case CCValAssign::SExt:
1767 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1768 case CCValAssign::ZExt:
1769 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1770 case CCValAssign::AExt:
1771 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1772 case CCValAssign::BCvt: {
1773 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1774 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1775 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1776 // For an f32 vararg we need to first promote it to an f64 and then
1777 // bitcast it to an i64.
1778 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1779 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1780 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1781 ? MVT::v2i64
1782 : VA.getLocVT();
1783 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1784 // For ELF, this is a short vector argument to be stored to the stack,
1785 // bitcast to v2i64 and then extract first element.
1786 if (BitCastToType == MVT::v2i64)
1787 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1788 DAG.getConstant(0, DL, MVT::i32));
1789 return Value;
1790 }
1791 case CCValAssign::Full:
1792 return Value;
1793 default:
1794 llvm_unreachable("Unhandled getLocInfo()");
1795 }
1796}
1797
1799 SDLoc DL(In);
1800 SDValue Lo, Hi;
1801 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1802 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1803 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1804 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1805 DAG.getConstant(64, DL, MVT::i32)));
1806 } else {
1807 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1808 }
1809
1810 // FIXME: If v2i64 were a legal type, we could use it instead of
1811 // Untyped here. This might enable improved folding.
1812 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1813 MVT::Untyped, Hi, Lo);
1814 return SDValue(Pair, 0);
1815}
1816
1818 SDLoc DL(In);
1819 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1820 DL, MVT::i64, In);
1821 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1822 DL, MVT::i64, In);
1823
1824 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1825 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1826 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1827 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1828 DAG.getConstant(64, DL, MVT::i32));
1829 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1830 } else {
1831 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1832 }
1833}
1834
1836 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1837 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1838 EVT ValueVT = Val.getValueType();
1839 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1840 // Inline assembly operand.
1841 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1842 return true;
1843 }
1844
1845 return false;
1846}
1847
1849 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1850 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1851 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1852 // Inline assembly operand.
1853 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1854 return DAG.getBitcast(ValueVT, Res);
1855 }
1856
1857 return SDValue();
1858}
1859
1861 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1862 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1863 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1865 MachineFrameInfo &MFI = MF.getFrameInfo();
1867 SystemZMachineFunctionInfo *FuncInfo =
1869 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1870 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1871
1872 // Assign locations to all of the incoming arguments.
1874 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1875 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1876 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1877
1878 unsigned NumFixedGPRs = 0;
1879 unsigned NumFixedFPRs = 0;
1880 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1881 SDValue ArgValue;
1882 CCValAssign &VA = ArgLocs[I];
1883 EVT LocVT = VA.getLocVT();
1884 if (VA.isRegLoc()) {
1885 // Arguments passed in registers
1886 const TargetRegisterClass *RC;
1887 switch (LocVT.getSimpleVT().SimpleTy) {
1888 default:
1889 // Integers smaller than i64 should be promoted to i64.
1890 llvm_unreachable("Unexpected argument type");
1891 case MVT::i32:
1892 NumFixedGPRs += 1;
1893 RC = &SystemZ::GR32BitRegClass;
1894 break;
1895 case MVT::i64:
1896 NumFixedGPRs += 1;
1897 RC = &SystemZ::GR64BitRegClass;
1898 break;
1899 case MVT::f32:
1900 NumFixedFPRs += 1;
1901 RC = &SystemZ::FP32BitRegClass;
1902 break;
1903 case MVT::f64:
1904 NumFixedFPRs += 1;
1905 RC = &SystemZ::FP64BitRegClass;
1906 break;
1907 case MVT::f128:
1908 NumFixedFPRs += 2;
1909 RC = &SystemZ::FP128BitRegClass;
1910 break;
1911 case MVT::v16i8:
1912 case MVT::v8i16:
1913 case MVT::v4i32:
1914 case MVT::v2i64:
1915 case MVT::v4f32:
1916 case MVT::v2f64:
1917 RC = &SystemZ::VR128BitRegClass;
1918 break;
1919 }
1920
1921 Register VReg = MRI.createVirtualRegister(RC);
1922 MRI.addLiveIn(VA.getLocReg(), VReg);
1923 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1924 } else {
1925 assert(VA.isMemLoc() && "Argument not register or memory");
1926
1927 // Create the frame index object for this incoming parameter.
1928 // FIXME: Pre-include call frame size in the offset, should not
1929 // need to manually add it here.
1930 int64_t ArgSPOffset = VA.getLocMemOffset();
1931 if (Subtarget.isTargetXPLINK64()) {
1932 auto &XPRegs =
1934 ArgSPOffset += XPRegs.getCallFrameSize();
1935 }
1936 int FI =
1937 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1938
1939 // Create the SelectionDAG nodes corresponding to a load
1940 // from this parameter. Unpromoted ints and floats are
1941 // passed as right-justified 8-byte values.
1942 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1943 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1944 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1945 DAG.getIntPtrConstant(4, DL));
1946 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1948 }
1949
1950 // Convert the value of the argument register into the value that's
1951 // being passed.
1952 if (VA.getLocInfo() == CCValAssign::Indirect) {
1953 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1955 // If the original argument was split (e.g. i128), we need
1956 // to load all parts of it here (using the same address).
1957 unsigned ArgIndex = Ins[I].OrigArgIndex;
1958 assert (Ins[I].PartOffset == 0);
1959 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1960 CCValAssign &PartVA = ArgLocs[I + 1];
1961 unsigned PartOffset = Ins[I + 1].PartOffset;
1962 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1963 DAG.getIntPtrConstant(PartOffset, DL));
1964 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1966 ++I;
1967 }
1968 } else
1969 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1970 }
1971
1972 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1973 // Save the number of non-varargs registers for later use by va_start, etc.
1974 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1975 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1976
1977 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1978 Subtarget.getSpecialRegisters());
1979
1980 // Likewise the address (in the form of a frame index) of where the
1981 // first stack vararg would be. The 1-byte size here is arbitrary.
1982 // FIXME: Pre-include call frame size in the offset, should not
1983 // need to manually add it here.
1984 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1985 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1986 FuncInfo->setVarArgsFrameIndex(FI);
1987 }
1988
1989 if (IsVarArg && Subtarget.isTargetELF()) {
1990 // Save the number of non-varargs registers for later use by va_start, etc.
1991 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1992 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1993
1994 // Likewise the address (in the form of a frame index) of where the
1995 // first stack vararg would be. The 1-byte size here is arbitrary.
1996 int64_t VarArgsOffset = CCInfo.getStackSize();
1997 FuncInfo->setVarArgsFrameIndex(
1998 MFI.CreateFixedObject(1, VarArgsOffset, true));
1999
2000 // ...and a similar frame index for the caller-allocated save area
2001 // that will be used to store the incoming registers.
2002 int64_t RegSaveOffset =
2003 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2004 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2005 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2006
2007 // Store the FPR varargs in the reserved frame slots. (We store the
2008 // GPRs as part of the prologue.)
2009 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2011 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2012 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2013 int FI =
2015 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2017 &SystemZ::FP64BitRegClass);
2018 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2019 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2021 }
2022 // Join the stores, which are independent of one another.
2023 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2024 ArrayRef(&MemOps[NumFixedFPRs],
2025 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2026 }
2027 }
2028
2029 if (Subtarget.isTargetXPLINK64()) {
2030 // Create virual register for handling incoming "ADA" special register (R5)
2031 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2032 Register ADAvReg = MRI.createVirtualRegister(RC);
2033 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2034 Subtarget.getSpecialRegisters());
2035 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2036 FuncInfo->setADAVirtualRegister(ADAvReg);
2037 }
2038 return Chain;
2039}
2040
2041static bool canUseSiblingCall(const CCState &ArgCCInfo,
2044 // Punt if there are any indirect or stack arguments, or if the call
2045 // needs the callee-saved argument register R6, or if the call uses
2046 // the callee-saved register arguments SwiftSelf and SwiftError.
2047 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2048 CCValAssign &VA = ArgLocs[I];
2050 return false;
2051 if (!VA.isRegLoc())
2052 return false;
2053 Register Reg = VA.getLocReg();
2054 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2055 return false;
2056 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2057 return false;
2058 }
2059 return true;
2060}
2061
2063 unsigned Offset, bool LoadAdr = false) {
2066 unsigned ADAvReg = MFI->getADAVirtualRegister();
2068
2069 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2070 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2071
2072 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2073 if (!LoadAdr)
2074 Result = DAG.getLoad(
2075 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2077
2078 return Result;
2079}
2080
2081// ADA access using Global value
2082// Note: for functions, address of descriptor is returned
2084 EVT PtrVT) {
2085 unsigned ADAtype;
2086 bool LoadAddr = false;
2087 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2088 bool IsFunction =
2089 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2090 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2091
2092 if (IsFunction) {
2093 if (IsInternal) {
2095 LoadAddr = true;
2096 } else
2098 } else {
2100 }
2101 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2102
2103 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2104}
2105
2106static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2107 SDLoc &DL, SDValue &Chain) {
2108 unsigned ADADelta = 0; // ADA offset in desc.
2109 unsigned EPADelta = 8; // EPA offset in desc.
2112
2113 // XPLink calling convention.
2114 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2115 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2116 G->getGlobal()->hasPrivateLinkage());
2117 if (IsInternal) {
2120 unsigned ADAvReg = MFI->getADAVirtualRegister();
2121 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2122 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2123 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2124 return true;
2125 } else {
2127 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2128 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2129 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2130 }
2131 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2133 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2134 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2135 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2136 } else {
2137 // Function pointer case
2138 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2139 DAG.getConstant(ADADelta, DL, PtrVT));
2140 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2142 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2143 DAG.getConstant(EPADelta, DL, PtrVT));
2144 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2146 }
2147 return false;
2148}
2149
2150SDValue
2152 SmallVectorImpl<SDValue> &InVals) const {
2153 SelectionDAG &DAG = CLI.DAG;
2154 SDLoc &DL = CLI.DL;
2156 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2158 SDValue Chain = CLI.Chain;
2159 SDValue Callee = CLI.Callee;
2160 bool &IsTailCall = CLI.IsTailCall;
2161 CallingConv::ID CallConv = CLI.CallConv;
2162 bool IsVarArg = CLI.IsVarArg;
2164 EVT PtrVT = getPointerTy(MF.getDataLayout());
2165 LLVMContext &Ctx = *DAG.getContext();
2167
2168 // FIXME: z/OS support to be added in later.
2169 if (Subtarget.isTargetXPLINK64())
2170 IsTailCall = false;
2171
2172 // Integer args <=32 bits should have an extension attribute.
2173 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2174
2175 // Analyze the operands of the call, assigning locations to each operand.
2177 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2178 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2179
2180 // We don't support GuaranteedTailCallOpt, only automatically-detected
2181 // sibling calls.
2182 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2183 IsTailCall = false;
2184
2185 // Get a count of how many bytes are to be pushed on the stack.
2186 unsigned NumBytes = ArgCCInfo.getStackSize();
2187
2188 // Mark the start of the call.
2189 if (!IsTailCall)
2190 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2191
2192 // Copy argument values to their designated locations.
2194 SmallVector<SDValue, 8> MemOpChains;
2195 SDValue StackPtr;
2196 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2197 CCValAssign &VA = ArgLocs[I];
2198 SDValue ArgValue = OutVals[I];
2199
2200 if (VA.getLocInfo() == CCValAssign::Indirect) {
2201 // Store the argument in a stack slot and pass its address.
2202 unsigned ArgIndex = Outs[I].OrigArgIndex;
2203 EVT SlotVT;
2204 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2205 // Allocate the full stack space for a promoted (and split) argument.
2206 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2207 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2208 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2209 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2210 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2211 } else {
2212 SlotVT = Outs[I].VT;
2213 }
2214 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2215 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2216 MemOpChains.push_back(
2217 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2219 // If the original argument was split (e.g. i128), we need
2220 // to store all parts of it here (and pass just one address).
2221 assert (Outs[I].PartOffset == 0);
2222 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2223 SDValue PartValue = OutVals[I + 1];
2224 unsigned PartOffset = Outs[I + 1].PartOffset;
2225 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2226 DAG.getIntPtrConstant(PartOffset, DL));
2227 MemOpChains.push_back(
2228 DAG.getStore(Chain, DL, PartValue, Address,
2230 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2231 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2232 ++I;
2233 }
2234 ArgValue = SpillSlot;
2235 } else
2236 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2237
2238 if (VA.isRegLoc()) {
2239 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2240 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2241 // and low values.
2242 if (VA.getLocVT() == MVT::i128)
2243 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2244 // Queue up the argument copies and emit them at the end.
2245 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2246 } else {
2247 assert(VA.isMemLoc() && "Argument not register or memory");
2248
2249 // Work out the address of the stack slot. Unpromoted ints and
2250 // floats are passed as right-justified 8-byte values.
2251 if (!StackPtr.getNode())
2252 StackPtr = DAG.getCopyFromReg(Chain, DL,
2253 Regs->getStackPointerRegister(), PtrVT);
2254 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2255 VA.getLocMemOffset();
2256 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2257 Offset += 4;
2258 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2260
2261 // Emit the store.
2262 MemOpChains.push_back(
2263 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2264
2265 // Although long doubles or vectors are passed through the stack when
2266 // they are vararg (non-fixed arguments), if a long double or vector
2267 // occupies the third and fourth slot of the argument list GPR3 should
2268 // still shadow the third slot of the argument list.
2269 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2270 SDValue ShadowArgValue =
2271 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2272 DAG.getIntPtrConstant(1, DL));
2273 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2274 }
2275 }
2276 }
2277
2278 // Join the stores, which are independent of one another.
2279 if (!MemOpChains.empty())
2280 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2281
2282 // Accept direct calls by converting symbolic call addresses to the
2283 // associated Target* opcodes. Force %r1 to be used for indirect
2284 // tail calls.
2285 SDValue Glue;
2286
2287 if (Subtarget.isTargetXPLINK64()) {
2288 SDValue ADA;
2289 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2290 if (!IsBRASL) {
2291 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2292 ->getAddressOfCalleeRegister();
2293 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2294 Glue = Chain.getValue(1);
2295 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2296 }
2297 RegsToPass.push_back(std::make_pair(
2298 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2299 } else {
2300 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2301 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2302 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2303 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2304 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2305 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2306 } else if (IsTailCall) {
2307 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2308 Glue = Chain.getValue(1);
2309 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2310 }
2311 }
2312
2313 // Build a sequence of copy-to-reg nodes, chained and glued together.
2314 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2315 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2316 RegsToPass[I].second, Glue);
2317 Glue = Chain.getValue(1);
2318 }
2319
2320 // The first call operand is the chain and the second is the target address.
2322 Ops.push_back(Chain);
2323 Ops.push_back(Callee);
2324
2325 // Add argument registers to the end of the list so that they are
2326 // known live into the call.
2327 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2328 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2329 RegsToPass[I].second.getValueType()));
2330
2331 // Add a register mask operand representing the call-preserved registers.
2332 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2333 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2334 assert(Mask && "Missing call preserved mask for calling convention");
2335 Ops.push_back(DAG.getRegisterMask(Mask));
2336
2337 // Glue the call to the argument copies, if any.
2338 if (Glue.getNode())
2339 Ops.push_back(Glue);
2340
2341 // Emit the call.
2342 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2343 if (IsTailCall) {
2344 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2345 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2346 return Ret;
2347 }
2348 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2349 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2350 Glue = Chain.getValue(1);
2351
2352 // Mark the end of the call, which is glued to the call itself.
2353 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2354 Glue = Chain.getValue(1);
2355
2356 // Assign locations to each value returned by this call.
2358 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2359 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2360
2361 // Copy all of the result registers out of their specified physreg.
2362 for (CCValAssign &VA : RetLocs) {
2363 // Copy the value out, gluing the copy to the end of the call sequence.
2364 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2365 VA.getLocVT(), Glue);
2366 Chain = RetValue.getValue(1);
2367 Glue = RetValue.getValue(2);
2368
2369 // Convert the value of the return register into the value that's
2370 // being returned.
2371 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2372 }
2373
2374 return Chain;
2375}
2376
2377// Generate a call taking the given operands as arguments and returning a
2378// result of type RetVT.
2380 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2381 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2382 bool DoesNotReturn, bool IsReturnValueUsed) const {
2384 Args.reserve(Ops.size());
2385
2387 for (SDValue Op : Ops) {
2388 Entry.Node = Op;
2389 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2390 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2391 Entry.IsZExt = !Entry.IsSExt;
2392 Args.push_back(Entry);
2393 }
2394
2395 SDValue Callee =
2396 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2397
2398 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2400 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2401 CLI.setDebugLoc(DL)
2402 .setChain(Chain)
2403 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2404 .setNoReturn(DoesNotReturn)
2405 .setDiscardResult(!IsReturnValueUsed)
2406 .setSExtResult(SignExtend)
2407 .setZExtResult(!SignExtend);
2408 return LowerCallTo(CLI);
2409}
2410
2413 MachineFunction &MF, bool isVarArg,
2415 LLVMContext &Context,
2416 const Type *RetTy) const {
2417 // Special case that we cannot easily detect in RetCC_SystemZ since
2418 // i128 may not be a legal type.
2419 for (auto &Out : Outs)
2420 if (Out.ArgVT == MVT::i128)
2421 return false;
2422
2424 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2425 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2426}
2427
2428SDValue
2430 bool IsVarArg,
2432 const SmallVectorImpl<SDValue> &OutVals,
2433 const SDLoc &DL, SelectionDAG &DAG) const {
2435
2436 // Integer args <=32 bits should have an extension attribute.
2437 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2438
2439 // Assign locations to each returned value.
2441 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2442 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2443
2444 // Quick exit for void returns
2445 if (RetLocs.empty())
2446 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2447
2448 if (CallConv == CallingConv::GHC)
2449 report_fatal_error("GHC functions return void only");
2450
2451 // Copy the result values into the output registers.
2452 SDValue Glue;
2454 RetOps.push_back(Chain);
2455 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2456 CCValAssign &VA = RetLocs[I];
2457 SDValue RetValue = OutVals[I];
2458
2459 // Make the return register live on exit.
2460 assert(VA.isRegLoc() && "Can only return in registers!");
2461
2462 // Promote the value as required.
2463 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2464
2465 // Chain and glue the copies together.
2466 Register Reg = VA.getLocReg();
2467 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2468 Glue = Chain.getValue(1);
2469 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2470 }
2471
2472 // Update chain and glue.
2473 RetOps[0] = Chain;
2474 if (Glue.getNode())
2475 RetOps.push_back(Glue);
2476
2477 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2478}
2479
2480// Return true if Op is an intrinsic node with chain that returns the CC value
2481// as its only (other) argument. Provide the associated SystemZISD opcode and
2482// the mask of valid CC values if so.
2483static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2484 unsigned &CCValid) {
2485 unsigned Id = Op.getConstantOperandVal(1);
2486 switch (Id) {
2487 case Intrinsic::s390_tbegin:
2488 Opcode = SystemZISD::TBEGIN;
2489 CCValid = SystemZ::CCMASK_TBEGIN;
2490 return true;
2491
2492 case Intrinsic::s390_tbegin_nofloat:
2494 CCValid = SystemZ::CCMASK_TBEGIN;
2495 return true;
2496
2497 case Intrinsic::s390_tend:
2498 Opcode = SystemZISD::TEND;
2499 CCValid = SystemZ::CCMASK_TEND;
2500 return true;
2501
2502 default:
2503 return false;
2504 }
2505}
2506
2507// Return true if Op is an intrinsic node without chain that returns the
2508// CC value as its final argument. Provide the associated SystemZISD
2509// opcode and the mask of valid CC values if so.
2510static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2511 unsigned Id = Op.getConstantOperandVal(0);
2512 switch (Id) {
2513 case Intrinsic::s390_vpkshs:
2514 case Intrinsic::s390_vpksfs:
2515 case Intrinsic::s390_vpksgs:
2516 Opcode = SystemZISD::PACKS_CC;
2517 CCValid = SystemZ::CCMASK_VCMP;
2518 return true;
2519
2520 case Intrinsic::s390_vpklshs:
2521 case Intrinsic::s390_vpklsfs:
2522 case Intrinsic::s390_vpklsgs:
2523 Opcode = SystemZISD::PACKLS_CC;
2524 CCValid = SystemZ::CCMASK_VCMP;
2525 return true;
2526
2527 case Intrinsic::s390_vceqbs:
2528 case Intrinsic::s390_vceqhs:
2529 case Intrinsic::s390_vceqfs:
2530 case Intrinsic::s390_vceqgs:
2531 Opcode = SystemZISD::VICMPES;
2532 CCValid = SystemZ::CCMASK_VCMP;
2533 return true;
2534
2535 case Intrinsic::s390_vchbs:
2536 case Intrinsic::s390_vchhs:
2537 case Intrinsic::s390_vchfs:
2538 case Intrinsic::s390_vchgs:
2539 Opcode = SystemZISD::VICMPHS;
2540 CCValid = SystemZ::CCMASK_VCMP;
2541 return true;
2542
2543 case Intrinsic::s390_vchlbs:
2544 case Intrinsic::s390_vchlhs:
2545 case Intrinsic::s390_vchlfs:
2546 case Intrinsic::s390_vchlgs:
2547 Opcode = SystemZISD::VICMPHLS;
2548 CCValid = SystemZ::CCMASK_VCMP;
2549 return true;
2550
2551 case Intrinsic::s390_vtm:
2552 Opcode = SystemZISD::VTM;
2553 CCValid = SystemZ::CCMASK_VCMP;
2554 return true;
2555
2556 case Intrinsic::s390_vfaebs:
2557 case Intrinsic::s390_vfaehs:
2558 case Intrinsic::s390_vfaefs:
2559 Opcode = SystemZISD::VFAE_CC;
2560 CCValid = SystemZ::CCMASK_ANY;
2561 return true;
2562
2563 case Intrinsic::s390_vfaezbs:
2564 case Intrinsic::s390_vfaezhs:
2565 case Intrinsic::s390_vfaezfs:
2566 Opcode = SystemZISD::VFAEZ_CC;
2567 CCValid = SystemZ::CCMASK_ANY;
2568 return true;
2569
2570 case Intrinsic::s390_vfeebs:
2571 case Intrinsic::s390_vfeehs:
2572 case Intrinsic::s390_vfeefs:
2573 Opcode = SystemZISD::VFEE_CC;
2574 CCValid = SystemZ::CCMASK_ANY;
2575 return true;
2576
2577 case Intrinsic::s390_vfeezbs:
2578 case Intrinsic::s390_vfeezhs:
2579 case Intrinsic::s390_vfeezfs:
2580 Opcode = SystemZISD::VFEEZ_CC;
2581 CCValid = SystemZ::CCMASK_ANY;
2582 return true;
2583
2584 case Intrinsic::s390_vfenebs:
2585 case Intrinsic::s390_vfenehs:
2586 case Intrinsic::s390_vfenefs:
2587 Opcode = SystemZISD::VFENE_CC;
2588 CCValid = SystemZ::CCMASK_ANY;
2589 return true;
2590
2591 case Intrinsic::s390_vfenezbs:
2592 case Intrinsic::s390_vfenezhs:
2593 case Intrinsic::s390_vfenezfs:
2594 Opcode = SystemZISD::VFENEZ_CC;
2595 CCValid = SystemZ::CCMASK_ANY;
2596 return true;
2597
2598 case Intrinsic::s390_vistrbs:
2599 case Intrinsic::s390_vistrhs:
2600 case Intrinsic::s390_vistrfs:
2601 Opcode = SystemZISD::VISTR_CC;
2603 return true;
2604
2605 case Intrinsic::s390_vstrcbs:
2606 case Intrinsic::s390_vstrchs:
2607 case Intrinsic::s390_vstrcfs:
2608 Opcode = SystemZISD::VSTRC_CC;
2609 CCValid = SystemZ::CCMASK_ANY;
2610 return true;
2611
2612 case Intrinsic::s390_vstrczbs:
2613 case Intrinsic::s390_vstrczhs:
2614 case Intrinsic::s390_vstrczfs:
2615 Opcode = SystemZISD::VSTRCZ_CC;
2616 CCValid = SystemZ::CCMASK_ANY;
2617 return true;
2618
2619 case Intrinsic::s390_vstrsb:
2620 case Intrinsic::s390_vstrsh:
2621 case Intrinsic::s390_vstrsf:
2622 Opcode = SystemZISD::VSTRS_CC;
2623 CCValid = SystemZ::CCMASK_ANY;
2624 return true;
2625
2626 case Intrinsic::s390_vstrszb:
2627 case Intrinsic::s390_vstrszh:
2628 case Intrinsic::s390_vstrszf:
2629 Opcode = SystemZISD::VSTRSZ_CC;
2630 CCValid = SystemZ::CCMASK_ANY;
2631 return true;
2632
2633 case Intrinsic::s390_vfcedbs:
2634 case Intrinsic::s390_vfcesbs:
2635 Opcode = SystemZISD::VFCMPES;
2636 CCValid = SystemZ::CCMASK_VCMP;
2637 return true;
2638
2639 case Intrinsic::s390_vfchdbs:
2640 case Intrinsic::s390_vfchsbs:
2641 Opcode = SystemZISD::VFCMPHS;
2642 CCValid = SystemZ::CCMASK_VCMP;
2643 return true;
2644
2645 case Intrinsic::s390_vfchedbs:
2646 case Intrinsic::s390_vfchesbs:
2647 Opcode = SystemZISD::VFCMPHES;
2648 CCValid = SystemZ::CCMASK_VCMP;
2649 return true;
2650
2651 case Intrinsic::s390_vftcidb:
2652 case Intrinsic::s390_vftcisb:
2653 Opcode = SystemZISD::VFTCI;
2654 CCValid = SystemZ::CCMASK_VCMP;
2655 return true;
2656
2657 case Intrinsic::s390_tdc:
2658 Opcode = SystemZISD::TDC;
2659 CCValid = SystemZ::CCMASK_TDC;
2660 return true;
2661
2662 default:
2663 return false;
2664 }
2665}
2666
2667// Emit an intrinsic with chain and an explicit CC register result.
2669 unsigned Opcode) {
2670 // Copy all operands except the intrinsic ID.
2671 unsigned NumOps = Op.getNumOperands();
2673 Ops.reserve(NumOps - 1);
2674 Ops.push_back(Op.getOperand(0));
2675 for (unsigned I = 2; I < NumOps; ++I)
2676 Ops.push_back(Op.getOperand(I));
2677
2678 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2679 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2680 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2681 SDValue OldChain = SDValue(Op.getNode(), 1);
2682 SDValue NewChain = SDValue(Intr.getNode(), 1);
2683 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2684 return Intr.getNode();
2685}
2686
2687// Emit an intrinsic with an explicit CC register result.
2689 unsigned Opcode) {
2690 // Copy all operands except the intrinsic ID.
2691 unsigned NumOps = Op.getNumOperands();
2693 Ops.reserve(NumOps - 1);
2694 for (unsigned I = 1; I < NumOps; ++I)
2695 Ops.push_back(Op.getOperand(I));
2696
2697 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2698 return Intr.getNode();
2699}
2700
2701// CC is a comparison that will be implemented using an integer or
2702// floating-point comparison. Return the condition code mask for
2703// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2704// unsigned comparisons and clear for signed ones. In the floating-point
2705// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2707#define CONV(X) \
2708 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2709 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2710 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2711
2712 switch (CC) {
2713 default:
2714 llvm_unreachable("Invalid integer condition!");
2715
2716 CONV(EQ);
2717 CONV(NE);
2718 CONV(GT);
2719 CONV(GE);
2720 CONV(LT);
2721 CONV(LE);
2722
2723 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2725 }
2726#undef CONV
2727}
2728
2729// If C can be converted to a comparison against zero, adjust the operands
2730// as necessary.
2731static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2732 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2733 return;
2734
2735 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2736 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2737 return;
2738
2739 int64_t Value = ConstOp1->getSExtValue();
2740 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2741 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2742 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2743 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2744 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2745 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2746 }
2747}
2748
2749// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2750// adjust the operands as necessary.
2751static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2752 Comparison &C) {
2753 // For us to make any changes, it must a comparison between a single-use
2754 // load and a constant.
2755 if (!C.Op0.hasOneUse() ||
2756 C.Op0.getOpcode() != ISD::LOAD ||
2757 C.Op1.getOpcode() != ISD::Constant)
2758 return;
2759
2760 // We must have an 8- or 16-bit load.
2761 auto *Load = cast<LoadSDNode>(C.Op0);
2762 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2763 if ((NumBits != 8 && NumBits != 16) ||
2764 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2765 return;
2766
2767 // The load must be an extending one and the constant must be within the
2768 // range of the unextended value.
2769 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2770 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2771 return;
2772 uint64_t Value = ConstOp1->getZExtValue();
2773 uint64_t Mask = (1 << NumBits) - 1;
2774 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2775 // Make sure that ConstOp1 is in range of C.Op0.
2776 int64_t SignedValue = ConstOp1->getSExtValue();
2777 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2778 return;
2779 if (C.ICmpType != SystemZICMP::SignedOnly) {
2780 // Unsigned comparison between two sign-extended values is equivalent
2781 // to unsigned comparison between two zero-extended values.
2782 Value &= Mask;
2783 } else if (NumBits == 8) {
2784 // Try to treat the comparison as unsigned, so that we can use CLI.
2785 // Adjust CCMask and Value as necessary.
2786 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2787 // Test whether the high bit of the byte is set.
2788 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2789 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2790 // Test whether the high bit of the byte is clear.
2791 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2792 else
2793 // No instruction exists for this combination.
2794 return;
2795 C.ICmpType = SystemZICMP::UnsignedOnly;
2796 }
2797 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2798 if (Value > Mask)
2799 return;
2800 // If the constant is in range, we can use any comparison.
2801 C.ICmpType = SystemZICMP::Any;
2802 } else
2803 return;
2804
2805 // Make sure that the first operand is an i32 of the right extension type.
2806 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2809 if (C.Op0.getValueType() != MVT::i32 ||
2810 Load->getExtensionType() != ExtType) {
2811 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2812 Load->getBasePtr(), Load->getPointerInfo(),
2813 Load->getMemoryVT(), Load->getAlign(),
2814 Load->getMemOperand()->getFlags());
2815 // Update the chain uses.
2816 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2817 }
2818
2819 // Make sure that the second operand is an i32 with the right value.
2820 if (C.Op1.getValueType() != MVT::i32 ||
2821 Value != ConstOp1->getZExtValue())
2822 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2823}
2824
2825// Return true if Op is either an unextended load, or a load suitable
2826// for integer register-memory comparisons of type ICmpType.
2827static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2828 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2829 if (Load) {
2830 // There are no instructions to compare a register with a memory byte.
2831 if (Load->getMemoryVT() == MVT::i8)
2832 return false;
2833 // Otherwise decide on extension type.
2834 switch (Load->getExtensionType()) {
2835 case ISD::NON_EXTLOAD:
2836 return true;
2837 case ISD::SEXTLOAD:
2838 return ICmpType != SystemZICMP::UnsignedOnly;
2839 case ISD::ZEXTLOAD:
2840 return ICmpType != SystemZICMP::SignedOnly;
2841 default:
2842 break;
2843 }
2844 }
2845 return false;
2846}
2847
2848// Return true if it is better to swap the operands of C.
2849static bool shouldSwapCmpOperands(const Comparison &C) {
2850 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2851 if (C.Op0.getValueType() == MVT::i128)
2852 return false;
2853 if (C.Op0.getValueType() == MVT::f128)
2854 return false;
2855
2856 // Always keep a floating-point constant second, since comparisons with
2857 // zero can use LOAD TEST and comparisons with other constants make a
2858 // natural memory operand.
2859 if (isa<ConstantFPSDNode>(C.Op1))
2860 return false;
2861
2862 // Never swap comparisons with zero since there are many ways to optimize
2863 // those later.
2864 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2865 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2866 return false;
2867
2868 // Also keep natural memory operands second if the loaded value is
2869 // only used here. Several comparisons have memory forms.
2870 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2871 return false;
2872
2873 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2874 // In that case we generally prefer the memory to be second.
2875 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2876 // The only exceptions are when the second operand is a constant and
2877 // we can use things like CHHSI.
2878 if (!ConstOp1)
2879 return true;
2880 // The unsigned memory-immediate instructions can handle 16-bit
2881 // unsigned integers.
2882 if (C.ICmpType != SystemZICMP::SignedOnly &&
2883 isUInt<16>(ConstOp1->getZExtValue()))
2884 return false;
2885 // The signed memory-immediate instructions can handle 16-bit
2886 // signed integers.
2887 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2888 isInt<16>(ConstOp1->getSExtValue()))
2889 return false;
2890 return true;
2891 }
2892
2893 // Try to promote the use of CGFR and CLGFR.
2894 unsigned Opcode0 = C.Op0.getOpcode();
2895 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2896 return true;
2897 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2898 return true;
2899 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2900 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2901 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2902 return true;
2903
2904 return false;
2905}
2906
2907// Check whether C tests for equality between X and Y and whether X - Y
2908// or Y - X is also computed. In that case it's better to compare the
2909// result of the subtraction against zero.
2911 Comparison &C) {
2912 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2913 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2914 for (SDNode *N : C.Op0->users()) {
2915 if (N->getOpcode() == ISD::SUB &&
2916 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2917 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2918 // Disable the nsw and nuw flags: the backend needs to handle
2919 // overflow as well during comparison elimination.
2920 N->dropFlags(SDNodeFlags::NoWrap);
2921 C.Op0 = SDValue(N, 0);
2922 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2923 return;
2924 }
2925 }
2926 }
2927}
2928
2929// Check whether C compares a floating-point value with zero and if that
2930// floating-point value is also negated. In this case we can use the
2931// negation to set CC, so avoiding separate LOAD AND TEST and
2932// LOAD (NEGATIVE/COMPLEMENT) instructions.
2933static void adjustForFNeg(Comparison &C) {
2934 // This optimization is invalid for strict comparisons, since FNEG
2935 // does not raise any exceptions.
2936 if (C.Chain)
2937 return;
2938 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2939 if (C1 && C1->isZero()) {
2940 for (SDNode *N : C.Op0->users()) {
2941 if (N->getOpcode() == ISD::FNEG) {
2942 C.Op0 = SDValue(N, 0);
2943 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2944 return;
2945 }
2946 }
2947 }
2948}
2949
2950// Check whether C compares (shl X, 32) with 0 and whether X is
2951// also sign-extended. In that case it is better to test the result
2952// of the sign extension using LTGFR.
2953//
2954// This case is important because InstCombine transforms a comparison
2955// with (sext (trunc X)) into a comparison with (shl X, 32).
2956static void adjustForLTGFR(Comparison &C) {
2957 // Check for a comparison between (shl X, 32) and 0.
2958 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2959 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2960 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2961 if (C1 && C1->getZExtValue() == 32) {
2962 SDValue ShlOp0 = C.Op0.getOperand(0);
2963 // See whether X has any SIGN_EXTEND_INREG uses.
2964 for (SDNode *N : ShlOp0->users()) {
2965 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2966 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2967 C.Op0 = SDValue(N, 0);
2968 return;
2969 }
2970 }
2971 }
2972 }
2973}
2974
2975// If C compares the truncation of an extending load, try to compare
2976// the untruncated value instead. This exposes more opportunities to
2977// reuse CC.
2978static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2979 Comparison &C) {
2980 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2981 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2982 C.Op1.getOpcode() == ISD::Constant &&
2983 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2984 C.Op1->getAsZExtVal() == 0) {
2985 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2986 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2987 C.Op0.getValueSizeInBits().getFixedValue()) {
2988 unsigned Type = L->getExtensionType();
2989 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2990 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2991 C.Op0 = C.Op0.getOperand(0);
2992 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2993 }
2994 }
2995 }
2996}
2997
2998// Return true if shift operation N has an in-range constant shift value.
2999// Store it in ShiftVal if so.
3000static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3001 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3002 if (!Shift)
3003 return false;
3004
3005 uint64_t Amount = Shift->getZExtValue();
3006 if (Amount >= N.getValueSizeInBits())
3007 return false;
3008
3009 ShiftVal = Amount;
3010 return true;
3011}
3012
3013// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3014// instruction and whether the CC value is descriptive enough to handle
3015// a comparison of type Opcode between the AND result and CmpVal.
3016// CCMask says which comparison result is being tested and BitSize is
3017// the number of bits in the operands. If TEST UNDER MASK can be used,
3018// return the corresponding CC mask, otherwise return 0.
3019static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3020 uint64_t Mask, uint64_t CmpVal,
3021 unsigned ICmpType) {
3022 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3023
3024 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3025 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3026 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3027 return 0;
3028
3029 // Work out the masks for the lowest and highest bits.
3031 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3032
3033 // Signed ordered comparisons are effectively unsigned if the sign
3034 // bit is dropped.
3035 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3036
3037 // Check for equality comparisons with 0, or the equivalent.
3038 if (CmpVal == 0) {
3039 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3041 if (CCMask == SystemZ::CCMASK_CMP_NE)
3043 }
3044 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3045 if (CCMask == SystemZ::CCMASK_CMP_LT)
3047 if (CCMask == SystemZ::CCMASK_CMP_GE)
3049 }
3050 if (EffectivelyUnsigned && CmpVal < Low) {
3051 if (CCMask == SystemZ::CCMASK_CMP_LE)
3053 if (CCMask == SystemZ::CCMASK_CMP_GT)
3055 }
3056
3057 // Check for equality comparisons with the mask, or the equivalent.
3058 if (CmpVal == Mask) {
3059 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3061 if (CCMask == SystemZ::CCMASK_CMP_NE)
3063 }
3064 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3065 if (CCMask == SystemZ::CCMASK_CMP_GT)
3067 if (CCMask == SystemZ::CCMASK_CMP_LE)
3069 }
3070 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3071 if (CCMask == SystemZ::CCMASK_CMP_GE)
3073 if (CCMask == SystemZ::CCMASK_CMP_LT)
3075 }
3076
3077 // Check for ordered comparisons with the top bit.
3078 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3079 if (CCMask == SystemZ::CCMASK_CMP_LE)
3081 if (CCMask == SystemZ::CCMASK_CMP_GT)
3083 }
3084 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3085 if (CCMask == SystemZ::CCMASK_CMP_LT)
3087 if (CCMask == SystemZ::CCMASK_CMP_GE)
3089 }
3090
3091 // If there are just two bits, we can do equality checks for Low and High
3092 // as well.
3093 if (Mask == Low + High) {
3094 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3096 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3098 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3100 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3102 }
3103
3104 // Looks like we've exhausted our options.
3105 return 0;
3106}
3107
3108// See whether C can be implemented as a TEST UNDER MASK instruction.
3109// Update the arguments with the TM version if so.
3111 Comparison &C) {
3112 // Use VECTOR TEST UNDER MASK for i128 operations.
3113 if (C.Op0.getValueType() == MVT::i128) {
3114 // We can use VTM for EQ/NE comparisons of x & y against 0.
3115 if (C.Op0.getOpcode() == ISD::AND &&
3116 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3117 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3118 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3119 if (Mask && Mask->getAPIntValue() == 0) {
3120 C.Opcode = SystemZISD::VTM;
3121 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3122 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3123 C.CCValid = SystemZ::CCMASK_VCMP;
3124 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3125 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3126 else
3127 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3128 }
3129 }
3130 return;
3131 }
3132
3133 // Check that we have a comparison with a constant.
3134 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3135 if (!ConstOp1)
3136 return;
3137 uint64_t CmpVal = ConstOp1->getZExtValue();
3138
3139 // Check whether the nonconstant input is an AND with a constant mask.
3140 Comparison NewC(C);
3141 uint64_t MaskVal;
3142 ConstantSDNode *Mask = nullptr;
3143 if (C.Op0.getOpcode() == ISD::AND) {
3144 NewC.Op0 = C.Op0.getOperand(0);
3145 NewC.Op1 = C.Op0.getOperand(1);
3146 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3147 if (!Mask)
3148 return;
3149 MaskVal = Mask->getZExtValue();
3150 } else {
3151 // There is no instruction to compare with a 64-bit immediate
3152 // so use TMHH instead if possible. We need an unsigned ordered
3153 // comparison with an i64 immediate.
3154 if (NewC.Op0.getValueType() != MVT::i64 ||
3155 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3156 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3157 NewC.ICmpType == SystemZICMP::SignedOnly)
3158 return;
3159 // Convert LE and GT comparisons into LT and GE.
3160 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3161 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3162 if (CmpVal == uint64_t(-1))
3163 return;
3164 CmpVal += 1;
3165 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3166 }
3167 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3168 // be masked off without changing the result.
3169 MaskVal = -(CmpVal & -CmpVal);
3170 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3171 }
3172 if (!MaskVal)
3173 return;
3174
3175 // Check whether the combination of mask, comparison value and comparison
3176 // type are suitable.
3177 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3178 unsigned NewCCMask, ShiftVal;
3179 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3180 NewC.Op0.getOpcode() == ISD::SHL &&
3181 isSimpleShift(NewC.Op0, ShiftVal) &&
3182 (MaskVal >> ShiftVal != 0) &&
3183 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3184 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3185 MaskVal >> ShiftVal,
3186 CmpVal >> ShiftVal,
3187 SystemZICMP::Any))) {
3188 NewC.Op0 = NewC.Op0.getOperand(0);
3189 MaskVal >>= ShiftVal;
3190 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3191 NewC.Op0.getOpcode() == ISD::SRL &&
3192 isSimpleShift(NewC.Op0, ShiftVal) &&
3193 (MaskVal << ShiftVal != 0) &&
3194 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3195 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3196 MaskVal << ShiftVal,
3197 CmpVal << ShiftVal,
3199 NewC.Op0 = NewC.Op0.getOperand(0);
3200 MaskVal <<= ShiftVal;
3201 } else {
3202 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3203 NewC.ICmpType);
3204 if (!NewCCMask)
3205 return;
3206 }
3207
3208 // Go ahead and make the change.
3209 C.Opcode = SystemZISD::TM;
3210 C.Op0 = NewC.Op0;
3211 if (Mask && Mask->getZExtValue() == MaskVal)
3212 C.Op1 = SDValue(Mask, 0);
3213 else
3214 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3215 C.CCValid = SystemZ::CCMASK_TM;
3216 C.CCMask = NewCCMask;
3217}
3218
3219// Implement i128 comparison in vector registers.
3220static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3221 Comparison &C) {
3222 if (C.Opcode != SystemZISD::ICMP)
3223 return;
3224 if (C.Op0.getValueType() != MVT::i128)
3225 return;
3226
3227 // (In-)Equality comparisons can be implemented via VCEQGS.
3228 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3229 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3230 C.Opcode = SystemZISD::VICMPES;
3231 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3232 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3233 C.CCValid = SystemZ::CCMASK_VCMP;
3234 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3235 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3236 else
3237 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3238 return;
3239 }
3240
3241 // Normalize other comparisons to GT.
3242 bool Swap = false, Invert = false;
3243 switch (C.CCMask) {
3244 case SystemZ::CCMASK_CMP_GT: break;
3245 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3246 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3247 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3248 default: llvm_unreachable("Invalid integer condition!");
3249 }
3250 if (Swap)
3251 std::swap(C.Op0, C.Op1);
3252
3253 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3254 C.Opcode = SystemZISD::UCMP128HI;
3255 else
3256 C.Opcode = SystemZISD::SCMP128HI;
3257 C.CCValid = SystemZ::CCMASK_ANY;
3258 C.CCMask = SystemZ::CCMASK_1;
3259
3260 if (Invert)
3261 C.CCMask ^= C.CCValid;
3262}
3263
3264// See whether the comparison argument contains a redundant AND
3265// and remove it if so. This sometimes happens due to the generic
3266// BRCOND expansion.
3268 Comparison &C) {
3269 if (C.Op0.getOpcode() != ISD::AND)
3270 return;
3271 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3272 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3273 return;
3274 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3275 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3276 return;
3277
3278 C.Op0 = C.Op0.getOperand(0);
3279}
3280
3281// Return a Comparison that tests the condition-code result of intrinsic
3282// node Call against constant integer CC using comparison code Cond.
3283// Opcode is the opcode of the SystemZISD operation for the intrinsic
3284// and CCValid is the set of possible condition-code results.
3285static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3286 SDValue Call, unsigned CCValid, uint64_t CC,
3288 Comparison C(Call, SDValue(), SDValue());
3289 C.Opcode = Opcode;
3290 C.CCValid = CCValid;
3291 if (Cond == ISD::SETEQ)
3292 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3293 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3294 else if (Cond == ISD::SETNE)
3295 // ...and the inverse of that.
3296 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3297 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3298 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3299 // always true for CC>3.
3300 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3301 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3302 // ...and the inverse of that.
3303 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3304 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3305 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3306 // always true for CC>3.
3307 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3308 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3309 // ...and the inverse of that.
3310 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3311 else
3312 llvm_unreachable("Unexpected integer comparison type");
3313 C.CCMask &= CCValid;
3314 return C;
3315}
3316
3317// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3318static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3319 ISD::CondCode Cond, const SDLoc &DL,
3320 SDValue Chain = SDValue(),
3321 bool IsSignaling = false) {
3322 if (CmpOp1.getOpcode() == ISD::Constant) {
3323 assert(!Chain);
3324 unsigned Opcode, CCValid;
3325 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3326 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3327 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3328 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3329 CmpOp1->getAsZExtVal(), Cond);
3330 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3331 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3332 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3333 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3334 CmpOp1->getAsZExtVal(), Cond);
3335 }
3336 Comparison C(CmpOp0, CmpOp1, Chain);
3337 C.CCMask = CCMaskForCondCode(Cond);
3338 if (C.Op0.getValueType().isFloatingPoint()) {
3339 C.CCValid = SystemZ::CCMASK_FCMP;
3340 if (!C.Chain)
3341 C.Opcode = SystemZISD::FCMP;
3342 else if (!IsSignaling)
3343 C.Opcode = SystemZISD::STRICT_FCMP;
3344 else
3345 C.Opcode = SystemZISD::STRICT_FCMPS;
3347 } else {
3348 assert(!C.Chain);
3349 C.CCValid = SystemZ::CCMASK_ICMP;
3350 C.Opcode = SystemZISD::ICMP;
3351 // Choose the type of comparison. Equality and inequality tests can
3352 // use either signed or unsigned comparisons. The choice also doesn't
3353 // matter if both sign bits are known to be clear. In those cases we
3354 // want to give the main isel code the freedom to choose whichever
3355 // form fits best.
3356 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3357 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3358 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3359 C.ICmpType = SystemZICMP::Any;
3360 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3361 C.ICmpType = SystemZICMP::UnsignedOnly;
3362 else
3363 C.ICmpType = SystemZICMP::SignedOnly;
3364 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3365 adjustForRedundantAnd(DAG, DL, C);
3366 adjustZeroCmp(DAG, DL, C);
3367 adjustSubwordCmp(DAG, DL, C);
3368 adjustForSubtraction(DAG, DL, C);
3370 adjustICmpTruncate(DAG, DL, C);
3371 }
3372
3373 if (shouldSwapCmpOperands(C)) {
3374 std::swap(C.Op0, C.Op1);
3375 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3376 }
3377
3379 adjustICmp128(DAG, DL, C);
3380 return C;
3381}
3382
3383// Emit the comparison instruction described by C.
3384static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3385 if (!C.Op1.getNode()) {
3386 SDNode *Node;
3387 switch (C.Op0.getOpcode()) {
3389 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3390 return SDValue(Node, 0);
3392 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3393 return SDValue(Node, Node->getNumValues() - 1);
3394 default:
3395 llvm_unreachable("Invalid comparison operands");
3396 }
3397 }
3398 if (C.Opcode == SystemZISD::ICMP)
3399 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3400 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3401 if (C.Opcode == SystemZISD::TM) {
3402 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3404 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3405 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3406 }
3407 if (C.Opcode == SystemZISD::VICMPES) {
3408 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3409 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3410 return SDValue(Val.getNode(), 1);
3411 }
3412 if (C.Chain) {
3413 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3414 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3415 }
3416 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3417}
3418
3419// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3420// 64 bits. Extend is the extension type to use. Store the high part
3421// in Hi and the low part in Lo.
3422static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3423 SDValue Op0, SDValue Op1, SDValue &Hi,
3424 SDValue &Lo) {
3425 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3426 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3427 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3428 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3429 DAG.getConstant(32, DL, MVT::i64));
3430 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3431 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3432}
3433
3434// Lower a binary operation that produces two VT results, one in each
3435// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3436// and Opcode performs the GR128 operation. Store the even register result
3437// in Even and the odd register result in Odd.
3438static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3439 unsigned Opcode, SDValue Op0, SDValue Op1,
3440 SDValue &Even, SDValue &Odd) {
3441 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3442 bool Is32Bit = is32Bit(VT);
3443 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3444 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3445}
3446
3447// Return an i32 value that is 1 if the CC value produced by CCReg is
3448// in the mask CCMask and 0 otherwise. CC is known to have a value
3449// in CCValid, so other values can be ignored.
3450static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3451 unsigned CCValid, unsigned CCMask) {
3452 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3453 DAG.getConstant(0, DL, MVT::i32),
3454 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3455 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3456 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3457}
3458
3459// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3460// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3461// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3462// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3463// floating-point comparisons.
3466 switch (CC) {
3467 case ISD::SETOEQ:
3468 case ISD::SETEQ:
3469 switch (Mode) {
3470 case CmpMode::Int: return SystemZISD::VICMPE;
3471 case CmpMode::FP: return SystemZISD::VFCMPE;
3472 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3473 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3474 }
3475 llvm_unreachable("Bad mode");
3476
3477 case ISD::SETOGE:
3478 case ISD::SETGE:
3479 switch (Mode) {
3480 case CmpMode::Int: return 0;
3481 case CmpMode::FP: return SystemZISD::VFCMPHE;
3482 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3483 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3484 }
3485 llvm_unreachable("Bad mode");
3486
3487 case ISD::SETOGT:
3488 case ISD::SETGT:
3489 switch (Mode) {
3490 case CmpMode::Int: return SystemZISD::VICMPH;
3491 case CmpMode::FP: return SystemZISD::VFCMPH;
3492 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3493 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3494 }
3495 llvm_unreachable("Bad mode");
3496
3497 case ISD::SETUGT:
3498 switch (Mode) {
3499 case CmpMode::Int: return SystemZISD::VICMPHL;
3500 case CmpMode::FP: return 0;
3501 case CmpMode::StrictFP: return 0;
3502 case CmpMode::SignalingFP: return 0;
3503 }
3504 llvm_unreachable("Bad mode");
3505
3506 default:
3507 return 0;
3508 }
3509}
3510
3511// Return the SystemZISD vector comparison operation for CC or its inverse,
3512// or 0 if neither can be done directly. Indicate in Invert whether the
3513// result is for the inverse of CC. Mode is as above.
3515 bool &Invert) {
3516 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3517 Invert = false;
3518 return Opcode;
3519 }
3520
3521 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3522 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3523 Invert = true;
3524 return Opcode;
3525 }
3526
3527 return 0;
3528}
3529
3530// Return a v2f64 that contains the extended form of elements Start and Start+1
3531// of v4f32 value Op. If Chain is nonnull, return the strict form.
3532static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3533 SDValue Op, SDValue Chain) {
3534 int Mask[] = { Start, -1, Start + 1, -1 };
3535 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3536 if (Chain) {
3537 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3538 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3539 }
3540 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3541}
3542
3543// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3544// producing a result of type VT. If Chain is nonnull, return the strict form.
3545SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3546 const SDLoc &DL, EVT VT,
3547 SDValue CmpOp0,
3548 SDValue CmpOp1,
3549 SDValue Chain) const {
3550 // There is no hardware support for v4f32 (unless we have the vector
3551 // enhancements facility 1), so extend the vector into two v2f64s
3552 // and compare those.
3553 if (CmpOp0.getValueType() == MVT::v4f32 &&
3554 !Subtarget.hasVectorEnhancements1()) {
3555 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3556 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3557 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3558 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3559 if (Chain) {
3560 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3561 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3562 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3563 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3564 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3565 H1.getValue(1), L1.getValue(1),
3566 HRes.getValue(1), LRes.getValue(1) };
3567 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3568 SDValue Ops[2] = { Res, NewChain };
3569 return DAG.getMergeValues(Ops, DL);
3570 }
3571 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3572 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3573 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3574 }
3575 if (Chain) {
3576 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3577 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3578 }
3579 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3580}
3581
3582// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3583// an integer mask of type VT. If Chain is nonnull, we have a strict
3584// floating-point comparison. If in addition IsSignaling is true, we have
3585// a strict signaling floating-point comparison.
3586SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3587 const SDLoc &DL, EVT VT,
3589 SDValue CmpOp0,
3590 SDValue CmpOp1,
3591 SDValue Chain,
3592 bool IsSignaling) const {
3593 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3594 assert (!Chain || IsFP);
3595 assert (!IsSignaling || Chain);
3596 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3597 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3598 bool Invert = false;
3599 SDValue Cmp;
3600 switch (CC) {
3601 // Handle tests for order using (or (ogt y x) (oge x y)).
3602 case ISD::SETUO:
3603 Invert = true;
3604 [[fallthrough]];
3605 case ISD::SETO: {
3606 assert(IsFP && "Unexpected integer comparison");
3607 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3608 DL, VT, CmpOp1, CmpOp0, Chain);
3609 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3610 DL, VT, CmpOp0, CmpOp1, Chain);
3611 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3612 if (Chain)
3613 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3614 LT.getValue(1), GE.getValue(1));
3615 break;
3616 }
3617
3618 // Handle <> tests using (or (ogt y x) (ogt x y)).
3619 case ISD::SETUEQ:
3620 Invert = true;
3621 [[fallthrough]];
3622 case ISD::SETONE: {
3623 assert(IsFP && "Unexpected integer comparison");
3624 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3625 DL, VT, CmpOp1, CmpOp0, Chain);
3626 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3627 DL, VT, CmpOp0, CmpOp1, Chain);
3628 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3629 if (Chain)
3630 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3631 LT.getValue(1), GT.getValue(1));
3632 break;
3633 }
3634
3635 // Otherwise a single comparison is enough. It doesn't really
3636 // matter whether we try the inversion or the swap first, since
3637 // there are no cases where both work.
3638 default:
3639 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3640 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3641 else {
3643 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3644 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3645 else
3646 llvm_unreachable("Unhandled comparison");
3647 }
3648 if (Chain)
3649 Chain = Cmp.getValue(1);
3650 break;
3651 }
3652 if (Invert) {
3653 SDValue Mask =
3654 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3655 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3656 }
3657 if (Chain && Chain.getNode() != Cmp.getNode()) {
3658 SDValue Ops[2] = { Cmp, Chain };
3659 Cmp = DAG.getMergeValues(Ops, DL);
3660 }
3661 return Cmp;
3662}
3663
3664SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3665 SelectionDAG &DAG) const {
3666 SDValue CmpOp0 = Op.getOperand(0);
3667 SDValue CmpOp1 = Op.getOperand(1);
3668 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3669 SDLoc DL(Op);
3670 EVT VT = Op.getValueType();
3671 if (VT.isVector())
3672 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3673
3674 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3675 SDValue CCReg = emitCmp(DAG, DL, C);
3676 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3677}
3678
3679SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3680 SelectionDAG &DAG,
3681 bool IsSignaling) const {
3682 SDValue Chain = Op.getOperand(0);
3683 SDValue CmpOp0 = Op.getOperand(1);
3684 SDValue CmpOp1 = Op.getOperand(2);
3685 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3686 SDLoc DL(Op);
3687 EVT VT = Op.getNode()->getValueType(0);
3688 if (VT.isVector()) {
3689 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3690 Chain, IsSignaling);
3691 return Res.getValue(Op.getResNo());
3692 }
3693
3694 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3695 SDValue CCReg = emitCmp(DAG, DL, C);
3696 CCReg->setFlags(Op->getFlags());
3697 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3698 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3699 return DAG.getMergeValues(Ops, DL);
3700}
3701
3702SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3703 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3704 SDValue CmpOp0 = Op.getOperand(2);
3705 SDValue CmpOp1 = Op.getOperand(3);
3706 SDValue Dest = Op.getOperand(4);
3707 SDLoc DL(Op);
3708
3709 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3710 SDValue CCReg = emitCmp(DAG, DL, C);
3711 return DAG.getNode(
3712 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3713 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3714 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3715}
3716
3717// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3718// allowing Pos and Neg to be wider than CmpOp.
3719static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3720 return (Neg.getOpcode() == ISD::SUB &&
3721 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3722 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3723 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3724 Pos.getOperand(0) == CmpOp)));
3725}
3726
3727// Return the absolute or negative absolute of Op; IsNegative decides which.
3729 bool IsNegative) {
3730 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3731 if (IsNegative)
3732 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3733 DAG.getConstant(0, DL, Op.getValueType()), Op);
3734 return Op;
3735}
3736
3737SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3738 SelectionDAG &DAG) const {
3739 SDValue CmpOp0 = Op.getOperand(0);
3740 SDValue CmpOp1 = Op.getOperand(1);
3741 SDValue TrueOp = Op.getOperand(2);
3742 SDValue FalseOp = Op.getOperand(3);
3743 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3744 SDLoc DL(Op);
3745
3746 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3747
3748 // Check for absolute and negative-absolute selections, including those
3749 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3750 // This check supplements the one in DAGCombiner.
3751 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3752 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3753 C.Op1.getOpcode() == ISD::Constant &&
3754 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3755 C.Op1->getAsZExtVal() == 0) {
3756 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3757 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3758 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3759 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3760 }
3761
3762 SDValue CCReg = emitCmp(DAG, DL, C);
3763 SDValue Ops[] = {TrueOp, FalseOp,
3764 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3765 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3766
3767 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3768}
3769
3770SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3771 SelectionDAG &DAG) const {
3772 SDLoc DL(Node);
3773 const GlobalValue *GV = Node->getGlobal();
3774 int64_t Offset = Node->getOffset();
3775 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3777
3779 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3780 if (isInt<32>(Offset)) {
3781 // Assign anchors at 1<<12 byte boundaries.
3782 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3783 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3784 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3785
3786 // The offset can be folded into the address if it is aligned to a
3787 // halfword.
3788 Offset -= Anchor;
3789 if (Offset != 0 && (Offset & 1) == 0) {
3790 SDValue Full =
3791 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3792 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3793 Offset = 0;
3794 }
3795 } else {
3796 // Conservatively load a constant offset greater than 32 bits into a
3797 // register below.
3798 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3799 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3800 }
3801 } else if (Subtarget.isTargetELF()) {
3802 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3803 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3804 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3806 } else if (Subtarget.isTargetzOS()) {
3807 Result = getADAEntry(DAG, GV, DL, PtrVT);
3808 } else
3809 llvm_unreachable("Unexpected Subtarget");
3810
3811 // If there was a non-zero offset that we didn't fold, create an explicit
3812 // addition for it.
3813 if (Offset != 0)
3814 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3815 DAG.getSignedConstant(Offset, DL, PtrVT));
3816
3817 return Result;
3818}
3819
3820SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3821 SelectionDAG &DAG,
3822 unsigned Opcode,
3823 SDValue GOTOffset) const {
3824 SDLoc DL(Node);
3825 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3826 SDValue Chain = DAG.getEntryNode();
3827 SDValue Glue;
3828
3831 report_fatal_error("In GHC calling convention TLS is not supported");
3832
3833 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3834 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3835 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3836 Glue = Chain.getValue(1);
3837 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3838 Glue = Chain.getValue(1);
3839
3840 // The first call operand is the chain and the second is the TLS symbol.
3842 Ops.push_back(Chain);
3843 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3844 Node->getValueType(0),
3845 0, 0));
3846
3847 // Add argument registers to the end of the list so that they are
3848 // known live into the call.
3849 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3850 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3851
3852 // Add a register mask operand representing the call-preserved registers.
3853 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3854 const uint32_t *Mask =
3855 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3856 assert(Mask && "Missing call preserved mask for calling convention");
3857 Ops.push_back(DAG.getRegisterMask(Mask));
3858
3859 // Glue the call to the argument copies.
3860 Ops.push_back(Glue);
3861
3862 // Emit the call.
3863 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3864 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3865 Glue = Chain.getValue(1);
3866
3867 // Copy the return value from %r2.
3868 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3869}
3870
3871SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3872 SelectionDAG &DAG) const {
3873 SDValue Chain = DAG.getEntryNode();
3874 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3875
3876 // The high part of the thread pointer is in access register 0.
3877 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3878 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3879
3880 // The low part of the thread pointer is in access register 1.
3881 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3882 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3883
3884 // Merge them into a single 64-bit address.
3885 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3886 DAG.getConstant(32, DL, PtrVT));
3887 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3888}
3889
3890SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3891 SelectionDAG &DAG) const {
3892 if (DAG.getTarget().useEmulatedTLS())
3893 return LowerToTLSEmulatedModel(Node, DAG);
3894 SDLoc DL(Node);
3895 const GlobalValue *GV = Node->getGlobal();
3896 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3897 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3898
3901 report_fatal_error("In GHC calling convention TLS is not supported");
3902
3903 SDValue TP = lowerThreadPointer(DL, DAG);
3904
3905 // Get the offset of GA from the thread pointer, based on the TLS model.
3907 switch (model) {
3909 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3912
3913 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3914 Offset = DAG.getLoad(
3915 PtrVT, DL, DAG.getEntryNode(), Offset,
3917
3918 // Call __tls_get_offset to retrieve the offset.
3919 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3920 break;
3921 }
3922
3924 // Load the GOT offset of the module ID.
3927
3928 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3929 Offset = DAG.getLoad(
3930 PtrVT, DL, DAG.getEntryNode(), Offset,
3932
3933 // Call __tls_get_offset to retrieve the module base offset.
3934 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3935
3936 // Note: The SystemZLDCleanupPass will remove redundant computations
3937 // of the module base offset. Count total number of local-dynamic
3938 // accesses to trigger execution of that pass.
3942
3943 // Add the per-symbol offset.
3945
3946 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3947 DTPOffset = DAG.getLoad(
3948 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3950
3951 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3952 break;
3953 }
3954
3955 case TLSModel::InitialExec: {
3956 // Load the offset from the GOT.
3957 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3960 Offset =
3961 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3963 break;
3964 }
3965
3966 case TLSModel::LocalExec: {
3967 // Force the offset into the constant pool and load it from there.
3970
3971 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3972 Offset = DAG.getLoad(
3973 PtrVT, DL, DAG.getEntryNode(), Offset,
3975 break;
3976 }
3977 }
3978
3979 // Add the base and offset together.
3980 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3981}
3982
3983SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3984 SelectionDAG &DAG) const {
3985 SDLoc DL(Node);
3986 const BlockAddress *BA = Node->getBlockAddress();
3987 int64_t Offset = Node->getOffset();
3988 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3989
3990 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3991 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3992 return Result;
3993}
3994
3995SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3996 SelectionDAG &DAG) const {
3997 SDLoc DL(JT);
3998 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3999 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4000
4001 // Use LARL to load the address of the table.
4002 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4003}
4004
4005SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4006 SelectionDAG &DAG) const {
4007 SDLoc DL(CP);
4008 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4009
4011 if (CP->isMachineConstantPoolEntry())
4012 Result =
4013 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4014 else
4015 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4016 CP->getOffset());
4017
4018 // Use LARL to load the address of the constant pool entry.
4019 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4020}
4021
4022SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4023 SelectionDAG &DAG) const {
4024 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4026 MachineFrameInfo &MFI = MF.getFrameInfo();
4027 MFI.setFrameAddressIsTaken(true);
4028
4029 SDLoc DL(Op);
4030 unsigned Depth = Op.getConstantOperandVal(0);
4031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4032
4033 // By definition, the frame address is the address of the back chain. (In
4034 // the case of packed stack without backchain, return the address where the
4035 // backchain would have been stored. This will either be an unused space or
4036 // contain a saved register).
4037 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4038 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4039
4040 if (Depth > 0) {
4041 // FIXME The frontend should detect this case.
4042 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4043 report_fatal_error("Unsupported stack frame traversal count");
4044
4045 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4046 while (Depth--) {
4047 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4049 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4050 }
4051 }
4052
4053 return BackChain;
4054}
4055
4056SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4057 SelectionDAG &DAG) const {
4059 MachineFrameInfo &MFI = MF.getFrameInfo();
4060 MFI.setReturnAddressIsTaken(true);
4061
4063 return SDValue();
4064
4065 SDLoc DL(Op);
4066 unsigned Depth = Op.getConstantOperandVal(0);
4067 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4068
4069 if (Depth > 0) {
4070 // FIXME The frontend should detect this case.
4071 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4072 report_fatal_error("Unsupported stack frame traversal count");
4073
4074 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4075 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4076 int Offset = TFL->getReturnAddressOffset(MF);
4077 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4078 DAG.getSignedConstant(Offset, DL, PtrVT));
4079 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4081 }
4082
4083 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4084 // implicit live-in.
4087 &SystemZ::GR64BitRegClass);
4088 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4089}
4090
4091SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4092 SelectionDAG &DAG) const {
4093 SDLoc DL(Op);
4094 SDValue In = Op.getOperand(0);
4095 EVT InVT = In.getValueType();
4096 EVT ResVT = Op.getValueType();
4097
4098 // Convert loads directly. This is normally done by DAGCombiner,
4099 // but we need this case for bitcasts that are created during lowering
4100 // and which are then lowered themselves.
4101 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4102 if (ISD::isNormalLoad(LoadN)) {
4103 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4104 LoadN->getBasePtr(), LoadN->getMemOperand());
4105 // Update the chain uses.
4106 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4107 return NewLoad;
4108 }
4109
4110 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4111 SDValue In64;
4112 if (Subtarget.hasHighWord()) {
4113 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4114 MVT::i64);
4115 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4116 MVT::i64, SDValue(U64, 0), In);
4117 } else {
4118 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4119 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4120 DAG.getConstant(32, DL, MVT::i64));
4121 }
4122 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4123 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4124 DL, MVT::f32, Out64);
4125 }
4126 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4127 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4128 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4129 MVT::f64, SDValue(U64, 0), In);
4130 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4131 if (Subtarget.hasHighWord())
4132 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4133 MVT::i32, Out64);
4134 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4135 DAG.getConstant(32, DL, MVT::i64));
4136 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4137 }
4138 llvm_unreachable("Unexpected bitcast combination");
4139}
4140
4141SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4142 SelectionDAG &DAG) const {
4143
4144 if (Subtarget.isTargetXPLINK64())
4145 return lowerVASTART_XPLINK(Op, DAG);
4146 else
4147 return lowerVASTART_ELF(Op, DAG);
4148}
4149
4150SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4151 SelectionDAG &DAG) const {
4153 SystemZMachineFunctionInfo *FuncInfo =
4155
4156 SDLoc DL(Op);
4157
4158 // vastart just stores the address of the VarArgsFrameIndex slot into the
4159 // memory location argument.
4160 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4161 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4162 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4163 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4164 MachinePointerInfo(SV));
4165}
4166
4167SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4168 SelectionDAG &DAG) const {
4170 SystemZMachineFunctionInfo *FuncInfo =
4172 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4173
4174 SDValue Chain = Op.getOperand(0);
4175 SDValue Addr = Op.getOperand(1);
4176 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4177 SDLoc DL(Op);
4178
4179 // The initial values of each field.
4180 const unsigned NumFields = 4;
4181 SDValue Fields[NumFields] = {
4182 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4183 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4184 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4185 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4186 };
4187
4188 // Store each field into its respective slot.
4189 SDValue MemOps[NumFields];
4190 unsigned Offset = 0;
4191 for (unsigned I = 0; I < NumFields; ++I) {
4192 SDValue FieldAddr = Addr;
4193 if (Offset != 0)
4194 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4196 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4198 Offset += 8;
4199 }
4200 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4201}
4202
4203SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4204 SelectionDAG &DAG) const {
4205 SDValue Chain = Op.getOperand(0);
4206 SDValue DstPtr = Op.getOperand(1);
4207 SDValue SrcPtr = Op.getOperand(2);
4208 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4209 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4210 SDLoc DL(Op);
4211
4212 uint32_t Sz =
4213 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4214 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4215 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4216 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4217 MachinePointerInfo(SrcSV));
4218}
4219
4220SDValue
4221SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4222 SelectionDAG &DAG) const {
4223 if (Subtarget.isTargetXPLINK64())
4224 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4225 else
4226 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4227}
4228
4229SDValue
4230SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4231 SelectionDAG &DAG) const {
4232 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4234 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4235 SDValue Chain = Op.getOperand(0);
4236 SDValue Size = Op.getOperand(1);
4237 SDValue Align = Op.getOperand(2);
4238 SDLoc DL(Op);
4239
4240 // If user has set the no alignment function attribute, ignore
4241 // alloca alignments.
4242 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4243
4244 uint64_t StackAlign = TFI->getStackAlignment();
4245 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4246 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4247
4248 SDValue NeededSpace = Size;
4249
4250 // Add extra space for alignment if needed.
4251 EVT PtrVT = getPointerTy(MF.getDataLayout());
4252 if (ExtraAlignSpace)
4253 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4254 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4255
4256 bool IsSigned = false;
4257 bool DoesNotReturn = false;
4258 bool IsReturnValueUsed = false;
4259 EVT VT = Op.getValueType();
4260 SDValue AllocaCall =
4261 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4262 CallingConv::C, IsSigned, DL, DoesNotReturn,
4263 IsReturnValueUsed)
4264 .first;
4265
4266 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4267 // to end of call in order to ensure it isn't broken up from the call
4268 // sequence.
4269 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4270 Register SPReg = Regs.getStackPointerRegister();
4271 Chain = AllocaCall.getValue(1);
4272 SDValue Glue = AllocaCall.getValue(2);
4273 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4274 Chain = NewSPRegNode.getValue(1);
4275
4276 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4277 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4278 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4279
4280 // Dynamically realign if needed.
4281 if (ExtraAlignSpace) {
4282 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4283 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4284 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4285 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4286 }
4287
4288 SDValue Ops[2] = {Result, Chain};
4289 return DAG.getMergeValues(Ops, DL);
4290}
4291
4292SDValue
4293SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4294 SelectionDAG &DAG) const {
4295 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4297 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4298 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4299
4300 SDValue Chain = Op.getOperand(0);
4301 SDValue Size = Op.getOperand(1);
4302 SDValue Align = Op.getOperand(2);
4303 SDLoc DL(Op);
4304
4305 // If user has set the no alignment function attribute, ignore
4306 // alloca alignments.
4307 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4308
4309 uint64_t StackAlign = TFI->getStackAlignment();
4310 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4311 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4312
4314 SDValue NeededSpace = Size;
4315
4316 // Get a reference to the stack pointer.
4317 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4318
4319 // If we need a backchain, save it now.
4320 SDValue Backchain;
4321 if (StoreBackchain)
4322 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4324
4325 // Add extra space for alignment if needed.
4326 if (ExtraAlignSpace)
4327 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4328 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4329
4330 // Get the new stack pointer value.
4331 SDValue NewSP;
4332 if (hasInlineStackProbe(MF)) {
4334 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4335 Chain = NewSP.getValue(1);
4336 }
4337 else {
4338 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4339 // Copy the new stack pointer back.
4340 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4341 }
4342
4343 // The allocated data lives above the 160 bytes allocated for the standard
4344 // frame, plus any outgoing stack arguments. We don't know how much that
4345 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4346 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4347 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4348
4349 // Dynamically realign if needed.
4350 if (RequiredAlign > StackAlign) {
4351 Result =
4352 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4353 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4354 Result =
4355 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4356 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4357 }
4358
4359 if (StoreBackchain)
4360 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4362
4363 SDValue Ops[2] = { Result, Chain };
4364 return DAG.getMergeValues(Ops, DL);
4365}
4366
4367SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4368 SDValue Op, SelectionDAG &DAG) const {
4369 SDLoc DL(Op);
4370
4371 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4372}
4373
4374SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4375 SelectionDAG &DAG) const {
4376 EVT VT = Op.getValueType();
4377 SDLoc DL(Op);
4378 SDValue Ops[2];
4379 if (is32Bit(VT))
4380 // Just do a normal 64-bit multiplication and extract the results.
4381 // We define this so that it can be used for constant division.
4382 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4383 Op.getOperand(1), Ops[1], Ops[0]);
4384 else if (Subtarget.hasMiscellaneousExtensions2())
4385 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4386 // the high result in the even register. ISD::SMUL_LOHI is defined to
4387 // return the low half first, so the results are in reverse order.
4389 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4390 else {
4391 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4392 //
4393 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4394 //
4395 // but using the fact that the upper halves are either all zeros
4396 // or all ones:
4397 //
4398 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4399 //
4400 // and grouping the right terms together since they are quicker than the
4401 // multiplication:
4402 //
4403 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4404 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4405 SDValue LL = Op.getOperand(0);
4406 SDValue RL = Op.getOperand(1);
4407 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4408 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4409 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4410 // the high result in the even register. ISD::SMUL_LOHI is defined to
4411 // return the low half first, so the results are in reverse order.
4413 LL, RL, Ops[1], Ops[0]);
4414 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4415 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4416 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4417 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4418 }
4419 return DAG.getMergeValues(Ops, DL);
4420}
4421
4422SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4423 SelectionDAG &DAG) const {
4424 EVT VT = Op.getValueType();
4425 SDLoc DL(Op);
4426 SDValue Ops[2];
4427 if (is32Bit(VT))
4428 // Just do a normal 64-bit multiplication and extract the results.
4429 // We define this so that it can be used for constant division.
4430 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4431 Op.getOperand(1), Ops[1], Ops[0]);
4432 else
4433 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4434 // the high result in the even register. ISD::UMUL_LOHI is defined to
4435 // return the low half first, so the results are in reverse order.
4437 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4438 return DAG.getMergeValues(Ops, DL);
4439}
4440
4441SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4442 SelectionDAG &DAG) const {
4443 SDValue Op0 = Op.getOperand(0);
4444 SDValue Op1 = Op.getOperand(1);
4445 EVT VT = Op.getValueType();
4446 SDLoc DL(Op);
4447
4448 // We use DSGF for 32-bit division. This means the first operand must
4449 // always be 64-bit, and the second operand should be 32-bit whenever
4450 // that is possible, to improve performance.
4451 if (is32Bit(VT))
4452 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4453 else if (DAG.ComputeNumSignBits(Op1) > 32)
4454 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4455
4456 // DSG(F) returns the remainder in the even register and the
4457 // quotient in the odd register.
4458 SDValue Ops[2];
4459 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4460 return DAG.getMergeValues(Ops, DL);
4461}
4462
4463SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4464 SelectionDAG &DAG) const {
4465 EVT VT = Op.getValueType();
4466 SDLoc DL(Op);
4467
4468 // DL(G) returns the remainder in the even register and the
4469 // quotient in the odd register.
4470 SDValue Ops[2];
4472 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4473 return DAG.getMergeValues(Ops, DL);
4474}
4475
4476SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4477 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4478
4479 // Get the known-zero masks for each operand.
4480 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4481 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4482 DAG.computeKnownBits(Ops[1])};
4483
4484 // See if the upper 32 bits of one operand and the lower 32 bits of the
4485 // other are known zero. They are the low and high operands respectively.
4486 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4487 Known[1].Zero.getZExtValue() };
4488 unsigned High, Low;
4489 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4490 High = 1, Low = 0;
4491 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4492 High = 0, Low = 1;
4493 else
4494 return Op;
4495
4496 SDValue LowOp = Ops[Low];
4497 SDValue HighOp = Ops[High];
4498
4499 // If the high part is a constant, we're better off using IILH.
4500 if (HighOp.getOpcode() == ISD::Constant)
4501 return Op;
4502
4503 // If the low part is a constant that is outside the range of LHI,
4504 // then we're better off using IILF.
4505 if (LowOp.getOpcode() == ISD::Constant) {
4506 int64_t Value = int32_t(LowOp->getAsZExtVal());
4507 if (!isInt<16>(Value))
4508 return Op;
4509 }
4510
4511 // Check whether the high part is an AND that doesn't change the
4512 // high 32 bits and just masks out low bits. We can skip it if so.
4513 if (HighOp.getOpcode() == ISD::AND &&
4514 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4515 SDValue HighOp0 = HighOp.getOperand(0);
4517 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4518 HighOp = HighOp0;
4519 }
4520
4521 // Take advantage of the fact that all GR32 operations only change the
4522 // low 32 bits by truncating Low to an i32 and inserting it directly
4523 // using a subreg. The interesting cases are those where the truncation
4524 // can be folded.
4525 SDLoc DL(Op);
4526 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4527 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4528 MVT::i64, HighOp, Low32);
4529}
4530
4531// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4532SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4533 SelectionDAG &DAG) const {
4534 SDNode *N = Op.getNode();
4535 SDValue LHS = N->getOperand(0);
4536 SDValue RHS = N->getOperand(1);
4537 SDLoc DL(N);
4538
4539 if (N->getValueType(0) == MVT::i128) {
4540 unsigned BaseOp = 0;
4541 unsigned FlagOp = 0;
4542 bool IsBorrow = false;
4543 switch (Op.getOpcode()) {
4544 default: llvm_unreachable("Unknown instruction!");
4545 case ISD::UADDO:
4546 BaseOp = ISD::ADD;
4547 FlagOp = SystemZISD::VACC;
4548 break;
4549 case ISD::USUBO:
4550 BaseOp = ISD::SUB;
4551 FlagOp = SystemZISD::VSCBI;
4552 IsBorrow = true;
4553 break;
4554 }
4555 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4556 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4557 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4558 DAG.getValueType(MVT::i1));
4559 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4560 if (IsBorrow)
4561 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4562 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4563 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4564 }
4565
4566 unsigned BaseOp = 0;
4567 unsigned CCValid = 0;
4568 unsigned CCMask = 0;
4569
4570 switch (Op.getOpcode()) {
4571 default: llvm_unreachable("Unknown instruction!");
4572 case ISD::SADDO:
4573 BaseOp = SystemZISD::SADDO;
4574 CCValid = SystemZ::CCMASK_ARITH;
4576 break;
4577 case ISD::SSUBO:
4578 BaseOp = SystemZISD::SSUBO;
4579 CCValid = SystemZ::CCMASK_ARITH;
4581 break;
4582 case ISD::UADDO:
4583 BaseOp = SystemZISD::UADDO;
4584 CCValid = SystemZ::CCMASK_LOGICAL;
4586 break;
4587 case ISD::USUBO:
4588 BaseOp = SystemZISD::USUBO;
4589 CCValid = SystemZ::CCMASK_LOGICAL;
4591 break;
4592 }
4593
4594 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4595 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4596
4597 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4598 if (N->getValueType(1) == MVT::i1)
4599 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4600
4601 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4602}
4603
4604static bool isAddCarryChain(SDValue Carry) {
4605 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4606 Carry = Carry.getOperand(2);
4607 return Carry.getOpcode() == ISD::UADDO;
4608}
4609
4610static bool isSubBorrowChain(SDValue Carry) {
4611 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4612 Carry = Carry.getOperand(2);
4613 return Carry.getOpcode() == ISD::USUBO;
4614}
4615
4616// Lower UADDO_CARRY/USUBO_CARRY nodes.
4617SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4618 SelectionDAG &DAG) const {
4619
4620 SDNode *N = Op.getNode();
4621 MVT VT = N->getSimpleValueType(0);
4622
4623 // Let legalize expand this if it isn't a legal type yet.
4624 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4625 return SDValue();
4626
4627 SDValue LHS = N->getOperand(0);
4628 SDValue RHS = N->getOperand(1);
4629 SDValue Carry = Op.getOperand(2);
4630 SDLoc DL(N);
4631
4632 if (VT == MVT::i128) {
4633 unsigned BaseOp = 0;
4634 unsigned FlagOp = 0;
4635 bool IsBorrow = false;
4636 switch (Op.getOpcode()) {
4637 default: llvm_unreachable("Unknown instruction!");
4638 case ISD::UADDO_CARRY:
4639 BaseOp = SystemZISD::VAC;
4640 FlagOp = SystemZISD::VACCC;
4641 break;
4642 case ISD::USUBO_CARRY:
4643 BaseOp = SystemZISD::VSBI;
4644 FlagOp = SystemZISD::VSBCBI;
4645 IsBorrow = true;
4646 break;
4647 }
4648 if (IsBorrow)
4649 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4650 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4651 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4652 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4653 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4654 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4655 DAG.getValueType(MVT::i1));
4656 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4657 if (IsBorrow)
4658 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4659 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4660 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4661 }
4662
4663 unsigned BaseOp = 0;
4664 unsigned CCValid = 0;
4665 unsigned CCMask = 0;
4666
4667 switch (Op.getOpcode()) {
4668 default: llvm_unreachable("Unknown instruction!");
4669 case ISD::UADDO_CARRY:
4670 if (!isAddCarryChain(Carry))
4671 return SDValue();
4672
4673 BaseOp = SystemZISD::ADDCARRY;
4674 CCValid = SystemZ::CCMASK_LOGICAL;
4676 break;
4677 case ISD::USUBO_CARRY:
4678 if (!isSubBorrowChain(Carry))
4679 return SDValue();
4680
4681 BaseOp = SystemZISD::SUBCARRY;
4682 CCValid = SystemZ::CCMASK_LOGICAL;
4684 break;
4685 }
4686
4687 // Set the condition code from the carry flag.
4688 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4689 DAG.getConstant(CCValid, DL, MVT::i32),
4690 DAG.getConstant(CCMask, DL, MVT::i32));
4691
4692 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4693 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4694
4695 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4696 if (N->getValueType(1) == MVT::i1)
4697 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4698
4699 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4700}
4701
4702SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4703 SelectionDAG &DAG) const {
4704 EVT VT = Op.getValueType();
4705 SDLoc DL(Op);
4706 Op = Op.getOperand(0);
4707
4708 if (VT.getScalarSizeInBits() == 128) {
4709 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4710 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4711 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4712 DAG.getConstant(0, DL, MVT::i64));
4713 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4714 return Op;
4715 }
4716
4717 // Handle vector types via VPOPCT.
4718 if (VT.isVector()) {
4719 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4720 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4721 switch (VT.getScalarSizeInBits()) {
4722 case 8:
4723 break;
4724 case 16: {
4725 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4726 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4727 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4728 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4729 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4730 break;
4731 }
4732 case 32: {
4733 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4734 DAG.getConstant(0, DL, MVT::i32));
4735 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4736 break;
4737 }
4738 case 64: {
4739 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4740 DAG.getConstant(0, DL, MVT::i32));
4741 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4742 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4743 break;
4744 }
4745 default:
4746 llvm_unreachable("Unexpected type");
4747 }
4748 return Op;
4749 }
4750
4751 // Get the known-zero mask for the operand.
4752 KnownBits Known = DAG.computeKnownBits(Op);
4753 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4754 if (NumSignificantBits == 0)
4755 return DAG.getConstant(0, DL, VT);
4756
4757 // Skip known-zero high parts of the operand.
4758 int64_t OrigBitSize = VT.getSizeInBits();
4759 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4760 BitSize = std::min(BitSize, OrigBitSize);
4761
4762 // The POPCNT instruction counts the number of bits in each byte.
4763 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4764 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4765 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4766
4767 // Add up per-byte counts in a binary tree. All bits of Op at
4768 // position larger than BitSize remain zero throughout.
4769 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4770 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4771 if (BitSize != OrigBitSize)
4772 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4773 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4774 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4775 }
4776
4777 // Extract overall result from high byte.
4778 if (BitSize > 8)
4779 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4780 DAG.getConstant(BitSize - 8, DL, VT));
4781
4782 return Op;
4783}
4784
4785SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4786 SelectionDAG &DAG) const {
4787 SDLoc DL(Op);
4788 AtomicOrdering FenceOrdering =
4789 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4790 SyncScope::ID FenceSSID =
4791 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4792
4793 // The only fence that needs an instruction is a sequentially-consistent
4794 // cross-thread fence.
4795 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4796 FenceSSID == SyncScope::System) {
4797 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4798 Op.getOperand(0)),
4799 0);
4800 }
4801
4802 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4803 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4804}
4805
4806SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4807 SelectionDAG &DAG) const {
4808 auto *Node = cast<AtomicSDNode>(Op.getNode());
4809 assert(
4810 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4811 "Only custom lowering i128 or f128.");
4812 // Use same code to handle both legal and non-legal i128 types.
4815 return DAG.getMergeValues(Results, SDLoc(Op));
4816}
4817
4818// Prepare for a Compare And Swap for a subword operation. This needs to be
4819// done in memory with 4 bytes at natural alignment.
4821 SDValue &AlignedAddr, SDValue &BitShift,
4822 SDValue &NegBitShift) {
4823 EVT PtrVT = Addr.getValueType();
4824 EVT WideVT = MVT::i32;
4825
4826 // Get the address of the containing word.
4827 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4828 DAG.getSignedConstant(-4, DL, PtrVT));
4829
4830 // Get the number of bits that the word must be rotated left in order
4831 // to bring the field to the top bits of a GR32.
4832 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4833 DAG.getConstant(3, DL, PtrVT));
4834 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4835
4836 // Get the complementing shift amount, for rotating a field in the top
4837 // bits back to its proper position.
4838 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4839 DAG.getConstant(0, DL, WideVT), BitShift);
4840
4841}
4842
4843// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4844// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4845SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4846 SelectionDAG &DAG,
4847 unsigned Opcode) const {
4848 auto *Node = cast<AtomicSDNode>(Op.getNode());
4849
4850 // 32-bit operations need no special handling.
4851 EVT NarrowVT = Node->getMemoryVT();
4852 EVT WideVT = MVT::i32;
4853 if (NarrowVT == WideVT)
4854 return Op;
4855
4856 int64_t BitSize = NarrowVT.getSizeInBits();
4857 SDValue ChainIn = Node->getChain();
4858 SDValue Addr = Node->getBasePtr();
4859 SDValue Src2 = Node->getVal();
4860 MachineMemOperand *MMO = Node->getMemOperand();
4861 SDLoc DL(Node);
4862
4863 // Convert atomic subtracts of constants into additions.
4864 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4865 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4867 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
4868 Src2.getValueType());
4869 }
4870
4871 SDValue AlignedAddr, BitShift, NegBitShift;
4872 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4873
4874 // Extend the source operand to 32 bits and prepare it for the inner loop.
4875 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4876 // operations require the source to be shifted in advance. (This shift
4877 // can be folded if the source is constant.) For AND and NAND, the lower
4878 // bits must be set, while for other opcodes they should be left clear.
4879 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4880 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4881 DAG.getConstant(32 - BitSize, DL, WideVT));
4882 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4884 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4885 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4886
4887 // Construct the ATOMIC_LOADW_* node.
4888 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4889 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4890 DAG.getConstant(BitSize, DL, WideVT) };
4891 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4892 NarrowVT, MMO);
4893
4894 // Rotate the result of the final CS so that the field is in the lower
4895 // bits of a GR32, then truncate it.
4896 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4897 DAG.getConstant(BitSize, DL, WideVT));
4898 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4899
4900 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4901 return DAG.getMergeValues(RetOps, DL);
4902}
4903
4904// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4905// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4906SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4907 SelectionDAG &DAG) const {
4908 auto *Node = cast<AtomicSDNode>(Op.getNode());
4909 EVT MemVT = Node->getMemoryVT();
4910 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4911 // A full-width operation: negate and use LAA(G).
4912 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4913 assert(Subtarget.hasInterlockedAccess1() &&
4914 "Should have been expanded by AtomicExpand pass.");
4915 SDValue Src2 = Node->getVal();
4916 SDLoc DL(Src2);
4917 SDValue NegSrc2 =
4918 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4919 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4920 Node->getChain(), Node->getBasePtr(), NegSrc2,
4921 Node->getMemOperand());
4922 }
4923
4924 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4925}
4926
4927// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4928SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4929 SelectionDAG &DAG) const {
4930 auto *Node = cast<AtomicSDNode>(Op.getNode());
4931 SDValue ChainIn = Node->getOperand(0);
4932 SDValue Addr = Node->getOperand(1);
4933 SDValue CmpVal = Node->getOperand(2);
4934 SDValue SwapVal = Node->getOperand(3);
4935 MachineMemOperand *MMO = Node->getMemOperand();
4936 SDLoc DL(Node);
4937
4938 if (Node->getMemoryVT() == MVT::i128) {
4939 // Use same code to handle both legal and non-legal i128 types.
4942 return DAG.getMergeValues(Results, DL);
4943 }
4944
4945 // We have native support for 32-bit and 64-bit compare and swap, but we
4946 // still need to expand extracting the "success" result from the CC.
4947 EVT NarrowVT = Node->getMemoryVT();
4948 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4949 if (NarrowVT == WideVT) {
4950 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4951 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4953 DL, Tys, Ops, NarrowVT, MMO);
4954 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4956
4957 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4958 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4959 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4960 return SDValue();
4961 }
4962
4963 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4964 // via a fullword ATOMIC_CMP_SWAPW operation.
4965 int64_t BitSize = NarrowVT.getSizeInBits();
4966
4967 SDValue AlignedAddr, BitShift, NegBitShift;
4968 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4969
4970 // Construct the ATOMIC_CMP_SWAPW node.
4971 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4972 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4973 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4975 VTList, Ops, NarrowVT, MMO);
4976 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4978
4979 // emitAtomicCmpSwapW() will zero extend the result (original value).
4980 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4981 DAG.getValueType(NarrowVT));
4982 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4983 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4984 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4985 return SDValue();
4986}
4987
4989SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4990 // Because of how we convert atomic_load and atomic_store to normal loads and
4991 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4992 // since DAGCombine hasn't been updated to account for atomic, but non
4993 // volatile loads. (See D57601)
4994 if (auto *SI = dyn_cast<StoreInst>(&I))
4995 if (SI->isAtomic())
4997 if (auto *LI = dyn_cast<LoadInst>(&I))
4998 if (LI->isAtomic())
5000 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5001 if (AI->isAtomic())
5003 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5004 if (AI->isAtomic())
5007}
5008
5009SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5010 SelectionDAG &DAG) const {
5012 auto *Regs = Subtarget.getSpecialRegisters();
5014 report_fatal_error("Variable-sized stack allocations are not supported "
5015 "in GHC calling convention");
5016 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5017 Regs->getStackPointerRegister(), Op.getValueType());
5018}
5019
5020SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5021 SelectionDAG &DAG) const {
5023 auto *Regs = Subtarget.getSpecialRegisters();
5024 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5025
5027 report_fatal_error("Variable-sized stack allocations are not supported "
5028 "in GHC calling convention");
5029
5030 SDValue Chain = Op.getOperand(0);
5031 SDValue NewSP = Op.getOperand(1);
5032 SDValue Backchain;
5033 SDLoc DL(Op);
5034
5035 if (StoreBackchain) {
5036 SDValue OldSP = DAG.getCopyFromReg(
5037 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5038 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5040 }
5041
5042 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5043
5044 if (StoreBackchain)
5045 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5047
5048 return Chain;
5049}
5050
5051SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5052 SelectionDAG &DAG) const {
5053 bool IsData = Op.getConstantOperandVal(4);
5054 if (!IsData)
5055 // Just preserve the chain.
5056 return Op.getOperand(0);
5057
5058 SDLoc DL(Op);
5059 bool IsWrite = Op.getConstantOperandVal(2);
5060 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5061 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5062 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5063 Op.getOperand(1)};
5065 Node->getVTList(), Ops,
5066 Node->getMemoryVT(), Node->getMemOperand());
5067}
5068
5069// Convert condition code in CCReg to an i32 value.
5071 SDLoc DL(CCReg);
5072 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
5073 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
5074 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
5075}
5076
5077SDValue
5078SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5079 SelectionDAG &DAG) const {
5080 unsigned Opcode, CCValid;
5081 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5082 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5083 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5084 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5085 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5086 return SDValue();
5087 }
5088
5089 return SDValue();
5090}
5091
5092SDValue
5093SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5094 SelectionDAG &DAG) const {
5095 unsigned Opcode, CCValid;
5096 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5097 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5098 if (Op->getNumValues() == 1)
5099 return getCCResult(DAG, SDValue(Node, 0));
5100 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5101 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5102 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5103 }
5104
5105 unsigned Id = Op.getConstantOperandVal(0);
5106 switch (Id) {
5107 case Intrinsic::thread_pointer:
5108 return lowerThreadPointer(SDLoc(Op), DAG);
5109
5110 case Intrinsic::s390_vpdi:
5111 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5112 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5113
5114 case Intrinsic::s390_vperm:
5115 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5116 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5117
5118 case Intrinsic::s390_vuphb:
5119 case Intrinsic::s390_vuphh:
5120 case Intrinsic::s390_vuphf:
5121 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5122 Op.getOperand(1));
5123
5124 case Intrinsic::s390_vuplhb:
5125 case Intrinsic::s390_vuplhh:
5126 case Intrinsic::s390_vuplhf:
5127 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5128 Op.getOperand(1));
5129
5130 case Intrinsic::s390_vuplb:
5131 case Intrinsic::s390_vuplhw:
5132 case Intrinsic::s390_vuplf:
5133 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5134 Op.getOperand(1));
5135
5136 case Intrinsic::s390_vupllb:
5137 case Intrinsic::s390_vupllh:
5138 case Intrinsic::s390_vupllf:
5139 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5140 Op.getOperand(1));
5141
5142 case Intrinsic::s390_vsumb:
5143 case Intrinsic::s390_vsumh:
5144 case Intrinsic::s390_vsumgh:
5145 case Intrinsic::s390_vsumgf:
5146 case Intrinsic::s390_vsumqf:
5147 case Intrinsic::s390_vsumqg:
5148 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5149 Op.getOperand(1), Op.getOperand(2));
5150
5151 case Intrinsic::s390_vaq:
5152 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5153 Op.getOperand(1), Op.getOperand(2));
5154 case Intrinsic::s390_vaccb:
5155 case Intrinsic::s390_vacch:
5156 case Intrinsic::s390_vaccf:
5157 case Intrinsic::s390_vaccg:
5158 case Intrinsic::s390_vaccq:
5159 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5160 Op.getOperand(1), Op.getOperand(2));
5161 case Intrinsic::s390_vacq:
5162 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5163 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5164 case Intrinsic::s390_vacccq:
5165 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5166 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5167
5168 case Intrinsic::s390_vsq:
5169 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5170 Op.getOperand(1), Op.getOperand(2));
5171 case Intrinsic::s390_vscbib:
5172 case Intrinsic::s390_vscbih:
5173 case Intrinsic::s390_vscbif:
5174 case Intrinsic::s390_vscbig:
5175 case Intrinsic::s390_vscbiq:
5176 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5177 Op.getOperand(1), Op.getOperand(2));
5178 case Intrinsic::s390_vsbiq:
5179 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5180 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5181 case Intrinsic::s390_vsbcbiq:
5182 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5183 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5184 }
5185
5186 return SDValue();
5187}
5188
5189namespace {
5190// Says that SystemZISD operation Opcode can be used to perform the equivalent
5191// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5192// Operand is the constant third operand, otherwise it is the number of
5193// bytes in each element of the result.
5194struct Permute {
5195 unsigned Opcode;
5196 unsigned Operand;
5197 unsigned char Bytes[SystemZ::VectorBytes];
5198};
5199}
5200
5201static const Permute PermuteForms[] = {
5202 // VMRHG
5204 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5205 // VMRHF
5207 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5208 // VMRHH
5210 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5211 // VMRHB
5213 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5214 // VMRLG
5216 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5217 // VMRLF
5219 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5220 // VMRLH
5222 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5223 // VMRLB
5225 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5226 // VPKG
5227 { SystemZISD::PACK, 4,
5228 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5229 // VPKF
5230 { SystemZISD::PACK, 2,
5231 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5232 // VPKH
5233 { SystemZISD::PACK, 1,
5234 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5235 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5237 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5238 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5240 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5241};
5242
5243// Called after matching a vector shuffle against a particular pattern.
5244// Both the original shuffle and the pattern have two vector operands.
5245// OpNos[0] is the operand of the original shuffle that should be used for
5246// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5247// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5248// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5249// for operands 0 and 1 of the pattern.
5250static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5251 if (OpNos[0] < 0) {
5252 if (OpNos[1] < 0)
5253 return false;
5254 OpNo0 = OpNo1 = OpNos[1];
5255 } else if (OpNos[1] < 0) {
5256 OpNo0 = OpNo1 = OpNos[0];
5257 } else {
5258 OpNo0 = OpNos[0];
5259 OpNo1 = OpNos[1];
5260 }
5261 return true;
5262}
5263
5264// Bytes is a VPERM-like permute vector, except that -1 is used for
5265// undefined bytes. Return true if the VPERM can be implemented using P.
5266// When returning true set OpNo0 to the VPERM operand that should be
5267// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5268//
5269// For example, if swapping the VPERM operands allows P to match, OpNo0
5270// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5271// operand, but rewriting it to use two duplicated operands allows it to
5272// match P, then OpNo0 and OpNo1 will be the same.
5273static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5274 unsigned &OpNo0, unsigned &OpNo1) {
5275 int OpNos[] = { -1, -1 };
5276 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5277 int Elt = Bytes[I];
5278 if (Elt >= 0) {
5279 // Make sure that the two permute vectors use the same suboperand
5280 // byte number. Only the operand numbers (the high bits) are
5281 // allowed to differ.
5282 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5283 return false;
5284 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5285 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5286 // Make sure that the operand mappings are consistent with previous
5287 // elements.
5288 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5289 return false;
5290 OpNos[ModelOpNo] = RealOpNo;
5291 }
5292 }
5293 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5294}
5295
5296// As above, but search for a matching permute.
5297static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5298 unsigned &OpNo0, unsigned &OpNo1) {
5299 for (auto &P : PermuteForms)
5300 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5301 return &P;
5302 return nullptr;
5303}
5304
5305// Bytes is a VPERM-like permute vector, except that -1 is used for
5306// undefined bytes. This permute is an operand of an outer permute.
5307// See whether redistributing the -1 bytes gives a shuffle that can be
5308// implemented using P. If so, set Transform to a VPERM-like permute vector
5309// that, when applied to the result of P, gives the original permute in Bytes.
5311 const Permute &P,
5312 SmallVectorImpl<int> &Transform) {
5313 unsigned To = 0;
5314 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5315 int Elt = Bytes[From];
5316 if (Elt < 0)
5317 // Byte number From of the result is undefined.
5318 Transform[From] = -1;
5319 else {
5320 while (P.Bytes[To] != Elt) {
5321 To += 1;
5322 if (To == SystemZ::VectorBytes)
5323 return false;
5324 }
5325 Transform[From] = To;
5326 }
5327 }
5328 return true;
5329}
5330
5331// As above, but search for a matching permute.
5332static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5333 SmallVectorImpl<int> &Transform) {
5334 for (auto &P : PermuteForms)
5335 if (matchDoublePermute(Bytes, P, Transform))
5336 return &P;
5337 return nullptr;
5338}
5339
5340// Convert the mask of the given shuffle op into a byte-level mask,
5341// as if it had type vNi8.
5342static bool getVPermMask(SDValue ShuffleOp,
5343 SmallVectorImpl<int> &Bytes) {
5344 EVT VT = ShuffleOp.getValueType();
5345 unsigned NumElements = VT.getVectorNumElements();
5346 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5347
5348 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5349 Bytes.resize(NumElements * BytesPerElement, -1);
5350 for (unsigned I = 0; I < NumElements; ++I) {
5351 int Index = VSN->getMaskElt(I);
5352 if (Index >= 0)
5353 for (unsigned J = 0; J < BytesPerElement; ++J)
5354 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5355 }
5356 return true;
5357 }
5358 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5359 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5360 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5361 Bytes.resize(NumElements * BytesPerElement, -1);
5362 for (unsigned I = 0; I < NumElements; ++I)
5363 for (unsigned J = 0; J < BytesPerElement; ++J)
5364 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5365 return true;
5366 }
5367 return false;
5368}
5369
5370// Bytes is a VPERM-like permute vector, except that -1 is used for
5371// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5372// the result come from a contiguous sequence of bytes from one input.
5373// Set Base to the selector for the first byte if so.
5374static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5375 unsigned BytesPerElement, int &Base) {
5376 Base = -1;
5377 for (unsigned I = 0; I < BytesPerElement; ++I) {
5378 if (Bytes[Start + I] >= 0) {
5379 unsigned Elem = Bytes[Start + I];
5380 if (Base < 0) {
5381 Base = Elem - I;
5382 // Make sure the bytes would come from one input operand.
5383 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5384 return false;
5385 } else if (unsigned(Base) != Elem - I)
5386 return false;
5387 }
5388 }
5389 return true;
5390}
5391
5392// Bytes is a VPERM-like permute vector, except that -1 is used for
5393// undefined bytes. Return true if it can be performed using VSLDB.
5394// When returning true, set StartIndex to the shift amount and OpNo0
5395// and OpNo1 to the VPERM operands that should be used as the first
5396// and second shift operand respectively.
5398 unsigned &StartIndex, unsigned &OpNo0,
5399 unsigned &OpNo1) {
5400 int OpNos[] = { -1, -1 };
5401 int Shift = -1;
5402 for (unsigned I = 0; I < 16; ++I) {
5403 int Index = Bytes[I];
5404 if (Index >= 0) {
5405 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5406 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5407 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5408 if (Shift < 0)
5409 Shift = ExpectedShift;
5410 else if (Shift != ExpectedShift)
5411 return false;
5412 // Make sure that the operand mappings are consistent with previous
5413 // elements.
5414 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5415 return false;
5416 OpNos[ModelOpNo] = RealOpNo;
5417 }
5418 }
5419 StartIndex = Shift;
5420 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5421}
5422
5423// Create a node that performs P on operands Op0 and Op1, casting the
5424// operands to the appropriate type. The type of the result is determined by P.
5426 const Permute &P, SDValue Op0, SDValue Op1) {
5427 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5428 // elements of a PACK are twice as wide as the outputs.
5429 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5430 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5431 P.Operand);
5432 // Cast both operands to the appropriate type.
5433 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5434 SystemZ::VectorBytes / InBytes);
5435 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5436 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5437 SDValue Op;
5438 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5439 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5440 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5441 } else if (P.Opcode == SystemZISD::PACK) {
5442 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5443 SystemZ::VectorBytes / P.Operand);
5444 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5445 } else {
5446 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5447 }
5448 return Op;
5449}
5450
5451static bool isZeroVector(SDValue N) {
5452 if (N->getOpcode() == ISD::BITCAST)
5453 N = N->getOperand(0);
5454 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5455 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5456 return Op->getZExtValue() == 0;
5457 return ISD::isBuildVectorAllZeros(N.getNode());
5458}
5459
5460// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5461static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5462 for (unsigned I = 0; I < Num ; I++)
5463 if (isZeroVector(Ops[I]))
5464 return I;
5465 return UINT32_MAX;
5466}
5467
5468// Bytes is a VPERM-like permute vector, except that -1 is used for
5469// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5470// VSLDB or VPERM.
5472 SDValue *Ops,
5473 const SmallVectorImpl<int> &Bytes) {
5474 for (unsigned I = 0; I < 2; ++I)
5475 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5476
5477 // First see whether VSLDB can be used.
5478 unsigned StartIndex, OpNo0, OpNo1;
5479 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5480 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5481 Ops[OpNo1],
5482 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5483
5484 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5485 // eliminate a zero vector by reusing any zero index in the permute vector.
5486 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5487 if (ZeroVecIdx != UINT32_MAX) {
5488 bool MaskFirst = true;
5489 int ZeroIdx = -1;
5490 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5491 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5492 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5493 if (OpNo == ZeroVecIdx && I == 0) {
5494 // If the first byte is zero, use mask as first operand.
5495 ZeroIdx = 0;
5496 break;
5497 }
5498 if (OpNo != ZeroVecIdx && Byte == 0) {
5499 // If mask contains a zero, use it by placing that vector first.
5500 ZeroIdx = I + SystemZ::VectorBytes;
5501 MaskFirst = false;
5502 break;
5503 }
5504 }
5505 if (ZeroIdx != -1) {
5506 SDValue IndexNodes[SystemZ::VectorBytes];
5507 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5508 if (Bytes[I] >= 0) {
5509 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5510 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5511 if (OpNo == ZeroVecIdx)
5512 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5513 else {
5514 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5515 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5516 }
5517 } else
5518 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5519 }
5520 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5521 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5522 if (MaskFirst)
5523 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5524 Mask);
5525 else
5526 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5527 Mask);
5528 }
5529 }
5530
5531 SDValue IndexNodes[SystemZ::VectorBytes];
5532 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5533 if (Bytes[I] >= 0)
5534 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5535 else
5536 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5537 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5538 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5539 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5540}
5541
5542namespace {
5543// Describes a general N-operand vector shuffle.
5544struct GeneralShuffle {
5545 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5546 void addUndef();
5547 bool add(SDValue, unsigned);
5548 SDValue getNode(SelectionDAG &, const SDLoc &);
5549 void tryPrepareForUnpack();
5550 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5551 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5552
5553 // The operands of the shuffle.
5555
5556 // Index I is -1 if byte I of the result is undefined. Otherwise the
5557 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5558 // Bytes[I] / SystemZ::VectorBytes.
5560
5561 // The type of the shuffle result.
5562 EVT VT;
5563
5564 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5565 unsigned UnpackFromEltSize;
5566};
5567}
5568
5569// Add an extra undefined element to the shuffle.
5570void GeneralShuffle::addUndef() {
5571 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5572 for (unsigned I = 0; I < BytesPerElement; ++I)
5573 Bytes.push_back(-1);
5574}
5575
5576// Add an extra element to the shuffle, taking it from element Elem of Op.
5577// A null Op indicates a vector input whose value will be calculated later;
5578// there is at most one such input per shuffle and it always has the same
5579// type as the result. Aborts and returns false if the source vector elements
5580// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5581// LLVM they become implicitly extended, but this is rare and not optimized.
5582bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5583 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5584
5585 // The source vector can have wider elements than the result,
5586 // either through an explicit TRUNCATE or because of type legalization.
5587 // We want the least significant part.
5588 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5589 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5590
5591 // Return false if the source elements are smaller than their destination
5592 // elements.
5593 if (FromBytesPerElement < BytesPerElement)
5594 return false;
5595
5596 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5597 (FromBytesPerElement - BytesPerElement));
5598
5599 // Look through things like shuffles and bitcasts.
5600 while (Op.getNode()) {
5601 if (Op.getOpcode() == ISD::BITCAST)
5602 Op = Op.getOperand(0);
5603 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5604 // See whether the bytes we need come from a contiguous part of one
5605 // operand.
5607 if (!getVPermMask(Op, OpBytes))
5608 break;
5609 int NewByte;
5610 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5611 break;
5612 if (NewByte < 0) {
5613 addUndef();
5614 return true;
5615 }
5616 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5617 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5618 } else if (Op.isUndef()) {
5619 addUndef();
5620 return true;
5621 } else
5622 break;
5623 }
5624
5625 // Make sure that the source of the extraction is in Ops.
5626 unsigned OpNo = 0;
5627 for (; OpNo < Ops.size(); ++OpNo)
5628 if (Ops[OpNo] == Op)
5629 break;
5630 if (OpNo == Ops.size())
5631 Ops.push_back(Op);
5632
5633 // Add the element to Bytes.
5634 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5635 for (unsigned I = 0; I < BytesPerElement; ++I)
5636 Bytes.push_back(Base + I);
5637
5638 return true;
5639}
5640
5641// Return SDNodes for the completed shuffle.
5642SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5643 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5644
5645 if (Ops.size() == 0)
5646 return DAG.getUNDEF(VT);
5647
5648 // Use a single unpack if possible as the last operation.
5649 tryPrepareForUnpack();
5650
5651 // Make sure that there are at least two shuffle operands.
5652 if (Ops.size() == 1)
5653 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5654
5655 // Create a tree of shuffles, deferring root node until after the loop.
5656 // Try to redistribute the undefined elements of non-root nodes so that
5657 // the non-root shuffles match something like a pack or merge, then adjust
5658 // the parent node's permute vector to compensate for the new order.
5659 // Among other things, this copes with vectors like <2 x i16> that were
5660 // padded with undefined elements during type legalization.
5661 //
5662 // In the best case this redistribution will lead to the whole tree
5663 // using packs and merges. It should rarely be a loss in other cases.
5664 unsigned Stride = 1;
5665 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5666 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5667 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5668
5669 // Create a mask for just these two operands.
5671 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5672 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5673 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5674 if (OpNo == I)
5675 NewBytes[J] = Byte;
5676 else if (OpNo == I + Stride)
5677 NewBytes[J] = SystemZ::VectorBytes + Byte;
5678 else
5679 NewBytes[J] = -1;
5680 }
5681 // See if it would be better to reorganize NewMask to avoid using VPERM.
5683 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5684 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5685 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5686 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5687 if (NewBytes[J] >= 0) {
5688 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5689 "Invalid double permute");
5690 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5691 } else
5692 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5693 }
5694 } else {
5695 // Just use NewBytes on the operands.
5696 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5697 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5698 if (NewBytes[J] >= 0)
5699 Bytes[J] = I * SystemZ::VectorBytes + J;
5700 }
5701 }
5702 }
5703
5704 // Now we just have 2 inputs. Put the second operand in Ops[1].
5705 if (Stride > 1) {
5706 Ops[1] = Ops[Stride];
5707 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5708 if (Bytes[I] >= int(SystemZ::VectorBytes))
5709 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5710 }
5711
5712 // Look for an instruction that can do the permute without resorting
5713 // to VPERM.
5714 unsigned OpNo0, OpNo1;
5715 SDValue Op;
5716 if (unpackWasPrepared() && Ops[1].isUndef())
5717 Op = Ops[0];
5718 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5719 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5720 else
5721 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5722
5723 Op = insertUnpackIfPrepared(DAG, DL, Op);
5724
5725 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5726}
5727
5728#ifndef NDEBUG
5729static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5730 dbgs() << Msg.c_str() << " { ";
5731 for (unsigned i = 0; i < Bytes.size(); i++)
5732 dbgs() << Bytes[i] << " ";
5733 dbgs() << "}\n";
5734}
5735#endif
5736
5737// If the Bytes vector matches an unpack operation, prepare to do the unpack
5738// after all else by removing the zero vector and the effect of the unpack on
5739// Bytes.
5740void GeneralShuffle::tryPrepareForUnpack() {
5741 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5742 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5743 return;
5744
5745 // Only do this if removing the zero vector reduces the depth, otherwise
5746 // the critical path will increase with the final unpack.
5747 if (Ops.size() > 2 &&
5748 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5749 return;
5750
5751 // Find an unpack that would allow removing the zero vector from Ops.
5752 UnpackFromEltSize = 1;
5753 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5754 bool MatchUnpack = true;
5756 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5757 unsigned ToEltSize = UnpackFromEltSize * 2;
5758 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5759 if (!IsZextByte)
5760 SrcBytes.push_back(Bytes[Elt]);
5761 if (Bytes[Elt] != -1) {
5762 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5763 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5764 MatchUnpack = false;
5765 break;
5766 }
5767 }
5768 }
5769 if (MatchUnpack) {
5770 if (Ops.size() == 2) {
5771 // Don't use unpack if a single source operand needs rearrangement.
5772 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5773 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5774 UnpackFromEltSize = UINT_MAX;
5775 return;
5776 }
5777 }
5778 break;
5779 }
5780 }
5781 if (UnpackFromEltSize > 4)
5782 return;
5783
5784 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5785 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5786 << ".\n";
5787 dumpBytes(Bytes, "Original Bytes vector:"););
5788
5789 // Apply the unpack in reverse to the Bytes array.
5790 unsigned B = 0;
5791 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5792 Elt += UnpackFromEltSize;
5793 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5794 Bytes[B] = Bytes[Elt];
5795 }
5796 while (B < SystemZ::VectorBytes)
5797 Bytes[B++] = -1;
5798
5799 // Remove the zero vector from Ops
5800 Ops.erase(&Ops[ZeroVecOpNo]);
5801 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5802 if (Bytes[I] >= 0) {
5803 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5804 if (OpNo > ZeroVecOpNo)
5805 Bytes[I] -= SystemZ::VectorBytes;
5806 }
5807
5808 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5809 dbgs() << "\n";);
5810}
5811
5812SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5813 const SDLoc &DL,
5814 SDValue Op) {
5815 if (!unpackWasPrepared())
5816 return Op;
5817 unsigned InBits = UnpackFromEltSize * 8;
5818 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5819 SystemZ::VectorBits / InBits);
5820 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5821 unsigned OutBits = InBits * 2;
5822 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5823 SystemZ::VectorBits / OutBits);
5824 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5825}
5826
5827// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5829 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5830 if (!Op.getOperand(I).isUndef())
5831 return false;
5832 return true;
5833}
5834
5835// Return a vector of type VT that contains Value in the first element.
5836// The other elements don't matter.
5838 SDValue Value) {
5839 // If we have a constant, replicate it to all elements and let the
5840 // BUILD_VECTOR lowering take care of it.
5841 if (Value.getOpcode() == ISD::Constant ||
5842 Value.getOpcode() == ISD::ConstantFP) {
5844 return DAG.getBuildVector(VT, DL, Ops);
5845 }
5846 if (Value.isUndef())
5847 return DAG.getUNDEF(VT);
5848 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5849}
5850
5851// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5852// element 1. Used for cases in which replication is cheap.
5854 SDValue Op0, SDValue Op1) {
5855 if (Op0.isUndef()) {
5856 if (Op1.isUndef())
5857 return DAG.getUNDEF(VT);
5858 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5859 }
5860 if (Op1.isUndef())
5861 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5862 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5863 buildScalarToVector(DAG, DL, VT, Op0),
5864 buildScalarToVector(DAG, DL, VT, Op1));
5865}
5866
5867// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5868// vector for them.
5870 SDValue Op1) {
5871 if (Op0.isUndef() && Op1.isUndef())
5872 return DAG.getUNDEF(MVT::v2i64);
5873 // If one of the two inputs is undefined then replicate the other one,
5874 // in order to avoid using another register unnecessarily.
5875 if (Op0.isUndef())
5876 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5877 else if (Op1.isUndef())
5878 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5879 else {
5880 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5881 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5882 }
5883 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5884}
5885
5886// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5887// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5888// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5889// would benefit from this representation and return it if so.
5891 BuildVectorSDNode *BVN) {
5892 EVT VT = BVN->getValueType(0);
5893 unsigned NumElements = VT.getVectorNumElements();
5894
5895 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5896 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5897 // need a BUILD_VECTOR, add an additional placeholder operand for that
5898 // BUILD_VECTOR and store its operands in ResidueOps.
5899 GeneralShuffle GS(VT);
5901 bool FoundOne = false;
5902 for (unsigned I = 0; I < NumElements; ++I) {
5903 SDValue Op = BVN->getOperand(I);
5904 if (Op.getOpcode() == ISD::TRUNCATE)
5905 Op = Op.getOperand(0);
5906 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5907 Op.getOperand(1).getOpcode() == ISD::Constant) {
5908 unsigned Elem = Op.getConstantOperandVal(1);
5909 if (!GS.add(Op.getOperand(0), Elem))
5910 return SDValue();
5911 FoundOne = true;
5912 } else if (Op.isUndef()) {
5913 GS.addUndef();
5914 } else {
5915 if (!GS.add(SDValue(), ResidueOps.size()))
5916 return SDValue();
5917 ResidueOps.push_back(BVN->getOperand(I));
5918 }
5919 }
5920
5921 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5922 if (!FoundOne)
5923 return SDValue();
5924
5925 // Create the BUILD_VECTOR for the remaining elements, if any.
5926 if (!ResidueOps.empty()) {
5927 while (ResidueOps.size() < NumElements)
5928 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5929 for (auto &Op : GS.Ops) {
5930 if (!Op.getNode()) {
5931 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5932 break;
5933 }
5934 }
5935 }
5936 return GS.getNode(DAG, SDLoc(BVN));
5937}
5938
5939bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5940 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5941 return true;
5942 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5943 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5944 return true;
5945 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5946 return true;
5947 return false;
5948}
5949
5950// Combine GPR scalar values Elems into a vector of type VT.
5951SDValue
5952SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5953 SmallVectorImpl<SDValue> &Elems) const {
5954 // See whether there is a single replicated value.
5956 unsigned int NumElements = Elems.size();
5957 unsigned int Count = 0;
5958 for (auto Elem : Elems) {
5959 if (!Elem.isUndef()) {
5960 if (!Single.getNode())
5961 Single = Elem;
5962 else if (Elem != Single) {
5963 Single = SDValue();
5964 break;
5965 }
5966 Count += 1;
5967 }
5968 }
5969 // There are three cases here:
5970 //
5971 // - if the only defined element is a loaded one, the best sequence
5972 // is a replicating load.
5973 //
5974 // - otherwise, if the only defined element is an i64 value, we will
5975 // end up with the same VLVGP sequence regardless of whether we short-cut
5976 // for replication or fall through to the later code.
5977 //
5978 // - otherwise, if the only defined element is an i32 or smaller value,
5979 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5980 // This is only a win if the single defined element is used more than once.
5981 // In other cases we're better off using a single VLVGx.
5982 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5983 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5984
5985 // If all elements are loads, use VLREP/VLEs (below).
5986 bool AllLoads = true;
5987 for (auto Elem : Elems)
5988 if (!isVectorElementLoad(Elem)) {
5989 AllLoads = false;
5990 break;
5991 }
5992
5993 // The best way of building a v2i64 from two i64s is to use VLVGP.
5994 if (VT == MVT::v2i64 && !AllLoads)
5995 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5996
5997 // Use a 64-bit merge high to combine two doubles.
5998 if (VT == MVT::v2f64 && !AllLoads)
5999 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6000
6001 // Build v4f32 values directly from the FPRs:
6002 //
6003 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6004 // V V VMRHF
6005 // <ABxx> <CDxx>
6006 // V VMRHG
6007 // <ABCD>
6008 if (VT == MVT::v4f32 && !AllLoads) {
6009 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6010 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6011 // Avoid unnecessary undefs by reusing the other operand.
6012 if (Op01.isUndef())
6013 Op01 = Op23;
6014 else if (Op23.isUndef())
6015 Op23 = Op01;
6016 // Merging identical replications is a no-op.
6017 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6018 return Op01;
6019 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6020 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6022 DL, MVT::v2i64, Op01, Op23);
6023 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6024 }
6025
6026 // Collect the constant terms.
6029
6030 unsigned NumConstants = 0;
6031 for (unsigned I = 0; I < NumElements; ++I) {
6032 SDValue Elem = Elems[I];
6033 if (Elem.getOpcode() == ISD::Constant ||
6034 Elem.getOpcode() == ISD::ConstantFP) {
6035 NumConstants += 1;
6036 Constants[I] = Elem;
6037 Done[I] = true;
6038 }
6039 }
6040 // If there was at least one constant, fill in the other elements of
6041 // Constants with undefs to get a full vector constant and use that
6042 // as the starting point.
6044 SDValue ReplicatedVal;
6045 if (NumConstants > 0) {
6046 for (unsigned I = 0; I < NumElements; ++I)
6047 if (!Constants[I].getNode())
6048 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6049 Result = DAG.getBuildVector(VT, DL, Constants);
6050 } else {
6051 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6052 // avoid a false dependency on any previous contents of the vector
6053 // register.
6054
6055 // Use a VLREP if at least one element is a load. Make sure to replicate
6056 // the load with the most elements having its value.
6057 std::map<const SDNode*, unsigned> UseCounts;
6058 SDNode *LoadMaxUses = nullptr;
6059 for (unsigned I = 0; I < NumElements; ++I)
6060 if (isVectorElementLoad(Elems[I])) {
6061 SDNode *Ld = Elems[I].getNode();
6062 UseCounts[Ld]++;
6063 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
6064 LoadMaxUses = Ld;
6065 }
6066 if (LoadMaxUses != nullptr) {
6067 ReplicatedVal = SDValue(LoadMaxUses, 0);
6068 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6069 } else {
6070 // Try to use VLVGP.
6071 unsigned I1 = NumElements / 2 - 1;
6072 unsigned I2 = NumElements - 1;
6073 bool Def1 = !Elems[I1].isUndef();
6074 bool Def2 = !Elems[I2].isUndef();
6075 if (Def1 || Def2) {
6076 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6077 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6078 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6079 joinDwords(DAG, DL, Elem1, Elem2));
6080 Done[I1] = true;
6081 Done[I2] = true;
6082 } else
6083 Result = DAG.getUNDEF(VT);
6084 }
6085 }
6086
6087 // Use VLVGx to insert the other elements.
6088 for (unsigned I = 0; I < NumElements; ++I)
6089 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6090 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6091 DAG.getConstant(I, DL, MVT::i32));
6092 return Result;
6093}
6094
6095SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6096 SelectionDAG &DAG) const {
6097 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6098 SDLoc DL(Op);
6099 EVT VT = Op.getValueType();
6100
6101 if (BVN->isConstant()) {
6102 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6103 return Op;
6104
6105 // Fall back to loading it from memory.
6106 return SDValue();
6107 }
6108
6109 // See if we should use shuffles to construct the vector from other vectors.
6110 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6111 return Res;
6112
6113 // Detect SCALAR_TO_VECTOR conversions.
6115 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6116
6117 // Otherwise use buildVector to build the vector up from GPRs.
6118 unsigned NumElements = Op.getNumOperands();
6120 for (unsigned I = 0; I < NumElements; ++I)
6121 Ops[I] = Op.getOperand(I);
6122 return buildVector(DAG, DL, VT, Ops);
6123}
6124
6125SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6126 SelectionDAG &DAG) const {
6127 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6128 SDLoc DL(Op);
6129 EVT VT = Op.getValueType();
6130 unsigned NumElements = VT.getVectorNumElements();
6131
6132 if (VSN->isSplat()) {
6133 SDValue Op0 = Op.getOperand(0);
6134 unsigned Index = VSN->getSplatIndex();
6135 assert(Index < VT.getVectorNumElements() &&
6136 "Splat index should be defined and in first operand");
6137 // See whether the value we're splatting is directly available as a scalar.
6138 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6140 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6141 // Otherwise keep it as a vector-to-vector operation.
6142 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6143 DAG.getTargetConstant(Index, DL, MVT::i32));
6144 }
6145
6146 GeneralShuffle GS(VT);
6147 for (unsigned I = 0; I < NumElements; ++I) {
6148 int Elt = VSN->getMaskElt(I);
6149 if (Elt < 0)
6150 GS.addUndef();
6151 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6152 unsigned(Elt) % NumElements))
6153 return SDValue();
6154 }
6155 return GS.getNode(DAG, SDLoc(VSN));
6156}
6157
6158SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6159 SelectionDAG &DAG) const {
6160 SDLoc DL(Op);
6161 // Just insert the scalar into element 0 of an undefined vector.
6162 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6163 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6164 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6165}
6166
6167SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6168 SelectionDAG &DAG) const {
6169 // Handle insertions of floating-point values.
6170 SDLoc DL(Op);
6171 SDValue Op0 = Op.getOperand(0);
6172 SDValue Op1 = Op.getOperand(1);
6173 SDValue Op2 = Op.getOperand(2);
6174 EVT VT = Op.getValueType();
6175
6176 // Insertions into constant indices of a v2f64 can be done using VPDI.
6177 // However, if the inserted value is a bitcast or a constant then it's
6178 // better to use GPRs, as below.
6179 if (VT == MVT::v2f64 &&
6180 Op1.getOpcode() != ISD::BITCAST &&
6181 Op1.getOpcode() != ISD::ConstantFP &&
6182 Op2.getOpcode() == ISD::Constant) {
6183 uint64_t Index = Op2->getAsZExtVal();
6184 unsigned Mask = VT.getVectorNumElements() - 1;
6185 if (Index <= Mask)
6186 return Op;
6187 }
6188
6189 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6191 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6192 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6193 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6194 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6195 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6196}
6197
6198SDValue
6199SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6200 SelectionDAG &DAG) const {
6201 // Handle extractions of floating-point values.
6202 SDLoc DL(Op);
6203 SDValue Op0 = Op.getOperand(0);
6204 SDValue Op1 = Op.getOperand(1);
6205 EVT VT = Op.getValueType();
6206 EVT VecVT = Op0.getValueType();
6207
6208 // Extractions of constant indices can be done directly.
6209 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6210 uint64_t Index = CIndexN->getZExtValue();
6211 unsigned Mask = VecVT.getVectorNumElements() - 1;
6212 if (Index <= Mask)
6213 return Op;
6214 }
6215
6216 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6217 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6218 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6219 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6220 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6221 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6222}
6223
6224SDValue SystemZTargetLowering::
6225lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6226 SDValue PackedOp = Op.getOperand(0);
6227 EVT OutVT = Op.getValueType();
6228 EVT InVT = PackedOp.getValueType();
6229 unsigned ToBits = OutVT.getScalarSizeInBits();
6230 unsigned FromBits = InVT.getScalarSizeInBits();
6231 do {
6232 FromBits *= 2;
6233 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
6234 SystemZ::VectorBits / FromBits);
6235 PackedOp =
6236 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
6237 } while (FromBits != ToBits);
6238 return PackedOp;
6239}
6240
6241// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6242SDValue SystemZTargetLowering::
6243lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6244 SDValue PackedOp = Op.getOperand(0);
6245 SDLoc DL(Op);
6246 EVT OutVT = Op.getValueType();
6247 EVT InVT = PackedOp.getValueType();
6248 unsigned InNumElts = InVT.getVectorNumElements();
6249 unsigned OutNumElts = OutVT.getVectorNumElements();
6250 unsigned NumInPerOut = InNumElts / OutNumElts;
6251
6252 SDValue ZeroVec =
6253 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6254
6255 SmallVector<int, 16> Mask(InNumElts);
6256 unsigned ZeroVecElt = InNumElts;
6257 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6258 unsigned MaskElt = PackedElt * NumInPerOut;
6259 unsigned End = MaskElt + NumInPerOut - 1;
6260 for (; MaskElt < End; MaskElt++)
6261 Mask[MaskElt] = ZeroVecElt++;
6262 Mask[MaskElt] = PackedElt;
6263 }
6264 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6265 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6266}
6267
6268SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6269 unsigned ByScalar) const {
6270 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6271 SDValue Op0 = Op.getOperand(0);
6272 SDValue Op1 = Op.getOperand(1);
6273 SDLoc DL(Op);
6274 EVT VT = Op.getValueType();
6275 unsigned ElemBitSize = VT.getScalarSizeInBits();
6276
6277 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6278 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6279 APInt SplatBits, SplatUndef;
6280 unsigned SplatBitSize;
6281 bool HasAnyUndefs;
6282 // Check for constant splats. Use ElemBitSize as the minimum element
6283 // width and reject splats that need wider elements.
6284 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6285 ElemBitSize, true) &&
6286 SplatBitSize == ElemBitSize) {
6287 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6288 DL, MVT::i32);
6289 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6290 }
6291 // Check for variable splats.
6292 BitVector UndefElements;
6293 SDValue Splat = BVN->getSplatValue(&UndefElements);
6294 if (Splat) {
6295 // Since i32 is the smallest legal type, we either need a no-op
6296 // or a truncation.
6297 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6298 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6299 }
6300 }
6301
6302 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6303 // and the shift amount is directly available in a GPR.
6304 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6305 if (VSN->isSplat()) {
6306 SDValue VSNOp0 = VSN->getOperand(0);
6307 unsigned Index = VSN->getSplatIndex();
6308 assert(Index < VT.getVectorNumElements() &&
6309 "Splat index should be defined and in first operand");
6310 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6311 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6312 // Since i32 is the smallest legal type, we either need a no-op
6313 // or a truncation.
6314 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6315 VSNOp0.getOperand(Index));
6316 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6317 }
6318 }
6319 }
6320
6321 // Otherwise just treat the current form as legal.
6322 return Op;
6323}
6324
6326 SDLoc dl(Op);
6327 SDValue Src = Op.getOperand(0);
6328 MVT DstVT = Op.getSimpleValueType();
6329
6330 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6331 unsigned SrcAS = N->getSrcAddressSpace();
6332
6333 assert(SrcAS != N->getDestAddressSpace() &&
6334 "addrspacecast must be between different address spaces");
6335
6336 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6337 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6338 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6339 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Src,
6340 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6341 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6342 } else if (DstVT == MVT::i32) {
6343 Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
6344 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
6345 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6346 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6347 } else {
6348 report_fatal_error("Bad address space in addrspacecast");
6349 }
6350 return Op;
6351}
6352
6353SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6354 SelectionDAG &DAG) const {
6355 SDLoc DL(Op);
6356 MVT ResultVT = Op.getSimpleValueType();
6357 SDValue Arg = Op.getOperand(0);
6358 unsigned Check = Op.getConstantOperandVal(1);
6359
6360 unsigned TDCMask = 0;
6361 if (Check & fcSNan)
6363 if (Check & fcQNan)
6365 if (Check & fcPosInf)
6367 if (Check & fcNegInf)
6369 if (Check & fcPosNormal)
6371 if (Check & fcNegNormal)
6373 if (Check & fcPosSubnormal)
6375 if (Check & fcNegSubnormal)
6377 if (Check & fcPosZero)
6378 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6379 if (Check & fcNegZero)
6380 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6381 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6382
6383 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6384 return getCCResult(DAG, Intr);
6385}
6386
6387SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6388 SelectionDAG &DAG) const {
6389 SDLoc DL(Op);
6390 SDValue Chain = Op.getOperand(0);
6391
6392 // STCKF only supports a memory operand, so we have to use a temporary.
6393 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6394 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6395 MachinePointerInfo MPI =
6397
6398 // Use STCFK to store the TOD clock into the temporary.
6399 SDValue StoreOps[] = {Chain, StackPtr};
6400 Chain = DAG.getMemIntrinsicNode(
6401 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6403
6404 // And read it back from there.
6405 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6406}
6407
6409 SelectionDAG &DAG) const {
6410 switch (Op.getOpcode()) {
6411 case ISD::FRAMEADDR:
6412 return lowerFRAMEADDR(Op, DAG);
6413 case ISD::RETURNADDR:
6414 return lowerRETURNADDR(Op, DAG);
6415 case ISD::BR_CC:
6416 return lowerBR_CC(Op, DAG);
6417 case ISD::SELECT_CC:
6418 return lowerSELECT_CC(Op, DAG);
6419 case ISD::SETCC:
6420 return lowerSETCC(Op, DAG);
6421 case ISD::STRICT_FSETCC:
6422 return lowerSTRICT_FSETCC(Op, DAG, false);
6424 return lowerSTRICT_FSETCC(Op, DAG, true);
6425 case ISD::GlobalAddress:
6426 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6428 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6429 case ISD::BlockAddress:
6430 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6431 case ISD::JumpTable:
6432 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6433 case ISD::ConstantPool:
6434 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6435 case ISD::BITCAST:
6436 return lowerBITCAST(Op, DAG);
6437 case ISD::VASTART:
6438 return lowerVASTART(Op, DAG);
6439 case ISD::VACOPY:
6440 return lowerVACOPY(Op, DAG);
6442 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6444 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6445 case ISD::SMUL_LOHI:
6446 return lowerSMUL_LOHI(Op, DAG);
6447 case ISD::UMUL_LOHI:
6448 return lowerUMUL_LOHI(Op, DAG);
6449 case ISD::SDIVREM:
6450 return lowerSDIVREM(Op, DAG);
6451 case ISD::UDIVREM:
6452 return lowerUDIVREM(Op, DAG);
6453 case ISD::SADDO:
6454 case ISD::SSUBO:
6455 case ISD::UADDO:
6456 case ISD::USUBO:
6457 return lowerXALUO(Op, DAG);
6458 case ISD::UADDO_CARRY:
6459 case ISD::USUBO_CARRY:
6460 return lowerUADDSUBO_CARRY(Op, DAG);
6461 case ISD::OR:
6462 return lowerOR(Op, DAG);
6463 case ISD::CTPOP:
6464 return lowerCTPOP(Op, DAG);
6465 case ISD::VECREDUCE_ADD:
6466 return lowerVECREDUCE_ADD(Op, DAG);
6467 case ISD::ATOMIC_FENCE:
6468 return lowerATOMIC_FENCE(Op, DAG);
6469 case ISD::ATOMIC_SWAP:
6470 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6471 case ISD::ATOMIC_STORE:
6472 case ISD::ATOMIC_LOAD:
6473 return lowerATOMIC_LDST_I128(Op, DAG);
6475 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6477 return lowerATOMIC_LOAD_SUB(Op, DAG);
6479 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6481 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6483 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6485 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6487 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6489 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6491 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6493 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6495 return lowerATOMIC_CMP_SWAP(Op, DAG);
6496 case ISD::STACKSAVE:
6497 return lowerSTACKSAVE(Op, DAG);
6498 case ISD::STACKRESTORE:
6499 return lowerSTACKRESTORE(Op, DAG);
6500 case ISD::PREFETCH:
6501 return lowerPREFETCH(Op, DAG);
6503 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6505 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6506 case ISD::BUILD_VECTOR:
6507 return lowerBUILD_VECTOR(Op, DAG);
6509 return lowerVECTOR_SHUFFLE(Op, DAG);
6511 return lowerSCALAR_TO_VECTOR(Op, DAG);
6513 return lowerINSERT_VECTOR_ELT(Op, DAG);
6515 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6517 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6519 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6520 case ISD::SHL:
6521 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6522 case ISD::SRL:
6523 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6524 case ISD::SRA:
6525 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6526 case ISD::ADDRSPACECAST:
6527 return lowerAddrSpaceCast(Op, DAG);
6528 case ISD::ROTL:
6529 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6530 case ISD::IS_FPCLASS:
6531 return lowerIS_FPCLASS(Op, DAG);
6532 case ISD::GET_ROUNDING:
6533 return lowerGET_ROUNDING(Op, DAG);
6535 return lowerREADCYCLECOUNTER(Op, DAG);
6538 // These operations are legal on our platform, but we cannot actually
6539 // set the operation action to Legal as common code would treat this
6540 // as equivalent to Expand. Instead, we keep the operation action to
6541 // Custom and just leave them unchanged here.
6542 return Op;
6543
6544 default:
6545 llvm_unreachable("Unexpected node to lower");
6546 }
6547}
6548
6550 const SDLoc &SL) {
6551 // If i128 is legal, just use a normal bitcast.
6552 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6553 return DAG.getBitcast(MVT::f128, Src);
6554
6555 // Otherwise, f128 must live in FP128, so do a partwise move.
6557 &SystemZ::FP128BitRegClass);
6558
6559 SDValue Hi, Lo;
6560 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6561
6562 Hi = DAG.getBitcast(MVT::f64, Hi);
6563 Lo = DAG.getBitcast(MVT::f64, Lo);
6564
6565 SDNode *Pair = DAG.getMachineNode(
6566 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6567 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6568 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6569 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6570 return SDValue(Pair, 0);
6571}
6572
6574 const SDLoc &SL) {
6575 // If i128 is legal, just use a normal bitcast.
6576 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6577 return DAG.getBitcast(MVT::i128, Src);
6578
6579 // Otherwise, f128 must live in FP128, so do a partwise move.
6581 &SystemZ::FP128BitRegClass);
6582
6583 SDValue LoFP =
6584 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6585 SDValue HiFP =
6586 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6587 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6588 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6589
6590 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6591}
6592
6593// Lower operations with invalid operand or result types (currently used
6594// only for 128-bit integer types).
6595void
6598 SelectionDAG &DAG) const {
6599 switch (N->getOpcode()) {
6600 case ISD::ATOMIC_LOAD: {
6601 SDLoc DL(N);
6602 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6603 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6604 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6606 DL, Tys, Ops, MVT::i128, MMO);
6607
6608 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6609 if (N->getValueType(0) == MVT::f128)
6610 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6611 Results.push_back(Lowered);
6612 Results.push_back(Res.getValue(1));
6613 break;
6614 }
6615 case ISD::ATOMIC_STORE: {
6616 SDLoc DL(N);
6617 SDVTList Tys = DAG.getVTList(MVT::Other);
6618 SDValue Val = N->getOperand(1);
6619 if (Val.getValueType() == MVT::f128)
6620 Val = expandBitCastF128ToI128(DAG, Val, DL);
6621 Val = lowerI128ToGR128(DAG, Val);
6622
6623 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6624 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6626 DL, Tys, Ops, MVT::i128, MMO);
6627 // We have to enforce sequential consistency by performing a
6628 // serialization operation after the store.
6629 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6631 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6632 MVT::Other, Res), 0);
6633 Results.push_back(Res);
6634 break;
6635 }
6637 SDLoc DL(N);
6638 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6639 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6640 lowerI128ToGR128(DAG, N->getOperand(2)),
6641 lowerI128ToGR128(DAG, N->getOperand(3)) };
6642 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6644 DL, Tys, Ops, MVT::i128, MMO);
6645 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6647 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6648 Results.push_back(lowerGR128ToI128(DAG, Res));
6649 Results.push_back(Success);
6650 Results.push_back(Res.getValue(2));
6651 break;
6652 }
6653 case ISD::BITCAST: {
6654 SDValue Src = N->getOperand(0);
6655 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6656 !useSoftFloat()) {
6657 SDLoc DL(N);
6658 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6659 }
6660 break;
6661 }
6662 default:
6663 llvm_unreachable("Unexpected node to lower");
6664 }
6665}
6666
6667void
6670 SelectionDAG &DAG) const {
6671 return LowerOperationWrapper(N, Results, DAG);
6672}
6673
6674const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6675#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6676 switch ((SystemZISD::NodeType)Opcode) {
6677 case SystemZISD::FIRST_NUMBER: break;
6678 OPCODE(RET_GLUE);
6679 OPCODE(CALL);
6680 OPCODE(SIBCALL);
6681 OPCODE(TLS_GDCALL);
6682 OPCODE(TLS_LDCALL);
6683 OPCODE(PCREL_WRAPPER);
6684 OPCODE(PCREL_OFFSET);
6685 OPCODE(ICMP);
6686 OPCODE(FCMP);
6687 OPCODE(STRICT_FCMP);
6688 OPCODE(STRICT_FCMPS);
6689 OPCODE(TM);
6690 OPCODE(BR_CCMASK);
6691 OPCODE(SELECT_CCMASK);
6692 OPCODE(ADJDYNALLOC);
6693 OPCODE(PROBED_ALLOCA);
6694 OPCODE(POPCNT);
6695 OPCODE(SMUL_LOHI);
6696 OPCODE(UMUL_LOHI);
6697 OPCODE(SDIVREM);
6698 OPCODE(UDIVREM);
6699 OPCODE(SADDO);
6700 OPCODE(SSUBO);
6701 OPCODE(UADDO);
6702 OPCODE(USUBO);
6703 OPCODE(ADDCARRY);
6704 OPCODE(SUBCARRY);
6705 OPCODE(GET_CCMASK);
6706 OPCODE(MVC);
6707 OPCODE(NC);
6708 OPCODE(OC);
6709 OPCODE(XC);
6710 OPCODE(CLC);
6711 OPCODE(MEMSET_MVC);
6712 OPCODE(STPCPY);
6713 OPCODE(STRCMP);
6714 OPCODE(SEARCH_STRING);
6715 OPCODE(IPM);
6716 OPCODE(TBEGIN);
6717 OPCODE(TBEGIN_NOFLOAT);
6718 OPCODE(TEND);
6719 OPCODE(BYTE_MASK);
6720 OPCODE(ROTATE_MASK);
6721 OPCODE(REPLICATE);
6722 OPCODE(JOIN_DWORDS);
6723 OPCODE(SPLAT);
6724 OPCODE(MERGE_HIGH);
6725 OPCODE(MERGE_LOW);
6726 OPCODE(SHL_DOUBLE);
6727 OPCODE(PERMUTE_DWORDS);
6728 OPCODE(PERMUTE);
6729 OPCODE(PACK);
6730 OPCODE(PACKS_CC);
6731 OPCODE(PACKLS_CC);
6732 OPCODE(UNPACK_HIGH);
6733 OPCODE(UNPACKL_HIGH);
6734 OPCODE(UNPACK_LOW);
6735 OPCODE(UNPACKL_LOW);
6736 OPCODE(VSHL_BY_SCALAR);
6737 OPCODE(VSRL_BY_SCALAR);
6738 OPCODE(VSRA_BY_SCALAR);
6739 OPCODE(VROTL_BY_SCALAR);
6740 OPCODE(VSUM);
6741 OPCODE(VACC);
6742 OPCODE(VSCBI);
6743 OPCODE(VAC);
6744 OPCODE(VSBI);
6745 OPCODE(VACCC);
6746 OPCODE(VSBCBI);
6747 OPCODE(VICMPE);
6748 OPCODE(VICMPH);
6749 OPCODE(VICMPHL);
6750 OPCODE(VICMPES);
6751 OPCODE(VICMPHS);
6752 OPCODE(VICMPHLS);
6753 OPCODE(VFCMPE);
6754 OPCODE(STRICT_VFCMPE);
6755 OPCODE(STRICT_VFCMPES);
6756 OPCODE(VFCMPH);
6757 OPCODE(STRICT_VFCMPH);
6758 OPCODE(STRICT_VFCMPHS);
6759 OPCODE(VFCMPHE);
6760 OPCODE(STRICT_VFCMPHE);
6761 OPCODE(STRICT_VFCMPHES);
6762 OPCODE(VFCMPES);
6763 OPCODE(VFCMPHS);
6764 OPCODE(VFCMPHES);
6765 OPCODE(VFTCI);
6766 OPCODE(VEXTEND);
6767 OPCODE(STRICT_VEXTEND);
6768 OPCODE(VROUND);
6769 OPCODE(STRICT_VROUND);
6770 OPCODE(VTM);
6771 OPCODE(SCMP128HI);
6772 OPCODE(UCMP128HI);
6773 OPCODE(VFAE_CC);
6774 OPCODE(VFAEZ_CC);
6775 OPCODE(VFEE_CC);
6776 OPCODE(VFEEZ_CC);
6777 OPCODE(VFENE_CC);
6778 OPCODE(VFENEZ_CC);
6779 OPCODE(VISTR_CC);
6780 OPCODE(VSTRC_CC);
6781 OPCODE(VSTRCZ_CC);
6782 OPCODE(VSTRS_CC);
6783 OPCODE(VSTRSZ_CC);
6784 OPCODE(TDC);
6785 OPCODE(ATOMIC_SWAPW);
6786 OPCODE(ATOMIC_LOADW_ADD);
6787 OPCODE(ATOMIC_LOADW_SUB);
6788 OPCODE(ATOMIC_LOADW_AND);
6789 OPCODE(ATOMIC_LOADW_OR);
6790 OPCODE(ATOMIC_LOADW_XOR);
6791 OPCODE(ATOMIC_LOADW_NAND);
6792 OPCODE(ATOMIC_LOADW_MIN);
6793 OPCODE(ATOMIC_LOADW_MAX);
6794 OPCODE(ATOMIC_LOADW_UMIN);
6795 OPCODE(ATOMIC_LOADW_UMAX);
6796 OPCODE(ATOMIC_CMP_SWAPW);
6797 OPCODE(ATOMIC_CMP_SWAP);
6798 OPCODE(ATOMIC_LOAD_128);
6799 OPCODE(ATOMIC_STORE_128);
6800 OPCODE(ATOMIC_CMP_SWAP_128);
6801 OPCODE(LRV);
6802 OPCODE(STRV);
6803 OPCODE(VLER);
6804 OPCODE(VSTER);
6805 OPCODE(STCKF);
6807 OPCODE(ADA_ENTRY);
6808 }
6809 return nullptr;
6810#undef OPCODE
6811}
6812
6813// Return true if VT is a vector whose elements are a whole number of bytes
6814// in width. Also check for presence of vector support.
6815bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6816 if (!Subtarget.hasVector())
6817 return false;
6818
6819 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6820}
6821
6822// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6823// producing a result of type ResVT. Op is a possibly bitcast version
6824// of the input vector and Index is the index (based on type VecVT) that
6825// should be extracted. Return the new extraction if a simplification
6826// was possible or if Force is true.
6827SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6828 EVT VecVT, SDValue Op,
6829 unsigned Index,
6830 DAGCombinerInfo &DCI,
6831 bool Force) const {
6832 SelectionDAG &DAG = DCI.DAG;
6833
6834 // The number of bytes being extracted.
6835 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6836
6837 for (;;) {
6838 unsigned Opcode = Op.getOpcode();
6839 if (Opcode == ISD::BITCAST)
6840 // Look through bitcasts.
6841 Op = Op.getOperand(0);
6842 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6843 canTreatAsByteVector(Op.getValueType())) {
6844 // Get a VPERM-like permute mask and see whether the bytes covered
6845 // by the extracted element are a contiguous sequence from one
6846 // source operand.
6848 if (!getVPermMask(Op, Bytes))
6849 break;
6850 int First;
6851 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6852 BytesPerElement, First))
6853 break;
6854 if (First < 0)
6855 return DAG.getUNDEF(ResVT);
6856 // Make sure the contiguous sequence starts at a multiple of the
6857 // original element size.
6858 unsigned Byte = unsigned(First) % Bytes.size();
6859 if (Byte % BytesPerElement != 0)
6860 break;
6861 // We can get the extracted value directly from an input.
6862 Index = Byte / BytesPerElement;
6863 Op = Op.getOperand(unsigned(First) / Bytes.size());
6864 Force = true;
6865 } else if (Opcode == ISD::BUILD_VECTOR &&
6866 canTreatAsByteVector(Op.getValueType())) {
6867 // We can only optimize this case if the BUILD_VECTOR elements are
6868 // at least as wide as the extracted value.
6869 EVT OpVT = Op.getValueType();
6870 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6871 if (OpBytesPerElement < BytesPerElement)
6872 break;
6873 // Make sure that the least-significant bit of the extracted value
6874 // is the least significant bit of an input.
6875 unsigned End = (Index + 1) * BytesPerElement;
6876 if (End % OpBytesPerElement != 0)
6877 break;
6878 // We're extracting the low part of one operand of the BUILD_VECTOR.
6879 Op = Op.getOperand(End / OpBytesPerElement - 1);
6880 if (!Op.getValueType().isInteger()) {
6881 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6882 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6883 DCI.AddToWorklist(Op.getNode());
6884 }
6885 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6886 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6887 if (VT != ResVT) {
6888 DCI.AddToWorklist(Op.getNode());
6889 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6890 }
6891 return Op;
6892 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6894 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6895 canTreatAsByteVector(Op.getValueType()) &&
6896 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6897 // Make sure that only the unextended bits are significant.
6898 EVT ExtVT = Op.getValueType();
6899 EVT OpVT = Op.getOperand(0).getValueType();
6900 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6901 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6902 unsigned Byte = Index * BytesPerElement;
6903 unsigned SubByte = Byte % ExtBytesPerElement;
6904 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6905 if (SubByte < MinSubByte ||
6906 SubByte + BytesPerElement > ExtBytesPerElement)
6907 break;
6908 // Get the byte offset of the unextended element
6909 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6910 // ...then add the byte offset relative to that element.
6911 Byte += SubByte - MinSubByte;
6912 if (Byte % BytesPerElement != 0)
6913 break;
6914 Op = Op.getOperand(0);
6915 Index = Byte / BytesPerElement;
6916 Force = true;
6917 } else
6918 break;
6919 }
6920 if (Force) {
6921 if (Op.getValueType() != VecVT) {
6922 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6923 DCI.AddToWorklist(Op.getNode());
6924 }
6925 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6926 DAG.getConstant(Index, DL, MVT::i32));
6927 }
6928 return SDValue();
6929}
6930
6931// Optimize vector operations in scalar value Op on the basis that Op
6932// is truncated to TruncVT.
6933SDValue SystemZTargetLowering::combineTruncateExtract(
6934 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6935 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6936 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6937 // of type TruncVT.
6938 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6939 TruncVT.getSizeInBits() % 8 == 0) {
6940 SDValue Vec = Op.getOperand(0);
6941 EVT VecVT = Vec.getValueType();
6942 if (canTreatAsByteVector(VecVT)) {
6943 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6944 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6945 unsigned TruncBytes = TruncVT.getStoreSize();
6946 if (BytesPerElement % TruncBytes == 0) {
6947 // Calculate the value of Y' in the above description. We are
6948 // splitting the original elements into Scale equal-sized pieces
6949 // and for truncation purposes want the last (least-significant)
6950 // of these pieces for IndexN. This is easiest to do by calculating
6951 // the start index of the following element and then subtracting 1.
6952 unsigned Scale = BytesPerElement / TruncBytes;
6953 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6954
6955 // Defer the creation of the bitcast from X to combineExtract,
6956 // which might be able to optimize the extraction.
6957 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
6958 MVT::getIntegerVT(TruncBytes * 8),
6959 VecVT.getStoreSize() / TruncBytes);
6960 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6961 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6962 }
6963 }
6964 }
6965 }
6966 return SDValue();
6967}
6968
6969SDValue SystemZTargetLowering::combineZERO_EXTEND(
6970 SDNode *N, DAGCombinerInfo &DCI) const {
6971 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6972 SelectionDAG &DAG = DCI.DAG;
6973 SDValue N0 = N->getOperand(0);
6974 EVT VT = N->getValueType(0);
6976 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6977 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6978 if (TrueOp && FalseOp) {
6979 SDLoc DL(N0);
6980 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6981 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6982 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6983 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6984 // If N0 has multiple uses, change other uses as well.
6985 if (!N0.hasOneUse()) {
6986 SDValue TruncSelect =
6987 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6988 DCI.CombineTo(N0.getNode(), TruncSelect);
6989 }
6990 return NewSelect;
6991 }
6992 }
6993 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6994 // of the result is smaller than the size of X and all the truncated bits
6995 // of X are already zero.
6996 if (N0.getOpcode() == ISD::XOR &&
6997 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6998 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6999 N0.getOperand(1).getOpcode() == ISD::Constant) {
7000 SDValue X = N0.getOperand(0).getOperand(0);
7001 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7002 KnownBits Known = DAG.computeKnownBits(X);
7003 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7004 N0.getValueSizeInBits(),
7005 VT.getSizeInBits());
7006 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7007 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7009 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7010 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7011 }
7012 }
7013 }
7014
7015 return SDValue();
7016}
7017
7018SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7019 SDNode *N, DAGCombinerInfo &DCI) const {
7020 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7021 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7022 // into (select_cc LHS, RHS, -1, 0, COND)
7023 SelectionDAG &DAG = DCI.DAG;
7024 SDValue N0 = N->getOperand(0);
7025 EVT VT = N->getValueType(0);
7026 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7027 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7028 N0 = N0.getOperand(0);
7029 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7030 SDLoc DL(N0);
7031 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7032 DAG.getAllOnesConstant(DL, VT),
7033 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7034 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7035 }
7036 return SDValue();
7037}
7038
7039SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7040 SDNode *N, DAGCombinerInfo &DCI) const {
7041 // Convert (sext (ashr (shl X, C1), C2)) to
7042 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7043 // cheap as narrower ones.
7044 SelectionDAG &DAG = DCI.DAG;
7045 SDValue N0 = N->getOperand(0);
7046 EVT VT = N->getValueType(0);
7047 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7048 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7049 SDValue Inner = N0.getOperand(0);
7050 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7051 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7052 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7053 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7054 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7055 EVT ShiftVT = N0.getOperand(1).getValueType();
7056 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7057 Inner.getOperand(0));
7058 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7059 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7060 ShiftVT));
7061 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7062 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7063 }
7064 }
7065 }
7066
7067 return SDValue();
7068}
7069
7070SDValue SystemZTargetLowering::combineMERGE(
7071 SDNode *N, DAGCombinerInfo &DCI) const {
7072 SelectionDAG &DAG = DCI.DAG;
7073 unsigned Opcode = N->getOpcode();
7074 SDValue Op0 = N->getOperand(0);
7075 SDValue Op1 = N->getOperand(1);
7076 if (Op0.getOpcode() == ISD::BITCAST)
7077 Op0 = Op0.getOperand(0);
7079 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7080 // for v4f32.
7081 if (Op1 == N->getOperand(0))
7082 return Op1;
7083 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7084 EVT VT = Op1.getValueType();
7085 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7086 if (ElemBytes <= 4) {
7087 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7090 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7091 SystemZ::VectorBytes / ElemBytes / 2);
7092 if (VT != InVT) {
7093 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7094 DCI.AddToWorklist(Op1.getNode());
7095 }
7096 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7097 DCI.AddToWorklist(Op.getNode());
7098 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7099 }
7100 }
7101 return SDValue();
7102}
7103
7104static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7105 SDNode *&HiPart) {
7106 LoPart = HiPart = nullptr;
7107
7108 // Scan through all users.
7109 for (SDUse &Use : LD->uses()) {
7110 // Skip the uses of the chain.
7111 if (Use.getResNo() != 0)
7112 continue;
7113
7114 // Verify every user is a TRUNCATE to i64 of the low or high half.
7115 SDNode *User = Use.getUser();
7116 bool IsLoPart = true;
7117 if (User->getOpcode() == ISD::SRL &&
7118 User->getOperand(1).getOpcode() == ISD::Constant &&
7119 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7120 User = *User->user_begin();
7121 IsLoPart = false;
7122 }
7123 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7124 return false;
7125
7126 if (IsLoPart) {
7127 if (LoPart)
7128 return false;
7129 LoPart = User;
7130 } else {
7131 if (HiPart)
7132 return false;
7133 HiPart = User;
7134 }
7135 }
7136 return true;
7137}
7138
7139static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7140 SDNode *&HiPart) {
7141 LoPart = HiPart = nullptr;
7142
7143 // Scan through all users.
7144 for (SDUse &Use : LD->uses()) {
7145 // Skip the uses of the chain.
7146 if (Use.getResNo() != 0)
7147 continue;
7148
7149 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7150 SDNode *User = Use.getUser();
7151 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7152 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7153 return false;
7154
7155 switch (User->getConstantOperandVal(1)) {
7156 case SystemZ::subreg_l64:
7157 if (LoPart)
7158 return false;
7159 LoPart = User;
7160 break;
7161 case SystemZ::subreg_h64:
7162 if (HiPart)
7163 return false;
7164 HiPart = User;
7165 break;
7166 default:
7167 return false;
7168 }
7169 }
7170 return true;
7171}
7172
7173SDValue SystemZTargetLowering::combineLOAD(
7174 SDNode *N, DAGCombinerInfo &DCI) const {
7175 SelectionDAG &DAG = DCI.DAG;
7176 EVT LdVT = N->getValueType(0);
7177 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7178 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7179 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7180 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7181 if (PtrVT != LoadNodeVT) {
7182 SDLoc DL(LN);
7183 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7184 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7185 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7186 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7187 LN->getMemOperand());
7188 }
7189 }
7190 }
7191 SDLoc DL(N);
7192
7193 // Replace a 128-bit load that is used solely to move its value into GPRs
7194 // by separate loads of both halves.
7195 LoadSDNode *LD = cast<LoadSDNode>(N);
7196 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7197 SDNode *LoPart, *HiPart;
7198 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7199 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7200 // Rewrite each extraction as an independent load.
7201 SmallVector<SDValue, 2> ArgChains;
7202 if (HiPart) {
7203 SDValue EltLoad = DAG.getLoad(
7204 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7205 LD->getPointerInfo(), LD->getOriginalAlign(),
7206 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7207
7208 DCI.CombineTo(HiPart, EltLoad, true);
7209 ArgChains.push_back(EltLoad.getValue(1));
7210 }
7211 if (LoPart) {
7212 SDValue EltLoad = DAG.getLoad(
7213 LoPart->getValueType(0), DL, LD->getChain(),
7214 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7215 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
7216 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7217
7218 DCI.CombineTo(LoPart, EltLoad, true);
7219 ArgChains.push_back(EltLoad.getValue(1));
7220 }
7221
7222 // Collect all chains via TokenFactor.
7223 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7224 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7225 DCI.AddToWorklist(Chain.getNode());
7226 return SDValue(N, 0);
7227 }
7228 }
7229
7230 if (LdVT.isVector() || LdVT.isInteger())
7231 return SDValue();
7232 // Transform a scalar load that is REPLICATEd as well as having other
7233 // use(s) to the form where the other use(s) use the first element of the
7234 // REPLICATE instead of the load. Otherwise instruction selection will not
7235 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7236 // point loads.
7237
7238 SDValue Replicate;
7239 SmallVector<SDNode*, 8> OtherUses;
7240 for (SDUse &Use : N->uses()) {
7241 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7242 if (Replicate)
7243 return SDValue(); // Should never happen
7244 Replicate = SDValue(Use.getUser(), 0);
7245 } else if (Use.getResNo() == 0)
7246 OtherUses.push_back(Use.getUser());
7247 }
7248 if (!Replicate || OtherUses.empty())
7249 return SDValue();
7250
7251 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7252 Replicate, DAG.getConstant(0, DL, MVT::i32));
7253 // Update uses of the loaded Value while preserving old chains.
7254 for (SDNode *U : OtherUses) {
7256 for (SDValue Op : U->ops())
7257 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7258 DAG.UpdateNodeOperands(U, Ops);
7259 }
7260 return SDValue(N, 0);
7261}
7262
7263bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7264 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7265 return true;
7266 if (Subtarget.hasVectorEnhancements2())
7267 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7268 return true;
7269 return false;
7270}
7271
7273 if (!VT.isVector() || !VT.isSimple() ||
7274 VT.getSizeInBits() != 128 ||
7275 VT.getScalarSizeInBits() % 8 != 0)
7276 return false;
7277
7278 unsigned NumElts = VT.getVectorNumElements();
7279 for (unsigned i = 0; i < NumElts; ++i) {
7280 if (M[i] < 0) continue; // ignore UNDEF indices
7281 if ((unsigned) M[i] != NumElts - 1 - i)
7282 return false;
7283 }
7284
7285 return true;
7286}
7287
7288static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7289 for (auto *U : StoredVal->users()) {
7290 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7291 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7292 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7293 continue;
7294 } else if (isa<BuildVectorSDNode>(U)) {
7295 SDValue BuildVector = SDValue(U, 0);
7296 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7297 isOnlyUsedByStores(BuildVector, DAG))
7298 continue;
7299 }
7300 return false;
7301 }
7302 return true;
7303}
7304
7305static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7306 SDValue &HiPart) {
7307 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7308 return false;
7309
7310 SDValue Op0 = Val.getOperand(0);
7311 SDValue Op1 = Val.getOperand(1);
7312
7313 if (Op0.getOpcode() == ISD::SHL)
7314 std::swap(Op0, Op1);
7315 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7316 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7317 Op1.getConstantOperandVal(1) != 64)
7318 return false;
7319 Op1 = Op1.getOperand(0);
7320
7321 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7322 Op0.getOperand(0).getValueType() != MVT::i64)
7323 return false;
7324 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7325 Op1.getOperand(0).getValueType() != MVT::i64)
7326 return false;
7327
7328 LoPart = Op0.getOperand(0);
7329 HiPart = Op1.getOperand(0);
7330 return true;
7331}
7332
7333static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7334 SDValue &HiPart) {
7335 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7336 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7337 return false;
7338
7339 if (Val->getNumOperands() != 5 ||
7340 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7341 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7342 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7343 return false;
7344
7345 LoPart = Val->getOperand(1);
7346 HiPart = Val->getOperand(3);
7347 return true;
7348}
7349
7350SDValue SystemZTargetLowering::combineSTORE(
7351 SDNode *N, DAGCombinerInfo &DCI) const {
7352 SelectionDAG &DAG = DCI.DAG;
7353 auto *SN = cast<StoreSDNode>(N);
7354 auto &Op1 = N->getOperand(1);
7355 EVT MemVT = SN->getMemoryVT();
7356
7357 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
7358 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7359 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
7360 if (PtrVT != StoreNodeVT) {
7361 SDLoc DL(SN);
7362 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
7363 SYSTEMZAS::PTR32, 0);
7364 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
7365 SN->getPointerInfo(), SN->getOriginalAlign(),
7366 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7367 }
7368 }
7369
7370 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7371 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7372 // If X has wider elements then convert it to:
7373 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7374 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7375 if (SDValue Value =
7376 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7377 DCI.AddToWorklist(Value.getNode());
7378
7379 // Rewrite the store with the new form of stored value.
7380 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7381 SN->getBasePtr(), SN->getMemoryVT(),
7382 SN->getMemOperand());
7383 }
7384 }
7385 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7386 if (!SN->isTruncatingStore() &&
7387 Op1.getOpcode() == ISD::BSWAP &&
7388 Op1.getNode()->hasOneUse() &&
7389 canLoadStoreByteSwapped(Op1.getValueType())) {
7390
7391 SDValue BSwapOp = Op1.getOperand(0);
7392
7393 if (BSwapOp.getValueType() == MVT::i16)
7394 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7395
7396 SDValue Ops[] = {
7397 N->getOperand(0), BSwapOp, N->getOperand(2)
7398 };
7399
7400 return
7401 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7402 Ops, MemVT, SN->getMemOperand());
7403 }
7404 // Combine STORE (element-swap) into VSTER
7405 if (!SN->isTruncatingStore() &&
7406 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7407 Op1.getNode()->hasOneUse() &&
7408 Subtarget.hasVectorEnhancements2()) {
7409 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7410 ArrayRef<int> ShuffleMask = SVN->getMask();
7411 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7412 SDValue Ops[] = {
7413 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7414 };
7415
7417 DAG.getVTList(MVT::Other),
7418 Ops, MemVT, SN->getMemOperand());
7419 }
7420 }
7421
7422 // Combine STORE (READCYCLECOUNTER) into STCKF.
7423 if (!SN->isTruncatingStore() &&
7425 Op1.hasOneUse() &&
7426 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7427 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7429 DAG.getVTList(MVT::Other),
7430 Ops, MemVT, SN->getMemOperand());
7431 }
7432
7433 // Transform a store of a 128-bit value moved from parts into two stores.
7434 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7435 SDValue LoPart, HiPart;
7436 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7437 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7438 SDLoc DL(SN);
7439 SDValue Chain0 =
7440 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7441 SN->getPointerInfo(), SN->getOriginalAlign(),
7442 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7443 SDValue Chain1 =
7444 DAG.getStore(SN->getChain(), DL, LoPart,
7445 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7447 SN->getPointerInfo().getWithOffset(8),
7448 SN->getOriginalAlign(),
7449 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7450
7451 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7452 }
7453 }
7454
7455 // Replicate a reg or immediate with VREP instead of scalar multiply or
7456 // immediate load. It seems best to do this during the first DAGCombine as
7457 // it is straight-forward to handle the zero-extend node in the initial
7458 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7459 // extracting an i16 element from a v16i8 vector).
7460 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7461 isOnlyUsedByStores(Op1, DAG)) {
7462 SDValue Word = SDValue();
7463 EVT WordVT;
7464
7465 // Find a replicated immediate and return it if found in Word and its
7466 // type in WordVT.
7467 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7468 // Some constants are better handled with a scalar store.
7469 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7470 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7471 return;
7472
7473 APInt Val = C->getAPIntValue();
7474 // Truncate Val in case of a truncating store.
7475 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
7476 assert(SN->isTruncatingStore() &&
7477 "Non-truncating store and immediate value does not fit?");
7478 Val = Val.trunc(TotBytes * 8);
7479 }
7480
7481 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
7482 if (VCI.isVectorConstantLegal(Subtarget) &&
7483 VCI.Opcode == SystemZISD::REPLICATE) {
7484 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7485 WordVT = VCI.VecVT.getScalarType();
7486 }
7487 };
7488
7489 // Find a replicated register and return it if found in Word and its type
7490 // in WordVT.
7491 auto FindReplicatedReg = [&](SDValue MulOp) {
7492 EVT MulVT = MulOp.getValueType();
7493 if (MulOp->getOpcode() == ISD::MUL &&
7494 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7495 // Find a zero extended value and its type.
7496 SDValue LHS = MulOp->getOperand(0);
7497 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7498 WordVT = LHS->getOperand(0).getValueType();
7499 else if (LHS->getOpcode() == ISD::AssertZext)
7500 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7501 else
7502 return;
7503 // Find a replicating constant, e.g. 0x00010001.
7504 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7506 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7507 if (VCI.isVectorConstantLegal(Subtarget) &&
7508 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7509 WordVT == VCI.VecVT.getScalarType())
7510 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7511 }
7512 }
7513 };
7514
7515 if (isa<BuildVectorSDNode>(Op1) &&
7516 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7517 SDValue SplatVal = Op1->getOperand(0);
7518 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7519 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7520 else
7521 FindReplicatedReg(SplatVal);
7522 } else {
7523 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7524 FindReplicatedImm(C, MemVT.getStoreSize());
7525 else
7526 FindReplicatedReg(Op1);
7527 }
7528
7529 if (Word != SDValue()) {
7530 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7531 "Bad type handling");
7532 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7533 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7534 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7535 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7536 SN->getBasePtr(), SN->getMemOperand());
7537 }
7538 }
7539
7540 return SDValue();
7541}
7542
7543SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7544 SDNode *N, DAGCombinerInfo &DCI) const {
7545 SelectionDAG &DAG = DCI.DAG;
7546 // Combine element-swap (LOAD) into VLER
7547 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7548 N->getOperand(0).hasOneUse() &&
7549 Subtarget.hasVectorEnhancements2()) {
7550 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7551 ArrayRef<int> ShuffleMask = SVN->getMask();
7552 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7553 SDValue Load = N->getOperand(0);
7554 LoadSDNode *LD = cast<LoadSDNode>(Load);
7555
7556 // Create the element-swapping load.
7557 SDValue Ops[] = {
7558 LD->getChain(), // Chain
7559 LD->getBasePtr() // Ptr
7560 };
7561 SDValue ESLoad =
7563 DAG.getVTList(LD->getValueType(0), MVT::Other),
7564 Ops, LD->getMemoryVT(), LD->getMemOperand());
7565
7566 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7567 // by the load dead.
7568 DCI.CombineTo(N, ESLoad);
7569
7570 // Next, combine the load away, we give it a bogus result value but a real
7571 // chain result. The result value is dead because the shuffle is dead.
7572 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7573
7574 // Return N so it doesn't get rechecked!
7575 return SDValue(N, 0);
7576 }
7577 }
7578
7579 return SDValue();
7580}
7581
7582SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7583 SDNode *N, DAGCombinerInfo &DCI) const {
7584 SelectionDAG &DAG = DCI.DAG;
7585
7586 if (!Subtarget.hasVector())
7587 return SDValue();
7588
7589 // Look through bitcasts that retain the number of vector elements.
7590 SDValue Op = N->getOperand(0);
7591 if (Op.getOpcode() == ISD::BITCAST &&
7592 Op.getValueType().isVector() &&
7593 Op.getOperand(0).getValueType().isVector() &&
7594 Op.getValueType().getVectorNumElements() ==
7595 Op.getOperand(0).getValueType().getVectorNumElements())
7596 Op = Op.getOperand(0);
7597
7598 // Pull BSWAP out of a vector extraction.
7599 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7600 EVT VecVT = Op.getValueType();
7601 EVT EltVT = VecVT.getVectorElementType();
7602 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7603 Op.getOperand(0), N->getOperand(1));
7604 DCI.AddToWorklist(Op.getNode());
7605 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7606 if (EltVT != N->getValueType(0)) {
7607 DCI.AddToWorklist(Op.getNode());
7608 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7609 }
7610 return Op;
7611 }
7612
7613 // Try to simplify a vector extraction.
7614 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7615 SDValue Op0 = N->getOperand(0);
7616 EVT VecVT = Op0.getValueType();
7617 if (canTreatAsByteVector(VecVT))
7618 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7619 IndexN->getZExtValue(), DCI, false);
7620 }
7621 return SDValue();
7622}
7623
7624SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7625 SDNode *N, DAGCombinerInfo &DCI) const {
7626 SelectionDAG &DAG = DCI.DAG;
7627 // (join_dwords X, X) == (replicate X)
7628 if (N->getOperand(0) == N->getOperand(1))
7629 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7630 N->getOperand(0));
7631 return SDValue();
7632}
7633
7635 SDValue Chain1 = N1->getOperand(0);
7636 SDValue Chain2 = N2->getOperand(0);
7637
7638 // Trivial case: both nodes take the same chain.
7639 if (Chain1 == Chain2)
7640 return Chain1;
7641
7642 // FIXME - we could handle more complex cases via TokenFactor,
7643 // assuming we can verify that this would not create a cycle.
7644 return SDValue();
7645}
7646
7647SDValue SystemZTargetLowering::combineFP_ROUND(
7648 SDNode *N, DAGCombinerInfo &DCI) const {
7649
7650 if (!Subtarget.hasVector())
7651 return SDValue();
7652
7653 // (fpround (extract_vector_elt X 0))
7654 // (fpround (extract_vector_elt X 1)) ->
7655 // (extract_vector_elt (VROUND X) 0)
7656 // (extract_vector_elt (VROUND X) 2)
7657 //
7658 // This is a special case since the target doesn't really support v2f32s.
7659 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7660 SelectionDAG &DAG = DCI.DAG;
7661 SDValue Op0 = N->getOperand(OpNo);
7662 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7664 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7665 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7666 Op0.getConstantOperandVal(1) == 0) {
7667 SDValue Vec = Op0.getOperand(0);
7668 for (auto *U : Vec->users()) {
7669 if (U != Op0.getNode() && U->hasOneUse() &&
7670 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7671 U->getOperand(0) == Vec &&
7672 U->getOperand(1).getOpcode() == ISD::Constant &&
7673 U->getConstantOperandVal(1) == 1) {
7674 SDValue OtherRound = SDValue(*U->user_begin(), 0);
7675 if (OtherRound.getOpcode() == N->getOpcode() &&
7676 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7677 OtherRound.getValueType() == MVT::f32) {
7678 SDValue VRound, Chain;
7679 if (N->isStrictFPOpcode()) {
7680 Chain = MergeInputChains(N, OtherRound.getNode());
7681 if (!Chain)
7682 continue;
7684 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7685 Chain = VRound.getValue(1);
7686 } else
7687 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7688 MVT::v4f32, Vec);
7689 DCI.AddToWorklist(VRound.getNode());
7690 SDValue Extract1 =
7691 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7692 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7693 DCI.AddToWorklist(Extract1.getNode());
7694 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7695 if (Chain)
7696 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7697 SDValue Extract0 =
7698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7699 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7700 if (Chain)
7701 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7702 N->getVTList(), Extract0, Chain);
7703 return Extract0;
7704 }
7705 }
7706 }
7707 }
7708 return SDValue();
7709}
7710
7711SDValue SystemZTargetLowering::combineFP_EXTEND(
7712 SDNode *N, DAGCombinerInfo &DCI) const {
7713
7714 if (!Subtarget.hasVector())
7715 return SDValue();
7716
7717 // (fpextend (extract_vector_elt X 0))
7718 // (fpextend (extract_vector_elt X 2)) ->
7719 // (extract_vector_elt (VEXTEND X) 0)
7720 // (extract_vector_elt (VEXTEND X) 1)
7721 //
7722 // This is a special case since the target doesn't really support v2f32s.
7723 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7724 SelectionDAG &DAG = DCI.DAG;
7725 SDValue Op0 = N->getOperand(OpNo);
7726 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7728 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7729 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7730 Op0.getConstantOperandVal(1) == 0) {
7731 SDValue Vec = Op0.getOperand(0);
7732 for (auto *U : Vec->users()) {
7733 if (U != Op0.getNode() && U->hasOneUse() &&
7734 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7735 U->getOperand(0) == Vec &&
7736 U->getOperand(1).getOpcode() == ISD::Constant &&
7737 U->getConstantOperandVal(1) == 2) {
7738 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
7739 if (OtherExtend.getOpcode() == N->getOpcode() &&
7740 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7741 OtherExtend.getValueType() == MVT::f64) {
7742 SDValue VExtend, Chain;
7743 if (N->isStrictFPOpcode()) {
7744 Chain = MergeInputChains(N, OtherExtend.getNode());
7745 if (!Chain)
7746 continue;
7747 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7748 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7749 Chain = VExtend.getValue(1);
7750 } else
7751 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7752 MVT::v2f64, Vec);
7753 DCI.AddToWorklist(VExtend.getNode());
7754 SDValue Extract1 =
7755 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7756 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7757 DCI.AddToWorklist(Extract1.getNode());
7758 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7759 if (Chain)
7760 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7761 SDValue Extract0 =
7762 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7763 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7764 if (Chain)
7765 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7766 N->getVTList(), Extract0, Chain);
7767 return Extract0;
7768 }
7769 }
7770 }
7771 }
7772 return SDValue();
7773}
7774
7775SDValue SystemZTargetLowering::combineINT_TO_FP(
7776 SDNode *N, DAGCombinerInfo &DCI) const {
7777 if (DCI.Level != BeforeLegalizeTypes)
7778 return SDValue();
7779 SelectionDAG &DAG = DCI.DAG;
7780 LLVMContext &Ctx = *DAG.getContext();
7781 unsigned Opcode = N->getOpcode();
7782 EVT OutVT = N->getValueType(0);
7783 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7784 SDValue Op = N->getOperand(0);
7785 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7786 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7787
7788 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7789 // v2f64 = uint_to_fp v2i16
7790 // =>
7791 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7792 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7793 OutScalarBits <= 64) {
7794 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7795 EVT ExtVT = EVT::getVectorVT(
7796 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7797 unsigned ExtOpcode =
7799 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7800 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7801 }
7802 return SDValue();
7803}
7804
7805SDValue SystemZTargetLowering::combineBSWAP(
7806 SDNode *N, DAGCombinerInfo &DCI) const {
7807 SelectionDAG &DAG = DCI.DAG;
7808 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7809 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7810 N->getOperand(0).hasOneUse() &&
7811 canLoadStoreByteSwapped(N->getValueType(0))) {
7812 SDValue Load = N->getOperand(0);
7813 LoadSDNode *LD = cast<LoadSDNode>(Load);
7814
7815 // Create the byte-swapping load.
7816 SDValue Ops[] = {
7817 LD->getChain(), // Chain
7818 LD->getBasePtr() // Ptr
7819 };
7820 EVT LoadVT = N->getValueType(0);
7821 if (LoadVT == MVT::i16)
7822 LoadVT = MVT::i32;
7823 SDValue BSLoad =
7825 DAG.getVTList(LoadVT, MVT::Other),
7826 Ops, LD->getMemoryVT(), LD->getMemOperand());
7827
7828 // If this is an i16 load, insert the truncate.
7829 SDValue ResVal = BSLoad;
7830 if (N->getValueType(0) == MVT::i16)
7831 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7832
7833 // First, combine the bswap away. This makes the value produced by the
7834 // load dead.
7835 DCI.CombineTo(N, ResVal);
7836
7837 // Next, combine the load away, we give it a bogus result value but a real
7838 // chain result. The result value is dead because the bswap is dead.
7839 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7840
7841 // Return N so it doesn't get rechecked!
7842 return SDValue(N, 0);
7843 }
7844
7845 // Look through bitcasts that retain the number of vector elements.
7846 SDValue Op = N->getOperand(0);
7847 if (Op.getOpcode() == ISD::BITCAST &&
7848 Op.getValueType().isVector() &&
7849 Op.getOperand(0).getValueType().isVector() &&
7850 Op.getValueType().getVectorNumElements() ==
7851 Op.getOperand(0).getValueType().getVectorNumElements())
7852 Op = Op.getOperand(0);
7853
7854 // Push BSWAP into a vector insertion if at least one side then simplifies.
7855 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7856 SDValue Vec = Op.getOperand(0);
7857 SDValue Elt = Op.getOperand(1);
7858 SDValue Idx = Op.getOperand(2);
7859
7861 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7863 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7864 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7865 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7866 EVT VecVT = N->getValueType(0);
7867 EVT EltVT = N->getValueType(0).getVectorElementType();
7868 if (VecVT != Vec.getValueType()) {
7869 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7870 DCI.AddToWorklist(Vec.getNode());
7871 }
7872 if (EltVT != Elt.getValueType()) {
7873 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7874 DCI.AddToWorklist(Elt.getNode());
7875 }
7876 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7877 DCI.AddToWorklist(Vec.getNode());
7878 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7879 DCI.AddToWorklist(Elt.getNode());
7880 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7881 Vec, Elt, Idx);
7882 }
7883 }
7884
7885 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7886 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7887 if (SV && Op.hasOneUse()) {
7888 SDValue Op0 = Op.getOperand(0);
7889 SDValue Op1 = Op.getOperand(1);
7890
7892 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7894 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7895 EVT VecVT = N->getValueType(0);
7896 if (VecVT != Op0.getValueType()) {
7897 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7898 DCI.AddToWorklist(Op0.getNode());
7899 }
7900 if (VecVT != Op1.getValueType()) {
7901 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7902 DCI.AddToWorklist(Op1.getNode());
7903 }
7904 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7905 DCI.AddToWorklist(Op0.getNode());
7906 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7907 DCI.AddToWorklist(Op1.getNode());
7908 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7909 }
7910 }
7911
7912 return SDValue();
7913}
7914
7915static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7916 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7917 // set by the CCReg instruction using the CCValid / CCMask masks,
7918 // If the CCReg instruction is itself a ICMP testing the condition
7919 // code set by some other instruction, see whether we can directly
7920 // use that condition code.
7921
7922 // Verify that we have an ICMP against some constant.
7923 if (CCValid != SystemZ::CCMASK_ICMP)
7924 return false;
7925 auto *ICmp = CCReg.getNode();
7926 if (ICmp->getOpcode() != SystemZISD::ICMP)
7927 return false;
7928 auto *CompareLHS = ICmp->getOperand(0).getNode();
7929 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7930 if (!CompareRHS)
7931 return false;
7932
7933 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7934 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7935 // Verify that we have an appropriate mask for a EQ or NE comparison.
7936 bool Invert = false;
7937 if (CCMask == SystemZ::CCMASK_CMP_NE)
7938 Invert = !Invert;
7939 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7940 return false;
7941
7942 // Verify that the ICMP compares against one of select values.
7943 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7944 if (!TrueVal)
7945 return false;
7946 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7947 if (!FalseVal)
7948 return false;
7949 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7950 Invert = !Invert;
7951 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7952 return false;
7953
7954 // Compute the effective CC mask for the new branch or select.
7955 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7956 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7957 if (!NewCCValid || !NewCCMask)
7958 return false;
7959 CCValid = NewCCValid->getZExtValue();
7960 CCMask = NewCCMask->getZExtValue();
7961 if (Invert)
7962 CCMask ^= CCValid;
7963
7964 // Return the updated CCReg link.
7965 CCReg = CompareLHS->getOperand(4);
7966 return true;
7967 }
7968
7969 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7970 if (CompareLHS->getOpcode() == ISD::SRA) {
7971 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7972 if (!SRACount || SRACount->getZExtValue() != 30)
7973 return false;
7974 auto *SHL = CompareLHS->getOperand(0).getNode();
7975 if (SHL->getOpcode() != ISD::SHL)
7976 return false;
7977 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7978 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7979 return false;
7980 auto *IPM = SHL->getOperand(0).getNode();
7981 if (IPM->getOpcode() != SystemZISD::IPM)
7982 return false;
7983
7984 // Avoid introducing CC spills (because SRA would clobber CC).
7985 if (!CompareLHS->hasOneUse())
7986 return false;
7987 // Verify that the ICMP compares against zero.
7988 if (CompareRHS->getZExtValue() != 0)
7989 return false;
7990
7991 // Compute the effective CC mask for the new branch or select.
7992 CCMask = SystemZ::reverseCCMask(CCMask);
7993
7994 // Return the updated CCReg link.
7995 CCReg = IPM->getOperand(0);
7996 return true;
7997 }
7998
7999 return false;
8000}
8001
8002SDValue SystemZTargetLowering::combineBR_CCMASK(
8003 SDNode *N, DAGCombinerInfo &DCI) const {
8004 SelectionDAG &DAG = DCI.DAG;
8005
8006 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8007 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8008 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8009 if (!CCValid || !CCMask)
8010 return SDValue();
8011
8012 int CCValidVal = CCValid->getZExtValue();
8013 int CCMaskVal = CCMask->getZExtValue();
8014 SDValue Chain = N->getOperand(0);
8015 SDValue CCReg = N->getOperand(4);
8016
8017 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8018 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8019 Chain,
8020 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8021 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8022 N->getOperand(3), CCReg);
8023 return SDValue();
8024}
8025
8026SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8027 SDNode *N, DAGCombinerInfo &DCI) const {
8028 SelectionDAG &DAG = DCI.DAG;
8029
8030 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8031 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8032 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8033 if (!CCValid || !CCMask)
8034 return SDValue();
8035
8036 int CCValidVal = CCValid->getZExtValue();
8037 int CCMaskVal = CCMask->getZExtValue();
8038 SDValue CCReg = N->getOperand(4);
8039
8040 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8041 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
8042 N->getOperand(0), N->getOperand(1),
8043 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8044 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8045 CCReg);
8046 return SDValue();
8047}
8048
8049
8050SDValue SystemZTargetLowering::combineGET_CCMASK(
8051 SDNode *N, DAGCombinerInfo &DCI) const {
8052
8053 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8054 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8055 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8056 if (!CCValid || !CCMask)
8057 return SDValue();
8058 int CCValidVal = CCValid->getZExtValue();
8059 int CCMaskVal = CCMask->getZExtValue();
8060
8061 SDValue Select = N->getOperand(0);
8062 if (Select->getOpcode() == ISD::TRUNCATE)
8063 Select = Select->getOperand(0);
8064 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
8065 return SDValue();
8066
8067 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
8068 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
8069 if (!SelectCCValid || !SelectCCMask)
8070 return SDValue();
8071 int SelectCCValidVal = SelectCCValid->getZExtValue();
8072 int SelectCCMaskVal = SelectCCMask->getZExtValue();
8073
8074 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
8075 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
8076 if (!TrueVal || !FalseVal)
8077 return SDValue();
8078 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
8079 ;
8080 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
8081 SelectCCMaskVal ^= SelectCCValidVal;
8082 else
8083 return SDValue();
8084
8085 if (SelectCCValidVal & ~CCValidVal)
8086 return SDValue();
8087 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
8088 return SDValue();
8089
8090 return Select->getOperand(4);
8091}
8092
8093SDValue SystemZTargetLowering::combineIntDIVREM(
8094 SDNode *N, DAGCombinerInfo &DCI) const {
8095 SelectionDAG &DAG = DCI.DAG;
8096 EVT VT = N->getValueType(0);
8097 // In the case where the divisor is a vector of constants a cheaper
8098 // sequence of instructions can replace the divide. BuildSDIV is called to
8099 // do this during DAG combining, but it only succeeds when it can build a
8100 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
8101 // since it is not Legal but Custom it can only happen before
8102 // legalization. Therefore we must scalarize this early before Combine
8103 // 1. For widened vectors, this is already the result of type legalization.
8104 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
8105 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
8106 return DAG.UnrollVectorOp(N);
8107 return SDValue();
8108}
8109
8110SDValue SystemZTargetLowering::combineINTRINSIC(
8111 SDNode *N, DAGCombinerInfo &DCI) const {
8112 SelectionDAG &DAG = DCI.DAG;
8113
8114 unsigned Id = N->getConstantOperandVal(1);
8115 switch (Id) {
8116 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
8117 // or larger is simply a vector load.
8118 case Intrinsic::s390_vll:
8119 case Intrinsic::s390_vlrl:
8120 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
8121 if (C->getZExtValue() >= 15)
8122 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
8123 N->getOperand(3), MachinePointerInfo());
8124 break;
8125 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
8126 case Intrinsic::s390_vstl:
8127 case Intrinsic::s390_vstrl:
8128 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
8129 if (C->getZExtValue() >= 15)
8130 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
8131 N->getOperand(4), MachinePointerInfo());
8132 break;
8133 }
8134
8135 return SDValue();
8136}
8137
8138SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
8139 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
8140 return N->getOperand(0);
8141 return N;
8142}
8143
8145 DAGCombinerInfo &DCI) const {
8146 switch(N->getOpcode()) {
8147 default: break;
8148 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
8149 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
8150 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
8152 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
8153 case ISD::LOAD: return combineLOAD(N, DCI);
8154 case ISD::STORE: return combineSTORE(N, DCI);
8155 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
8156 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
8157 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
8159 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
8161 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
8162 case ISD::SINT_TO_FP:
8163 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
8164 case ISD::BSWAP: return combineBSWAP(N, DCI);
8165 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
8166 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
8167 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
8168 case ISD::SDIV:
8169 case ISD::UDIV:
8170 case ISD::SREM:
8171 case ISD::UREM: return combineIntDIVREM(N, DCI);
8173 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
8174 }
8175
8176 return SDValue();
8177}
8178
8179// Return the demanded elements for the OpNo source operand of Op. DemandedElts
8180// are for Op.
8181static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
8182 unsigned OpNo) {
8183 EVT VT = Op.getValueType();
8184 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
8185 APInt SrcDemE;
8186 unsigned Opcode = Op.getOpcode();
8187 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8188 unsigned Id = Op.getConstantOperandVal(0);
8189 switch (Id) {
8190 case Intrinsic::s390_vpksh: // PACKS
8191 case Intrinsic::s390_vpksf:
8192 case Intrinsic::s390_vpksg:
8193 case Intrinsic::s390_vpkshs: // PACKS_CC
8194 case Intrinsic::s390_vpksfs:
8195 case Intrinsic::s390_vpksgs:
8196 case Intrinsic::s390_vpklsh: // PACKLS
8197 case Intrinsic::s390_vpklsf:
8198 case Intrinsic::s390_vpklsg:
8199 case Intrinsic::s390_vpklshs: // PACKLS_CC
8200 case Intrinsic::s390_vpklsfs:
8201 case Intrinsic::s390_vpklsgs:
8202 // VECTOR PACK truncates the elements of two source vectors into one.
8203 SrcDemE = DemandedElts;
8204 if (OpNo == 2)
8205 SrcDemE.lshrInPlace(NumElts / 2);
8206 SrcDemE = SrcDemE.trunc(NumElts / 2);
8207 break;
8208 // VECTOR UNPACK extends half the elements of the source vector.
8209 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8210 case Intrinsic::s390_vuphh:
8211 case Intrinsic::s390_vuphf:
8212 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8213 case Intrinsic::s390_vuplhh:
8214 case Intrinsic::s390_vuplhf:
8215 SrcDemE = APInt(NumElts * 2, 0);
8216 SrcDemE.insertBits(DemandedElts, 0);
8217 break;
8218 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8219 case Intrinsic::s390_vuplhw:
8220 case Intrinsic::s390_vuplf:
8221 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8222 case Intrinsic::s390_vupllh:
8223 case Intrinsic::s390_vupllf:
8224 SrcDemE = APInt(NumElts * 2, 0);
8225 SrcDemE.insertBits(DemandedElts, NumElts);
8226 break;
8227 case Intrinsic::s390_vpdi: {
8228 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
8229 SrcDemE = APInt(NumElts, 0);
8230 if (!DemandedElts[OpNo - 1])
8231 break;
8232 unsigned Mask = Op.getConstantOperandVal(3);
8233 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
8234 // Demand input element 0 or 1, given by the mask bit value.
8235 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
8236 break;
8237 }
8238 case Intrinsic::s390_vsldb: {
8239 // VECTOR SHIFT LEFT DOUBLE BY BYTE
8240 assert(VT == MVT::v16i8 && "Unexpected type.");
8241 unsigned FirstIdx = Op.getConstantOperandVal(3);
8242 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
8243 unsigned NumSrc0Els = 16 - FirstIdx;
8244 SrcDemE = APInt(NumElts, 0);
8245 if (OpNo == 1) {
8246 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
8247 SrcDemE.insertBits(DemEls, FirstIdx);
8248 } else {
8249 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
8250 SrcDemE.insertBits(DemEls, 0);
8251 }
8252 break;
8253 }
8254 case Intrinsic::s390_vperm:
8255 SrcDemE = APInt::getAllOnes(NumElts);
8256 break;
8257 default:
8258 llvm_unreachable("Unhandled intrinsic.");
8259 break;
8260 }
8261 } else {
8262 switch (Opcode) {
8264 // Scalar operand.
8265 SrcDemE = APInt(1, 1);
8266 break;
8268 SrcDemE = DemandedElts;
8269 break;
8270 default:
8271 llvm_unreachable("Unhandled opcode.");
8272 break;
8273 }
8274 }
8275 return SrcDemE;
8276}
8277
8278static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
8279 const APInt &DemandedElts,
8280 const SelectionDAG &DAG, unsigned Depth,
8281 unsigned OpNo) {
8282 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8283 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8284 KnownBits LHSKnown =
8285 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8286 KnownBits RHSKnown =
8287 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8288 Known = LHSKnown.intersectWith(RHSKnown);
8289}
8290
8291void
8293 KnownBits &Known,
8294 const APInt &DemandedElts,
8295 const SelectionDAG &DAG,
8296 unsigned Depth) const {
8297 Known.resetAll();
8298
8299 // Intrinsic CC result is returned in the two low bits.
8300 unsigned tmp0, tmp1; // not used
8301 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
8302 Known.Zero.setBitsFrom(2);
8303 return;
8304 }
8305 EVT VT = Op.getValueType();
8306 if (Op.getResNo() != 0 || VT == MVT::Untyped)
8307 return;
8308 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
8309 "KnownBits does not match VT in bitwidth");
8310 assert ((!VT.isVector() ||
8311 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
8312 "DemandedElts does not match VT number of elements");
8313 unsigned BitWidth = Known.getBitWidth();
8314 unsigned Opcode = Op.getOpcode();
8315 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8316 bool IsLogical = false;
8317 unsigned Id = Op.getConstantOperandVal(0);
8318 switch (Id) {
8319 case Intrinsic::s390_vpksh: // PACKS
8320 case Intrinsic::s390_vpksf:
8321 case Intrinsic::s390_vpksg:
8322 case Intrinsic::s390_vpkshs: // PACKS_CC
8323 case Intrinsic::s390_vpksfs:
8324 case Intrinsic::s390_vpksgs:
8325 case Intrinsic::s390_vpklsh: // PACKLS
8326 case Intrinsic::s390_vpklsf:
8327 case Intrinsic::s390_vpklsg:
8328 case Intrinsic::s390_vpklshs: // PACKLS_CC
8329 case Intrinsic::s390_vpklsfs:
8330 case Intrinsic::s390_vpklsgs:
8331 case Intrinsic::s390_vpdi:
8332 case Intrinsic::s390_vsldb:
8333 case Intrinsic::s390_vperm:
8334 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
8335 break;
8336 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8337 case Intrinsic::s390_vuplhh:
8338 case Intrinsic::s390_vuplhf:
8339 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8340 case Intrinsic::s390_vupllh:
8341 case Intrinsic::s390_vupllf:
8342 IsLogical = true;
8343 [[fallthrough]];
8344 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8345 case Intrinsic::s390_vuphh:
8346 case Intrinsic::s390_vuphf:
8347 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8348 case Intrinsic::s390_vuplhw:
8349 case Intrinsic::s390_vuplf: {
8350 SDValue SrcOp = Op.getOperand(1);
8351 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8352 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8353 if (IsLogical) {
8354 Known = Known.zext(BitWidth);
8355 } else
8356 Known = Known.sext(BitWidth);
8357 break;
8358 }
8359 default:
8360 break;
8361 }
8362 } else {
8363 switch (Opcode) {
8366 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8367 break;
8368 case SystemZISD::REPLICATE: {
8369 SDValue SrcOp = Op.getOperand(0);
8370 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8371 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8372 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8373 break;
8374 }
8375 default:
8376 break;
8377 }
8378 }
8379
8380 // Known has the width of the source operand(s). Adjust if needed to match
8381 // the passed bitwidth.
8382 if (Known.getBitWidth() != BitWidth)
8383 Known = Known.anyextOrTrunc(BitWidth);
8384}
8385
8386static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8387 const SelectionDAG &DAG, unsigned Depth,
8388 unsigned OpNo) {
8389 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8390 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8391 if (LHS == 1) return 1; // Early out.
8392 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8393 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8394 if (RHS == 1) return 1; // Early out.
8395 unsigned Common = std::min(LHS, RHS);
8396 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8397 EVT VT = Op.getValueType();
8398 unsigned VTBits = VT.getScalarSizeInBits();
8399 if (SrcBitWidth > VTBits) { // PACK
8400 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8401 if (Common > SrcExtraBits)
8402 return (Common - SrcExtraBits);
8403 return 1;
8404 }
8405 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8406 return Common;
8407}
8408
8409unsigned
8411 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8412 unsigned Depth) const {
8413 if (Op.getResNo() != 0)
8414 return 1;
8415 unsigned Opcode = Op.getOpcode();
8416 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8417 unsigned Id = Op.getConstantOperandVal(0);
8418 switch (Id) {
8419 case Intrinsic::s390_vpksh: // PACKS
8420 case Intrinsic::s390_vpksf:
8421 case Intrinsic::s390_vpksg:
8422 case Intrinsic::s390_vpkshs: // PACKS_CC
8423 case Intrinsic::s390_vpksfs:
8424 case Intrinsic::s390_vpksgs:
8425 case Intrinsic::s390_vpklsh: // PACKLS
8426 case Intrinsic::s390_vpklsf:
8427 case Intrinsic::s390_vpklsg:
8428 case Intrinsic::s390_vpklshs: // PACKLS_CC
8429 case Intrinsic::s390_vpklsfs:
8430 case Intrinsic::s390_vpklsgs:
8431 case Intrinsic::s390_vpdi:
8432 case Intrinsic::s390_vsldb:
8433 case Intrinsic::s390_vperm:
8434 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8435 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8436 case Intrinsic::s390_vuphh:
8437 case Intrinsic::s390_vuphf:
8438 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8439 case Intrinsic::s390_vuplhw:
8440 case Intrinsic::s390_vuplf: {
8441 SDValue PackedOp = Op.getOperand(1);
8442 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8443 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8444 EVT VT = Op.getValueType();
8445 unsigned VTBits = VT.getScalarSizeInBits();
8446 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8447 return Tmp;
8448 }
8449 default:
8450 break;
8451 }
8452 } else {
8453 switch (Opcode) {
8455 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8456 default:
8457 break;
8458 }
8459 }
8460
8461 return 1;
8462}
8463
8466 const APInt &DemandedElts, const SelectionDAG &DAG,
8467 bool PoisonOnly, unsigned Depth) const {
8468 switch (Op->getOpcode()) {
8471 return true;
8472 }
8473 return false;
8474}
8475
8476unsigned
8478 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8479 unsigned StackAlign = TFI->getStackAlignment();
8480 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8481 "Unexpected stack alignment");
8482 // The default stack probe size is 4096 if the function has no
8483 // stack-probe-size attribute.
8484 unsigned StackProbeSize =
8485 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8486 // Round down to the stack alignment.
8487 StackProbeSize &= ~(StackAlign - 1);
8488 return StackProbeSize ? StackProbeSize : StackAlign;
8489}
8490
8491//===----------------------------------------------------------------------===//
8492// Custom insertion
8493//===----------------------------------------------------------------------===//
8494
8495// Force base value Base into a register before MI. Return the register.
8497 const SystemZInstrInfo *TII) {
8498 MachineBasicBlock *MBB = MI.getParent();
8499 MachineFunction &MF = *MBB->getParent();
8501
8502 if (Base.isReg()) {
8503 // Copy Base into a new virtual register to help register coalescing in
8504 // cases with multiple uses.
8505 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8506 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8507 .add(Base);
8508 return Reg;
8509 }
8510
8511 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8512 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8513 .add(Base)
8514 .addImm(0)
8515 .addReg(0);
8516 return Reg;
8517}
8518
8519// The CC operand of MI might be missing a kill marker because there
8520// were multiple uses of CC, and ISel didn't know which to mark.
8521// Figure out whether MI should have had a kill marker.
8523 // Scan forward through BB for a use/def of CC.
8525 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8526 const MachineInstr& mi = *miI;
8527 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8528 return false;
8529 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8530 break; // Should have kill-flag - update below.
8531 }
8532
8533 // If we hit the end of the block, check whether CC is live into a
8534 // successor.
8535 if (miI == MBB->end()) {
8536 for (const MachineBasicBlock *Succ : MBB->successors())
8537 if (Succ->isLiveIn(SystemZ::CC))
8538 return false;
8539 }
8540
8541 return true;
8542}
8543
8544// Return true if it is OK for this Select pseudo-opcode to be cascaded
8545// together with other Select pseudo-opcodes into a single basic-block with
8546// a conditional jump around it.
8548 switch (MI.getOpcode()) {
8549 case SystemZ::Select32:
8550 case SystemZ::Select64:
8551 case SystemZ::Select128:
8552 case SystemZ::SelectF32:
8553 case SystemZ::SelectF64:
8554 case SystemZ::SelectF128:
8555 case SystemZ::SelectVR32:
8556 case SystemZ::SelectVR64:
8557 case SystemZ::SelectVR128:
8558 return true;
8559
8560 default:
8561 return false;
8562 }
8563}
8564
8565// Helper function, which inserts PHI functions into SinkMBB:
8566// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8567// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8569 MachineBasicBlock *TrueMBB,
8570 MachineBasicBlock *FalseMBB,
8571 MachineBasicBlock *SinkMBB) {
8572 MachineFunction *MF = TrueMBB->getParent();
8574
8575 MachineInstr *FirstMI = Selects.front();
8576 unsigned CCValid = FirstMI->getOperand(3).getImm();
8577 unsigned CCMask = FirstMI->getOperand(4).getImm();
8578
8579 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8580
8581 // As we are creating the PHIs, we have to be careful if there is more than
8582 // one. Later Selects may reference the results of earlier Selects, but later
8583 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8584 // That also means that PHI construction must work forward from earlier to
8585 // later, and that the code must maintain a mapping from earlier PHI's
8586 // destination registers, and the registers that went into the PHI.
8588
8589 for (auto *MI : Selects) {
8590 Register DestReg = MI->getOperand(0).getReg();
8591 Register TrueReg = MI->getOperand(1).getReg();
8592 Register FalseReg = MI->getOperand(2).getReg();
8593
8594 // If this Select we are generating is the opposite condition from
8595 // the jump we generated, then we have to swap the operands for the
8596 // PHI that is going to be generated.
8597 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8598 std::swap(TrueReg, FalseReg);
8599
8600 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
8601 TrueReg = It->second.first;
8602
8603 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
8604 FalseReg = It->second.second;
8605
8606 DebugLoc DL = MI->getDebugLoc();
8607 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8608 .addReg(TrueReg).addMBB(TrueMBB)
8609 .addReg(FalseReg).addMBB(FalseMBB);
8610
8611 // Add this PHI to the rewrite table.
8612 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8613 }
8614
8616}
8617
8619SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8620 MachineBasicBlock *BB) const {
8621 MachineFunction &MF = *BB->getParent();
8622 MachineFrameInfo &MFI = MF.getFrameInfo();
8623 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8624 assert(TFL->hasReservedCallFrame(MF) &&
8625 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8626 (void)TFL;
8627 // Get the MaxCallFrameSize value and erase MI since it serves no further
8628 // purpose as the call frame is statically reserved in the prolog. Set
8629 // AdjustsStack as MI is *not* mapped as a frame instruction.
8630 uint32_t NumBytes = MI.getOperand(0).getImm();
8631 if (NumBytes > MFI.getMaxCallFrameSize())
8632 MFI.setMaxCallFrameSize(NumBytes);
8633 MFI.setAdjustsStack(true);
8634
8635 MI.eraseFromParent();
8636 return BB;
8637}
8638
8639// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8641SystemZTargetLowering::emitSelect(MachineInstr &MI,
8642 MachineBasicBlock *MBB) const {
8643 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8644 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8645
8646 unsigned CCValid = MI.getOperand(3).getImm();
8647 unsigned CCMask = MI.getOperand(4).getImm();
8648
8649 // If we have a sequence of Select* pseudo instructions using the
8650 // same condition code value, we want to expand all of them into
8651 // a single pair of basic blocks using the same condition.
8654 Selects.push_back(&MI);
8655 unsigned Count = 0;
8656 for (MachineInstr &NextMI : llvm::make_range(
8657 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8658 if (isSelectPseudo(NextMI)) {
8659 assert(NextMI.getOperand(3).getImm() == CCValid &&
8660 "Bad CCValid operands since CC was not redefined.");
8661 if (NextMI.getOperand(4).getImm() == CCMask ||
8662 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8663 Selects.push_back(&NextMI);
8664 continue;
8665 }
8666 break;
8667 }
8668 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8669 NextMI.usesCustomInsertionHook())
8670 break;
8671 bool User = false;
8672 for (auto *SelMI : Selects)
8673 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8674 User = true;
8675 break;
8676 }
8677 if (NextMI.isDebugInstr()) {
8678 if (User) {
8679 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8680 DbgValues.push_back(&NextMI);
8681 }
8682 } else if (User || ++Count > 20)
8683 break;
8684 }
8685
8686 MachineInstr *LastMI = Selects.back();
8687 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8688 checkCCKill(*LastMI, MBB));
8689 MachineBasicBlock *StartMBB = MBB;
8691 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8692
8693 // Unless CC was killed in the last Select instruction, mark it as
8694 // live-in to both FalseMBB and JoinMBB.
8695 if (!CCKilled) {
8696 FalseMBB->addLiveIn(SystemZ::CC);
8697 JoinMBB->addLiveIn(SystemZ::CC);
8698 }
8699
8700 // StartMBB:
8701 // BRC CCMask, JoinMBB
8702 // # fallthrough to FalseMBB
8703 MBB = StartMBB;
8704 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8705 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8706 MBB->addSuccessor(JoinMBB);
8707 MBB->addSuccessor(FalseMBB);
8708
8709 // FalseMBB:
8710 // # fallthrough to JoinMBB
8711 MBB = FalseMBB;
8712 MBB->addSuccessor(JoinMBB);
8713
8714 // JoinMBB:
8715 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8716 // ...
8717 MBB = JoinMBB;
8718 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8719 for (auto *SelMI : Selects)
8720 SelMI->eraseFromParent();
8721
8723 for (auto *DbgMI : DbgValues)
8724 MBB->splice(InsertPos, StartMBB, DbgMI);
8725
8726 return JoinMBB;
8727}
8728
8729// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8730// StoreOpcode is the store to use and Invert says whether the store should
8731// happen when the condition is false rather than true. If a STORE ON
8732// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8733MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8735 unsigned StoreOpcode,
8736 unsigned STOCOpcode,
8737 bool Invert) const {
8738 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8739
8740 Register SrcReg = MI.getOperand(0).getReg();
8741 MachineOperand Base = MI.getOperand(1);
8742 int64_t Disp = MI.getOperand(2).getImm();
8743 Register IndexReg = MI.getOperand(3).getReg();
8744 unsigned CCValid = MI.getOperand(4).getImm();
8745 unsigned CCMask = MI.getOperand(5).getImm();
8746 DebugLoc DL = MI.getDebugLoc();
8747
8748 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8749
8750 // ISel pattern matching also adds a load memory operand of the same
8751 // address, so take special care to find the storing memory operand.
8752 MachineMemOperand *MMO = nullptr;
8753 for (auto *I : MI.memoperands())
8754 if (I->isStore()) {
8755 MMO = I;
8756 break;
8757 }
8758
8759 // Use STOCOpcode if possible. We could use different store patterns in
8760 // order to avoid matching the index register, but the performance trade-offs
8761 // might be more complicated in that case.
8762 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8763 if (Invert)
8764 CCMask ^= CCValid;
8765
8766 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8767 .addReg(SrcReg)
8768 .add(Base)
8769 .addImm(Disp)
8770 .addImm(CCValid)
8771 .addImm(CCMask)
8772 .addMemOperand(MMO);
8773
8774 MI.eraseFromParent();
8775 return MBB;
8776 }
8777
8778 // Get the condition needed to branch around the store.
8779 if (!Invert)
8780 CCMask ^= CCValid;
8781
8782 MachineBasicBlock *StartMBB = MBB;
8784 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8785
8786 // Unless CC was killed in the CondStore instruction, mark it as
8787 // live-in to both FalseMBB and JoinMBB.
8788 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8789 !checkCCKill(MI, JoinMBB)) {
8790 FalseMBB->addLiveIn(SystemZ::CC);
8791 JoinMBB->addLiveIn(SystemZ::CC);
8792 }
8793
8794 // StartMBB:
8795 // BRC CCMask, JoinMBB
8796 // # fallthrough to FalseMBB
8797 MBB = StartMBB;
8798 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8799 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8800 MBB->addSuccessor(JoinMBB);
8801 MBB->addSuccessor(FalseMBB);
8802
8803 // FalseMBB:
8804 // store %SrcReg, %Disp(%Index,%Base)
8805 // # fallthrough to JoinMBB
8806 MBB = FalseMBB;
8807 BuildMI(MBB, DL, TII->get(StoreOpcode))
8808 .addReg(SrcReg)
8809 .add(Base)
8810 .addImm(Disp)
8811 .addReg(IndexReg)
8812 .addMemOperand(MMO);
8813 MBB->addSuccessor(JoinMBB);
8814
8815 MI.eraseFromParent();
8816 return JoinMBB;
8817}
8818
8819// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8821SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8823 bool Unsigned) const {
8824 MachineFunction &MF = *MBB->getParent();
8825 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8827
8828 // Synthetic instruction to compare 128-bit values.
8829 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8830 Register Op0 = MI.getOperand(0).getReg();
8831 Register Op1 = MI.getOperand(1).getReg();
8832
8833 MachineBasicBlock *StartMBB = MBB;
8835 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8836
8837 // StartMBB:
8838 //
8839 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8840 // Swap the inputs to get:
8841 // CC 1 if high(Op0) > high(Op1)
8842 // CC 2 if high(Op0) < high(Op1)
8843 // CC 0 if high(Op0) == high(Op1)
8844 //
8845 // If CC != 0, we'd done, so jump over the next instruction.
8846 //
8847 // VEC[L]G Op1, Op0
8848 // JNE JoinMBB
8849 // # fallthrough to HiEqMBB
8850 MBB = StartMBB;
8851 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8852 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8853 .addReg(Op1).addReg(Op0);
8854 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8856 MBB->addSuccessor(JoinMBB);
8857 MBB->addSuccessor(HiEqMBB);
8858
8859 // HiEqMBB:
8860 //
8861 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8862 // Since we already know the high parts are equal, the CC
8863 // result will only depend on the low parts:
8864 // CC 1 if low(Op0) > low(Op1)
8865 // CC 3 if low(Op0) <= low(Op1)
8866 //
8867 // VCHLGS Tmp, Op0, Op1
8868 // # fallthrough to JoinMBB
8869 MBB = HiEqMBB;
8870 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8871 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8872 .addReg(Op0).addReg(Op1);
8873 MBB->addSuccessor(JoinMBB);
8874
8875 // Mark CC as live-in to JoinMBB.
8876 JoinMBB->addLiveIn(SystemZ::CC);
8877
8878 MI.eraseFromParent();
8879 return JoinMBB;
8880}
8881
8882// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8883// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8884// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8885// whether the field should be inverted after performing BinOpcode (e.g. for
8886// NAND).
8887MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8888 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8889 bool Invert) const {
8890 MachineFunction &MF = *MBB->getParent();
8891 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8893
8894 // Extract the operands. Base can be a register or a frame index.
8895 // Src2 can be a register or immediate.
8896 Register Dest = MI.getOperand(0).getReg();
8897 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8898 int64_t Disp = MI.getOperand(2).getImm();
8899 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8900 Register BitShift = MI.getOperand(4).getReg();
8901 Register NegBitShift = MI.getOperand(5).getReg();
8902 unsigned BitSize = MI.getOperand(6).getImm();
8903 DebugLoc DL = MI.getDebugLoc();
8904
8905 // Get the right opcodes for the displacement.
8906 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8907 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8908 assert(LOpcode && CSOpcode && "Displacement out of range");
8909
8910 // Create virtual registers for temporary results.
8911 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8912 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8913 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8914 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8915 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8916
8917 // Insert a basic block for the main loop.
8918 MachineBasicBlock *StartMBB = MBB;
8920 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8921
8922 // StartMBB:
8923 // ...
8924 // %OrigVal = L Disp(%Base)
8925 // # fall through to LoopMBB
8926 MBB = StartMBB;
8927 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8928 MBB->addSuccessor(LoopMBB);
8929
8930 // LoopMBB:
8931 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8932 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8933 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8934 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8935 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8936 // JNE LoopMBB
8937 // # fall through to DoneMBB
8938 MBB = LoopMBB;
8939 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8940 .addReg(OrigVal).addMBB(StartMBB)
8941 .addReg(Dest).addMBB(LoopMBB);
8942 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8943 .addReg(OldVal).addReg(BitShift).addImm(0);
8944 if (Invert) {
8945 // Perform the operation normally and then invert every bit of the field.
8946 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8947 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8948 // XILF with the upper BitSize bits set.
8949 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8950 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8951 } else if (BinOpcode)
8952 // A simply binary operation.
8953 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8954 .addReg(RotatedOldVal)
8955 .add(Src2);
8956 else
8957 // Use RISBG to rotate Src2 into position and use it to replace the
8958 // field in RotatedOldVal.
8959 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8960 .addReg(RotatedOldVal).addReg(Src2.getReg())
8961 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8962 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8963 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8964 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8965 .addReg(OldVal)
8966 .addReg(NewVal)
8967 .add(Base)
8968 .addImm(Disp);
8969 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8971 MBB->addSuccessor(LoopMBB);
8972 MBB->addSuccessor(DoneMBB);
8973
8974 MI.eraseFromParent();
8975 return DoneMBB;
8976}
8977
8978// Implement EmitInstrWithCustomInserter for subword pseudo
8979// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8980// instruction that should be used to compare the current field with the
8981// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8982// for when the current field should be kept.
8983MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8984 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8985 unsigned KeepOldMask) const {
8986 MachineFunction &MF = *MBB->getParent();
8987 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8989
8990 // Extract the operands. Base can be a register or a frame index.
8991 Register Dest = MI.getOperand(0).getReg();
8992 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8993 int64_t Disp = MI.getOperand(2).getImm();
8994 Register Src2 = MI.getOperand(3).getReg();
8995 Register BitShift = MI.getOperand(4).getReg();
8996 Register NegBitShift = MI.getOperand(5).getReg();
8997 unsigned BitSize = MI.getOperand(6).getImm();
8998 DebugLoc DL = MI.getDebugLoc();
8999
9000 // Get the right opcodes for the displacement.
9001 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9002 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9003 assert(LOpcode && CSOpcode && "Displacement out of range");
9004
9005 // Create virtual registers for temporary results.
9006 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9007 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9008 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9009 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9010 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9011 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9012
9013 // Insert 3 basic blocks for the loop.
9014 MachineBasicBlock *StartMBB = MBB;
9016 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9017 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
9018 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
9019
9020 // StartMBB:
9021 // ...
9022 // %OrigVal = L Disp(%Base)
9023 // # fall through to LoopMBB
9024 MBB = StartMBB;
9025 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
9026 MBB->addSuccessor(LoopMBB);
9027
9028 // LoopMBB:
9029 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
9030 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
9031 // CompareOpcode %RotatedOldVal, %Src2
9032 // BRC KeepOldMask, UpdateMBB
9033 MBB = LoopMBB;
9034 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9035 .addReg(OrigVal).addMBB(StartMBB)
9036 .addReg(Dest).addMBB(UpdateMBB);
9037 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
9038 .addReg(OldVal).addReg(BitShift).addImm(0);
9039 BuildMI(MBB, DL, TII->get(CompareOpcode))
9040 .addReg(RotatedOldVal).addReg(Src2);
9041 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9042 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
9043 MBB->addSuccessor(UpdateMBB);
9044 MBB->addSuccessor(UseAltMBB);
9045
9046 // UseAltMBB:
9047 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
9048 // # fall through to UpdateMBB
9049 MBB = UseAltMBB;
9050 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
9051 .addReg(RotatedOldVal).addReg(Src2)
9052 .addImm(32).addImm(31 + BitSize).addImm(0);
9053 MBB->addSuccessor(UpdateMBB);
9054
9055 // UpdateMBB:
9056 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
9057 // [ %RotatedAltVal, UseAltMBB ]
9058 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
9059 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
9060 // JNE LoopMBB
9061 // # fall through to DoneMBB
9062 MBB = UpdateMBB;
9063 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
9064 .addReg(RotatedOldVal).addMBB(LoopMBB)
9065 .addReg(RotatedAltVal).addMBB(UseAltMBB);
9066 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
9067 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
9068 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
9069 .addReg(OldVal)
9070 .addReg(NewVal)
9071 .add(Base)
9072 .addImm(Disp);
9073 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9075 MBB->addSuccessor(LoopMBB);
9076 MBB->addSuccessor(DoneMBB);
9077
9078 MI.eraseFromParent();
9079 return DoneMBB;
9080}
9081
9082// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
9083// instruction MI.
9085SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
9086 MachineBasicBlock *MBB) const {
9087 MachineFunction &MF = *MBB->getParent();
9088 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9090
9091 // Extract the operands. Base can be a register or a frame index.
9092 Register Dest = MI.getOperand(0).getReg();
9093 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
9094 int64_t Disp = MI.getOperand(2).getImm();
9095 Register CmpVal = MI.getOperand(3).getReg();
9096 Register OrigSwapVal = MI.getOperand(4).getReg();
9097 Register BitShift = MI.getOperand(5).getReg();
9098 Register NegBitShift = MI.getOperand(6).getReg();
9099 int64_t BitSize = MI.getOperand(7).getImm();
9100 DebugLoc DL = MI.getDebugLoc();
9101
9102 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
9103
9104 // Get the right opcodes for the displacement and zero-extension.
9105 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9106 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9107 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
9108 assert(LOpcode && CSOpcode && "Displacement out of range");
9109
9110 // Create virtual registers for temporary results.
9111 Register OrigOldVal = MRI.createVirtualRegister(RC);
9112 Register OldVal = MRI.createVirtualRegister(RC);
9113 Register SwapVal = MRI.createVirtualRegister(RC);
9114 Register StoreVal = MRI.createVirtualRegister(RC);
9115 Register OldValRot = MRI.createVirtualRegister(RC);
9116 Register RetryOldVal = MRI.createVirtualRegister(RC);
9117 Register RetrySwapVal = MRI.createVirtualRegister(RC);
9118
9119 // Insert 2 basic blocks for the loop.
9120 MachineBasicBlock *StartMBB = MBB;
9122 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9123 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
9124
9125 // StartMBB:
9126 // ...
9127 // %OrigOldVal = L Disp(%Base)
9128 // # fall through to LoopMBB
9129 MBB = StartMBB;
9130 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
9131 .add(Base)
9132 .addImm(Disp)
9133 .addReg(0);
9134 MBB->addSuccessor(LoopMBB);
9135
9136 // LoopMBB:
9137 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
9138 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
9139 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
9140 // ^^ The low BitSize bits contain the field
9141 // of interest.
9142 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
9143 // ^^ Replace the upper 32-BitSize bits of the
9144 // swap value with those that we loaded and rotated.
9145 // %Dest = LL[CH] %OldValRot
9146 // CR %Dest, %CmpVal
9147 // JNE DoneMBB
9148 // # Fall through to SetMBB
9149 MBB = LoopMBB;
9150 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9151 .addReg(OrigOldVal).addMBB(StartMBB)
9152 .addReg(RetryOldVal).addMBB(SetMBB);
9153 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
9154 .addReg(OrigSwapVal).addMBB(StartMBB)
9155 .addReg(RetrySwapVal).addMBB(SetMBB);
9156 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
9157 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
9158 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
9159 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
9160 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
9161 .addReg(OldValRot);
9162 BuildMI(MBB, DL, TII->get(SystemZ::CR))
9163 .addReg(Dest).addReg(CmpVal);
9164 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9167 MBB->addSuccessor(DoneMBB);
9168 MBB->addSuccessor(SetMBB);
9169
9170 // SetMBB:
9171 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
9172 // ^^ Rotate the new field to its proper position.
9173 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
9174 // JNE LoopMBB
9175 // # fall through to ExitMBB
9176 MBB = SetMBB;
9177 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
9178 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
9179 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
9180 .addReg(OldVal)
9181 .addReg(StoreVal)
9182 .add(Base)
9183 .addImm(Disp);
9184 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9186 MBB->addSuccessor(LoopMBB);
9187 MBB->addSuccessor(DoneMBB);
9188
9189 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
9190 // to the block after the loop. At this point, CC may have been defined
9191 // either by the CR in LoopMBB or by the CS in SetMBB.
9192 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
9193 DoneMBB->addLiveIn(SystemZ::CC);
9194
9195 MI.eraseFromParent();
9196 return DoneMBB;
9197}
9198
9199// Emit a move from two GR64s to a GR128.
9201SystemZTargetLowering::emitPair128(MachineInstr &MI,
9202 MachineBasicBlock *MBB) const {
9203 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9204 const DebugLoc &DL = MI.getDebugLoc();
9205
9206 Register Dest = MI.getOperand(0).getReg();
9207 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
9208 .add(MI.getOperand(1))
9209 .addImm(SystemZ::subreg_h64)
9210 .add(MI.getOperand(2))
9211 .addImm(SystemZ::subreg_l64);
9212 MI.eraseFromParent();
9213 return MBB;
9214}
9215
9216// Emit an extension from a GR64 to a GR128. ClearEven is true
9217// if the high register of the GR128 value must be cleared or false if
9218// it's "don't care".
9219MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
9221 bool ClearEven) const {
9222 MachineFunction &MF = *MBB->getParent();
9223 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9225 DebugLoc DL = MI.getDebugLoc();
9226
9227 Register Dest = MI.getOperand(0).getReg();
9228 Register Src = MI.getOperand(1).getReg();
9229 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
9230
9231 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
9232 if (ClearEven) {
9233 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
9234 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9235
9236 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
9237 .addImm(0);
9238 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
9239 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
9240 In128 = NewIn128;
9241 }
9242 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
9243 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
9244
9245 MI.eraseFromParent();
9246 return MBB;
9247}
9248
9250SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
9252 unsigned Opcode, bool IsMemset) const {
9253 MachineFunction &MF = *MBB->getParent();
9254 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9256 DebugLoc DL = MI.getDebugLoc();
9257
9258 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
9259 uint64_t DestDisp = MI.getOperand(1).getImm();
9260 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
9261 uint64_t SrcDisp;
9262
9263 // Fold the displacement Disp if it is out of range.
9264 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
9265 if (!isUInt<12>(Disp)) {
9266 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9267 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
9268 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
9269 .add(Base).addImm(Disp).addReg(0);
9270 Base = MachineOperand::CreateReg(Reg, false);
9271 Disp = 0;
9272 }
9273 };
9274
9275 if (!IsMemset) {
9276 SrcBase = earlyUseOperand(MI.getOperand(2));
9277 SrcDisp = MI.getOperand(3).getImm();
9278 } else {
9279 SrcBase = DestBase;
9280 SrcDisp = DestDisp++;
9281 foldDisplIfNeeded(DestBase, DestDisp);
9282 }
9283
9284 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
9285 bool IsImmForm = LengthMO.isImm();
9286 bool IsRegForm = !IsImmForm;
9287
9288 // Build and insert one Opcode of Length, with special treatment for memset.
9289 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
9291 MachineOperand DBase, uint64_t DDisp,
9293 unsigned Length) -> void {
9294 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
9295 if (IsMemset) {
9296 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
9297 if (ByteMO.isImm())
9298 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
9299 .add(SBase).addImm(SDisp).add(ByteMO);
9300 else
9301 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
9302 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
9303 if (--Length == 0)
9304 return;
9305 }
9306 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
9307 .add(DBase).addImm(DDisp).addImm(Length)
9308 .add(SBase).addImm(SDisp)
9309 .setMemRefs(MI.memoperands());
9310 };
9311
9312 bool NeedsLoop = false;
9313 uint64_t ImmLength = 0;
9314 Register LenAdjReg = SystemZ::NoRegister;
9315 if (IsImmForm) {
9316 ImmLength = LengthMO.getImm();
9317 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
9318 if (ImmLength == 0) {
9319 MI.eraseFromParent();
9320 return MBB;
9321 }
9322 if (Opcode == SystemZ::CLC) {
9323 if (ImmLength > 3 * 256)
9324 // A two-CLC sequence is a clear win over a loop, not least because
9325 // it needs only one branch. A three-CLC sequence needs the same
9326 // number of branches as a loop (i.e. 2), but is shorter. That
9327 // brings us to lengths greater than 768 bytes. It seems relatively
9328 // likely that a difference will be found within the first 768 bytes,
9329 // so we just optimize for the smallest number of branch
9330 // instructions, in order to avoid polluting the prediction buffer
9331 // too much.
9332 NeedsLoop = true;
9333 } else if (ImmLength > 6 * 256)
9334 // The heuristic we use is to prefer loops for anything that would
9335 // require 7 or more MVCs. With these kinds of sizes there isn't much
9336 // to choose between straight-line code and looping code, since the
9337 // time will be dominated by the MVCs themselves.
9338 NeedsLoop = true;
9339 } else {
9340 NeedsLoop = true;
9341 LenAdjReg = LengthMO.getReg();
9342 }
9343
9344 // When generating more than one CLC, all but the last will need to
9345 // branch to the end when a difference is found.
9346 MachineBasicBlock *EndMBB =
9347 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9349 : nullptr);
9350
9351 if (NeedsLoop) {
9352 Register StartCountReg =
9353 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9354 if (IsImmForm) {
9355 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9356 ImmLength &= 255;
9357 } else {
9358 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9359 .addReg(LenAdjReg)
9360 .addReg(0)
9361 .addImm(8);
9362 }
9363
9364 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9365 auto loadZeroAddress = [&]() -> MachineOperand {
9366 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9367 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9368 return MachineOperand::CreateReg(Reg, false);
9369 };
9370 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9371 DestBase = loadZeroAddress();
9372 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9373 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9374
9375 MachineBasicBlock *StartMBB = nullptr;
9376 MachineBasicBlock *LoopMBB = nullptr;
9377 MachineBasicBlock *NextMBB = nullptr;
9378 MachineBasicBlock *DoneMBB = nullptr;
9379 MachineBasicBlock *AllDoneMBB = nullptr;
9380
9381 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9382 Register StartDestReg =
9383 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9384
9385 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9386 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9387 Register ThisDestReg =
9388 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9389 Register NextSrcReg = MRI.createVirtualRegister(RC);
9390 Register NextDestReg =
9391 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9392 RC = &SystemZ::GR64BitRegClass;
9393 Register ThisCountReg = MRI.createVirtualRegister(RC);
9394 Register NextCountReg = MRI.createVirtualRegister(RC);
9395
9396 if (IsRegForm) {
9397 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9398 StartMBB = SystemZ::emitBlockAfter(MBB);
9399 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9400 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9401 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9402
9403 // MBB:
9404 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9405 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9406 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9407 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9409 .addMBB(AllDoneMBB);
9410 MBB->addSuccessor(AllDoneMBB);
9411 if (!IsMemset)
9412 MBB->addSuccessor(StartMBB);
9413 else {
9414 // MemsetOneCheckMBB:
9415 // # Jump to MemsetOneMBB for a memset of length 1, or
9416 // # fall thru to StartMBB.
9417 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9418 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9419 MBB->addSuccessor(MemsetOneCheckMBB);
9420 MBB = MemsetOneCheckMBB;
9421 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9422 .addReg(LenAdjReg).addImm(-1);
9423 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9425 .addMBB(MemsetOneMBB);
9426 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9427 MBB->addSuccessor(StartMBB, {90, 100});
9428
9429 // MemsetOneMBB:
9430 // # Jump back to AllDoneMBB after a single MVI or STC.
9431 MBB = MemsetOneMBB;
9432 insertMemMemOp(MBB, MBB->end(),
9433 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9434 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9435 1);
9436 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9437 MBB->addSuccessor(AllDoneMBB);
9438 }
9439
9440 // StartMBB:
9441 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9442 MBB = StartMBB;
9443 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9444 .addReg(StartCountReg).addImm(0);
9445 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9447 .addMBB(DoneMBB);
9448 MBB->addSuccessor(DoneMBB);
9449 MBB->addSuccessor(LoopMBB);
9450 }
9451 else {
9452 StartMBB = MBB;
9453 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9454 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9455 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9456
9457 // StartMBB:
9458 // # fall through to LoopMBB
9459 MBB->addSuccessor(LoopMBB);
9460
9461 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9462 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9463 if (EndMBB && !ImmLength)
9464 // If the loop handled the whole CLC range, DoneMBB will be empty with
9465 // CC live-through into EndMBB, so add it as live-in.
9466 DoneMBB->addLiveIn(SystemZ::CC);
9467 }
9468
9469 // LoopMBB:
9470 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9471 // [ %NextDestReg, NextMBB ]
9472 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9473 // [ %NextSrcReg, NextMBB ]
9474 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9475 // [ %NextCountReg, NextMBB ]
9476 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9477 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9478 // ( JLH EndMBB )
9479 //
9480 // The prefetch is used only for MVC. The JLH is used only for CLC.
9481 MBB = LoopMBB;
9482 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9483 .addReg(StartDestReg).addMBB(StartMBB)
9484 .addReg(NextDestReg).addMBB(NextMBB);
9485 if (!HaveSingleBase)
9486 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9487 .addReg(StartSrcReg).addMBB(StartMBB)
9488 .addReg(NextSrcReg).addMBB(NextMBB);
9489 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9490 .addReg(StartCountReg).addMBB(StartMBB)
9491 .addReg(NextCountReg).addMBB(NextMBB);
9492 if (Opcode == SystemZ::MVC)
9493 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9495 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9496 insertMemMemOp(MBB, MBB->end(),
9497 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9498 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9499 if (EndMBB) {
9500 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9502 .addMBB(EndMBB);
9503 MBB->addSuccessor(EndMBB);
9504 MBB->addSuccessor(NextMBB);
9505 }
9506
9507 // NextMBB:
9508 // %NextDestReg = LA 256(%ThisDestReg)
9509 // %NextSrcReg = LA 256(%ThisSrcReg)
9510 // %NextCountReg = AGHI %ThisCountReg, -1
9511 // CGHI %NextCountReg, 0
9512 // JLH LoopMBB
9513 // # fall through to DoneMBB
9514 //
9515 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9516 MBB = NextMBB;
9517 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9518 .addReg(ThisDestReg).addImm(256).addReg(0);
9519 if (!HaveSingleBase)
9520 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9521 .addReg(ThisSrcReg).addImm(256).addReg(0);
9522 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9523 .addReg(ThisCountReg).addImm(-1);
9524 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9525 .addReg(NextCountReg).addImm(0);
9526 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9528 .addMBB(LoopMBB);
9529 MBB->addSuccessor(LoopMBB);
9530 MBB->addSuccessor(DoneMBB);
9531
9532 MBB = DoneMBB;
9533 if (IsRegForm) {
9534 // DoneMBB:
9535 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9536 // # Use EXecute Relative Long for the remainder of the bytes. The target
9537 // instruction of the EXRL will have a length field of 1 since 0 is an
9538 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9539 // 0xff) + 1.
9540 // # Fall through to AllDoneMBB.
9541 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9542 Register RemDestReg = HaveSingleBase ? RemSrcReg
9543 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9544 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9545 .addReg(StartDestReg).addMBB(StartMBB)
9546 .addReg(NextDestReg).addMBB(NextMBB);
9547 if (!HaveSingleBase)
9548 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9549 .addReg(StartSrcReg).addMBB(StartMBB)
9550 .addReg(NextSrcReg).addMBB(NextMBB);
9551 if (IsMemset)
9552 insertMemMemOp(MBB, MBB->end(),
9553 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9554 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9555 MachineInstrBuilder EXRL_MIB =
9556 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9557 .addImm(Opcode)
9558 .addReg(LenAdjReg)
9559 .addReg(RemDestReg).addImm(DestDisp)
9560 .addReg(RemSrcReg).addImm(SrcDisp);
9561 MBB->addSuccessor(AllDoneMBB);
9562 MBB = AllDoneMBB;
9563 if (Opcode != SystemZ::MVC) {
9564 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9565 if (EndMBB)
9566 MBB->addLiveIn(SystemZ::CC);
9567 }
9568 }
9570 }
9571
9572 // Handle any remaining bytes with straight-line code.
9573 while (ImmLength > 0) {
9574 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9575 // The previous iteration might have created out-of-range displacements.
9576 // Apply them using LA/LAY if so.
9577 foldDisplIfNeeded(DestBase, DestDisp);
9578 foldDisplIfNeeded(SrcBase, SrcDisp);
9579 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9580 DestDisp += ThisLength;
9581 SrcDisp += ThisLength;
9582 ImmLength -= ThisLength;
9583 // If there's another CLC to go, branch to the end if a difference
9584 // was found.
9585 if (EndMBB && ImmLength > 0) {
9587 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9589 .addMBB(EndMBB);
9590 MBB->addSuccessor(EndMBB);
9591 MBB->addSuccessor(NextMBB);
9592 MBB = NextMBB;
9593 }
9594 }
9595 if (EndMBB) {
9596 MBB->addSuccessor(EndMBB);
9597 MBB = EndMBB;
9598 MBB->addLiveIn(SystemZ::CC);
9599 }
9600
9601 MI.eraseFromParent();
9602 return MBB;
9603}
9604
9605// Decompose string pseudo-instruction MI into a loop that continually performs
9606// Opcode until CC != 3.
9607MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9608 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9609 MachineFunction &MF = *MBB->getParent();
9610 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9612 DebugLoc DL = MI.getDebugLoc();
9613
9614 uint64_t End1Reg = MI.getOperand(0).getReg();
9615 uint64_t Start1Reg = MI.getOperand(1).getReg();
9616 uint64_t Start2Reg = MI.getOperand(2).getReg();
9617 uint64_t CharReg = MI.getOperand(3).getReg();
9618
9619 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9620 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9621 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9622 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9623
9624 MachineBasicBlock *StartMBB = MBB;
9626 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9627
9628 // StartMBB:
9629 // # fall through to LoopMBB
9630 MBB->addSuccessor(LoopMBB);
9631
9632 // LoopMBB:
9633 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9634 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9635 // R0L = %CharReg
9636 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9637 // JO LoopMBB
9638 // # fall through to DoneMBB
9639 //
9640 // The load of R0L can be hoisted by post-RA LICM.
9641 MBB = LoopMBB;
9642
9643 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9644 .addReg(Start1Reg).addMBB(StartMBB)
9645 .addReg(End1Reg).addMBB(LoopMBB);
9646 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9647 .addReg(Start2Reg).addMBB(StartMBB)
9648 .addReg(End2Reg).addMBB(LoopMBB);
9649 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9650 BuildMI(MBB, DL, TII->get(Opcode))
9651 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9652 .addReg(This1Reg).addReg(This2Reg);
9653 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9655 MBB->addSuccessor(LoopMBB);
9656 MBB->addSuccessor(DoneMBB);
9657
9658 DoneMBB->addLiveIn(SystemZ::CC);
9659
9660 MI.eraseFromParent();
9661 return DoneMBB;
9662}
9663
9664// Update TBEGIN instruction with final opcode and register clobbers.
9665MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9666 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9667 bool NoFloat) const {
9668 MachineFunction &MF = *MBB->getParent();
9669 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9670 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9671
9672 // Update opcode.
9673 MI.setDesc(TII->get(Opcode));
9674
9675 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9676 // Make sure to add the corresponding GRSM bits if they are missing.
9677 uint64_t Control = MI.getOperand(2).getImm();
9678 static const unsigned GPRControlBit[16] = {
9679 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9680 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9681 };
9682 Control |= GPRControlBit[15];
9683 if (TFI->hasFP(MF))
9684 Control |= GPRControlBit[11];
9685 MI.getOperand(2).setImm(Control);
9686
9687 // Add GPR clobbers.
9688 for (int I = 0; I < 16; I++) {
9689 if ((Control & GPRControlBit[I]) == 0) {
9690 unsigned Reg = SystemZMC::GR64Regs[I];
9691 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9692 }
9693 }
9694
9695 // Add FPR/VR clobbers.
9696 if (!NoFloat && (Control & 4) != 0) {
9697 if (Subtarget.hasVector()) {
9698 for (unsigned Reg : SystemZMC::VR128Regs) {
9699 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9700 }
9701 } else {
9702 for (unsigned Reg : SystemZMC::FP64Regs) {
9703 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9704 }
9705 }
9706 }
9707
9708 return MBB;
9709}
9710
9711MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9712 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9713 MachineFunction &MF = *MBB->getParent();
9715 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9716 DebugLoc DL = MI.getDebugLoc();
9717
9718 Register SrcReg = MI.getOperand(0).getReg();
9719
9720 // Create new virtual register of the same class as source.
9721 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9722 Register DstReg = MRI->createVirtualRegister(RC);
9723
9724 // Replace pseudo with a normal load-and-test that models the def as
9725 // well.
9726 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9727 .addReg(SrcReg)
9728 .setMIFlags(MI.getFlags());
9729 MI.eraseFromParent();
9730
9731 return MBB;
9732}
9733
9734MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9736 MachineFunction &MF = *MBB->getParent();
9738 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9739 DebugLoc DL = MI.getDebugLoc();
9740 const unsigned ProbeSize = getStackProbeSize(MF);
9741 Register DstReg = MI.getOperand(0).getReg();
9742 Register SizeReg = MI.getOperand(2).getReg();
9743
9744 MachineBasicBlock *StartMBB = MBB;
9746 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9747 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9748 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9749 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9750
9753
9754 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9755 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9756
9757 // LoopTestMBB
9758 // BRC TailTestMBB
9759 // # fallthrough to LoopBodyMBB
9760 StartMBB->addSuccessor(LoopTestMBB);
9761 MBB = LoopTestMBB;
9762 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9763 .addReg(SizeReg)
9764 .addMBB(StartMBB)
9765 .addReg(IncReg)
9766 .addMBB(LoopBodyMBB);
9767 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9768 .addReg(PHIReg)
9769 .addImm(ProbeSize);
9770 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9772 .addMBB(TailTestMBB);
9773 MBB->addSuccessor(LoopBodyMBB);
9774 MBB->addSuccessor(TailTestMBB);
9775
9776 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9777 // J LoopTestMBB
9778 MBB = LoopBodyMBB;
9779 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9780 .addReg(PHIReg)
9781 .addImm(ProbeSize);
9782 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9783 .addReg(SystemZ::R15D)
9784 .addImm(ProbeSize);
9785 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9786 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9787 .setMemRefs(VolLdMMO);
9788 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9789 MBB->addSuccessor(LoopTestMBB);
9790
9791 // TailTestMBB
9792 // BRC DoneMBB
9793 // # fallthrough to TailMBB
9794 MBB = TailTestMBB;
9795 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9796 .addReg(PHIReg)
9797 .addImm(0);
9798 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9800 .addMBB(DoneMBB);
9801 MBB->addSuccessor(TailMBB);
9802 MBB->addSuccessor(DoneMBB);
9803
9804 // TailMBB
9805 // # fallthrough to DoneMBB
9806 MBB = TailMBB;
9807 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9808 .addReg(SystemZ::R15D)
9809 .addReg(PHIReg);
9810 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9811 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9812 .setMemRefs(VolLdMMO);
9813 MBB->addSuccessor(DoneMBB);
9814
9815 // DoneMBB
9816 MBB = DoneMBB;
9817 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9818 .addReg(SystemZ::R15D);
9819
9820 MI.eraseFromParent();
9821 return DoneMBB;
9822}
9823
9824SDValue SystemZTargetLowering::
9825getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9827 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9828 SDLoc DL(SP);
9829 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9830 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9831}
9832
9835 switch (MI.getOpcode()) {
9836 case SystemZ::ADJCALLSTACKDOWN:
9837 case SystemZ::ADJCALLSTACKUP:
9838 return emitAdjCallStack(MI, MBB);
9839
9840 case SystemZ::Select32:
9841 case SystemZ::Select64:
9842 case SystemZ::Select128:
9843 case SystemZ::SelectF32:
9844 case SystemZ::SelectF64:
9845 case SystemZ::SelectF128:
9846 case SystemZ::SelectVR32:
9847 case SystemZ::SelectVR64:
9848 case SystemZ::SelectVR128:
9849 return emitSelect(MI, MBB);
9850
9851 case SystemZ::CondStore8Mux:
9852 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9853 case SystemZ::CondStore8MuxInv:
9854 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9855 case SystemZ::CondStore16Mux:
9856 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9857 case SystemZ::CondStore16MuxInv:
9858 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9859 case SystemZ::CondStore32Mux:
9860 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9861 case SystemZ::CondStore32MuxInv:
9862 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9863 case SystemZ::CondStore8:
9864 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9865 case SystemZ::CondStore8Inv:
9866 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9867 case SystemZ::CondStore16:
9868 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9869 case SystemZ::CondStore16Inv:
9870 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9871 case SystemZ::CondStore32:
9872 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9873 case SystemZ::CondStore32Inv:
9874 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9875 case SystemZ::CondStore64:
9876 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9877 case SystemZ::CondStore64Inv:
9878 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9879 case SystemZ::CondStoreF32:
9880 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9881 case SystemZ::CondStoreF32Inv:
9882 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9883 case SystemZ::CondStoreF64:
9884 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9885 case SystemZ::CondStoreF64Inv:
9886 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9887
9888 case SystemZ::SCmp128Hi:
9889 return emitICmp128Hi(MI, MBB, false);
9890 case SystemZ::UCmp128Hi:
9891 return emitICmp128Hi(MI, MBB, true);
9892
9893 case SystemZ::PAIR128:
9894 return emitPair128(MI, MBB);
9895 case SystemZ::AEXT128:
9896 return emitExt128(MI, MBB, false);
9897 case SystemZ::ZEXT128:
9898 return emitExt128(MI, MBB, true);
9899
9900 case SystemZ::ATOMIC_SWAPW:
9901 return emitAtomicLoadBinary(MI, MBB, 0);
9902
9903 case SystemZ::ATOMIC_LOADW_AR:
9904 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9905 case SystemZ::ATOMIC_LOADW_AFI:
9906 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9907
9908 case SystemZ::ATOMIC_LOADW_SR:
9909 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9910
9911 case SystemZ::ATOMIC_LOADW_NR:
9912 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9913 case SystemZ::ATOMIC_LOADW_NILH:
9914 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9915
9916 case SystemZ::ATOMIC_LOADW_OR:
9917 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9918 case SystemZ::ATOMIC_LOADW_OILH:
9919 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9920
9921 case SystemZ::ATOMIC_LOADW_XR:
9922 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9923 case SystemZ::ATOMIC_LOADW_XILF:
9924 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9925
9926 case SystemZ::ATOMIC_LOADW_NRi:
9927 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9928 case SystemZ::ATOMIC_LOADW_NILHi:
9929 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9930
9931 case SystemZ::ATOMIC_LOADW_MIN:
9932 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9933 case SystemZ::ATOMIC_LOADW_MAX:
9934 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9935 case SystemZ::ATOMIC_LOADW_UMIN:
9936 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9937 case SystemZ::ATOMIC_LOADW_UMAX:
9938 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9939
9940 case SystemZ::ATOMIC_CMP_SWAPW:
9941 return emitAtomicCmpSwapW(MI, MBB);
9942 case SystemZ::MVCImm:
9943 case SystemZ::MVCReg:
9944 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9945 case SystemZ::NCImm:
9946 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9947 case SystemZ::OCImm:
9948 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9949 case SystemZ::XCImm:
9950 case SystemZ::XCReg:
9951 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9952 case SystemZ::CLCImm:
9953 case SystemZ::CLCReg:
9954 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9955 case SystemZ::MemsetImmImm:
9956 case SystemZ::MemsetImmReg:
9957 case SystemZ::MemsetRegImm:
9958 case SystemZ::MemsetRegReg:
9959 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9960 case SystemZ::CLSTLoop:
9961 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9962 case SystemZ::MVSTLoop:
9963 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9964 case SystemZ::SRSTLoop:
9965 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9966 case SystemZ::TBEGIN:
9967 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9968 case SystemZ::TBEGIN_nofloat:
9969 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9970 case SystemZ::TBEGINC:
9971 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9972 case SystemZ::LTEBRCompare_Pseudo:
9973 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9974 case SystemZ::LTDBRCompare_Pseudo:
9975 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9976 case SystemZ::LTXBRCompare_Pseudo:
9977 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9978
9979 case SystemZ::PROBED_ALLOCA:
9980 return emitProbedAlloca(MI, MBB);
9981 case SystemZ::EH_SjLj_SetJmp:
9982 return emitEHSjLjSetJmp(MI, MBB);
9983 case SystemZ::EH_SjLj_LongJmp:
9984 return emitEHSjLjLongJmp(MI, MBB);
9985
9986 case TargetOpcode::STACKMAP:
9987 case TargetOpcode::PATCHPOINT:
9988 return emitPatchPoint(MI, MBB);
9989
9990 default:
9991 llvm_unreachable("Unexpected instr type to insert");
9992 }
9993}
9994
9995// This is only used by the isel schedulers, and is needed only to prevent
9996// compiler from crashing when list-ilp is used.
9997const TargetRegisterClass *
9998SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9999 if (VT == MVT::Untyped)
10000 return &SystemZ::ADDR128BitRegClass;
10002}
10003
10004SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
10005 SelectionDAG &DAG) const {
10006 SDLoc dl(Op);
10007 /*
10008 The rounding method is in FPC Byte 3 bits 6-7, and has the following
10009 settings:
10010 00 Round to nearest
10011 01 Round to 0
10012 10 Round to +inf
10013 11 Round to -inf
10014
10015 FLT_ROUNDS, on the other hand, expects the following:
10016 -1 Undefined
10017 0 Round to 0
10018 1 Round to nearest
10019 2 Round to +inf
10020 3 Round to -inf
10021 */
10022
10023 // Save FPC to register.
10024 SDValue Chain = Op.getOperand(0);
10025 SDValue EFPC(
10026 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
10027 Chain = EFPC.getValue(1);
10028
10029 // Transform as necessary
10030 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
10031 DAG.getConstant(3, dl, MVT::i32));
10032 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
10033 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
10034 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
10035 DAG.getConstant(1, dl, MVT::i32)));
10036
10037 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
10038 DAG.getConstant(1, dl, MVT::i32));
10039 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
10040
10041 return DAG.getMergeValues({RetVal, Chain}, dl);
10042}
10043
10044SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
10045 SelectionDAG &DAG) const {
10046 EVT VT = Op.getValueType();
10047 Op = Op.getOperand(0);
10048 EVT OpVT = Op.getValueType();
10049
10050 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
10051
10052 SDLoc DL(Op);
10053
10054 // load a 0 vector for the third operand of VSUM.
10055 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
10056
10057 // execute VSUM.
10058 switch (OpVT.getScalarSizeInBits()) {
10059 case 8:
10060 case 16:
10061 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
10062 [[fallthrough]];
10063 case 32:
10064 case 64:
10065 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
10066 DAG.getBitcast(Op.getValueType(), Zero));
10067 break;
10068 case 128:
10069 break; // VSUM over v1i128 should not happen and would be a noop
10070 default:
10071 llvm_unreachable("Unexpected scalar size.");
10072 }
10073 // Cast to original vector type, retrieve last element.
10074 return DAG.getNode(
10075 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
10076 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
10077}
10078
10079// Only consider a function fully internal as long as it has local linkage
10080// and is not used in any other way than acting as the called function at
10081// call sites.
10082bool SystemZTargetLowering::isFullyInternal(const Function *Fn) const {
10083 if (!Fn->hasLocalLinkage())
10084 return false;
10085 for (const User *U : Fn->users()) {
10086 if (auto *CB = dyn_cast<CallBase>(U)) {
10087 if (CB->getCalledFunction() != Fn)
10088 return false;
10089 } else
10090 return false;
10091 }
10092 return true;
10093}
10094
10096 FunctionType *FT = F->getFunctionType();
10097 const AttributeList &Attrs = F->getAttributes();
10098 if (Attrs.hasRetAttrs())
10099 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
10100 OS << *F->getReturnType() << " @" << F->getName() << "(";
10101 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
10102 if (I)
10103 OS << ", ";
10104 OS << *FT->getParamType(I);
10105 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
10106 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
10107 if (ArgAttrs.hasAttribute(A))
10109 }
10110 OS << ")\n";
10111}
10112
10113void SystemZTargetLowering::
10114verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
10115 const Function *F, SDValue Callee) const {
10116 bool IsInternal = false;
10117 const Function *CalleeFn = nullptr;
10118 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
10119 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
10120 IsInternal = isFullyInternal(CalleeFn);
10121 if (!verifyNarrowIntegerArgs(Outs, IsInternal)) {
10122 errs() << "ERROR: Missing extension attribute of passed "
10123 << "value in call to function:\n" << "Callee: ";
10124 if (CalleeFn != nullptr)
10125 printFunctionArgExts(CalleeFn, errs());
10126 else
10127 errs() << "-\n";
10128 errs() << "Caller: ";
10130 llvm_unreachable("");
10131 }
10132}
10133
10134void SystemZTargetLowering::
10135verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
10136 const Function *F) const {
10137 if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) {
10138 errs() << "ERROR: Missing extension attribute of returned "
10139 << "value from function:\n";
10141 llvm_unreachable("");
10142 }
10143}
10144
10145// Verify that narrow integer arguments are extended as required by the ABI.
10146// Return false if an error is found.
10147bool SystemZTargetLowering::
10148verifyNarrowIntegerArgs(const SmallVectorImpl<ISD::OutputArg> &Outs,
10149 bool IsInternal) const {
10150 if (IsInternal || !Subtarget.isTargetELF())
10151 return true;
10152
10153 // Temporarily only do the check when explicitly requested, until it can be
10154 // enabled by default.
10156 return true;
10157
10160 return true;
10161 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
10162 return true;
10163
10164 for (unsigned i = 0; i < Outs.size(); ++i) {
10165 MVT VT = Outs[i].VT;
10166 ISD::ArgFlagsTy Flags = Outs[i].Flags;
10167 if (VT.isInteger()) {
10168 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
10169 "Unexpected integer argument VT.");
10170 if (VT == MVT::i32 &&
10171 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
10172 return false;
10173 }
10174 }
10175
10176 return true;
10177}
unsigned const MachineRegisterInfo * MRI
#define Success
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr Register SPReg
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:322
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
@ Add
*p = old + v
Definition: Instructions.h:720
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
BinOp getOperation() const
Definition: Instructions.h:805
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
Definition: Attributes.cpp:314
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
The address of a basic block.
Definition: Constants.h:893
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void reserve(size_type N)
Definition: SmallVector.h:663
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:470
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:684
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
iterator end() const
Definition: StringRef.h:118
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
int getNumOccurrences() const
Definition: CommandLine.h:399
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1325
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:451
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:450
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:958
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1398
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:210
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:355
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:257
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:583
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:178
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:137
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
const uint32_t * getNoPreservedMask() const override
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})