LLVM 23.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
17#include "llvm/ADT/SmallSet.h"
22#include "llvm/IR/GlobalAlias.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsS390.h"
26#include "llvm/IR/Module.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v8f16, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
128 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
129 }
130
131 if (Subtarget.hasVector())
132 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
133 }
134
135 // Compute derived properties from the register classes
136 computeRegisterProperties(Subtarget.getRegisterInfo());
137
138 // Set up special registers.
139 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
140
141 // TODO: It may be better to default to latency-oriented scheduling, however
142 // LLVM's current latency-oriented scheduler can't handle physreg definitions
143 // such as SystemZ has with CC, so set this to the register-pressure
144 // scheduler, because it can.
146
149
151
152 // Instructions are strings of 2-byte aligned 2-byte values.
154 // For performance reasons we prefer 16-byte alignment.
156
157 // Handle operations that are handled in a similar way for all types.
158 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
159 I <= MVT::LAST_FP_VALUETYPE;
160 ++I) {
162 if (isTypeLegal(VT)) {
163 // Lower SET_CC into an IPM-based sequence.
167
168 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
170
171 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
174 }
175 }
176
177 // Expand jump table branches as address arithmetic followed by an
178 // indirect jump.
180
181 // Expand BRCOND into a BR_CC (see above).
183
184 // Handle integer types except i128.
185 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
186 I <= MVT::LAST_INTEGER_VALUETYPE;
187 ++I) {
189 if (isTypeLegal(VT) && VT != MVT::i128) {
191
192 // Expand individual DIV and REMs into DIVREMs.
199
200 // Support addition/subtraction with overflow.
203
204 // Support addition/subtraction with carry.
207
208 // Support carry in as value rather than glue.
211
212 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
213 // available, or if the operand is constant.
215
216 // Use POPCNT on z196 and above.
217 if (Subtarget.hasPopulationCount())
219 else
221
222 // No special instructions for these.
225
226 // Use *MUL_LOHI where possible instead of MULH*.
231
232 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
233 // unsigned on z10 (only z196 and above have native support for
234 // unsigned conversions).
241 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
242 auto OpAction =
243 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
244 setOperationAction(Op, VT, OpAction);
245 }
246 }
247 }
248
249 // Handle i128 if legal.
250 if (isTypeLegal(MVT::i128)) {
251 // No special instructions for these.
258
259 // We may be able to use VSLDB/VSLD/VSRD for these.
262
263 // No special instructions for these before z17.
264 if (!Subtarget.hasVectorEnhancements3()) {
274 } else {
275 // Even if we do have a legal 128-bit multiply, we do not
276 // want 64-bit multiply-high operations to use it.
279 }
280
281 // Support addition/subtraction with carry.
286
287 // Use VPOPCT and add up partial results.
289
290 // Additional instructions available with z17.
291 if (Subtarget.hasVectorEnhancements3()) {
292 setOperationAction(ISD::ABS, MVT::i128, Legal);
293
295 MVT::i128, Legal);
296 }
297 }
298
299 // These need custom handling in order to handle the f16 conversions.
308
309 // Type legalization will convert 8- and 16-bit atomic operations into
310 // forms that operate on i32s (but still keeping the original memory VT).
311 // Lower them into full i32 operations.
323
324 // Whether or not i128 is not a legal type, we need to custom lower
325 // the atomic operations in order to exploit SystemZ instructions.
330
331 // Mark sign/zero extending atomic loads as legal, which will make
332 // DAGCombiner fold extensions into atomic loads if possible.
334 {MVT::i8, MVT::i16, MVT::i32}, Legal);
336 {MVT::i8, MVT::i16}, Legal);
338 MVT::i8, Legal);
339
340 // We can use the CC result of compare-and-swap to implement
341 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
345
347
348 // Traps are legal, as we will convert them to "j .+2".
349 setOperationAction(ISD::TRAP, MVT::Other, Legal);
350
351 // We have native support for a 64-bit CTLZ, via FLOGR.
355
356 // On z17 we have native support for a 64-bit CTTZ.
357 if (Subtarget.hasMiscellaneousExtensions4()) {
361 }
362
363 // On z15 we have native support for a 64-bit CTPOP.
364 if (Subtarget.hasMiscellaneousExtensions3()) {
367 }
368
369 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
371
372 // Expand 128 bit shifts without using a libcall.
376
377 // Also expand 256 bit shifts if i128 is a legal type.
378 if (isTypeLegal(MVT::i128)) {
382 }
383
384 // Handle bitcast from fp128 to i128.
385 if (!isTypeLegal(MVT::i128))
387
388 // We have native instructions for i8, i16 and i32 extensions, but not i1.
390 for (MVT VT : MVT::integer_valuetypes()) {
394 }
395
396 // Handle the various types of symbolic address.
402
403 // We need to handle dynamic allocations specially because of the
404 // 160-byte area at the bottom of the stack.
407
410
411 // Handle prefetches with PFD or PFDRL.
413
414 // Handle readcyclecounter with STCKF.
416
418 // Assume by default that all vector operations need to be expanded.
419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
420 if (getOperationAction(Opcode, VT) == Legal)
421 setOperationAction(Opcode, VT, Expand);
422
423 // Likewise all truncating stores and extending loads.
424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
425 setTruncStoreAction(VT, InnerVT, Expand);
428 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
429 }
430
431 if (isTypeLegal(VT)) {
432 // These operations are legal for anything that can be stored in a
433 // vector register, even if there is no native support for the format
434 // as such. In particular, we can do these for v4f32 even though there
435 // are no specific instructions for that format.
441
442 // Likewise, except that we need to replace the nodes with something
443 // more specific.
446 }
447 }
448
449 // Handle integer vector types.
451 if (isTypeLegal(VT)) {
452 // These operations have direct equivalents.
457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
461 }
462 if (Subtarget.hasVectorEnhancements3() &&
463 VT != MVT::v16i8 && VT != MVT::v8i16) {
468 }
473 if (Subtarget.hasVectorEnhancements1())
475 else
479
480 // Convert a GPR scalar to a vector by inserting it into element 0.
482
483 // Use a series of unpacks for extensions.
486
487 // Detect shifts/rotates by a scalar amount and convert them into
488 // V*_BY_SCALAR.
493
494 // Add ISD::VECREDUCE_ADD as custom in order to implement
495 // it with VZERO+VSUM
497
498 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
499 // and inverting the result as necessary.
501
503 Legal);
504 }
505 }
506
507 if (Subtarget.hasVector()) {
508 // There should be no need to check for float types other than v2f64
509 // since <2 x f32> isn't a legal type.
518
527 }
528
529 if (Subtarget.hasVectorEnhancements2()) {
538
547 }
548
549 // Handle floating-point types.
550 if (!useSoftFloat()) {
551 // Promote all f16 operations to float, with some exceptions below.
552 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
553 setOperationAction(Opc, MVT::f16, Promote);
555 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
556 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
557 setTruncStoreAction(VT, MVT::f16, Expand);
558 }
560 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
564
565 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
566 setOperationAction(Op, MVT::f16, Legal);
567 }
568
569 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
570 I <= MVT::LAST_FP_VALUETYPE;
571 ++I) {
573 if (isTypeLegal(VT) && VT != MVT::f16) {
574 // We can use FI for FRINT.
576
577 // We can use the extended form of FI for other rounding operations.
578 if (Subtarget.hasFPExtension()) {
585 }
586
587 // No special instructions for these.
593
594 // Special treatment.
596
597 // Handle constrained floating-point operations.
606 if (Subtarget.hasFPExtension()) {
613 }
614
615 // Extension from f16 needs libcall.
618 }
619 }
620
621 // Handle floating-point vector types.
622 if (Subtarget.hasVector()) {
623 // Scalar-to-vector conversion is just a subreg.
627
628 // Some insertions and extractions can be done directly but others
629 // need to go via integers.
636
637 // These operations have direct equivalents.
638 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
639 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
640 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
641 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
642 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
643 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
644 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
645 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
646 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
649 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
653
654 // Handle constrained floating-point operations.
668
673 if (Subtarget.hasVectorEnhancements1()) {
676 }
677 }
678
679 // The vector enhancements facility 1 has instructions for these.
680 if (Subtarget.hasVectorEnhancements1()) {
681 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
682 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
683 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
684 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
685 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
686 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
687 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
688 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
689 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
692 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
696
697 for (MVT Type : {MVT::f64, MVT::v2f64, MVT::f32, MVT::v4f32, MVT::f128}) {
704 }
705
706 // Handle constrained floating-point operations.
720 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
721 MVT::v4f32, MVT::v2f64 }) {
726 }
727 }
728
729 // We only have fused f128 multiply-addition on vector registers.
730 if (!Subtarget.hasVectorEnhancements1()) {
733 }
734
735 // We don't have a copysign instruction on vector registers.
736 if (Subtarget.hasVectorEnhancements1())
738
739 // Needed so that we don't try to implement f128 constant loads using
740 // a load-and-extend of a f80 constant (in cases where the constant
741 // would fit in an f80).
742 for (MVT VT : MVT::fp_valuetypes())
743 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
744
745 // We don't have extending load instruction on vector registers.
746 if (Subtarget.hasVectorEnhancements1()) {
747 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
748 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
749 }
750
751 // Floating-point truncation and stores need to be done separately.
752 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
753 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
754 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
755
756 // We have 64-bit FPR<->GPR moves, but need special handling for
757 // 32-bit forms.
758 if (!Subtarget.hasVector()) {
761 }
762
763 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
764 // structure, but VAEND is a no-op.
768
769 if (Subtarget.isTargetzOS()) {
770 // Handle address space casts between mixed sized pointers.
773 }
774
776
777 // Codes for which we want to perform some z-specific combinations.
781 ISD::LOAD,
794 ISD::SRL,
795 ISD::SRA,
796 ISD::MUL,
797 ISD::SDIV,
798 ISD::UDIV,
799 ISD::SREM,
800 ISD::UREM,
803
804 // Handle intrinsics.
807
808 // We're not using SJLJ for exception handling, but they're implemented
809 // solely to support use of __builtin_setjmp / __builtin_longjmp.
812
813 // We want to use MVC in preference to even a single load/store pair.
814 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
816
817 // Same with memmove.
818 MaxStoresPerMemmove = Subtarget.hasVector() ? 2 : 0;
820
821 // The main memset sequence is a byte store followed by an MVC.
822 // Two STC or MV..I stores win over that, but the kind of fused stores
823 // generated by target-independent code don't when the byte value is
824 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
825 // than "STC;MVC". Handle the choice in target-specific code instead.
826 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
828
829 // Default to having -disable-strictnode-mutation on
830 IsStrictFPEnabled = true;
831}
832
834 return Subtarget.hasSoftFloat();
835}
836
838 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
839 unsigned &NumIntermediates, MVT &RegisterVT) const {
840 // Pass fp16 vectors in VR(s).
841 if (Subtarget.hasVector() && VT.isVectorOf(MVT::f16)) {
842 IntermediateVT = RegisterVT = MVT::v8f16;
843 return NumIntermediates =
845 }
847 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
848}
849
852 EVT VT) const {
853 // 128-bit single-element vector types are passed like other vectors,
854 // not like their element type.
855 if (Subtarget.hasVector() && VT.isVector() && VT.getSizeInBits() == 128 &&
856 VT.getVectorNumElements() == 1)
857 return MVT::v16i8;
858 // Pass fp16 vectors in VR(s).
859 if (Subtarget.hasVector() && VT.isVectorOf(MVT::f16))
860 return MVT::v8f16;
861 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
862}
863
865 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
866 // Pass fp16 vectors in VR(s).
867 if (Subtarget.hasVector() && VT.isVectorOf(MVT::f16))
869 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
870}
871
873 LLVMContext &, EVT VT) const {
874 if (!VT.isVector())
875 return MVT::i32;
877}
878
880 const MachineFunction &MF, EVT VT) const {
881 if (useSoftFloat())
882 return false;
883
884 VT = VT.getScalarType();
885
886 if (!VT.isSimple())
887 return false;
888
889 switch (VT.getSimpleVT().SimpleTy) {
890 case MVT::f32:
891 case MVT::f64:
892 return true;
893 case MVT::f128:
894 return Subtarget.hasVectorEnhancements1();
895 default:
896 break;
897 }
898
899 return false;
900}
901
902// Return true if the constant can be generated with a vector instruction,
903// such as VGM, VGMB or VREPI.
905 const SystemZSubtarget &Subtarget) {
906 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
907 if (!Subtarget.hasVector() ||
908 (isFP128 && !Subtarget.hasVectorEnhancements1()))
909 return false;
910
911 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
912 // preferred way of creating all-zero and all-one vectors so give it
913 // priority over other methods below.
914 unsigned Mask = 0;
915 unsigned I = 0;
916 for (; I < SystemZ::VectorBytes; ++I) {
917 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
918 if (Byte == 0xff)
919 Mask |= 1ULL << I;
920 else if (Byte != 0)
921 break;
922 }
923 if (I == SystemZ::VectorBytes) {
924 Opcode = SystemZISD::BYTE_MASK;
925 OpVals.push_back(Mask);
927 return true;
928 }
929
930 if (SplatBitSize > 64)
931 return false;
932
933 auto TryValue = [&](uint64_t Value) -> bool {
934 // Try VECTOR REPLICATE IMMEDIATE
935 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
936 if (isInt<16>(SignedValue)) {
937 OpVals.push_back(((unsigned) SignedValue));
938 Opcode = SystemZISD::REPLICATE;
940 SystemZ::VectorBits / SplatBitSize);
941 return true;
942 }
943 // Try VECTOR GENERATE MASK
944 unsigned Start, End;
945 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
946 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
947 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
948 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
949 OpVals.push_back(Start - (64 - SplatBitSize));
950 OpVals.push_back(End - (64 - SplatBitSize));
951 Opcode = SystemZISD::ROTATE_MASK;
953 SystemZ::VectorBits / SplatBitSize);
954 return true;
955 }
956 return false;
957 };
958
959 // First try assuming that any undefined bits above the highest set bit
960 // and below the lowest set bit are 1s. This increases the likelihood of
961 // being able to use a sign-extended element value in VECTOR REPLICATE
962 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
963 uint64_t SplatBitsZ = SplatBits.getZExtValue();
964 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
965 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
966 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
967 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
968 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
969 if (TryValue(SplatBitsZ | Upper | Lower))
970 return true;
971
972 // Now try assuming that any undefined bits between the first and
973 // last defined set bits are set. This increases the chances of
974 // using a non-wraparound mask.
975 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
976 return TryValue(SplatBitsZ | Middle);
977}
978
980 if (IntImm.isSingleWord()) {
981 IntBits = APInt(128, IntImm.getZExtValue());
982 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
983 } else
984 IntBits = IntImm;
985 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
986
987 // Find the smallest splat.
988 SplatBits = IntImm;
989 unsigned Width = SplatBits.getBitWidth();
990 while (Width > 8) {
991 unsigned HalfSize = Width / 2;
992 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
993 APInt LowValue = SplatBits.trunc(HalfSize);
994
995 // If the two halves do not match, stop here.
996 if (HighValue != LowValue || 8 > HalfSize)
997 break;
998
999 SplatBits = HighValue;
1000 Width = HalfSize;
1001 }
1002 SplatUndef = 0;
1003 SplatBitSize = Width;
1004}
1005
1007 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
1008 bool HasAnyUndefs;
1009
1010 // Get IntBits by finding the 128 bit splat.
1011 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
1012 true);
1013
1014 // Get SplatBits by finding the 8 bit or greater splat.
1015 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
1016 true);
1017}
1018
1020 bool ForCodeSize) const {
1021 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
1022 if (Imm.isZero() || Imm.isNegZero())
1023 return true;
1024
1025 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
1026}
1027
1030 MachineBasicBlock *MBB) const {
1031 DebugLoc DL = MI.getDebugLoc();
1032 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1033 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1034
1035 MachineFunction *MF = MBB->getParent();
1036 MachineRegisterInfo &MRI = MF->getRegInfo();
1037
1038 const BasicBlock *BB = MBB->getBasicBlock();
1039 MachineFunction::iterator I = ++MBB->getIterator();
1040
1041 Register DstReg = MI.getOperand(0).getReg();
1042 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1043 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1044 (void)TRI;
1045 Register MainDstReg = MRI.createVirtualRegister(RC);
1046 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1047
1048 MVT PVT = getPointerTy(MF->getDataLayout());
1049 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1050 // For v = setjmp(buf), we generate.
1051 // Algorithm:
1052 //
1053 // ---------
1054 // | thisMBB |
1055 // ---------
1056 // |
1057 // ------------------------
1058 // | |
1059 // ---------- ---------------
1060 // | mainMBB | | restoreMBB |
1061 // | v = 0 | | v = 1 |
1062 // ---------- ---------------
1063 // | |
1064 // -------------------------
1065 // |
1066 // -----------------------------
1067 // | sinkMBB |
1068 // | phi(v_mainMBB,v_restoreMBB) |
1069 // -----------------------------
1070 // thisMBB:
1071 // buf[FPOffset] = Frame Pointer if hasFP.
1072 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1073 // buf[BCOffset] = Backchain value if building with -mbackchain.
1074 // buf[SPOffset] = Stack Pointer.
1075 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1076 // SjLjSetup restoreMBB
1077 // mainMBB:
1078 // v_main = 0
1079 // sinkMBB:
1080 // v = phi(v_main, v_restore)
1081 // restoreMBB:
1082 // v_restore = 1
1083
1084 MachineBasicBlock *ThisMBB = MBB;
1085 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1086 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1087 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1088
1089 MF->insert(I, MainMBB);
1090 MF->insert(I, SinkMBB);
1091 MF->push_back(RestoreMBB);
1092 RestoreMBB->setMachineBlockAddressTaken();
1093
1095
1096 // Transfer the remainder of BB and its successor edges to sinkMBB.
1097 SinkMBB->splice(SinkMBB->begin(), MBB,
1098 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1100
1101 // thisMBB:
1102 const int64_t FPOffset = 0; // Slot 1.
1103 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1104 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1105 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1106
1107 // Buf address.
1108 Register BufReg = MI.getOperand(1).getReg();
1109
1110 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1111 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1112
1113 // Prepare IP for longjmp.
1114 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1115 .addMBB(RestoreMBB);
1116 // Store IP for return from jmp, slot 2, offset = 1.
1117 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1118 .addReg(LabelReg)
1119 .addReg(BufReg)
1120 .addImm(LabelOffset)
1121 .addReg(0);
1122
1123 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1124 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1125 if (HasFP) {
1126 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1127 .addReg(SpecialRegs->getFramePointerRegister())
1128 .addReg(BufReg)
1129 .addImm(FPOffset)
1130 .addReg(0);
1131 }
1132
1133 // Store SP.
1134 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1135 .addReg(SpecialRegs->getStackPointerRegister())
1136 .addReg(BufReg)
1137 .addImm(SPOffset)
1138 .addReg(0);
1139
1140 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1141 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1142 if (BackChain) {
1143 Register BCReg = MRI.createVirtualRegister(PtrRC);
1144 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1145 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1146 .addReg(SpecialRegs->getStackPointerRegister())
1147 .addImm(TFL->getBackchainOffset(*MF))
1148 .addReg(0);
1149
1150 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1151 .addReg(BCReg)
1152 .addReg(BufReg)
1153 .addImm(BCOffset)
1154 .addReg(0);
1155 }
1156
1157 // Setup.
1158 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1159 .addMBB(RestoreMBB);
1160
1161 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1162 MIB.addRegMask(RegInfo->getNoPreservedMask());
1163
1164 ThisMBB->addSuccessor(MainMBB);
1165 ThisMBB->addSuccessor(RestoreMBB);
1166
1167 // mainMBB:
1168 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1169 MainMBB->addSuccessor(SinkMBB);
1170
1171 // sinkMBB:
1172 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1173 .addReg(MainDstReg)
1174 .addMBB(MainMBB)
1175 .addReg(RestoreDstReg)
1176 .addMBB(RestoreMBB);
1177
1178 // restoreMBB.
1179 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1180 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1181 RestoreMBB->addSuccessor(SinkMBB);
1182
1183 MI.eraseFromParent();
1184
1185 return SinkMBB;
1186}
1187
1190 MachineBasicBlock *MBB) const {
1191
1192 DebugLoc DL = MI.getDebugLoc();
1193 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1194
1195 MachineFunction *MF = MBB->getParent();
1196 MachineRegisterInfo &MRI = MF->getRegInfo();
1197
1198 MVT PVT = getPointerTy(MF->getDataLayout());
1199 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1200 Register BufReg = MI.getOperand(0).getReg();
1201 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1202 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1203
1204 Register Tmp = MRI.createVirtualRegister(RC);
1205 Register BCReg = MRI.createVirtualRegister(RC);
1206
1208
1209 const int64_t FPOffset = 0;
1210 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1211 const int64_t BCOffset = 2 * PVT.getStoreSize();
1212 const int64_t SPOffset = 3 * PVT.getStoreSize();
1213 const int64_t LPOffset = 4 * PVT.getStoreSize();
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1216 .addReg(BufReg)
1217 .addImm(LabelOffset)
1218 .addReg(0);
1219
1220 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1221 SpecialRegs->getFramePointerRegister())
1222 .addReg(BufReg)
1223 .addImm(FPOffset)
1224 .addReg(0);
1225
1226 // We are restoring R13 even though we never stored in setjmp from llvm,
1227 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1228 // gcc setjmp and llvm longjmp.
1229 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1230 .addReg(BufReg)
1231 .addImm(LPOffset)
1232 .addReg(0);
1233
1234 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1235 if (BackChain) {
1236 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1237 .addReg(BufReg)
1238 .addImm(BCOffset)
1239 .addReg(0);
1240 }
1241
1242 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1243 SpecialRegs->getStackPointerRegister())
1244 .addReg(BufReg)
1245 .addImm(SPOffset)
1246 .addReg(0);
1247
1248 if (BackChain) {
1249 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1250 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1251 .addReg(BCReg)
1252 .addReg(SpecialRegs->getStackPointerRegister())
1253 .addImm(TFL->getBackchainOffset(*MF))
1254 .addReg(0);
1255 }
1256
1257 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1258
1259 MI.eraseFromParent();
1260 return MBB;
1261}
1262
1263/// Returns true if stack probing through inline assembly is requested.
1265 // If the function specifically requests inline stack probes, emit them.
1266 if (MF.getFunction().hasFnAttribute("probe-stack"))
1267 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1268 "inline-asm";
1269 return false;
1270}
1271
1276
1281
1284 const AtomicRMWInst *RMW) const {
1285 // Don't expand subword operations as they require special treatment.
1286 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1288
1289 // Don't expand if there is a target instruction available.
1290 if (Subtarget.hasInterlockedAccess1() &&
1291 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1298
1300}
1301
1303 // We can use CGFI or CLGFI.
1304 return isInt<32>(Imm) || isUInt<32>(Imm);
1305}
1306
1308 // We can use ALGFI or SLGFI.
1309 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1310}
1311
1313 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1314 // Unaligned accesses should never be slower than the expanded version.
1315 // We check specifically for aligned accesses in the few cases where
1316 // they are required.
1317 if (Fast)
1318 *Fast = 1;
1319 return true;
1320}
1321
1323 EVT VT = Y.getValueType();
1324
1325 // We can use NC(G)RK for types in GPRs ...
1326 if (VT == MVT::i32 || VT == MVT::i64)
1327 return Subtarget.hasMiscellaneousExtensions3();
1328
1329 // ... or VNC for types in VRs.
1330 if (VT.isVector() || VT == MVT::i128)
1331 return Subtarget.hasVector();
1332
1333 return false;
1334}
1335
1336// Information about the addressing mode for a memory access.
1338 // True if a long displacement is supported.
1340
1341 // True if use of index register is supported.
1343
1344 AddressingMode(bool LongDispl, bool IdxReg) :
1345 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1346};
1347
1348// Return the desired addressing mode for a Load which has only one use (in
1349// the same block) which is a Store.
1351 Type *Ty) {
1352 // With vector support a Load->Store combination may be combined to either
1353 // an MVC or vector operations and it seems to work best to allow the
1354 // vector addressing mode.
1355 if (HasVector)
1356 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1357
1358 // Otherwise only the MVC case is special.
1359 bool MVC = Ty->isIntegerTy(8);
1360 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1361}
1362
1363// Return the addressing mode which seems most desirable given an LLVM
1364// Instruction pointer.
1365static AddressingMode
1368 switch (II->getIntrinsicID()) {
1369 default: break;
1370 case Intrinsic::memset:
1371 case Intrinsic::memmove:
1372 case Intrinsic::memcpy:
1373 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1374 }
1375 }
1376
1377 if (isa<LoadInst>(I) && I->hasOneUse()) {
1378 auto *SingleUser = cast<Instruction>(*I->user_begin());
1379 if (SingleUser->getParent() == I->getParent()) {
1380 if (isa<ICmpInst>(SingleUser)) {
1381 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1382 if (C->getBitWidth() <= 64 &&
1383 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1384 // Comparison of memory with 16 bit signed / unsigned immediate
1385 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1386 } else if (isa<StoreInst>(SingleUser))
1387 // Load->Store
1388 return getLoadStoreAddrMode(HasVector, I->getType());
1389 }
1390 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1391 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1392 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1393 // Load->Store
1394 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1395 }
1396
1397 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1398
1399 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1400 // dependencies (LDE only supports small offsets).
1401 // * Utilize the vector registers to hold floating point
1402 // values (vector load / store instructions only support small
1403 // offsets).
1404
1405 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1406 I->getOperand(0)->getType());
1407 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1408 bool IsVectorAccess = MemAccessTy->isVectorTy();
1409
1410 // A store of an extracted vector element will be combined into a VSTE type
1411 // instruction.
1412 if (!IsVectorAccess && isa<StoreInst>(I)) {
1413 Value *DataOp = I->getOperand(0);
1414 if (isa<ExtractElementInst>(DataOp))
1415 IsVectorAccess = true;
1416 }
1417
1418 // A load which gets inserted into a vector element will be combined into a
1419 // VLE type instruction.
1420 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1421 User *LoadUser = *I->user_begin();
1422 if (isa<InsertElementInst>(LoadUser))
1423 IsVectorAccess = true;
1424 }
1425
1426 if (IsFPAccess || IsVectorAccess)
1427 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1428 }
1429
1430 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1431}
1432
1434 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1435 // Punt on globals for now, although they can be used in limited
1436 // RELATIVE LONG cases.
1437 if (AM.BaseGV)
1438 return false;
1439
1440 // Require a 20-bit signed offset.
1441 if (!isInt<20>(AM.BaseOffs))
1442 return false;
1443
1444 bool RequireD12 =
1445 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1446 AddressingMode SupportedAM(!RequireD12, true);
1447 if (I != nullptr)
1448 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1449
1450 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1451 return false;
1452
1453 if (!SupportedAM.IndexReg)
1454 // No indexing allowed.
1455 return AM.Scale == 0;
1456 else
1457 // Indexing is OK but no scale factor can be applied.
1458 return AM.Scale == 0 || AM.Scale == 1;
1459}
1460
1462 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1463 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1464 const AttributeList &FuncAttributes, EVT *LargestVT) const {
1465
1466 assert(Limit != ~0U &&
1467 "Expected EmitTargetCodeForMemXXX() to handle AlwaysInline cases.");
1468
1469 if (Op.isZeroMemset())
1470 return false; // Memset zero: Use XC.
1471
1472 const int MVCFastLen = 16;
1473 // Use MVC up to 16 bytes. Small memset uses STC/MVI for first byte.
1474 if ((Op.isMemset() ? Op.size() - 1 : Op.size()) <= MVCFastLen)
1475 return false;
1476
1477 // Avoid unaligned VL/VST:s.
1478 if (!Op.isAligned(Align(8)) || (Op.size() >= 25 && Op.size() <= 31))
1479 return false;
1480
1482 Context, MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, LargestVT);
1483}
1484
1486 LLVMContext &Context, const MemOp &Op,
1487 const AttributeList &FuncAttributes) const {
1488 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1489}
1490
1491bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1492 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1493 return false;
1494 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1495 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1496 return FromBits > ToBits;
1497}
1498
1500 if (!FromVT.isInteger() || !ToVT.isInteger())
1501 return false;
1502 unsigned FromBits = FromVT.getFixedSizeInBits();
1503 unsigned ToBits = ToVT.getFixedSizeInBits();
1504 return FromBits > ToBits;
1505}
1506
1507//===----------------------------------------------------------------------===//
1508// Inline asm support
1509//===----------------------------------------------------------------------===//
1510
1513 if (Constraint.size() == 1) {
1514 switch (Constraint[0]) {
1515 case 'a': // Address register
1516 case 'd': // Data register (equivalent to 'r')
1517 case 'f': // Floating-point register
1518 case 'h': // High-part register
1519 case 'r': // General-purpose register
1520 case 'v': // Vector register
1521 return C_RegisterClass;
1522
1523 case 'Q': // Memory with base and unsigned 12-bit displacement
1524 case 'R': // Likewise, plus an index
1525 case 'S': // Memory with base and signed 20-bit displacement
1526 case 'T': // Likewise, plus an index
1527 case 'm': // Equivalent to 'T'.
1528 return C_Memory;
1529
1530 case 'I': // Unsigned 8-bit constant
1531 case 'J': // Unsigned 12-bit constant
1532 case 'K': // Signed 16-bit constant
1533 case 'L': // Signed 20-bit displacement (on all targets we support)
1534 case 'M': // 0x7fffffff
1535 return C_Immediate;
1536
1537 default:
1538 break;
1539 }
1540 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1541 switch (Constraint[1]) {
1542 case 'Q': // Address with base and unsigned 12-bit displacement
1543 case 'R': // Likewise, plus an index
1544 case 'S': // Address with base and signed 20-bit displacement
1545 case 'T': // Likewise, plus an index
1546 return C_Address;
1547
1548 default:
1549 break;
1550 }
1551 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1552 if (StringRef("{@cc}").compare(Constraint) == 0)
1553 return C_Other;
1554 }
1555 return TargetLowering::getConstraintType(Constraint);
1556}
1557
1560 AsmOperandInfo &Info, const char *Constraint) const {
1562 Value *CallOperandVal = Info.CallOperandVal;
1563 // If we don't have a value, we can't do a match,
1564 // but allow it at the lowest weight.
1565 if (!CallOperandVal)
1566 return CW_Default;
1567 Type *type = CallOperandVal->getType();
1568 // Look at the constraint type.
1569 switch (*Constraint) {
1570 default:
1571 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1572 break;
1573
1574 case 'a': // Address register
1575 case 'd': // Data register (equivalent to 'r')
1576 case 'h': // High-part register
1577 case 'r': // General-purpose register
1578 Weight =
1579 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1580 break;
1581
1582 case 'f': // Floating-point register
1583 if (!useSoftFloat())
1584 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1585 break;
1586
1587 case 'v': // Vector register
1588 if (Subtarget.hasVector())
1589 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1590 : CW_Default;
1591 break;
1592
1593 case 'I': // Unsigned 8-bit constant
1594 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1595 if (isUInt<8>(C->getZExtValue()))
1596 Weight = CW_Constant;
1597 break;
1598
1599 case 'J': // Unsigned 12-bit constant
1600 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1601 if (isUInt<12>(C->getZExtValue()))
1602 Weight = CW_Constant;
1603 break;
1604
1605 case 'K': // Signed 16-bit constant
1606 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1607 if (isInt<16>(C->getSExtValue()))
1608 Weight = CW_Constant;
1609 break;
1610
1611 case 'L': // Signed 20-bit displacement (on all targets we support)
1612 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1613 if (isInt<20>(C->getSExtValue()))
1614 Weight = CW_Constant;
1615 break;
1616
1617 case 'M': // 0x7fffffff
1618 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1619 if (C->getZExtValue() == 0x7fffffff)
1620 Weight = CW_Constant;
1621 break;
1622 }
1623 return Weight;
1624}
1625
1626// Parse a "{tNNN}" register constraint for which the register type "t"
1627// has already been verified. MC is the class associated with "t" and
1628// Map maps 0-based register numbers to LLVM register numbers.
1629static std::pair<unsigned, const TargetRegisterClass *>
1631 const unsigned *Map, unsigned Size) {
1632 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1633 if (isdigit(Constraint[2])) {
1634 unsigned Index;
1635 bool Failed =
1636 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1637 if (!Failed && Index < Size && Map[Index])
1638 return std::make_pair(Map[Index], RC);
1639 }
1640 return std::make_pair(0U, nullptr);
1641}
1642
1643std::pair<unsigned, const TargetRegisterClass *>
1645 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1646 if (Constraint.size() == 1) {
1647 // GCC Constraint Letters
1648 switch (Constraint[0]) {
1649 default: break;
1650 case 'd': // Data register (equivalent to 'r')
1651 case 'r': // General-purpose register
1652 if (VT.getSizeInBits() == 64)
1653 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1654 else if (VT.getSizeInBits() == 128)
1655 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1656 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1657
1658 case 'a': // Address register
1659 if (VT == MVT::i64)
1660 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1661 else if (VT == MVT::i128)
1662 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1663 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1664
1665 case 'h': // High-part register (an LLVM extension)
1666 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1667
1668 case 'f': // Floating-point register
1669 if (!useSoftFloat()) {
1670 if (VT.getSizeInBits() == 16)
1671 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1672 else if (VT.getSizeInBits() == 64)
1673 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1674 else if (VT.getSizeInBits() == 128)
1675 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1676 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1677 }
1678 break;
1679
1680 case 'v': // Vector register
1681 if (Subtarget.hasVector()) {
1682 if (VT.getSizeInBits() == 16)
1683 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1684 if (VT.getSizeInBits() == 32)
1685 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1686 if (VT.getSizeInBits() == 64)
1687 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1688 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1689 }
1690 break;
1691 }
1692 }
1693 if (Constraint.starts_with("{")) {
1694
1695 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1696 // to check the size on.
1697 auto getVTSizeInBits = [&VT]() {
1698 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1699 };
1700
1701 // We need to override the default register parsing for GPRs and FPRs
1702 // because the interpretation depends on VT. The internal names of
1703 // the registers are also different from the external names
1704 // (F0D and F0S instead of F0, etc.).
1705 if (Constraint[1] == 'r') {
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1709 if (getVTSizeInBits() == 128)
1710 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1714 }
1715 if (Constraint[1] == 'f') {
1716 if (useSoftFloat())
1717 return std::make_pair(
1718 0u, static_cast<const TargetRegisterClass *>(nullptr));
1719 if (getVTSizeInBits() == 16)
1720 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1722 if (getVTSizeInBits() == 32)
1723 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1725 if (getVTSizeInBits() == 128)
1726 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1728 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1730 }
1731 if (Constraint[1] == 'v') {
1732 if (!Subtarget.hasVector())
1733 return std::make_pair(
1734 0u, static_cast<const TargetRegisterClass *>(nullptr));
1735 if (getVTSizeInBits() == 16)
1736 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1738 if (getVTSizeInBits() == 32)
1739 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1741 if (getVTSizeInBits() == 64)
1742 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1744 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1746 }
1747 if (Constraint[1] == '@') {
1748 if (StringRef("{@cc}").compare(Constraint) == 0)
1749 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1750 }
1751 }
1752 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1753}
1754
1755// FIXME? Maybe this could be a TableGen attribute on some registers and
1756// this table could be generated automatically from RegInfo.
1759 const MachineFunction &MF) const {
1760 Register Reg =
1762 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1763 : SystemZ::NoRegister)
1764 .Case("r15",
1765 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1766 .Default(Register());
1767
1768 return Reg;
1769}
1770
1772 const Constant *PersonalityFn) const {
1773 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1774}
1775
1777 const Constant *PersonalityFn) const {
1778 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1779}
1780
1781// Convert condition code in CCReg to an i32 value.
1783 SDLoc DL(CCReg);
1784 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1785 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1786 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1787}
1788
1789// Lower @cc targets via setcc.
1791 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1792 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1793 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1794 return SDValue();
1795
1796 // Check that return type is valid.
1797 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1798 OpInfo.ConstraintVT.getSizeInBits() < 8)
1799 report_fatal_error("Glue output operand is of invalid type");
1800
1801 if (Glue.getNode()) {
1802 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1803 Chain = Glue.getValue(1);
1804 } else
1805 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1806 return getCCResult(DAG, Glue);
1807}
1808
1810 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1811 SelectionDAG &DAG) const {
1812 // Only support length 1 constraints for now.
1813 if (Constraint.size() == 1) {
1814 switch (Constraint[0]) {
1815 case 'I': // Unsigned 8-bit constant
1816 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1817 if (isUInt<8>(C->getZExtValue()))
1818 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1819 Op.getValueType()));
1820 return;
1821
1822 case 'J': // Unsigned 12-bit constant
1823 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1824 if (isUInt<12>(C->getZExtValue()))
1825 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1826 Op.getValueType()));
1827 return;
1828
1829 case 'K': // Signed 16-bit constant
1830 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1831 if (isInt<16>(C->getSExtValue()))
1832 Ops.push_back(DAG.getSignedTargetConstant(
1833 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1834 return;
1835
1836 case 'L': // Signed 20-bit displacement (on all targets we support)
1837 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1838 if (isInt<20>(C->getSExtValue()))
1839 Ops.push_back(DAG.getSignedTargetConstant(
1840 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1841 return;
1842
1843 case 'M': // 0x7fffffff
1844 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1845 if (C->getZExtValue() == 0x7fffffff)
1846 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1847 Op.getValueType()));
1848 return;
1849 }
1850 }
1852}
1853
1854//===----------------------------------------------------------------------===//
1855// Calling conventions
1856//===----------------------------------------------------------------------===//
1857
1858#include "SystemZGenCallingConv.inc"
1859
1861 CallingConv::ID) const {
1862 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1863 SystemZ::R14D, 0 };
1864 return ScratchRegs;
1865}
1866
1868 Type *ToType) const {
1869 return isTruncateFree(FromType, ToType);
1870}
1871
1873 return CI->isTailCall();
1874}
1875
1876// Value is a value that has been passed to us in the location described by VA
1877// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1878// any loads onto Chain.
1880 CCValAssign &VA, SDValue Chain,
1881 SDValue Value) {
1882 // If the argument has been promoted from a smaller type, insert an
1883 // assertion to capture this.
1884 if (VA.getLocInfo() == CCValAssign::SExt)
1886 DAG.getValueType(VA.getValVT()));
1887 else if (VA.getLocInfo() == CCValAssign::ZExt)
1889 DAG.getValueType(VA.getValVT()));
1890
1891 if (VA.isExtInLoc())
1892 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1893 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1894 // If this is a short vector argument loaded from the stack,
1895 // extend from i64 to full vector size and then bitcast.
1896 assert(VA.getLocVT() == MVT::i64);
1897 assert(VA.getValVT().isVector());
1898 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1899 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1900 } else
1901 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1902 return Value;
1903}
1904
1905// Value is a value of type VA.getValVT() that we need to copy into
1906// the location described by VA. Return a copy of Value converted to
1907// VA.getValVT(). The caller is responsible for handling indirect values.
1909 CCValAssign &VA, SDValue Value) {
1910 switch (VA.getLocInfo()) {
1911 case CCValAssign::SExt:
1912 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1913 case CCValAssign::ZExt:
1914 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1915 case CCValAssign::AExt:
1916 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1917 case CCValAssign::BCvt: {
1918 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1919 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1920 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1921 // For an f32 vararg we need to first promote it to an f64 and then
1922 // bitcast it to an i64.
1923 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1924 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1925 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1926 ? MVT::v2i64
1927 : VA.getLocVT();
1928 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1929 // For ELF, this is a short vector argument to be stored to the stack,
1930 // bitcast to v2i64 and then extract first element.
1931 if (BitCastToType == MVT::v2i64)
1932 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1933 DAG.getConstant(0, DL, MVT::i32));
1934 return Value;
1935 }
1936 case CCValAssign::Full:
1937 return Value;
1938 default:
1939 llvm_unreachable("Unhandled getLocInfo()");
1940 }
1941}
1942
1944 SDLoc DL(In);
1945 SDValue Lo, Hi;
1946 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1947 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1948 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1949 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1950 DAG.getConstant(64, DL, MVT::i32)));
1951 } else {
1952 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1953 }
1954
1955 // FIXME: If v2i64 were a legal type, we could use it instead of
1956 // Untyped here. This might enable improved folding.
1957 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1958 MVT::Untyped, Hi, Lo);
1959 return SDValue(Pair, 0);
1960}
1961
1963 SDLoc DL(In);
1964 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1965 DL, MVT::i64, In);
1966 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1967 DL, MVT::i64, In);
1968
1969 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1970 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1971 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1972 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1973 DAG.getConstant(64, DL, MVT::i32));
1974 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1975 } else {
1976 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1977 }
1978}
1979
1981 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1982 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1983 EVT ValueVT = Val.getValueType();
1984 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1985 // Inline assembly operand.
1986 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1987 return true;
1988 }
1989
1990 return false;
1991}
1992
1994 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1995 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1996 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1997 // Inline assembly operand.
1998 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1999 return DAG.getBitcast(ValueVT, Res);
2000 }
2001
2002 return SDValue();
2003}
2004
2005// The first part of a split stack argument is at index I in Args (and
2006// ArgLocs). Return the type of a part and the number of them by reference.
2007template <class ArgTy>
2009 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
2010 MVT &PartVT, unsigned &NumParts) {
2011 if (!Args[I].Flags.isSplit())
2012 return false;
2013 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
2014 "ArgLocs havoc.");
2015 PartVT = ArgLocs[I].getValVT();
2016 NumParts = 1;
2017 for (unsigned PartIdx = I + 1;; ++PartIdx) {
2018 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
2019 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
2020 ++NumParts;
2021 if (Args[PartIdx].Flags.isSplitEnd())
2022 break;
2023 }
2024 return true;
2025}
2026
2028 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2029 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2030 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2032 MachineFrameInfo &MFI = MF.getFrameInfo();
2033 MachineRegisterInfo &MRI = MF.getRegInfo();
2034 SystemZMachineFunctionInfo *FuncInfo =
2036 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2037 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2038
2039 // Assign locations to all of the incoming arguments.
2041 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2042 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2043 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2044
2045 unsigned NumFixedGPRs = 0;
2046 unsigned NumFixedFPRs = 0;
2047 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2048 SDValue ArgValue;
2049 CCValAssign &VA = ArgLocs[I];
2050 EVT LocVT = VA.getLocVT();
2051 if (VA.isRegLoc()) {
2052 // Arguments passed in registers
2053 const TargetRegisterClass *RC;
2054 switch (LocVT.getSimpleVT().SimpleTy) {
2055 default:
2056 // Integers smaller than i64 should be promoted to i64.
2057 llvm_unreachable("Unexpected argument type");
2058 case MVT::i32:
2059 NumFixedGPRs += 1;
2060 RC = &SystemZ::GR32BitRegClass;
2061 break;
2062 case MVT::i64:
2063 NumFixedGPRs += 1;
2064 RC = &SystemZ::GR64BitRegClass;
2065 break;
2066 case MVT::f16:
2067 NumFixedFPRs += 1;
2068 RC = &SystemZ::FP16BitRegClass;
2069 break;
2070 case MVT::f32:
2071 NumFixedFPRs += 1;
2072 RC = &SystemZ::FP32BitRegClass;
2073 break;
2074 case MVT::f64:
2075 NumFixedFPRs += 1;
2076 RC = &SystemZ::FP64BitRegClass;
2077 break;
2078 case MVT::f128:
2079 NumFixedFPRs += 2;
2080 RC = &SystemZ::FP128BitRegClass;
2081 break;
2082 case MVT::v16i8:
2083 case MVT::v8i16:
2084 case MVT::v4i32:
2085 case MVT::v2i64:
2086 case MVT::v8f16:
2087 case MVT::v4f32:
2088 case MVT::v2f64:
2089 RC = &SystemZ::VR128BitRegClass;
2090 break;
2091 }
2092
2093 Register VReg = MRI.createVirtualRegister(RC);
2094 MRI.addLiveIn(VA.getLocReg(), VReg);
2095 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2096 } else {
2097 assert(VA.isMemLoc() && "Argument not register or memory");
2098
2099 // Create the frame index object for this incoming parameter.
2100 // FIXME: Pre-include call frame size in the offset, should not
2101 // need to manually add it here.
2102 int64_t ArgSPOffset = VA.getLocMemOffset();
2103 if (Subtarget.isTargetXPLINK64()) {
2104 auto &XPRegs =
2105 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2106 ArgSPOffset += XPRegs.getCallFrameSize();
2107 }
2108 int FI =
2109 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2110
2111 // Create the SelectionDAG nodes corresponding to a load
2112 // from this parameter. Unpromoted ints and floats are
2113 // passed as right-justified 8-byte values.
2114 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2115 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2116 VA.getLocVT() == MVT::f16) {
2117 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2118 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2119 DAG.getIntPtrConstant(SlotOffs, DL));
2120 }
2121 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2123 }
2124
2125 // Convert the value of the argument register into the value that's
2126 // being passed.
2127 if (VA.getLocInfo() == CCValAssign::Indirect) {
2128 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2130 // If the original argument was split (e.g. i128), we need
2131 // to load all parts of it here (using the same address).
2132 MVT PartVT;
2133 unsigned NumParts;
2134 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2135 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2136 ++I;
2137 CCValAssign &PartVA = ArgLocs[I];
2138 unsigned PartOffset = Ins[I].PartOffset;
2139 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2140 DAG.getIntPtrConstant(PartOffset, DL));
2141 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2143 assert(PartOffset && "Offset should be non-zero.");
2144 }
2145 }
2146 } else
2147 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2148 }
2149
2150 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2151 // Save the number of non-varargs registers for later use by va_start, etc.
2152 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2153 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2154
2155 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2156 Subtarget.getSpecialRegisters());
2157
2158 // Likewise the address (in the form of a frame index) of where the
2159 // first stack vararg would be. The 1-byte size here is arbitrary.
2160 // FIXME: Pre-include call frame size in the offset, should not
2161 // need to manually add it here.
2162 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2163 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2164 FuncInfo->setVarArgsFrameIndex(FI);
2165 }
2166
2167 if (IsVarArg && Subtarget.isTargetELF()) {
2168 // Save the number of non-varargs registers for later use by va_start, etc.
2169 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2170 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2171
2172 // Likewise the address (in the form of a frame index) of where the
2173 // first stack vararg would be. The 1-byte size here is arbitrary.
2174 int64_t VarArgsOffset = CCInfo.getStackSize();
2175 FuncInfo->setVarArgsFrameIndex(
2176 MFI.CreateFixedObject(1, VarArgsOffset, true));
2177
2178 // ...and a similar frame index for the caller-allocated save area
2179 // that will be used to store the incoming registers.
2180 int64_t RegSaveOffset =
2181 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2182 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2183 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2184
2185 // Store the FPR varargs in the reserved frame slots. (We store the
2186 // GPRs as part of the prologue.)
2187 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2189 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2190 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2191 int FI =
2193 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2195 &SystemZ::FP64BitRegClass);
2196 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2197 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2199 }
2200 // Join the stores, which are independent of one another.
2201 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2202 ArrayRef(&MemOps[NumFixedFPRs],
2203 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2204 }
2205 }
2206
2207 if (Subtarget.isTargetXPLINK64()) {
2208 // Create virual register for handling incoming "ADA" special register (R5)
2209 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2210 Register ADAvReg = MRI.createVirtualRegister(RC);
2211 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2212 Subtarget.getSpecialRegisters());
2213 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2214 FuncInfo->setADAVirtualRegister(ADAvReg);
2215 }
2216 return Chain;
2217}
2218
2219static bool canUseSiblingCall(const CCState &ArgCCInfo,
2222 // Punt if there are any indirect or stack arguments, or if the call
2223 // needs the callee-saved argument register R6, or if the call uses
2224 // the callee-saved register arguments SwiftSelf and SwiftError.
2225 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2226 CCValAssign &VA = ArgLocs[I];
2228 return false;
2229 if (!VA.isRegLoc())
2230 return false;
2231 Register Reg = VA.getLocReg();
2232 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2233 return false;
2234 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2235 return false;
2236 }
2237 return true;
2238}
2239
2241 unsigned Offset, bool LoadAdr = false) {
2244 Register ADAvReg = MFI->getADAVirtualRegister();
2246
2247 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2248 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2249
2250 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2251 if (!LoadAdr)
2252 Result = DAG.getLoad(
2253 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2255
2256 return Result;
2257}
2258
2259// ADA access using Global value
2260// Note: for functions, address of descriptor is returned
2262 EVT PtrVT) {
2263 unsigned ADAtype;
2264 bool LoadAddr = false;
2265 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2266 bool IsFunction =
2267 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2268 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2269
2270 if (IsFunction) {
2271 if (IsInternal) {
2273 LoadAddr = true;
2274 } else
2276 } else {
2278 }
2279 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2280
2281 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2282}
2283
2284static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2285 SDLoc &DL, SDValue &Chain) {
2286 unsigned ADADelta = 0; // ADA offset in desc.
2287 unsigned EPADelta = 8; // EPA offset in desc.
2290
2291 // XPLink calling convention.
2292 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2293 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2294 G->getGlobal()->hasPrivateLinkage());
2295 if (IsInternal) {
2298 Register ADAvReg = MFI->getADAVirtualRegister();
2299 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2300 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2301 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2302 return true;
2303 } else {
2305 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2306 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2307 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2308 }
2309 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2311 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2312 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2313 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2314 } else {
2315 // Function pointer case
2316 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2317 DAG.getConstant(ADADelta, DL, PtrVT));
2318 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2320 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2321 DAG.getConstant(EPADelta, DL, PtrVT));
2322 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2324 }
2325 return false;
2326}
2327
2328SDValue
2330 SmallVectorImpl<SDValue> &InVals) const {
2331 SelectionDAG &DAG = CLI.DAG;
2332 SDLoc &DL = CLI.DL;
2334 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2336 SDValue Chain = CLI.Chain;
2337 SDValue Callee = CLI.Callee;
2338 bool &IsTailCall = CLI.IsTailCall;
2339 CallingConv::ID CallConv = CLI.CallConv;
2340 bool IsVarArg = CLI.IsVarArg;
2342 EVT PtrVT = getPointerTy(MF.getDataLayout());
2343 LLVMContext &Ctx = *DAG.getContext();
2344 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2345
2346 // FIXME: z/OS support to be added in later.
2347 if (Subtarget.isTargetXPLINK64())
2348 IsTailCall = false;
2349
2350 // Integer args <=32 bits should have an extension attribute.
2351 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2352
2353 // Analyze the operands of the call, assigning locations to each operand.
2355 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2356 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2357
2358 // We don't support GuaranteedTailCallOpt, only automatically-detected
2359 // sibling calls.
2360 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2361 IsTailCall = false;
2362
2363 // Get a count of how many bytes are to be pushed on the stack.
2364 unsigned NumBytes = ArgCCInfo.getStackSize();
2365
2366 // Mark the start of the call.
2367 if (!IsTailCall)
2368 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2369
2370 // Copy argument values to their designated locations.
2372 SmallVector<SDValue, 8> MemOpChains;
2373 SDValue StackPtr;
2374 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2375 CCValAssign &VA = ArgLocs[I];
2376 SDValue ArgValue = OutVals[I];
2377
2378 if (VA.getLocInfo() == CCValAssign::Indirect) {
2379 // Store the argument in a stack slot and pass its address.
2380 EVT SlotVT;
2381 MVT PartVT;
2382 unsigned NumParts = 1;
2383 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2384 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2385 else
2386 SlotVT = Outs[I].VT;
2387 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2388 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2389
2390 MachinePointerInfo StackPtrInfo =
2392 MemOpChains.push_back(
2393 DAG.getStore(Chain, DL, ArgValue, SpillSlot, StackPtrInfo));
2394 // If the original argument was split (e.g. i128), we need
2395 // to store all parts of it here (and pass just one address).
2396 assert(Outs[I].PartOffset == 0);
2397 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2398 ++I;
2399 SDValue PartValue = OutVals[I];
2400 unsigned PartOffset = Outs[I].PartOffset;
2401 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2402 DAG.getIntPtrConstant(PartOffset, DL));
2403 MemOpChains.push_back(
2404 DAG.getStore(Chain, DL, PartValue, Address,
2405 StackPtrInfo.getWithOffset(PartOffset)));
2406 assert(PartOffset && "Offset should be non-zero.");
2407 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2408 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2409 }
2410 ArgValue = SpillSlot;
2411 } else
2412 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2413
2414 if (VA.isRegLoc()) {
2415 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2416 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2417 // and low values.
2418 if (VA.getLocVT() == MVT::i128)
2419 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2420 // Queue up the argument copies and emit them at the end.
2421 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2422 } else {
2423 assert(VA.isMemLoc() && "Argument not register or memory");
2424
2425 // Work out the address of the stack slot. Unpromoted ints and
2426 // floats are passed as right-justified 8-byte values.
2427 if (!StackPtr.getNode())
2428 StackPtr = DAG.getCopyFromReg(Chain, DL,
2429 Regs->getStackPointerRegister(), PtrVT);
2430 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2431 VA.getLocMemOffset();
2432 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2433 Offset += 4;
2434 else if (VA.getLocVT() == MVT::f16)
2435 Offset += 6;
2436 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2438
2439 // Emit the store.
2440 MemOpChains.push_back(
2441 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2442
2443 // Although long doubles or vectors are passed through the stack when
2444 // they are vararg (non-fixed arguments), if a long double or vector
2445 // occupies the third and fourth slot of the argument list GPR3 should
2446 // still shadow the third slot of the argument list.
2447 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2448 SDValue ShadowArgValue =
2449 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2450 DAG.getIntPtrConstant(1, DL));
2451 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2452 }
2453 }
2454 }
2455
2456 // Join the stores, which are independent of one another.
2457 if (!MemOpChains.empty())
2458 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2459
2460 // Accept direct calls by converting symbolic call addresses to the
2461 // associated Target* opcodes. Force %r1 to be used for indirect
2462 // tail calls.
2463 SDValue Glue;
2464
2465 if (Subtarget.isTargetXPLINK64()) {
2466 SDValue ADA;
2467 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2468 if (!IsBRASL) {
2469 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2470 ->getAddressOfCalleeRegister();
2471 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2472 Glue = Chain.getValue(1);
2473 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2474 }
2475 RegsToPass.push_back(std::make_pair(
2476 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2477 } else {
2478 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2479 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2480 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2481 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2482 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2483 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2484 } else if (IsTailCall) {
2485 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2486 Glue = Chain.getValue(1);
2487 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2488 }
2489 }
2490
2491 // Build a sequence of copy-to-reg nodes, chained and glued together.
2492 for (const auto &[Reg, N] : RegsToPass) {
2493 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2494 Glue = Chain.getValue(1);
2495 }
2496
2497 // The first call operand is the chain and the second is the target address.
2499 Ops.push_back(Chain);
2500 Ops.push_back(Callee);
2501
2502 // Add argument registers to the end of the list so that they are
2503 // known live into the call.
2504 for (const auto &[Reg, N] : RegsToPass)
2505 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2506
2507 // Add a register mask operand representing the call-preserved registers.
2508 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2509 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2510 assert(Mask && "Missing call preserved mask for calling convention");
2511 Ops.push_back(DAG.getRegisterMask(Mask));
2512
2513 // Glue the call to the argument copies, if any.
2514 if (Glue.getNode())
2515 Ops.push_back(Glue);
2516
2517 // Emit the call.
2518 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2519 if (IsTailCall) {
2520 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2521 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2522 return Ret;
2523 }
2524 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2525 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2526 Glue = Chain.getValue(1);
2527
2528 // Mark the end of the call, which is glued to the call itself.
2529 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2530 Glue = Chain.getValue(1);
2531
2532 // Assign locations to each value returned by this call.
2534 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2535 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2536
2537 // Copy all of the result registers out of their specified physreg.
2538 for (CCValAssign &VA : RetLocs) {
2539 // Copy the value out, gluing the copy to the end of the call sequence.
2540 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2541 VA.getLocVT(), Glue);
2542 Chain = RetValue.getValue(1);
2543 Glue = RetValue.getValue(2);
2544
2545 // Convert the value of the return register into the value that's
2546 // being returned.
2547 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2548 }
2549
2550 return Chain;
2551}
2552
2553// Generate a call taking the given operands as arguments and returning a
2554// result of type RetVT.
2556 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2557 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2558 bool DoesNotReturn, bool IsReturnValueUsed) const {
2560 Args.reserve(Ops.size());
2561
2562 for (SDValue Op : Ops) {
2564 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2565 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2566 Entry.IsZExt = !Entry.IsSExt;
2567 Args.push_back(Entry);
2568 }
2569
2570 SDValue Callee =
2571 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2572
2573 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2575 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2576 CLI.setDebugLoc(DL)
2577 .setChain(Chain)
2578 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2579 .setNoReturn(DoesNotReturn)
2580 .setDiscardResult(!IsReturnValueUsed)
2581 .setSExtResult(SignExtend)
2582 .setZExtResult(!SignExtend);
2583 return LowerCallTo(CLI);
2584}
2585
2587 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2588 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2589 const Type *RetTy) const {
2590 // Special case that we cannot easily detect in RetCC_SystemZ since
2591 // i128 may not be a legal type.
2592 for (auto &Out : Outs)
2593 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2594 return false;
2595
2597 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2598 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2599}
2600
2601SDValue
2603 bool IsVarArg,
2605 const SmallVectorImpl<SDValue> &OutVals,
2606 const SDLoc &DL, SelectionDAG &DAG) const {
2608
2609 // Integer args <=32 bits should have an extension attribute.
2610 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2611
2612 // Assign locations to each returned value.
2614 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2615 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2616
2617 // Quick exit for void returns
2618 if (RetLocs.empty())
2619 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2620
2621 if (CallConv == CallingConv::GHC)
2622 report_fatal_error("GHC functions return void only");
2623
2624 // Copy the result values into the output registers.
2625 SDValue Glue;
2627 RetOps.push_back(Chain);
2628 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2629 CCValAssign &VA = RetLocs[I];
2630 SDValue RetValue = OutVals[I];
2631
2632 // Make the return register live on exit.
2633 assert(VA.isRegLoc() && "Can only return in registers!");
2634
2635 // Promote the value as required.
2636 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2637
2638 // Chain and glue the copies together.
2639 Register Reg = VA.getLocReg();
2640 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2641 Glue = Chain.getValue(1);
2642 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2643 }
2644
2645 // Update chain and glue.
2646 RetOps[0] = Chain;
2647 if (Glue.getNode())
2648 RetOps.push_back(Glue);
2649
2650 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2651}
2652
2653// Return true if Op is an intrinsic node with chain that returns the CC value
2654// as its only (other) argument. Provide the associated SystemZISD opcode and
2655// the mask of valid CC values if so.
2656static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2657 unsigned &CCValid) {
2658 unsigned Id = Op.getConstantOperandVal(1);
2659 switch (Id) {
2660 case Intrinsic::s390_tbegin:
2661 Opcode = SystemZISD::TBEGIN;
2662 CCValid = SystemZ::CCMASK_TBEGIN;
2663 return true;
2664
2665 case Intrinsic::s390_tbegin_nofloat:
2666 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2667 CCValid = SystemZ::CCMASK_TBEGIN;
2668 return true;
2669
2670 case Intrinsic::s390_tend:
2671 Opcode = SystemZISD::TEND;
2672 CCValid = SystemZ::CCMASK_TEND;
2673 return true;
2674
2675 default:
2676 return false;
2677 }
2678}
2679
2680// Return true if Op is an intrinsic node without chain that returns the
2681// CC value as its final argument. Provide the associated SystemZISD
2682// opcode and the mask of valid CC values if so.
2683static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2684 unsigned Id = Op.getConstantOperandVal(0);
2685 switch (Id) {
2686 case Intrinsic::s390_vpkshs:
2687 case Intrinsic::s390_vpksfs:
2688 case Intrinsic::s390_vpksgs:
2689 Opcode = SystemZISD::PACKS_CC;
2690 CCValid = SystemZ::CCMASK_VCMP;
2691 return true;
2692
2693 case Intrinsic::s390_vpklshs:
2694 case Intrinsic::s390_vpklsfs:
2695 case Intrinsic::s390_vpklsgs:
2696 Opcode = SystemZISD::PACKLS_CC;
2697 CCValid = SystemZ::CCMASK_VCMP;
2698 return true;
2699
2700 case Intrinsic::s390_vceqbs:
2701 case Intrinsic::s390_vceqhs:
2702 case Intrinsic::s390_vceqfs:
2703 case Intrinsic::s390_vceqgs:
2704 case Intrinsic::s390_vceqqs:
2705 Opcode = SystemZISD::VICMPES;
2706 CCValid = SystemZ::CCMASK_VCMP;
2707 return true;
2708
2709 case Intrinsic::s390_vchbs:
2710 case Intrinsic::s390_vchhs:
2711 case Intrinsic::s390_vchfs:
2712 case Intrinsic::s390_vchgs:
2713 case Intrinsic::s390_vchqs:
2714 Opcode = SystemZISD::VICMPHS;
2715 CCValid = SystemZ::CCMASK_VCMP;
2716 return true;
2717
2718 case Intrinsic::s390_vchlbs:
2719 case Intrinsic::s390_vchlhs:
2720 case Intrinsic::s390_vchlfs:
2721 case Intrinsic::s390_vchlgs:
2722 case Intrinsic::s390_vchlqs:
2723 Opcode = SystemZISD::VICMPHLS;
2724 CCValid = SystemZ::CCMASK_VCMP;
2725 return true;
2726
2727 case Intrinsic::s390_vtm:
2728 Opcode = SystemZISD::VTM;
2729 CCValid = SystemZ::CCMASK_VCMP;
2730 return true;
2731
2732 case Intrinsic::s390_vfaebs:
2733 case Intrinsic::s390_vfaehs:
2734 case Intrinsic::s390_vfaefs:
2735 Opcode = SystemZISD::VFAE_CC;
2736 CCValid = SystemZ::CCMASK_ANY;
2737 return true;
2738
2739 case Intrinsic::s390_vfaezbs:
2740 case Intrinsic::s390_vfaezhs:
2741 case Intrinsic::s390_vfaezfs:
2742 Opcode = SystemZISD::VFAEZ_CC;
2743 CCValid = SystemZ::CCMASK_ANY;
2744 return true;
2745
2746 case Intrinsic::s390_vfeebs:
2747 case Intrinsic::s390_vfeehs:
2748 case Intrinsic::s390_vfeefs:
2749 Opcode = SystemZISD::VFEE_CC;
2750 CCValid = SystemZ::CCMASK_ANY;
2751 return true;
2752
2753 case Intrinsic::s390_vfeezbs:
2754 case Intrinsic::s390_vfeezhs:
2755 case Intrinsic::s390_vfeezfs:
2756 Opcode = SystemZISD::VFEEZ_CC;
2757 CCValid = SystemZ::CCMASK_ANY;
2758 return true;
2759
2760 case Intrinsic::s390_vfenebs:
2761 case Intrinsic::s390_vfenehs:
2762 case Intrinsic::s390_vfenefs:
2763 Opcode = SystemZISD::VFENE_CC;
2764 CCValid = SystemZ::CCMASK_ANY;
2765 return true;
2766
2767 case Intrinsic::s390_vfenezbs:
2768 case Intrinsic::s390_vfenezhs:
2769 case Intrinsic::s390_vfenezfs:
2770 Opcode = SystemZISD::VFENEZ_CC;
2771 CCValid = SystemZ::CCMASK_ANY;
2772 return true;
2773
2774 case Intrinsic::s390_vistrbs:
2775 case Intrinsic::s390_vistrhs:
2776 case Intrinsic::s390_vistrfs:
2777 Opcode = SystemZISD::VISTR_CC;
2779 return true;
2780
2781 case Intrinsic::s390_vstrcbs:
2782 case Intrinsic::s390_vstrchs:
2783 case Intrinsic::s390_vstrcfs:
2784 Opcode = SystemZISD::VSTRC_CC;
2785 CCValid = SystemZ::CCMASK_ANY;
2786 return true;
2787
2788 case Intrinsic::s390_vstrczbs:
2789 case Intrinsic::s390_vstrczhs:
2790 case Intrinsic::s390_vstrczfs:
2791 Opcode = SystemZISD::VSTRCZ_CC;
2792 CCValid = SystemZ::CCMASK_ANY;
2793 return true;
2794
2795 case Intrinsic::s390_vstrsb:
2796 case Intrinsic::s390_vstrsh:
2797 case Intrinsic::s390_vstrsf:
2798 Opcode = SystemZISD::VSTRS_CC;
2799 CCValid = SystemZ::CCMASK_ANY;
2800 return true;
2801
2802 case Intrinsic::s390_vstrszb:
2803 case Intrinsic::s390_vstrszh:
2804 case Intrinsic::s390_vstrszf:
2805 Opcode = SystemZISD::VSTRSZ_CC;
2806 CCValid = SystemZ::CCMASK_ANY;
2807 return true;
2808
2809 case Intrinsic::s390_vfcedbs:
2810 case Intrinsic::s390_vfcesbs:
2811 Opcode = SystemZISD::VFCMPES;
2812 CCValid = SystemZ::CCMASK_VCMP;
2813 return true;
2814
2815 case Intrinsic::s390_vfchdbs:
2816 case Intrinsic::s390_vfchsbs:
2817 Opcode = SystemZISD::VFCMPHS;
2818 CCValid = SystemZ::CCMASK_VCMP;
2819 return true;
2820
2821 case Intrinsic::s390_vfchedbs:
2822 case Intrinsic::s390_vfchesbs:
2823 Opcode = SystemZISD::VFCMPHES;
2824 CCValid = SystemZ::CCMASK_VCMP;
2825 return true;
2826
2827 case Intrinsic::s390_vftcidb:
2828 case Intrinsic::s390_vftcisb:
2829 Opcode = SystemZISD::VFTCI;
2830 CCValid = SystemZ::CCMASK_VCMP;
2831 return true;
2832
2833 case Intrinsic::s390_tdc:
2834 Opcode = SystemZISD::TDC;
2835 CCValid = SystemZ::CCMASK_TDC;
2836 return true;
2837
2838 default:
2839 return false;
2840 }
2841}
2842
2843// Emit an intrinsic with chain and an explicit CC register result.
2845 unsigned Opcode) {
2846 // Copy all operands except the intrinsic ID.
2847 unsigned NumOps = Op.getNumOperands();
2849 Ops.reserve(NumOps - 1);
2850 Ops.push_back(Op.getOperand(0));
2851 for (unsigned I = 2; I < NumOps; ++I)
2852 Ops.push_back(Op.getOperand(I));
2853
2854 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2855 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2856 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2857 SDValue OldChain = SDValue(Op.getNode(), 1);
2858 SDValue NewChain = SDValue(Intr.getNode(), 1);
2859 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2860 return Intr.getNode();
2861}
2862
2863// Emit an intrinsic with an explicit CC register result.
2865 unsigned Opcode) {
2866 // Copy all operands except the intrinsic ID.
2867 SDLoc DL(Op);
2868 unsigned NumOps = Op.getNumOperands();
2870 Ops.reserve(NumOps - 1);
2871 for (unsigned I = 1; I < NumOps; ++I) {
2872 SDValue CurrOper = Op.getOperand(I);
2873 if (CurrOper.getValueType() == MVT::f16) {
2874 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2875 "Unhandled intrinsic with f16 operand.");
2876 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2877 }
2878 Ops.push_back(CurrOper);
2879 }
2880
2881 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2882 return Intr.getNode();
2883}
2884
2885// CC is a comparison that will be implemented using an integer or
2886// floating-point comparison. Return the condition code mask for
2887// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2888// unsigned comparisons and clear for signed ones. In the floating-point
2889// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2891#define CONV(X) \
2892 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2893 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2894 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2895
2896 switch (CC) {
2897 default:
2898 llvm_unreachable("Invalid integer condition!");
2899
2900 CONV(EQ);
2901 CONV(NE);
2902 CONV(GT);
2903 CONV(GE);
2904 CONV(LT);
2905 CONV(LE);
2906
2907 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2909 }
2910#undef CONV
2911}
2912
2913// If C can be converted to a comparison against zero, adjust the operands
2914// as necessary.
2915static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2916 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2917 return;
2918
2919 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2920 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2921 return;
2922
2923 int64_t Value = ConstOp1->getSExtValue();
2924 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2925 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2926 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2927 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2928 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2929 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2930 }
2931}
2932
2933// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2934// adjust the operands as necessary.
2935static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2936 Comparison &C) {
2937 // For us to make any changes, it must a comparison between a single-use
2938 // load and a constant.
2939 if (!C.Op0.hasOneUse() ||
2940 C.Op0.getOpcode() != ISD::LOAD ||
2941 C.Op1.getOpcode() != ISD::Constant)
2942 return;
2943
2944 // We must have an 8- or 16-bit load.
2945 auto *Load = cast<LoadSDNode>(C.Op0);
2946 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2947 if ((NumBits != 8 && NumBits != 16) ||
2948 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2949 return;
2950
2951 // The load must be an extending one and the constant must be within the
2952 // range of the unextended value.
2953 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2954 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2955 return;
2956 uint64_t Value = ConstOp1->getZExtValue();
2957 uint64_t Mask = (1 << NumBits) - 1;
2958 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2959 // Make sure that ConstOp1 is in range of C.Op0.
2960 int64_t SignedValue = ConstOp1->getSExtValue();
2961 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2962 return;
2963 if (C.ICmpType != SystemZICMP::SignedOnly) {
2964 // Unsigned comparison between two sign-extended values is equivalent
2965 // to unsigned comparison between two zero-extended values.
2966 Value &= Mask;
2967 } else if (NumBits == 8) {
2968 // Try to treat the comparison as unsigned, so that we can use CLI.
2969 // Adjust CCMask and Value as necessary.
2970 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2971 // Test whether the high bit of the byte is set.
2972 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2973 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2974 // Test whether the high bit of the byte is clear.
2975 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2976 else
2977 // No instruction exists for this combination.
2978 return;
2979 C.ICmpType = SystemZICMP::UnsignedOnly;
2980 }
2981 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2982 if (Value > Mask)
2983 return;
2984 // If the constant is in range, we can use any comparison.
2985 C.ICmpType = SystemZICMP::Any;
2986 } else
2987 return;
2988
2989 // Make sure that the first operand is an i32 of the right extension type.
2990 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2993 if (C.Op0.getValueType() != MVT::i32 ||
2994 Load->getExtensionType() != ExtType) {
2995 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2996 Load->getBasePtr(), Load->getPointerInfo(),
2997 Load->getMemoryVT(), Load->getAlign(),
2998 Load->getMemOperand()->getFlags());
2999 // Update the chain uses.
3000 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
3001 }
3002
3003 // Make sure that the second operand is an i32 with the right value.
3004 if (C.Op1.getValueType() != MVT::i32 ||
3005 Value != ConstOp1->getZExtValue())
3006 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
3007}
3008
3009// Return true if Op is either an unextended load, or a load suitable
3010// for integer register-memory comparisons of type ICmpType.
3011static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
3012 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
3013 if (Load) {
3014 // There are no instructions to compare a register with a memory byte.
3015 if (Load->getMemoryVT() == MVT::i8)
3016 return false;
3017 // Otherwise decide on extension type.
3018 switch (Load->getExtensionType()) {
3019 case ISD::NON_EXTLOAD:
3020 return true;
3021 case ISD::SEXTLOAD:
3022 return ICmpType != SystemZICMP::UnsignedOnly;
3023 case ISD::ZEXTLOAD:
3024 return ICmpType != SystemZICMP::SignedOnly;
3025 default:
3026 break;
3027 }
3028 }
3029 return false;
3030}
3031
3032// Return true if it is better to swap the operands of C.
3033static bool shouldSwapCmpOperands(const Comparison &C) {
3034 // If one side of the compare is a load of the stackguard reference value,
3035 // then that load should be Op1.
3036 if (C.Op0.isMachineOpcode() &&
3037 (C.Op0.getMachineOpcode() == SystemZ::LOAD_STACK_GUARD))
3038 return true;
3039
3040 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3041 if (C.Op0.getValueType() == MVT::i128)
3042 return false;
3043 if (C.Op0.getValueType() == MVT::f128)
3044 return false;
3045
3046 // Always keep a floating-point constant second, since comparisons with
3047 // zero can use LOAD TEST and comparisons with other constants make a
3048 // natural memory operand.
3049 if (isa<ConstantFPSDNode>(C.Op1))
3050 return false;
3051
3052 // Never swap comparisons with zero since there are many ways to optimize
3053 // those later.
3054 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3055 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3056 return false;
3057
3058 // Also keep natural memory operands second if the loaded value is
3059 // only used here. Several comparisons have memory forms.
3060 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3061 return false;
3062
3063 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3064 // In that case we generally prefer the memory to be second.
3065 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3066 // The only exceptions are when the second operand is a constant and
3067 // we can use things like CHHSI.
3068 if (!ConstOp1)
3069 return true;
3070 // The unsigned memory-immediate instructions can handle 16-bit
3071 // unsigned integers.
3072 if (C.ICmpType != SystemZICMP::SignedOnly &&
3073 isUInt<16>(ConstOp1->getZExtValue()))
3074 return false;
3075 // The signed memory-immediate instructions can handle 16-bit
3076 // signed integers.
3077 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3078 isInt<16>(ConstOp1->getSExtValue()))
3079 return false;
3080 return true;
3081 }
3082
3083 // Try to promote the use of CGFR and CLGFR.
3084 unsigned Opcode0 = C.Op0.getOpcode();
3085 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3086 return true;
3087 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3088 return true;
3089 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3090 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3091 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3092 return true;
3093
3094 return false;
3095}
3096
3097// Check whether C tests for equality between X and Y and whether X - Y
3098// or Y - X is also computed. In that case it's better to compare the
3099// result of the subtraction against zero.
3101 Comparison &C) {
3102 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3103 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3104 for (SDNode *N : C.Op0->users()) {
3105 if (N->getOpcode() == ISD::SUB &&
3106 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3107 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3108 // Disable the nsw and nuw flags: the backend needs to handle
3109 // overflow as well during comparison elimination.
3110 N->dropFlags(SDNodeFlags::NoWrap);
3111 C.Op0 = SDValue(N, 0);
3112 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3113 return;
3114 }
3115 }
3116 }
3117}
3118
3119// Check whether C compares a floating-point value with zero and if that
3120// floating-point value is also negated. In this case we can use the
3121// negation to set CC, so avoiding separate LOAD AND TEST and
3122// LOAD (NEGATIVE/COMPLEMENT) instructions.
3123static void adjustForFNeg(Comparison &C) {
3124 // This optimization is invalid for strict comparisons, since FNEG
3125 // does not raise any exceptions.
3126 if (C.Chain)
3127 return;
3128 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3129 if (C1 && C1->isZero()) {
3130 for (SDNode *N : C.Op0->users()) {
3131 if (N->getOpcode() == ISD::FNEG) {
3132 C.Op0 = SDValue(N, 0);
3133 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3134 return;
3135 }
3136 }
3137 }
3138}
3139
3140// Check whether C compares (shl X, 32) with 0 and whether X is
3141// also sign-extended. In that case it is better to test the result
3142// of the sign extension using LTGFR.
3143//
3144// This case is important because InstCombine transforms a comparison
3145// with (sext (trunc X)) into a comparison with (shl X, 32).
3146static void adjustForLTGFR(Comparison &C) {
3147 // Check for a comparison between (shl X, 32) and 0.
3148 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3149 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3150 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3151 if (C1 && C1->getZExtValue() == 32) {
3152 SDValue ShlOp0 = C.Op0.getOperand(0);
3153 // See whether X has any SIGN_EXTEND_INREG uses.
3154 for (SDNode *N : ShlOp0->users()) {
3155 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3156 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3157 C.Op0 = SDValue(N, 0);
3158 return;
3159 }
3160 }
3161 }
3162 }
3163}
3164
3165// If C compares the truncation of an extending load, try to compare
3166// the untruncated value instead. This exposes more opportunities to
3167// reuse CC.
3168static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3169 Comparison &C) {
3170 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3171 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3172 C.Op1.getOpcode() == ISD::Constant &&
3173 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3174 C.Op1->getAsZExtVal() == 0) {
3175 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3176 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3177 C.Op0.getValueSizeInBits().getFixedValue()) {
3178 unsigned Type = L->getExtensionType();
3179 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3180 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3181 C.Op0 = C.Op0.getOperand(0);
3182 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3183 }
3184 }
3185 }
3186}
3187
3188// Adjust if a given Compare is a check of the stack guard against a stack
3189// guard instance on the stack. Specifically, this checks if:
3190// - The operands are a load of the stack guard, and a load from a stack slot
3191// - The original opcode is ICMP
3192// - ICMPType is compatible with unsigned comparison.
3194 Comparison &C) {
3195
3196 // Opcode must be ICMP.
3197 if (C.Opcode != SystemZISD::ICMP)
3198 return;
3199 // ICmpType must be Unsigned or Any.
3200 if (C.ICmpType == SystemZICMP::SignedOnly)
3201 return;
3202 // Op0 must be FrameIndex Load.
3203 if (!(ISD::isNormalLoad(C.Op0.getNode()) &&
3204 dyn_cast<FrameIndexSDNode>(C.Op0.getOperand(1))))
3205 return;
3206 // Op1 must be LOAD_STACK_GUARD.
3207 if (!C.Op1.isMachineOpcode() ||
3208 C.Op1.getMachineOpcode() != SystemZ::LOAD_STACK_GUARD)
3209 return;
3210
3211 // At this point we are sure that this is a proper CMP_STACKGUARD
3212 // case, update the opcode to reflect this.
3213 C.Opcode = SystemZISD::CMP_STACKGUARD;
3214 C.Op1 = SDValue();
3215}
3216
3217// Return true if shift operation N has an in-range constant shift value.
3218// Store it in ShiftVal if so.
3219static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3220 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3221 if (!Shift)
3222 return false;
3223
3224 uint64_t Amount = Shift->getZExtValue();
3225 if (Amount >= N.getValueSizeInBits())
3226 return false;
3227
3228 ShiftVal = Amount;
3229 return true;
3230}
3231
3232// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3233// instruction and whether the CC value is descriptive enough to handle
3234// a comparison of type Opcode between the AND result and CmpVal.
3235// CCMask says which comparison result is being tested and BitSize is
3236// the number of bits in the operands. If TEST UNDER MASK can be used,
3237// return the corresponding CC mask, otherwise return 0.
3238static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3239 uint64_t Mask, uint64_t CmpVal,
3240 unsigned ICmpType) {
3241 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3242
3243 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3244 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3245 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3246 return 0;
3247
3248 // Work out the masks for the lowest and highest bits.
3250 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3251
3252 // Signed ordered comparisons are effectively unsigned if the sign
3253 // bit is dropped.
3254 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3255
3256 // Check for equality comparisons with 0, or the equivalent.
3257 if (CmpVal == 0) {
3258 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3260 if (CCMask == SystemZ::CCMASK_CMP_NE)
3262 }
3263 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3264 if (CCMask == SystemZ::CCMASK_CMP_LT)
3266 if (CCMask == SystemZ::CCMASK_CMP_GE)
3268 }
3269 if (EffectivelyUnsigned && CmpVal < Low) {
3270 if (CCMask == SystemZ::CCMASK_CMP_LE)
3272 if (CCMask == SystemZ::CCMASK_CMP_GT)
3274 }
3275
3276 // Check for equality comparisons with the mask, or the equivalent.
3277 if (CmpVal == Mask) {
3278 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3280 if (CCMask == SystemZ::CCMASK_CMP_NE)
3282 }
3283 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3284 if (CCMask == SystemZ::CCMASK_CMP_GT)
3286 if (CCMask == SystemZ::CCMASK_CMP_LE)
3288 }
3289 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3290 if (CCMask == SystemZ::CCMASK_CMP_GE)
3292 if (CCMask == SystemZ::CCMASK_CMP_LT)
3294 }
3295
3296 // Check for ordered comparisons with the top bit.
3297 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3298 if (CCMask == SystemZ::CCMASK_CMP_LE)
3300 if (CCMask == SystemZ::CCMASK_CMP_GT)
3302 }
3303 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3304 if (CCMask == SystemZ::CCMASK_CMP_LT)
3306 if (CCMask == SystemZ::CCMASK_CMP_GE)
3308 }
3309
3310 // If there are just two bits, we can do equality checks for Low and High
3311 // as well.
3312 if (Mask == Low + High) {
3313 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3315 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3317 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3319 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3321 }
3322
3323 // Looks like we've exhausted our options.
3324 return 0;
3325}
3326
3327// See whether C can be implemented as a TEST UNDER MASK instruction.
3328// Update the arguments with the TM version if so.
3330 Comparison &C) {
3331 // Use VECTOR TEST UNDER MASK for i128 operations.
3332 if (C.Op0.getValueType() == MVT::i128) {
3333 // We can use VTM for EQ/NE comparisons of x & y against 0.
3334 if (C.Op0.getOpcode() == ISD::AND &&
3335 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3336 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3337 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3338 if (Mask && Mask->getAPIntValue() == 0) {
3339 C.Opcode = SystemZISD::VTM;
3340 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3341 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3342 C.CCValid = SystemZ::CCMASK_VCMP;
3343 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3344 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3345 else
3346 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3347 }
3348 }
3349 return;
3350 }
3351
3352 // Check that we have a comparison with a constant.
3353 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3354 if (!ConstOp1)
3355 return;
3356 uint64_t CmpVal = ConstOp1->getZExtValue();
3357
3358 // Check whether the nonconstant input is an AND with a constant mask.
3359 Comparison NewC(C);
3360 uint64_t MaskVal;
3361 ConstantSDNode *Mask = nullptr;
3362 if (C.Op0.getOpcode() == ISD::AND) {
3363 NewC.Op0 = C.Op0.getOperand(0);
3364 NewC.Op1 = C.Op0.getOperand(1);
3365 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3366 if (!Mask)
3367 return;
3368 MaskVal = Mask->getZExtValue();
3369 } else {
3370 // There is no instruction to compare with a 64-bit immediate
3371 // so use TMHH instead if possible. We need an unsigned ordered
3372 // comparison with an i64 immediate.
3373 if (NewC.Op0.getValueType() != MVT::i64 ||
3374 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3375 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3376 NewC.ICmpType == SystemZICMP::SignedOnly)
3377 return;
3378 // Convert LE and GT comparisons into LT and GE.
3379 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3380 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3381 if (CmpVal == uint64_t(-1))
3382 return;
3383 CmpVal += 1;
3384 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3385 }
3386 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3387 // be masked off without changing the result.
3388 MaskVal = -(CmpVal & -CmpVal);
3389 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3390 }
3391 if (!MaskVal)
3392 return;
3393
3394 // Check whether the combination of mask, comparison value and comparison
3395 // type are suitable.
3396 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3397 unsigned NewCCMask, ShiftVal;
3398 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3399 NewC.Op0.getOpcode() == ISD::SHL &&
3400 isSimpleShift(NewC.Op0, ShiftVal) &&
3401 (MaskVal >> ShiftVal != 0) &&
3402 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3403 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3404 MaskVal >> ShiftVal,
3405 CmpVal >> ShiftVal,
3406 SystemZICMP::Any))) {
3407 NewC.Op0 = NewC.Op0.getOperand(0);
3408 MaskVal >>= ShiftVal;
3409 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3410 NewC.Op0.getOpcode() == ISD::SRL &&
3411 isSimpleShift(NewC.Op0, ShiftVal) &&
3412 (MaskVal << ShiftVal != 0) &&
3413 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3414 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3415 MaskVal << ShiftVal,
3416 CmpVal << ShiftVal,
3418 NewC.Op0 = NewC.Op0.getOperand(0);
3419 MaskVal <<= ShiftVal;
3420 } else {
3421 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3422 NewC.ICmpType);
3423 if (!NewCCMask)
3424 return;
3425 }
3426
3427 // Go ahead and make the change.
3428 C.Opcode = SystemZISD::TM;
3429 C.Op0 = NewC.Op0;
3430 if (Mask && Mask->getZExtValue() == MaskVal)
3431 C.Op1 = SDValue(Mask, 0);
3432 else
3433 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3434 C.CCValid = SystemZ::CCMASK_TM;
3435 C.CCMask = NewCCMask;
3436}
3437
3438// Implement i128 comparison in vector registers.
3439static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3440 Comparison &C) {
3441 if (C.Opcode != SystemZISD::ICMP)
3442 return;
3443 if (C.Op0.getValueType() != MVT::i128)
3444 return;
3445
3446 // Recognize vector comparison reductions.
3447 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3448 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3449 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3450 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3451 bool CmpNull = isNullConstant(C.Op1);
3452 SDValue Src = peekThroughBitcasts(C.Op0);
3453 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3454 Src = Src.getOperand(0);
3455 CmpNull = !CmpNull;
3456 }
3457 unsigned Opcode = 0;
3458 if (Src.hasOneUse()) {
3459 switch (Src.getOpcode()) {
3460 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3461 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3462 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3463 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3464 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3465 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3466 default: break;
3467 }
3468 }
3469 if (Opcode) {
3470 C.Opcode = Opcode;
3471 C.Op0 = Src->getOperand(0);
3472 C.Op1 = Src->getOperand(1);
3473 C.CCValid = SystemZ::CCMASK_VCMP;
3475 if (!CmpEq)
3476 C.CCMask ^= C.CCValid;
3477 return;
3478 }
3479 }
3480
3481 // Everything below here is not useful if we have native i128 compares.
3482 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3483 return;
3484
3485 // (In-)Equality comparisons can be implemented via VCEQGS.
3486 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3487 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3488 C.Opcode = SystemZISD::VICMPES;
3489 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3490 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3491 C.CCValid = SystemZ::CCMASK_VCMP;
3492 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3493 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3494 else
3495 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3496 return;
3497 }
3498
3499 // Normalize other comparisons to GT.
3500 bool Swap = false, Invert = false;
3501 switch (C.CCMask) {
3502 case SystemZ::CCMASK_CMP_GT: break;
3503 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3504 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3505 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3506 default: llvm_unreachable("Invalid integer condition!");
3507 }
3508 if (Swap)
3509 std::swap(C.Op0, C.Op1);
3510
3511 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3512 C.Opcode = SystemZISD::UCMP128HI;
3513 else
3514 C.Opcode = SystemZISD::SCMP128HI;
3515 C.CCValid = SystemZ::CCMASK_ANY;
3516 C.CCMask = SystemZ::CCMASK_1;
3517
3518 if (Invert)
3519 C.CCMask ^= C.CCValid;
3520}
3521
3522// See whether the comparison argument contains a redundant AND
3523// and remove it if so. This sometimes happens due to the generic
3524// BRCOND expansion.
3526 Comparison &C) {
3527 if (C.Op0.getOpcode() != ISD::AND)
3528 return;
3529 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3530 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3531 return;
3532 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3533 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3534 return;
3535
3536 C.Op0 = C.Op0.getOperand(0);
3537}
3538
3539// Return a Comparison that tests the condition-code result of intrinsic
3540// node Call against constant integer CC using comparison code Cond.
3541// Opcode is the opcode of the SystemZISD operation for the intrinsic
3542// and CCValid is the set of possible condition-code results.
3543static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3544 SDValue Call, unsigned CCValid, uint64_t CC,
3546 Comparison C(Call, SDValue(), SDValue());
3547 C.Opcode = Opcode;
3548 C.CCValid = CCValid;
3549 if (Cond == ISD::SETEQ)
3550 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3551 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3552 else if (Cond == ISD::SETNE)
3553 // ...and the inverse of that.
3554 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3555 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3556 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3557 // always true for CC>3.
3558 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3559 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3560 // ...and the inverse of that.
3561 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3562 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3563 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3564 // always true for CC>3.
3565 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3566 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3567 // ...and the inverse of that.
3568 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3569 else
3570 llvm_unreachable("Unexpected integer comparison type");
3571 C.CCMask &= CCValid;
3572 return C;
3573}
3574
3575// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3576static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3577 ISD::CondCode Cond, const SDLoc &DL,
3578 SDValue Chain = SDValue(),
3579 bool IsSignaling = false) {
3580 if (CmpOp1.getOpcode() == ISD::Constant) {
3581 assert(!Chain);
3582 unsigned Opcode, CCValid;
3583 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3584 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3585 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3586 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3587 CmpOp1->getAsZExtVal(), Cond);
3588 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3589 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3590 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3591 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3592 CmpOp1->getAsZExtVal(), Cond);
3593 }
3594 Comparison C(CmpOp0, CmpOp1, Chain);
3595 C.CCMask = CCMaskForCondCode(Cond);
3596 if (C.Op0.getValueType().isFloatingPoint()) {
3597 C.CCValid = SystemZ::CCMASK_FCMP;
3598 if (!C.Chain)
3599 C.Opcode = SystemZISD::FCMP;
3600 else if (!IsSignaling)
3601 C.Opcode = SystemZISD::STRICT_FCMP;
3602 else
3603 C.Opcode = SystemZISD::STRICT_FCMPS;
3605 } else {
3606 assert(!C.Chain);
3607 C.CCValid = SystemZ::CCMASK_ICMP;
3608 C.Opcode = SystemZISD::ICMP;
3609 // Choose the type of comparison. Equality and inequality tests can
3610 // use either signed or unsigned comparisons. The choice also doesn't
3611 // matter if both sign bits are known to be clear. In those cases we
3612 // want to give the main isel code the freedom to choose whichever
3613 // form fits best.
3614 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3615 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3616 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3617 C.ICmpType = SystemZICMP::Any;
3618 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3619 C.ICmpType = SystemZICMP::UnsignedOnly;
3620 else
3621 C.ICmpType = SystemZICMP::SignedOnly;
3622 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3623 adjustForRedundantAnd(DAG, DL, C);
3624 adjustZeroCmp(DAG, DL, C);
3625 adjustSubwordCmp(DAG, DL, C);
3626 adjustForSubtraction(DAG, DL, C);
3628 adjustICmpTruncate(DAG, DL, C);
3629 }
3630
3631 if (shouldSwapCmpOperands(C)) {
3632 std::swap(C.Op0, C.Op1);
3633 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3634 }
3635
3637 adjustICmp128(DAG, DL, C);
3639 return C;
3640}
3641
3642// Emit the comparison instruction described by C.
3643static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3644 if (!C.Op1.getNode()) {
3645 if (C.Opcode == SystemZISD::CMP_STACKGUARD)
3646 return DAG.getNode(SystemZISD::CMP_STACKGUARD, DL, MVT::i32, C.Op0);
3647 SDNode *Node;
3648 switch (C.Op0.getOpcode()) {
3650 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3651 return SDValue(Node, 0);
3653 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3654 return SDValue(Node, Node->getNumValues() - 1);
3655 default:
3656 llvm_unreachable("Invalid comparison operands");
3657 }
3658 }
3659 if (C.Opcode == SystemZISD::ICMP)
3660 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3661 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3662 if (C.Opcode == SystemZISD::TM) {
3663 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3665 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3666 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3667 }
3668 if (C.Opcode == SystemZISD::VICMPES ||
3669 C.Opcode == SystemZISD::VICMPHS ||
3670 C.Opcode == SystemZISD::VICMPHLS ||
3671 C.Opcode == SystemZISD::VFCMPES ||
3672 C.Opcode == SystemZISD::VFCMPHS ||
3673 C.Opcode == SystemZISD::VFCMPHES) {
3674 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3675 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3676 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3677 return SDValue(Val.getNode(), 1);
3678 }
3679 if (C.Chain) {
3680 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3681 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3682 }
3683 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3684}
3685
3686// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3687// 64 bits. Extend is the extension type to use. Store the high part
3688// in Hi and the low part in Lo.
3689static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3690 SDValue Op0, SDValue Op1, SDValue &Hi,
3691 SDValue &Lo) {
3692 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3693 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3694 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3695 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3696 DAG.getConstant(32, DL, MVT::i64));
3697 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3698 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3699}
3700
3701// Lower a binary operation that produces two VT results, one in each
3702// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3703// and Opcode performs the GR128 operation. Store the even register result
3704// in Even and the odd register result in Odd.
3705static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3706 unsigned Opcode, SDValue Op0, SDValue Op1,
3707 SDValue &Even, SDValue &Odd) {
3708 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3709 bool Is32Bit = is32Bit(VT);
3710 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3711 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3712}
3713
3714// Return an i32 value that is 1 if the CC value produced by CCReg is
3715// in the mask CCMask and 0 otherwise. CC is known to have a value
3716// in CCValid, so other values can be ignored.
3717static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3718 unsigned CCValid, unsigned CCMask) {
3719 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3720 DAG.getConstant(0, DL, MVT::i32),
3721 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3722 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3723 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3724}
3725
3726// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3727// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3728// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3729// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3730// floating-point comparisons.
3733 switch (CC) {
3734 case ISD::SETOEQ:
3735 case ISD::SETEQ:
3736 switch (Mode) {
3737 case CmpMode::Int: return SystemZISD::VICMPE;
3738 case CmpMode::FP: return SystemZISD::VFCMPE;
3739 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3740 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3741 }
3742 llvm_unreachable("Bad mode");
3743
3744 case ISD::SETOGE:
3745 case ISD::SETGE:
3746 switch (Mode) {
3747 case CmpMode::Int: return 0;
3748 case CmpMode::FP: return SystemZISD::VFCMPHE;
3749 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3750 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3751 }
3752 llvm_unreachable("Bad mode");
3753
3754 case ISD::SETOGT:
3755 case ISD::SETGT:
3756 switch (Mode) {
3757 case CmpMode::Int: return SystemZISD::VICMPH;
3758 case CmpMode::FP: return SystemZISD::VFCMPH;
3759 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3760 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3761 }
3762 llvm_unreachable("Bad mode");
3763
3764 case ISD::SETUGT:
3765 switch (Mode) {
3766 case CmpMode::Int: return SystemZISD::VICMPHL;
3767 case CmpMode::FP: return 0;
3768 case CmpMode::StrictFP: return 0;
3769 case CmpMode::SignalingFP: return 0;
3770 }
3771 llvm_unreachable("Bad mode");
3772
3773 default:
3774 return 0;
3775 }
3776}
3777
3778// Return the SystemZISD vector comparison operation for CC or its inverse,
3779// or 0 if neither can be done directly. Indicate in Invert whether the
3780// result is for the inverse of CC. Mode is as above.
3782 bool &Invert) {
3783 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3784 Invert = false;
3785 return Opcode;
3786 }
3787
3788 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3789 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3790 Invert = true;
3791 return Opcode;
3792 }
3793
3794 return 0;
3795}
3796
3797// Return a v2f64 that contains the extended form of elements Start and Start+1
3798// of v4f32 value Op. If Chain is nonnull, return the strict form.
3799static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3800 SDValue Op, SDValue Chain) {
3801 int Mask[] = { Start, -1, Start + 1, -1 };
3802 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3803 if (Chain) {
3804 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3805 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3806 }
3807 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3808}
3809
3810// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3811// producing a result of type VT. If Chain is nonnull, return the strict form.
3812SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3813 const SDLoc &DL, EVT VT,
3814 SDValue CmpOp0,
3815 SDValue CmpOp1,
3816 SDValue Chain) const {
3817 // There is no hardware support for v4f32 (unless we have the vector
3818 // enhancements facility 1), so extend the vector into two v2f64s
3819 // and compare those.
3820 if (CmpOp0.getValueType() == MVT::v4f32 &&
3821 !Subtarget.hasVectorEnhancements1()) {
3822 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3823 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3824 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3825 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3826 if (Chain) {
3827 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3828 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3829 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3830 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3831 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3832 H1.getValue(1), L1.getValue(1),
3833 HRes.getValue(1), LRes.getValue(1) };
3834 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3835 SDValue Ops[2] = { Res, NewChain };
3836 return DAG.getMergeValues(Ops, DL);
3837 }
3838 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3839 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3840 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3841 }
3842 if (Chain) {
3843 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3844 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3845 }
3846 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3847}
3848
3849// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3850// an integer mask of type VT. If Chain is nonnull, we have a strict
3851// floating-point comparison. If in addition IsSignaling is true, we have
3852// a strict signaling floating-point comparison.
3853SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3854 const SDLoc &DL, EVT VT,
3855 ISD::CondCode CC,
3856 SDValue CmpOp0,
3857 SDValue CmpOp1,
3858 SDValue Chain,
3859 bool IsSignaling) const {
3860 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3861 assert (!Chain || IsFP);
3862 assert (!IsSignaling || Chain);
3863 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3864 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3865 bool Invert = false;
3866 SDValue Cmp;
3867 switch (CC) {
3868 // Handle tests for order using (or (ogt y x) (oge x y)).
3869 case ISD::SETUO:
3870 Invert = true;
3871 [[fallthrough]];
3872 case ISD::SETO: {
3873 assert(IsFP && "Unexpected integer comparison");
3874 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3875 DL, VT, CmpOp1, CmpOp0, Chain);
3876 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3877 DL, VT, CmpOp0, CmpOp1, Chain);
3878 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3879 if (Chain)
3880 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3881 LT.getValue(1), GE.getValue(1));
3882 break;
3883 }
3884
3885 // Handle <> tests using (or (ogt y x) (ogt x y)).
3886 case ISD::SETUEQ:
3887 Invert = true;
3888 [[fallthrough]];
3889 case ISD::SETONE: {
3890 assert(IsFP && "Unexpected integer comparison");
3891 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3892 DL, VT, CmpOp1, CmpOp0, Chain);
3893 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3894 DL, VT, CmpOp0, CmpOp1, Chain);
3895 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3896 if (Chain)
3897 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3898 LT.getValue(1), GT.getValue(1));
3899 break;
3900 }
3901
3902 // Otherwise a single comparison is enough. It doesn't really
3903 // matter whether we try the inversion or the swap first, since
3904 // there are no cases where both work.
3905 default:
3906 // Optimize sign-bit comparisons to signed compares.
3907 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3909 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3910 APInt Mask;
3911 if (CmpOp0.getOpcode() == ISD::AND
3912 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3913 && Mask == APInt::getSignMask(EltSize)) {
3914 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3915 CmpOp0 = CmpOp0.getOperand(0);
3916 }
3917 }
3918 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3919 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3920 else {
3922 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3923 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3924 else
3925 llvm_unreachable("Unhandled comparison");
3926 }
3927 if (Chain)
3928 Chain = Cmp.getValue(1);
3929 break;
3930 }
3931 if (Invert) {
3932 SDValue Mask =
3933 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3934 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3935 }
3936 if (Chain && Chain.getNode() != Cmp.getNode()) {
3937 SDValue Ops[2] = { Cmp, Chain };
3938 Cmp = DAG.getMergeValues(Ops, DL);
3939 }
3940 return Cmp;
3941}
3942
3943SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3944 SelectionDAG &DAG) const {
3945 SDValue CmpOp0 = Op.getOperand(0);
3946 SDValue CmpOp1 = Op.getOperand(1);
3947 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3948 SDLoc DL(Op);
3949 EVT VT = Op.getValueType();
3950 if (VT.isVector())
3951 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3952
3953 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3954 SDValue CCReg = emitCmp(DAG, DL, C);
3955 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3956}
3957
3958SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3959 SelectionDAG &DAG,
3960 bool IsSignaling) const {
3961 SDValue Chain = Op.getOperand(0);
3962 SDValue CmpOp0 = Op.getOperand(1);
3963 SDValue CmpOp1 = Op.getOperand(2);
3964 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3965 SDLoc DL(Op);
3966 EVT VT = Op.getNode()->getValueType(0);
3967 if (VT.isVector()) {
3968 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3969 Chain, IsSignaling);
3970 return Res.getValue(Op.getResNo());
3971 }
3972
3973 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3974 SDValue CCReg = emitCmp(DAG, DL, C);
3975 CCReg->setFlags(Op->getFlags());
3976 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3977 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3978 return DAG.getMergeValues(Ops, DL);
3979}
3980
3981SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3982 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3983 SDValue CmpOp0 = Op.getOperand(2);
3984 SDValue CmpOp1 = Op.getOperand(3);
3985 SDValue Dest = Op.getOperand(4);
3986 SDLoc DL(Op);
3987
3988 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3989 SDValue CCReg = emitCmp(DAG, DL, C);
3990 return DAG.getNode(
3991 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3992 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3993 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3994}
3995
3996// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3997// allowing Pos and Neg to be wider than CmpOp.
3998static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3999 return (Neg.getOpcode() == ISD::SUB &&
4000 Neg.getOperand(0).getOpcode() == ISD::Constant &&
4001 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
4002 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
4003 Pos.getOperand(0) == CmpOp)));
4004}
4005
4006// Return the absolute or negative absolute of Op; IsNegative decides which.
4008 bool IsNegative) {
4009 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
4010 if (IsNegative)
4011 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
4012 DAG.getConstant(0, DL, Op.getValueType()), Op);
4013 return Op;
4014}
4015
4017 Comparison C, SDValue TrueOp, SDValue FalseOp) {
4018 EVT VT = MVT::i128;
4019 unsigned Op;
4020
4021 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
4022 C.CCMask == SystemZ::CCMASK_CMP_GE ||
4023 C.CCMask == SystemZ::CCMASK_CMP_LE) {
4024 std::swap(TrueOp, FalseOp);
4025 C.CCMask ^= C.CCValid;
4026 }
4027 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
4028 std::swap(C.Op0, C.Op1);
4029 C.CCMask = SystemZ::CCMASK_CMP_GT;
4030 }
4031 switch (C.CCMask) {
4033 Op = SystemZISD::VICMPE;
4034 break;
4036 if (C.ICmpType == SystemZICMP::UnsignedOnly)
4037 Op = SystemZISD::VICMPHL;
4038 else
4039 Op = SystemZISD::VICMPH;
4040 break;
4041 default:
4042 llvm_unreachable("Unhandled comparison");
4043 break;
4044 }
4045
4046 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
4047 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
4048 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
4049 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
4050}
4051
4052SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
4053 SelectionDAG &DAG) const {
4054 SDValue CmpOp0 = Op.getOperand(0);
4055 SDValue CmpOp1 = Op.getOperand(1);
4056 SDValue TrueOp = Op.getOperand(2);
4057 SDValue FalseOp = Op.getOperand(3);
4058 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4059 SDLoc DL(Op);
4060
4061 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
4062 // legalizer, as it will be handled according to the type of the resulting
4063 // value. Extend them here if needed.
4064 if (CmpOp0.getSimpleValueType() == MVT::f16) {
4065 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
4066 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
4067 }
4068
4069 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
4070
4071 // Check for absolute and negative-absolute selections, including those
4072 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4073 // This check supplements the one in DAGCombiner.
4074 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4075 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4076 C.Op1.getOpcode() == ISD::Constant &&
4077 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4078 C.Op1->getAsZExtVal() == 0) {
4079 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4080 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4081 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4082 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4083 }
4084
4085 if (Subtarget.hasVectorEnhancements3() &&
4086 C.Opcode == SystemZISD::ICMP &&
4087 C.Op0.getValueType() == MVT::i128 &&
4088 TrueOp.getValueType() == MVT::i128) {
4089 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4090 }
4091
4092 SDValue CCReg = emitCmp(DAG, DL, C);
4093 SDValue Ops[] = {TrueOp, FalseOp,
4094 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4095 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4096
4097 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4098}
4099
4100SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4101 SelectionDAG &DAG) const {
4102 SDLoc DL(Node);
4103 const GlobalValue *GV = Node->getGlobal();
4104 int64_t Offset = Node->getOffset();
4105 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4107
4109 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4110 if (isInt<32>(Offset)) {
4111 // Assign anchors at 1<<12 byte boundaries.
4112 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4113 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4114 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4115
4116 // The offset can be folded into the address if it is aligned to a
4117 // halfword.
4118 Offset -= Anchor;
4119 if (Offset != 0 && (Offset & 1) == 0) {
4120 SDValue Full =
4121 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4122 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4123 Offset = 0;
4124 }
4125 } else {
4126 // Conservatively load a constant offset greater than 32 bits into a
4127 // register below.
4128 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4129 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4130 }
4131 } else if (Subtarget.isTargetELF()) {
4132 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4133 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4134 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4136 } else if (Subtarget.isTargetzOS()) {
4137 Result = getADAEntry(DAG, GV, DL, PtrVT);
4138 } else
4139 llvm_unreachable("Unexpected Subtarget");
4140
4141 // If there was a non-zero offset that we didn't fold, create an explicit
4142 // addition for it.
4143 if (Offset != 0)
4144 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4145 DAG.getSignedConstant(Offset, DL, PtrVT));
4146
4147 return Result;
4148}
4149
4150SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4151 SelectionDAG &DAG,
4152 unsigned Opcode,
4153 SDValue GOTOffset) const {
4154 SDLoc DL(Node);
4155 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4156 SDValue Chain = DAG.getEntryNode();
4157 SDValue Glue;
4158
4161 report_fatal_error("In GHC calling convention TLS is not supported");
4162
4163 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4164 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4165 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4166 Glue = Chain.getValue(1);
4167 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4168 Glue = Chain.getValue(1);
4169
4170 // The first call operand is the chain and the second is the TLS symbol.
4172 Ops.push_back(Chain);
4173 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4174 Node->getValueType(0),
4175 0, 0));
4176
4177 // Add argument registers to the end of the list so that they are
4178 // known live into the call.
4179 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4180 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4181
4182 // Add a register mask operand representing the call-preserved registers.
4183 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4184 const uint32_t *Mask =
4185 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4186 assert(Mask && "Missing call preserved mask for calling convention");
4187 Ops.push_back(DAG.getRegisterMask(Mask));
4188
4189 // Glue the call to the argument copies.
4190 Ops.push_back(Glue);
4191
4192 // Emit the call.
4193 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4194 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4195 Glue = Chain.getValue(1);
4196
4197 // Copy the return value from %r2.
4198 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4199}
4200
4201SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4202 SelectionDAG &DAG) const {
4203 SDValue Chain = DAG.getEntryNode();
4204 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4205
4206 // The high part of the thread pointer is in access register 0.
4207 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4208 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4209
4210 // The low part of the thread pointer is in access register 1.
4211 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4212 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4213
4214 // Merge them into a single 64-bit address.
4215 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4216 DAG.getConstant(32, DL, PtrVT));
4217 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4218}
4219
4220SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4221 SelectionDAG &DAG) const {
4222 if (DAG.getTarget().useEmulatedTLS())
4223 return LowerToTLSEmulatedModel(Node, DAG);
4224 SDLoc DL(Node);
4225 const GlobalValue *GV = Node->getGlobal();
4226 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4227 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4228
4231 report_fatal_error("In GHC calling convention TLS is not supported");
4232
4233 SDValue TP = lowerThreadPointer(DL, DAG);
4234
4235 // Get the offset of GA from the thread pointer, based on the TLS model.
4237 switch (model) {
4239 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4240 SystemZConstantPoolValue *CPV =
4242
4243 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4244 Offset = DAG.getLoad(
4245 PtrVT, DL, DAG.getEntryNode(), Offset,
4247
4248 // Call __tls_get_offset to retrieve the offset.
4249 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4250 break;
4251 }
4252
4254 // Load the GOT offset of the module ID.
4255 SystemZConstantPoolValue *CPV =
4257
4258 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4259 Offset = DAG.getLoad(
4260 PtrVT, DL, DAG.getEntryNode(), Offset,
4262
4263 // Call __tls_get_offset to retrieve the module base offset.
4264 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4265
4266 // Note: The SystemZLDCleanupPass will remove redundant computations
4267 // of the module base offset. Count total number of local-dynamic
4268 // accesses to trigger execution of that pass.
4269 SystemZMachineFunctionInfo* MFI =
4270 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4272
4273 // Add the per-symbol offset.
4275
4276 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4277 DTPOffset = DAG.getLoad(
4278 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4280
4281 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4282 break;
4283 }
4284
4285 case TLSModel::InitialExec: {
4286 // Load the offset from the GOT.
4287 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4289 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4290 Offset =
4291 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4293 break;
4294 }
4295
4296 case TLSModel::LocalExec: {
4297 // Force the offset into the constant pool and load it from there.
4298 SystemZConstantPoolValue *CPV =
4300
4301 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4302 Offset = DAG.getLoad(
4303 PtrVT, DL, DAG.getEntryNode(), Offset,
4305 break;
4306 }
4307 }
4308
4309 // Add the base and offset together.
4310 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4311}
4312
4313SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4314 SelectionDAG &DAG) const {
4315 SDLoc DL(Node);
4316 const BlockAddress *BA = Node->getBlockAddress();
4317 int64_t Offset = Node->getOffset();
4318 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4319
4320 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4321 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4322 return Result;
4323}
4324
4325SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4326 SelectionDAG &DAG) const {
4327 SDLoc DL(JT);
4328 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4329 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4330
4331 // Use LARL to load the address of the table.
4332 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4333}
4334
4335SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4336 SelectionDAG &DAG) const {
4337 SDLoc DL(CP);
4338 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4339
4342 Result =
4343 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4344 else
4345 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4346 CP->getOffset());
4347
4348 // Use LARL to load the address of the constant pool entry.
4349 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4350}
4351
4352SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4353 SelectionDAG &DAG) const {
4354 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4355 MachineFunction &MF = DAG.getMachineFunction();
4356 MachineFrameInfo &MFI = MF.getFrameInfo();
4357 MFI.setFrameAddressIsTaken(true);
4358
4359 SDLoc DL(Op);
4360 unsigned Depth = Op.getConstantOperandVal(0);
4361 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4362
4363 // By definition, the frame address is the address of the back chain. (In
4364 // the case of packed stack without backchain, return the address where the
4365 // backchain would have been stored. This will either be an unused space or
4366 // contain a saved register).
4367 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4368 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4369
4370 if (Depth > 0) {
4371 // FIXME The frontend should detect this case.
4372 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4373 report_fatal_error("Unsupported stack frame traversal count");
4374
4375 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4376 while (Depth--) {
4377 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4378 MachinePointerInfo());
4379 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4380 }
4381 }
4382
4383 return BackChain;
4384}
4385
4386SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4387 SelectionDAG &DAG) const {
4388 MachineFunction &MF = DAG.getMachineFunction();
4389 MachineFrameInfo &MFI = MF.getFrameInfo();
4390 MFI.setReturnAddressIsTaken(true);
4391
4392 SDLoc DL(Op);
4393 unsigned Depth = Op.getConstantOperandVal(0);
4394 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4395
4396 if (Depth > 0) {
4397 // FIXME The frontend should detect this case.
4398 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4399 report_fatal_error("Unsupported stack frame traversal count");
4400
4401 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4402 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4403 int Offset = TFL->getReturnAddressOffset(MF);
4404 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4405 DAG.getSignedConstant(Offset, DL, PtrVT));
4406 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4407 MachinePointerInfo());
4408 }
4409
4410 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4411 // implicit live-in.
4412 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4414 &SystemZ::GR64BitRegClass);
4415 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4416}
4417
4418SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4419 SelectionDAG &DAG) const {
4420 SDLoc DL(Op);
4421 SDValue In = Op.getOperand(0);
4422 EVT InVT = In.getValueType();
4423 EVT ResVT = Op.getValueType();
4424
4425 // Convert loads directly. This is normally done by DAGCombiner,
4426 // but we need this case for bitcasts that are created during lowering
4427 // and which are then lowered themselves.
4428 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4429 if (ISD::isNormalLoad(LoadN)) {
4430 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4431 LoadN->getBasePtr(), LoadN->getMemOperand());
4432 // Update the chain uses.
4433 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4434 return NewLoad;
4435 }
4436
4437 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4438 SDValue In64;
4439 if (Subtarget.hasHighWord()) {
4440 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4441 MVT::i64);
4442 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4443 MVT::i64, SDValue(U64, 0), In);
4444 } else {
4445 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4446 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4447 DAG.getConstant(32, DL, MVT::i64));
4448 }
4449 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4450 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4451 DL, MVT::f32, Out64);
4452 }
4453 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4454 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4455 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4456 MVT::f64, SDValue(U64, 0), In);
4457 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4458 if (Subtarget.hasHighWord())
4459 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4460 MVT::i32, Out64);
4461 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4462 DAG.getConstant(32, DL, MVT::i64));
4463 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4464 }
4465 llvm_unreachable("Unexpected bitcast combination");
4466}
4467
4468SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4469 SelectionDAG &DAG) const {
4470
4471 if (Subtarget.isTargetXPLINK64())
4472 return lowerVASTART_XPLINK(Op, DAG);
4473 else
4474 return lowerVASTART_ELF(Op, DAG);
4475}
4476
4477SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4478 SelectionDAG &DAG) const {
4479 MachineFunction &MF = DAG.getMachineFunction();
4480 SystemZMachineFunctionInfo *FuncInfo =
4481 MF.getInfo<SystemZMachineFunctionInfo>();
4482
4483 SDLoc DL(Op);
4484
4485 // vastart just stores the address of the VarArgsFrameIndex slot into the
4486 // memory location argument.
4487 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4488 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4489 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4490 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4491 MachinePointerInfo(SV));
4492}
4493
4494SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4495 SelectionDAG &DAG) const {
4496 MachineFunction &MF = DAG.getMachineFunction();
4497 SystemZMachineFunctionInfo *FuncInfo =
4498 MF.getInfo<SystemZMachineFunctionInfo>();
4499 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4500
4501 SDValue Chain = Op.getOperand(0);
4502 SDValue Addr = Op.getOperand(1);
4503 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4504 SDLoc DL(Op);
4505
4506 // The initial values of each field.
4507 const unsigned NumFields = 4;
4508 SDValue Fields[NumFields] = {
4509 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4510 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4511 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4512 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4513 };
4514
4515 // Store each field into its respective slot.
4516 SDValue MemOps[NumFields];
4517 unsigned Offset = 0;
4518 for (unsigned I = 0; I < NumFields; ++I) {
4519 SDValue FieldAddr = Addr;
4520 if (Offset != 0)
4521 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4523 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4524 MachinePointerInfo(SV, Offset));
4525 Offset += 8;
4526 }
4527 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4528}
4529
4530SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4531 SelectionDAG &DAG) const {
4532 SDValue Chain = Op.getOperand(0);
4533 SDValue DstPtr = Op.getOperand(1);
4534 SDValue SrcPtr = Op.getOperand(2);
4535 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4536 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4537 SDLoc DL(Op);
4538
4539 uint32_t Sz =
4540 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4541 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4542 Align(8), Align(8), /*isVolatile*/ false,
4543 /*AlwaysInline*/ false,
4544 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4545 MachinePointerInfo(SrcSV));
4546}
4547
4548SDValue
4549SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4550 SelectionDAG &DAG) const {
4551 if (Subtarget.isTargetXPLINK64())
4552 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4553 else
4554 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4555}
4556
4557SDValue
4558SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4559 SelectionDAG &DAG) const {
4560 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4561 MachineFunction &MF = DAG.getMachineFunction();
4562 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4563 SDValue Chain = Op.getOperand(0);
4564 SDValue Size = Op.getOperand(1);
4565 SDValue Align = Op.getOperand(2);
4566 SDLoc DL(Op);
4567
4568 // If user has set the no alignment function attribute, ignore
4569 // alloca alignments.
4570 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4571
4572 uint64_t StackAlign = TFI->getStackAlignment();
4573 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4574 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4575
4576 SDValue NeededSpace = Size;
4577
4578 // Add extra space for alignment if needed.
4579 EVT PtrVT = getPointerTy(MF.getDataLayout());
4580 if (ExtraAlignSpace)
4581 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4582 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4583
4584 bool IsSigned = false;
4585 bool DoesNotReturn = false;
4586 bool IsReturnValueUsed = false;
4587 EVT VT = Op.getValueType();
4588 SDValue AllocaCall =
4589 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4590 CallingConv::C, IsSigned, DL, DoesNotReturn,
4591 IsReturnValueUsed)
4592 .first;
4593
4594 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4595 // to end of call in order to ensure it isn't broken up from the call
4596 // sequence.
4597 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4598 Register SPReg = Regs.getStackPointerRegister();
4599 Chain = AllocaCall.getValue(1);
4600 SDValue Glue = AllocaCall.getValue(2);
4601 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4602 Chain = NewSPRegNode.getValue(1);
4603
4604 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4605 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4606 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4607
4608 // Dynamically realign if needed.
4609 if (ExtraAlignSpace) {
4610 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4611 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4612 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4613 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4614 }
4615
4616 SDValue Ops[2] = {Result, Chain};
4617 return DAG.getMergeValues(Ops, DL);
4618}
4619
4620SDValue
4621SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4622 SelectionDAG &DAG) const {
4623 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4624 MachineFunction &MF = DAG.getMachineFunction();
4625 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4626 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4627
4628 SDValue Chain = Op.getOperand(0);
4629 SDValue Size = Op.getOperand(1);
4630 SDValue Align = Op.getOperand(2);
4631 SDLoc DL(Op);
4632
4633 // If user has set the no alignment function attribute, ignore
4634 // alloca alignments.
4635 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4636
4637 uint64_t StackAlign = TFI->getStackAlignment();
4638 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4639 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4640
4642 SDValue NeededSpace = Size;
4643
4644 // Get a reference to the stack pointer.
4645 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4646
4647 // If we need a backchain, save it now.
4648 SDValue Backchain;
4649 if (StoreBackchain)
4650 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4651 MachinePointerInfo());
4652
4653 // Add extra space for alignment if needed.
4654 if (ExtraAlignSpace)
4655 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4656 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4657
4658 // Get the new stack pointer value.
4659 SDValue NewSP;
4660 if (hasInlineStackProbe(MF)) {
4661 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4662 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4663 Chain = NewSP.getValue(1);
4664 }
4665 else {
4666 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4667 // Copy the new stack pointer back.
4668 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4669 }
4670
4671 // The allocated data lives above the 160 bytes allocated for the standard
4672 // frame, plus any outgoing stack arguments. We don't know how much that
4673 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4674 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4675 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4676
4677 // Dynamically realign if needed.
4678 if (RequiredAlign > StackAlign) {
4679 Result =
4680 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4681 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4682 Result =
4683 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4684 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4685 }
4686
4687 if (StoreBackchain)
4688 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4689 MachinePointerInfo());
4690
4691 SDValue Ops[2] = { Result, Chain };
4692 return DAG.getMergeValues(Ops, DL);
4693}
4694
4695SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4696 SDValue Op, SelectionDAG &DAG) const {
4697 SDLoc DL(Op);
4698
4699 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4700}
4701
4702SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4703 SelectionDAG &DAG,
4704 unsigned Opcode) const {
4705 EVT VT = Op.getValueType();
4706 SDLoc DL(Op);
4707 SDValue Even, Odd;
4708
4709 // This custom expander is only used on z17 and later for 64-bit types.
4710 assert(!is32Bit(VT));
4711 assert(Subtarget.hasMiscellaneousExtensions2());
4712
4713 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4714 // the high result in the even register. Return the latter.
4715 lowerGR128Binary(DAG, DL, VT, Opcode,
4716 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4717 return Even;
4718}
4719
4720SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4721 SelectionDAG &DAG) const {
4722 EVT VT = Op.getValueType();
4723 SDLoc DL(Op);
4724 SDValue Ops[2];
4725 if (is32Bit(VT))
4726 // Just do a normal 64-bit multiplication and extract the results.
4727 // We define this so that it can be used for constant division.
4728 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4729 Op.getOperand(1), Ops[1], Ops[0]);
4730 else if (Subtarget.hasMiscellaneousExtensions2())
4731 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4732 // the high result in the even register. ISD::SMUL_LOHI is defined to
4733 // return the low half first, so the results are in reverse order.
4734 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4735 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4736 else {
4737 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4738 //
4739 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4740 //
4741 // but using the fact that the upper halves are either all zeros
4742 // or all ones:
4743 //
4744 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4745 //
4746 // and grouping the right terms together since they are quicker than the
4747 // multiplication:
4748 //
4749 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4750 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4751 SDValue LL = Op.getOperand(0);
4752 SDValue RL = Op.getOperand(1);
4753 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4754 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4755 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4756 // the high result in the even register. ISD::SMUL_LOHI is defined to
4757 // return the low half first, so the results are in reverse order.
4758 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4759 LL, RL, Ops[1], Ops[0]);
4760 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4761 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4762 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4763 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4764 }
4765 return DAG.getMergeValues(Ops, DL);
4766}
4767
4768SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4769 SelectionDAG &DAG) const {
4770 EVT VT = Op.getValueType();
4771 SDLoc DL(Op);
4772 SDValue Ops[2];
4773 if (is32Bit(VT))
4774 // Just do a normal 64-bit multiplication and extract the results.
4775 // We define this so that it can be used for constant division.
4776 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4777 Op.getOperand(1), Ops[1], Ops[0]);
4778 else
4779 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4780 // the high result in the even register. ISD::UMUL_LOHI is defined to
4781 // return the low half first, so the results are in reverse order.
4782 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4783 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4784 return DAG.getMergeValues(Ops, DL);
4785}
4786
4787SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4788 SelectionDAG &DAG) const {
4789 SDValue Op0 = Op.getOperand(0);
4790 SDValue Op1 = Op.getOperand(1);
4791 EVT VT = Op.getValueType();
4792 SDLoc DL(Op);
4793
4794 // We use DSGF for 32-bit division. This means the first operand must
4795 // always be 64-bit, and the second operand should be 32-bit whenever
4796 // that is possible, to improve performance.
4797 if (is32Bit(VT))
4798 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4799 else if (DAG.ComputeNumSignBits(Op1) > 32)
4800 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4801
4802 // DSG(F) returns the remainder in the even register and the
4803 // quotient in the odd register.
4804 SDValue Ops[2];
4805 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4806 return DAG.getMergeValues(Ops, DL);
4807}
4808
4809SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4810 SelectionDAG &DAG) const {
4811 EVT VT = Op.getValueType();
4812 SDLoc DL(Op);
4813
4814 // DL(G) returns the remainder in the even register and the
4815 // quotient in the odd register.
4816 SDValue Ops[2];
4817 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4818 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4819 return DAG.getMergeValues(Ops, DL);
4820}
4821
4822SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4823 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4824
4825 // Get the known-zero masks for each operand.
4826 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4827 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4828 DAG.computeKnownBits(Ops[1])};
4829
4830 // See if the upper 32 bits of one operand and the lower 32 bits of the
4831 // other are known zero. They are the low and high operands respectively.
4832 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4833 Known[1].Zero.getZExtValue() };
4834 unsigned High, Low;
4835 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4836 High = 1, Low = 0;
4837 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4838 High = 0, Low = 1;
4839 else
4840 return Op;
4841
4842 SDValue LowOp = Ops[Low];
4843 SDValue HighOp = Ops[High];
4844
4845 // If the high part is a constant, we're better off using IILH.
4846 if (HighOp.getOpcode() == ISD::Constant)
4847 return Op;
4848
4849 // If the low part is a constant that is outside the range of LHI,
4850 // then we're better off using IILF.
4851 if (LowOp.getOpcode() == ISD::Constant) {
4852 int64_t Value = int32_t(LowOp->getAsZExtVal());
4853 if (!isInt<16>(Value))
4854 return Op;
4855 }
4856
4857 // Check whether the high part is an AND that doesn't change the
4858 // high 32 bits and just masks out low bits. We can skip it if so.
4859 if (HighOp.getOpcode() == ISD::AND &&
4860 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4861 SDValue HighOp0 = HighOp.getOperand(0);
4862 uint64_t Mask = HighOp.getConstantOperandVal(1);
4863 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4864 HighOp = HighOp0;
4865 }
4866
4867 // Take advantage of the fact that all GR32 operations only change the
4868 // low 32 bits by truncating Low to an i32 and inserting it directly
4869 // using a subreg. The interesting cases are those where the truncation
4870 // can be folded.
4871 SDLoc DL(Op);
4872 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4873 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4874 MVT::i64, HighOp, Low32);
4875}
4876
4877// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4878SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4879 SelectionDAG &DAG) const {
4880 SDNode *N = Op.getNode();
4881 SDValue LHS = N->getOperand(0);
4882 SDValue RHS = N->getOperand(1);
4883 SDLoc DL(N);
4884
4885 if (N->getValueType(0) == MVT::i128) {
4886 unsigned BaseOp = 0;
4887 unsigned FlagOp = 0;
4888 bool IsBorrow = false;
4889 switch (Op.getOpcode()) {
4890 default: llvm_unreachable("Unknown instruction!");
4891 case ISD::UADDO:
4892 BaseOp = ISD::ADD;
4893 FlagOp = SystemZISD::VACC;
4894 break;
4895 case ISD::USUBO:
4896 BaseOp = ISD::SUB;
4897 FlagOp = SystemZISD::VSCBI;
4898 IsBorrow = true;
4899 break;
4900 }
4901 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4902 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4903 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4904 DAG.getValueType(MVT::i1));
4905 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4906 if (IsBorrow)
4907 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4908 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4909 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4910 }
4911
4912 unsigned BaseOp = 0;
4913 unsigned CCValid = 0;
4914 unsigned CCMask = 0;
4915
4916 switch (Op.getOpcode()) {
4917 default: llvm_unreachable("Unknown instruction!");
4918 case ISD::SADDO:
4919 BaseOp = SystemZISD::SADDO;
4920 CCValid = SystemZ::CCMASK_ARITH;
4922 break;
4923 case ISD::SSUBO:
4924 BaseOp = SystemZISD::SSUBO;
4925 CCValid = SystemZ::CCMASK_ARITH;
4927 break;
4928 case ISD::UADDO:
4929 BaseOp = SystemZISD::UADDO;
4930 CCValid = SystemZ::CCMASK_LOGICAL;
4932 break;
4933 case ISD::USUBO:
4934 BaseOp = SystemZISD::USUBO;
4935 CCValid = SystemZ::CCMASK_LOGICAL;
4937 break;
4938 }
4939
4940 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4941 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4942
4943 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4944 if (N->getValueType(1) == MVT::i1)
4945 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4946
4947 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4948}
4949
4950static bool isAddCarryChain(SDValue Carry) {
4951 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4952 Carry->getValueType(0) != MVT::i128)
4953 Carry = Carry.getOperand(2);
4954 return Carry.getOpcode() == ISD::UADDO &&
4955 Carry->getValueType(0) != MVT::i128;
4956}
4957
4958static bool isSubBorrowChain(SDValue Carry) {
4959 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4960 Carry->getValueType(0) != MVT::i128)
4961 Carry = Carry.getOperand(2);
4962 return Carry.getOpcode() == ISD::USUBO &&
4963 Carry->getValueType(0) != MVT::i128;
4964}
4965
4966// Lower UADDO_CARRY/USUBO_CARRY nodes.
4967SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4968 SelectionDAG &DAG) const {
4969
4970 SDNode *N = Op.getNode();
4971 MVT VT = N->getSimpleValueType(0);
4972
4973 // Let legalize expand this if it isn't a legal type yet.
4974 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4975 return SDValue();
4976
4977 SDValue LHS = N->getOperand(0);
4978 SDValue RHS = N->getOperand(1);
4979 SDValue Carry = Op.getOperand(2);
4980 SDLoc DL(N);
4981
4982 if (VT == MVT::i128) {
4983 unsigned BaseOp = 0;
4984 unsigned FlagOp = 0;
4985 bool IsBorrow = false;
4986 switch (Op.getOpcode()) {
4987 default: llvm_unreachable("Unknown instruction!");
4988 case ISD::UADDO_CARRY:
4989 BaseOp = SystemZISD::VAC;
4990 FlagOp = SystemZISD::VACCC;
4991 break;
4992 case ISD::USUBO_CARRY:
4993 BaseOp = SystemZISD::VSBI;
4994 FlagOp = SystemZISD::VSBCBI;
4995 IsBorrow = true;
4996 break;
4997 }
4998 if (IsBorrow)
4999 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
5000 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
5001 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
5002 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
5003 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
5004 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
5005 DAG.getValueType(MVT::i1));
5006 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
5007 if (IsBorrow)
5008 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
5009 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
5010 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
5011 }
5012
5013 unsigned BaseOp = 0;
5014 unsigned CCValid = 0;
5015 unsigned CCMask = 0;
5016
5017 switch (Op.getOpcode()) {
5018 default: llvm_unreachable("Unknown instruction!");
5019 case ISD::UADDO_CARRY:
5020 if (!isAddCarryChain(Carry))
5021 return SDValue();
5022
5023 BaseOp = SystemZISD::ADDCARRY;
5024 CCValid = SystemZ::CCMASK_LOGICAL;
5026 break;
5027 case ISD::USUBO_CARRY:
5028 if (!isSubBorrowChain(Carry))
5029 return SDValue();
5030
5031 BaseOp = SystemZISD::SUBCARRY;
5032 CCValid = SystemZ::CCMASK_LOGICAL;
5034 break;
5035 }
5036
5037 // Set the condition code from the carry flag.
5038 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
5039 DAG.getConstant(CCValid, DL, MVT::i32),
5040 DAG.getConstant(CCMask, DL, MVT::i32));
5041
5042 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5043 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
5044
5045 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
5046 if (N->getValueType(1) == MVT::i1)
5047 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
5048
5049 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
5050}
5051
5052SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
5053 SelectionDAG &DAG) const {
5054 EVT VT = Op.getValueType();
5055 SDLoc DL(Op);
5056 Op = Op.getOperand(0);
5057
5058 if (VT.getScalarSizeInBits() == 128) {
5059 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
5060 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
5061 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
5062 DAG.getConstant(0, DL, MVT::i64));
5063 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5064 return Op;
5065 }
5066
5067 // Handle vector types via VPOPCT.
5068 if (VT.isVector()) {
5069 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
5070 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
5071 switch (VT.getScalarSizeInBits()) {
5072 case 8:
5073 break;
5074 case 16: {
5075 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5076 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5077 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5078 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5079 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5080 break;
5081 }
5082 case 32: {
5083 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5084 DAG.getConstant(0, DL, MVT::i32));
5085 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5086 break;
5087 }
5088 case 64: {
5089 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5090 DAG.getConstant(0, DL, MVT::i32));
5091 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5092 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5093 break;
5094 }
5095 default:
5096 llvm_unreachable("Unexpected type");
5097 }
5098 return Op;
5099 }
5100
5101 // Get the known-zero mask for the operand.
5102 KnownBits Known = DAG.computeKnownBits(Op);
5103 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5104 if (NumSignificantBits == 0)
5105 return DAG.getConstant(0, DL, VT);
5106
5107 // Skip known-zero high parts of the operand.
5108 int64_t OrigBitSize = VT.getSizeInBits();
5109 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5110 BitSize = std::min(BitSize, OrigBitSize);
5111
5112 // The POPCNT instruction counts the number of bits in each byte.
5113 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5114 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5115 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5116
5117 // Add up per-byte counts in a binary tree. All bits of Op at
5118 // position larger than BitSize remain zero throughout.
5119 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5120 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5121 if (BitSize != OrigBitSize)
5122 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5123 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5124 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5125 }
5126
5127 // Extract overall result from high byte.
5128 if (BitSize > 8)
5129 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5130 DAG.getConstant(BitSize - 8, DL, VT));
5131
5132 return Op;
5133}
5134
5135SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5136 SelectionDAG &DAG) const {
5137 SDLoc DL(Op);
5138 AtomicOrdering FenceOrdering =
5139 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5140 SyncScope::ID FenceSSID =
5141 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5142
5143 // The only fence that needs an instruction is a sequentially-consistent
5144 // cross-thread fence.
5145 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5146 FenceSSID == SyncScope::System) {
5147 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5148 Op.getOperand(0)),
5149 0);
5150 }
5151
5152 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5153 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5154}
5155
5156SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5157 SelectionDAG &DAG) const {
5158 EVT RegVT = Op.getValueType();
5159 if (RegVT.getSizeInBits() == 128)
5160 return lowerATOMIC_LDST_I128(Op, DAG);
5161 return lowerLoadF16(Op, DAG);
5162}
5163
5164SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5165 SelectionDAG &DAG) const {
5166 auto *Node = cast<AtomicSDNode>(Op.getNode());
5167 if (Node->getMemoryVT().getSizeInBits() == 128)
5168 return lowerATOMIC_LDST_I128(Op, DAG);
5169 return lowerStoreF16(Op, DAG);
5170}
5171
5172SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5173 SelectionDAG &DAG) const {
5174 auto *Node = cast<AtomicSDNode>(Op.getNode());
5175 assert(
5176 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5177 "Only custom lowering i128 or f128.");
5178 // Use same code to handle both legal and non-legal i128 types.
5180 LowerOperationWrapper(Node, Results, DAG);
5181 return DAG.getMergeValues(Results, SDLoc(Op));
5182}
5183
5184// Prepare for a Compare And Swap for a subword operation. This needs to be
5185// done in memory with 4 bytes at natural alignment.
5187 SDValue &AlignedAddr, SDValue &BitShift,
5188 SDValue &NegBitShift) {
5189 EVT PtrVT = Addr.getValueType();
5190 EVT WideVT = MVT::i32;
5191
5192 // Get the address of the containing word.
5193 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5194 DAG.getSignedConstant(-4, DL, PtrVT));
5195
5196 // Get the number of bits that the word must be rotated left in order
5197 // to bring the field to the top bits of a GR32.
5198 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5199 DAG.getConstant(3, DL, PtrVT));
5200 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5201
5202 // Get the complementing shift amount, for rotating a field in the top
5203 // bits back to its proper position.
5204 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5205 DAG.getConstant(0, DL, WideVT), BitShift);
5206
5207}
5208
5209// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5210// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5211SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5212 SelectionDAG &DAG,
5213 unsigned Opcode) const {
5214 auto *Node = cast<AtomicSDNode>(Op.getNode());
5215
5216 // 32-bit operations need no special handling.
5217 EVT NarrowVT = Node->getMemoryVT();
5218 EVT WideVT = MVT::i32;
5219 if (NarrowVT == WideVT)
5220 return Op;
5221
5222 int64_t BitSize = NarrowVT.getSizeInBits();
5223 SDValue ChainIn = Node->getChain();
5224 SDValue Addr = Node->getBasePtr();
5225 SDValue Src2 = Node->getVal();
5226 MachineMemOperand *MMO = Node->getMemOperand();
5227 SDLoc DL(Node);
5228
5229 // Convert atomic subtracts of constants into additions.
5230 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5231 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5232 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5233 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5234 Src2.getValueType());
5235 }
5236
5237 SDValue AlignedAddr, BitShift, NegBitShift;
5238 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5239
5240 // Extend the source operand to 32 bits and prepare it for the inner loop.
5241 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5242 // operations require the source to be shifted in advance. (This shift
5243 // can be folded if the source is constant.) For AND and NAND, the lower
5244 // bits must be set, while for other opcodes they should be left clear.
5245 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5246 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5247 DAG.getConstant(32 - BitSize, DL, WideVT));
5248 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5249 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5250 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5251 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5252
5253 // Construct the ATOMIC_LOADW_* node.
5254 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5255 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5256 DAG.getConstant(BitSize, DL, WideVT) };
5257 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5258 NarrowVT, MMO);
5259
5260 // Rotate the result of the final CS so that the field is in the lower
5261 // bits of a GR32, then truncate it.
5262 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5263 DAG.getConstant(BitSize, DL, WideVT));
5264 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5265
5266 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5267 return DAG.getMergeValues(RetOps, DL);
5268}
5269
5270// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5271// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5272SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5273 SelectionDAG &DAG) const {
5274 auto *Node = cast<AtomicSDNode>(Op.getNode());
5275 EVT MemVT = Node->getMemoryVT();
5276 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5277 // A full-width operation: negate and use LAA(G).
5278 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5279 assert(Subtarget.hasInterlockedAccess1() &&
5280 "Should have been expanded by AtomicExpand pass.");
5281 SDValue Src2 = Node->getVal();
5282 SDLoc DL(Src2);
5283 SDValue NegSrc2 =
5284 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5285 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5286 Node->getChain(), Node->getBasePtr(), NegSrc2,
5287 Node->getMemOperand());
5288 }
5289
5290 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5291}
5292
5293// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5294SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5295 SelectionDAG &DAG) const {
5296 auto *Node = cast<AtomicSDNode>(Op.getNode());
5297 SDValue ChainIn = Node->getOperand(0);
5298 SDValue Addr = Node->getOperand(1);
5299 SDValue CmpVal = Node->getOperand(2);
5300 SDValue SwapVal = Node->getOperand(3);
5301 MachineMemOperand *MMO = Node->getMemOperand();
5302 SDLoc DL(Node);
5303
5304 if (Node->getMemoryVT() == MVT::i128) {
5305 // Use same code to handle both legal and non-legal i128 types.
5307 LowerOperationWrapper(Node, Results, DAG);
5308 return DAG.getMergeValues(Results, DL);
5309 }
5310
5311 // We have native support for 32-bit and 64-bit compare and swap, but we
5312 // still need to expand extracting the "success" result from the CC.
5313 EVT NarrowVT = Node->getMemoryVT();
5314 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5315 if (NarrowVT == WideVT) {
5316 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5317 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5318 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5319 DL, Tys, Ops, NarrowVT, MMO);
5320 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5322
5323 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5324 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5325 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5326 return SDValue();
5327 }
5328
5329 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5330 // via a fullword ATOMIC_CMP_SWAPW operation.
5331 int64_t BitSize = NarrowVT.getSizeInBits();
5332
5333 SDValue AlignedAddr, BitShift, NegBitShift;
5334 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5335
5336 // Construct the ATOMIC_CMP_SWAPW node.
5337 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5338 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5339 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5340 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5341 VTList, Ops, NarrowVT, MMO);
5342 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5344
5345 // emitAtomicCmpSwapW() will zero extend the result (original value).
5346 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5347 DAG.getValueType(NarrowVT));
5348 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5349 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5350 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5351 return SDValue();
5352}
5353
5355SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5356 // Because of how we convert atomic_load and atomic_store to normal loads and
5357 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5358 // since DAGCombine hasn't been updated to account for atomic, but non
5359 // volatile loads. (See D57601)
5360 if (auto *SI = dyn_cast<StoreInst>(&I))
5361 if (SI->isAtomic())
5363 if (auto *LI = dyn_cast<LoadInst>(&I))
5364 if (LI->isAtomic())
5366 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5367 if (AI->isAtomic())
5369 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5370 if (AI->isAtomic())
5373}
5374
5375SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5376 SelectionDAG &DAG) const {
5377 MachineFunction &MF = DAG.getMachineFunction();
5378 auto *Regs = Subtarget.getSpecialRegisters();
5380 report_fatal_error("Variable-sized stack allocations are not supported "
5381 "in GHC calling convention");
5382 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5383 Regs->getStackPointerRegister(), Op.getValueType());
5384}
5385
5386SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5387 SelectionDAG &DAG) const {
5388 MachineFunction &MF = DAG.getMachineFunction();
5389 auto *Regs = Subtarget.getSpecialRegisters();
5390 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5391
5393 report_fatal_error("Variable-sized stack allocations are not supported "
5394 "in GHC calling convention");
5395
5396 SDValue Chain = Op.getOperand(0);
5397 SDValue NewSP = Op.getOperand(1);
5398 SDValue Backchain;
5399 SDLoc DL(Op);
5400
5401 if (StoreBackchain) {
5402 SDValue OldSP = DAG.getCopyFromReg(
5403 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5404 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5405 MachinePointerInfo());
5406 }
5407
5408 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5409
5410 if (StoreBackchain)
5411 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5412 MachinePointerInfo());
5413
5414 return Chain;
5415}
5416
5417SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5418 SelectionDAG &DAG) const {
5419 bool IsData = Op.getConstantOperandVal(4);
5420 if (!IsData)
5421 // Just preserve the chain.
5422 return Op.getOperand(0);
5423
5424 SDLoc DL(Op);
5425 bool IsWrite = Op.getConstantOperandVal(2);
5426 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5427 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5428 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5429 Op.getOperand(1)};
5430 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5431 Node->getVTList(), Ops,
5432 Node->getMemoryVT(), Node->getMemOperand());
5433}
5434
5435SDValue
5436SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5437 SelectionDAG &DAG) const {
5438 unsigned Opcode, CCValid;
5439 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5440 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5441 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5442 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5443 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5444 return SDValue();
5445 }
5446
5447 return SDValue();
5448}
5449
5450SDValue
5451SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5452 SelectionDAG &DAG) const {
5453 unsigned Opcode, CCValid;
5454 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5455 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5456 if (Op->getNumValues() == 1)
5457 return getCCResult(DAG, SDValue(Node, 0));
5458 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5459 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5460 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5461 }
5462
5463 unsigned Id = Op.getConstantOperandVal(0);
5464 switch (Id) {
5465 case Intrinsic::thread_pointer:
5466 return lowerThreadPointer(SDLoc(Op), DAG);
5467
5468 case Intrinsic::s390_vpdi:
5469 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5470 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5471
5472 case Intrinsic::s390_vperm:
5473 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5474 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5475
5476 case Intrinsic::s390_vuphb:
5477 case Intrinsic::s390_vuphh:
5478 case Intrinsic::s390_vuphf:
5479 case Intrinsic::s390_vuphg:
5480 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5481 Op.getOperand(1));
5482
5483 case Intrinsic::s390_vuplhb:
5484 case Intrinsic::s390_vuplhh:
5485 case Intrinsic::s390_vuplhf:
5486 case Intrinsic::s390_vuplhg:
5487 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5488 Op.getOperand(1));
5489
5490 case Intrinsic::s390_vuplb:
5491 case Intrinsic::s390_vuplhw:
5492 case Intrinsic::s390_vuplf:
5493 case Intrinsic::s390_vuplg:
5494 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5495 Op.getOperand(1));
5496
5497 case Intrinsic::s390_vupllb:
5498 case Intrinsic::s390_vupllh:
5499 case Intrinsic::s390_vupllf:
5500 case Intrinsic::s390_vupllg:
5501 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5502 Op.getOperand(1));
5503
5504 case Intrinsic::s390_vsumb:
5505 case Intrinsic::s390_vsumh:
5506 case Intrinsic::s390_vsumgh:
5507 case Intrinsic::s390_vsumgf:
5508 case Intrinsic::s390_vsumqf:
5509 case Intrinsic::s390_vsumqg:
5510 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5511 Op.getOperand(1), Op.getOperand(2));
5512
5513 case Intrinsic::s390_vaq:
5514 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5515 Op.getOperand(1), Op.getOperand(2));
5516 case Intrinsic::s390_vaccb:
5517 case Intrinsic::s390_vacch:
5518 case Intrinsic::s390_vaccf:
5519 case Intrinsic::s390_vaccg:
5520 case Intrinsic::s390_vaccq:
5521 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5522 Op.getOperand(1), Op.getOperand(2));
5523 case Intrinsic::s390_vacq:
5524 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5525 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5526 case Intrinsic::s390_vacccq:
5527 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5528 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5529
5530 case Intrinsic::s390_vsq:
5531 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5532 Op.getOperand(1), Op.getOperand(2));
5533 case Intrinsic::s390_vscbib:
5534 case Intrinsic::s390_vscbih:
5535 case Intrinsic::s390_vscbif:
5536 case Intrinsic::s390_vscbig:
5537 case Intrinsic::s390_vscbiq:
5538 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5539 Op.getOperand(1), Op.getOperand(2));
5540 case Intrinsic::s390_vsbiq:
5541 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5542 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5543 case Intrinsic::s390_vsbcbiq:
5544 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5545 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5546
5547 case Intrinsic::s390_vmhb:
5548 case Intrinsic::s390_vmhh:
5549 case Intrinsic::s390_vmhf:
5550 case Intrinsic::s390_vmhg:
5551 case Intrinsic::s390_vmhq:
5552 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5553 Op.getOperand(1), Op.getOperand(2));
5554 case Intrinsic::s390_vmlhb:
5555 case Intrinsic::s390_vmlhh:
5556 case Intrinsic::s390_vmlhf:
5557 case Intrinsic::s390_vmlhg:
5558 case Intrinsic::s390_vmlhq:
5559 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5560 Op.getOperand(1), Op.getOperand(2));
5561
5562 case Intrinsic::s390_vmahb:
5563 case Intrinsic::s390_vmahh:
5564 case Intrinsic::s390_vmahf:
5565 case Intrinsic::s390_vmahg:
5566 case Intrinsic::s390_vmahq:
5567 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5568 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5569 case Intrinsic::s390_vmalhb:
5570 case Intrinsic::s390_vmalhh:
5571 case Intrinsic::s390_vmalhf:
5572 case Intrinsic::s390_vmalhg:
5573 case Intrinsic::s390_vmalhq:
5574 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5575 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5576
5577 case Intrinsic::s390_vmeb:
5578 case Intrinsic::s390_vmeh:
5579 case Intrinsic::s390_vmef:
5580 case Intrinsic::s390_vmeg:
5581 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5582 Op.getOperand(1), Op.getOperand(2));
5583 case Intrinsic::s390_vmleb:
5584 case Intrinsic::s390_vmleh:
5585 case Intrinsic::s390_vmlef:
5586 case Intrinsic::s390_vmleg:
5587 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5588 Op.getOperand(1), Op.getOperand(2));
5589 case Intrinsic::s390_vmob:
5590 case Intrinsic::s390_vmoh:
5591 case Intrinsic::s390_vmof:
5592 case Intrinsic::s390_vmog:
5593 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5594 Op.getOperand(1), Op.getOperand(2));
5595 case Intrinsic::s390_vmlob:
5596 case Intrinsic::s390_vmloh:
5597 case Intrinsic::s390_vmlof:
5598 case Intrinsic::s390_vmlog:
5599 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5600 Op.getOperand(1), Op.getOperand(2));
5601
5602 case Intrinsic::s390_vmaeb:
5603 case Intrinsic::s390_vmaeh:
5604 case Intrinsic::s390_vmaef:
5605 case Intrinsic::s390_vmaeg:
5606 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5607 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5608 Op.getOperand(1), Op.getOperand(2)),
5609 Op.getOperand(3));
5610 case Intrinsic::s390_vmaleb:
5611 case Intrinsic::s390_vmaleh:
5612 case Intrinsic::s390_vmalef:
5613 case Intrinsic::s390_vmaleg:
5614 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5615 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5616 Op.getOperand(1), Op.getOperand(2)),
5617 Op.getOperand(3));
5618 case Intrinsic::s390_vmaob:
5619 case Intrinsic::s390_vmaoh:
5620 case Intrinsic::s390_vmaof:
5621 case Intrinsic::s390_vmaog:
5622 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5623 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5624 Op.getOperand(1), Op.getOperand(2)),
5625 Op.getOperand(3));
5626 case Intrinsic::s390_vmalob:
5627 case Intrinsic::s390_vmaloh:
5628 case Intrinsic::s390_vmalof:
5629 case Intrinsic::s390_vmalog:
5630 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5631 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5632 Op.getOperand(1), Op.getOperand(2)),
5633 Op.getOperand(3));
5634 }
5635
5636 return SDValue();
5637}
5638
5639namespace {
5640// Says that SystemZISD operation Opcode can be used to perform the equivalent
5641// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5642// Operand is the constant third operand, otherwise it is the number of
5643// bytes in each element of the result.
5644struct Permute {
5645 unsigned Opcode;
5646 unsigned Operand;
5647 unsigned char Bytes[SystemZ::VectorBytes];
5648};
5649}
5650
5651static const Permute PermuteForms[] = {
5652 // VMRHG
5653 { SystemZISD::MERGE_HIGH, 8,
5654 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5655 // VMRHF
5656 { SystemZISD::MERGE_HIGH, 4,
5657 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5658 // VMRHH
5659 { SystemZISD::MERGE_HIGH, 2,
5660 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5661 // VMRHB
5662 { SystemZISD::MERGE_HIGH, 1,
5663 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5664 // VMRLG
5665 { SystemZISD::MERGE_LOW, 8,
5666 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5667 // VMRLF
5668 { SystemZISD::MERGE_LOW, 4,
5669 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5670 // VMRLH
5671 { SystemZISD::MERGE_LOW, 2,
5672 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5673 // VMRLB
5674 { SystemZISD::MERGE_LOW, 1,
5675 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5676 // VPKG
5677 { SystemZISD::PACK, 4,
5678 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5679 // VPKF
5680 { SystemZISD::PACK, 2,
5681 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5682 // VPKH
5683 { SystemZISD::PACK, 1,
5684 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5685 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5686 { SystemZISD::PERMUTE_DWORDS, 4,
5687 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5688 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5689 { SystemZISD::PERMUTE_DWORDS, 1,
5690 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5691};
5692
5693// Called after matching a vector shuffle against a particular pattern.
5694// Both the original shuffle and the pattern have two vector operands.
5695// OpNos[0] is the operand of the original shuffle that should be used for
5696// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5697// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5698// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5699// for operands 0 and 1 of the pattern.
5700static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5701 if (OpNos[0] < 0) {
5702 if (OpNos[1] < 0)
5703 return false;
5704 OpNo0 = OpNo1 = OpNos[1];
5705 } else if (OpNos[1] < 0) {
5706 OpNo0 = OpNo1 = OpNos[0];
5707 } else {
5708 OpNo0 = OpNos[0];
5709 OpNo1 = OpNos[1];
5710 }
5711 return true;
5712}
5713
5714// Bytes is a VPERM-like permute vector, except that -1 is used for
5715// undefined bytes. Return true if the VPERM can be implemented using P.
5716// When returning true set OpNo0 to the VPERM operand that should be
5717// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5718//
5719// For example, if swapping the VPERM operands allows P to match, OpNo0
5720// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5721// operand, but rewriting it to use two duplicated operands allows it to
5722// match P, then OpNo0 and OpNo1 will be the same.
5723static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5724 unsigned &OpNo0, unsigned &OpNo1) {
5725 int OpNos[] = { -1, -1 };
5726 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5727 int Elt = Bytes[I];
5728 if (Elt >= 0) {
5729 // Make sure that the two permute vectors use the same suboperand
5730 // byte number. Only the operand numbers (the high bits) are
5731 // allowed to differ.
5732 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5733 return false;
5734 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5735 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5736 // Make sure that the operand mappings are consistent with previous
5737 // elements.
5738 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5739 return false;
5740 OpNos[ModelOpNo] = RealOpNo;
5741 }
5742 }
5743 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5744}
5745
5746// As above, but search for a matching permute.
5747static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5748 unsigned &OpNo0, unsigned &OpNo1) {
5749 for (auto &P : PermuteForms)
5750 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5751 return &P;
5752 return nullptr;
5753}
5754
5755// Bytes is a VPERM-like permute vector, except that -1 is used for
5756// undefined bytes. This permute is an operand of an outer permute.
5757// See whether redistributing the -1 bytes gives a shuffle that can be
5758// implemented using P. If so, set Transform to a VPERM-like permute vector
5759// that, when applied to the result of P, gives the original permute in Bytes.
5761 const Permute &P,
5762 SmallVectorImpl<int> &Transform) {
5763 unsigned To = 0;
5764 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5765 int Elt = Bytes[From];
5766 if (Elt < 0)
5767 // Byte number From of the result is undefined.
5768 Transform[From] = -1;
5769 else {
5770 while (P.Bytes[To] != Elt) {
5771 To += 1;
5772 if (To == SystemZ::VectorBytes)
5773 return false;
5774 }
5775 Transform[From] = To;
5776 }
5777 }
5778 return true;
5779}
5780
5781// As above, but search for a matching permute.
5782static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5783 SmallVectorImpl<int> &Transform) {
5784 for (auto &P : PermuteForms)
5785 if (matchDoublePermute(Bytes, P, Transform))
5786 return &P;
5787 return nullptr;
5788}
5789
5790// Convert the mask of the given shuffle op into a byte-level mask,
5791// as if it had type vNi8.
5792static bool getVPermMask(SDValue ShuffleOp,
5793 SmallVectorImpl<int> &Bytes) {
5794 EVT VT = ShuffleOp.getValueType();
5795 unsigned NumElements = VT.getVectorNumElements();
5796 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5797
5798 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5799 Bytes.resize(NumElements * BytesPerElement, -1);
5800 for (unsigned I = 0; I < NumElements; ++I) {
5801 int Index = VSN->getMaskElt(I);
5802 if (Index >= 0)
5803 for (unsigned J = 0; J < BytesPerElement; ++J)
5804 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5805 }
5806 return true;
5807 }
5808 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5809 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5810 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5811 Bytes.resize(NumElements * BytesPerElement, -1);
5812 for (unsigned I = 0; I < NumElements; ++I)
5813 for (unsigned J = 0; J < BytesPerElement; ++J)
5814 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5815 return true;
5816 }
5817 return false;
5818}
5819
5820// Bytes is a VPERM-like permute vector, except that -1 is used for
5821// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5822// the result come from a contiguous sequence of bytes from one input.
5823// Set Base to the selector for the first byte if so.
5824static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5825 unsigned BytesPerElement, int &Base) {
5826 Base = -1;
5827 for (unsigned I = 0; I < BytesPerElement; ++I) {
5828 if (Bytes[Start + I] >= 0) {
5829 unsigned Elem = Bytes[Start + I];
5830 if (Base < 0) {
5831 Base = Elem - I;
5832 // Make sure the bytes would come from one input operand.
5833 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5834 return false;
5835 } else if (unsigned(Base) != Elem - I)
5836 return false;
5837 }
5838 }
5839 return true;
5840}
5841
5842// Bytes is a VPERM-like permute vector, except that -1 is used for
5843// undefined bytes. Return true if it can be performed using VSLDB.
5844// When returning true, set StartIndex to the shift amount and OpNo0
5845// and OpNo1 to the VPERM operands that should be used as the first
5846// and second shift operand respectively.
5848 unsigned &StartIndex, unsigned &OpNo0,
5849 unsigned &OpNo1) {
5850 int OpNos[] = { -1, -1 };
5851 int Shift = -1;
5852 for (unsigned I = 0; I < 16; ++I) {
5853 int Index = Bytes[I];
5854 if (Index >= 0) {
5855 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5856 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5857 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5858 if (Shift < 0)
5859 Shift = ExpectedShift;
5860 else if (Shift != ExpectedShift)
5861 return false;
5862 // Make sure that the operand mappings are consistent with previous
5863 // elements.
5864 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5865 return false;
5866 OpNos[ModelOpNo] = RealOpNo;
5867 }
5868 }
5869 StartIndex = Shift;
5870 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5871}
5872
5873// Create a node that performs P on operands Op0 and Op1, casting the
5874// operands to the appropriate type. The type of the result is determined by P.
5876 const Permute &P, SDValue Op0, SDValue Op1) {
5877 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5878 // elements of a PACK are twice as wide as the outputs.
5879 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5880 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5881 P.Operand);
5882 // Cast both operands to the appropriate type.
5883 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5884 SystemZ::VectorBytes / InBytes);
5885 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5886 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5887 SDValue Op;
5888 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5889 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5890 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5891 } else if (P.Opcode == SystemZISD::PACK) {
5892 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5893 SystemZ::VectorBytes / P.Operand);
5894 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5895 } else {
5896 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5897 }
5898 return Op;
5899}
5900
5901static bool isZeroVector(SDValue N) {
5902 if (N->getOpcode() == ISD::BITCAST)
5903 N = N->getOperand(0);
5904 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5905 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5906 return Op->getZExtValue() == 0;
5907 return ISD::isBuildVectorAllZeros(N.getNode());
5908}
5909
5910// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5911static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5912 for (unsigned I = 0; I < Num ; I++)
5913 if (isZeroVector(Ops[I]))
5914 return I;
5915 return UINT32_MAX;
5916}
5917
5918// Bytes is a VPERM-like permute vector, except that -1 is used for
5919// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5920// VSLDB or VPERM.
5922 SDValue *Ops,
5923 const SmallVectorImpl<int> &Bytes) {
5924 for (unsigned I = 0; I < 2; ++I)
5925 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5926
5927 // First see whether VSLDB can be used.
5928 unsigned StartIndex, OpNo0, OpNo1;
5929 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5930 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5931 Ops[OpNo1],
5932 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5933
5934 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5935 // eliminate a zero vector by reusing any zero index in the permute vector.
5936 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5937 if (ZeroVecIdx != UINT32_MAX) {
5938 bool MaskFirst = true;
5939 int ZeroIdx = -1;
5940 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5941 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5942 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5943 if (OpNo == ZeroVecIdx && I == 0) {
5944 // If the first byte is zero, use mask as first operand.
5945 ZeroIdx = 0;
5946 break;
5947 }
5948 if (OpNo != ZeroVecIdx && Byte == 0) {
5949 // If mask contains a zero, use it by placing that vector first.
5950 ZeroIdx = I + SystemZ::VectorBytes;
5951 MaskFirst = false;
5952 break;
5953 }
5954 }
5955 if (ZeroIdx != -1) {
5956 SDValue IndexNodes[SystemZ::VectorBytes];
5957 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5958 if (Bytes[I] >= 0) {
5959 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5960 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5961 if (OpNo == ZeroVecIdx)
5962 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5963 else {
5964 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5965 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5966 }
5967 } else
5968 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5969 }
5970 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5971 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5972 if (MaskFirst)
5973 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5974 Mask);
5975 else
5976 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5977 Mask);
5978 }
5979 }
5980
5981 SDValue IndexNodes[SystemZ::VectorBytes];
5982 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5983 if (Bytes[I] >= 0)
5984 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5985 else
5986 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5987 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5988 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5989 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5990}
5991
5992namespace {
5993// Describes a general N-operand vector shuffle.
5994struct GeneralShuffle {
5995 GeneralShuffle(EVT vt)
5996 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5997 void addUndef();
5998 bool add(SDValue, unsigned);
5999 SDValue getNode(SelectionDAG &, const SDLoc &);
6000 void tryPrepareForUnpack();
6001 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
6002 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
6003
6004 // The operands of the shuffle.
6006
6007 // Index I is -1 if byte I of the result is undefined. Otherwise the
6008 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
6009 // Bytes[I] / SystemZ::VectorBytes.
6011
6012 // The type of the shuffle result.
6013 EVT VT;
6014
6015 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
6016 unsigned UnpackFromEltSize;
6017 // True if the final unpack uses the low half.
6018 bool UnpackLow;
6019};
6020} // namespace
6021
6022// Add an extra undefined element to the shuffle.
6023void GeneralShuffle::addUndef() {
6024 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
6025 for (unsigned I = 0; I < BytesPerElement; ++I)
6026 Bytes.push_back(-1);
6027}
6028
6029// Add an extra element to the shuffle, taking it from element Elem of Op.
6030// A null Op indicates a vector input whose value will be calculated later;
6031// there is at most one such input per shuffle and it always has the same
6032// type as the result. Aborts and returns false if the source vector elements
6033// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
6034// LLVM they become implicitly extended, but this is rare and not optimized.
6035bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
6036 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
6037
6038 // The source vector can have wider elements than the result,
6039 // either through an explicit TRUNCATE or because of type legalization.
6040 // We want the least significant part.
6041 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
6042 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
6043
6044 // Return false if the source elements are smaller than their destination
6045 // elements.
6046 if (FromBytesPerElement < BytesPerElement)
6047 return false;
6048
6049 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
6050 (FromBytesPerElement - BytesPerElement));
6051
6052 // Look through things like shuffles and bitcasts.
6053 while (Op.getNode()) {
6054 if (Op.getOpcode() == ISD::BITCAST)
6055 Op = Op.getOperand(0);
6056 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
6057 // See whether the bytes we need come from a contiguous part of one
6058 // operand.
6060 if (!getVPermMask(Op, OpBytes))
6061 break;
6062 int NewByte;
6063 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
6064 break;
6065 if (NewByte < 0) {
6066 addUndef();
6067 return true;
6068 }
6069 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
6070 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
6071 } else if (Op.isUndef()) {
6072 addUndef();
6073 return true;
6074 } else
6075 break;
6076 }
6077
6078 // Make sure that the source of the extraction is in Ops.
6079 unsigned OpNo = 0;
6080 for (; OpNo < Ops.size(); ++OpNo)
6081 if (Ops[OpNo] == Op)
6082 break;
6083 if (OpNo == Ops.size())
6084 Ops.push_back(Op);
6085
6086 // Add the element to Bytes.
6087 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6088 for (unsigned I = 0; I < BytesPerElement; ++I)
6089 Bytes.push_back(Base + I);
6090
6091 return true;
6092}
6093
6094// Return SDNodes for the completed shuffle.
6095SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6096 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6097
6098 if (Ops.size() == 0)
6099 return DAG.getUNDEF(VT);
6100
6101 // Use a single unpack if possible as the last operation.
6102 tryPrepareForUnpack();
6103
6104 // Make sure that there are at least two shuffle operands.
6105 if (Ops.size() == 1)
6106 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6107
6108 // Create a tree of shuffles, deferring root node until after the loop.
6109 // Try to redistribute the undefined elements of non-root nodes so that
6110 // the non-root shuffles match something like a pack or merge, then adjust
6111 // the parent node's permute vector to compensate for the new order.
6112 // Among other things, this copes with vectors like <2 x i16> that were
6113 // padded with undefined elements during type legalization.
6114 //
6115 // In the best case this redistribution will lead to the whole tree
6116 // using packs and merges. It should rarely be a loss in other cases.
6117 unsigned Stride = 1;
6118 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6119 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6120 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6121
6122 // Create a mask for just these two operands.
6124 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6125 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6126 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6127 if (OpNo == I)
6128 NewBytes[J] = Byte;
6129 else if (OpNo == I + Stride)
6130 NewBytes[J] = SystemZ::VectorBytes + Byte;
6131 else
6132 NewBytes[J] = -1;
6133 }
6134 // See if it would be better to reorganize NewMask to avoid using VPERM.
6136 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6137 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6138 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6139 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6140 if (NewBytes[J] >= 0) {
6141 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6142 "Invalid double permute");
6143 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6144 } else
6145 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6146 }
6147 } else {
6148 // Just use NewBytes on the operands.
6149 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6150 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6151 if (NewBytes[J] >= 0)
6152 Bytes[J] = I * SystemZ::VectorBytes + J;
6153 }
6154 }
6155 }
6156
6157 // Now we just have 2 inputs. Put the second operand in Ops[1].
6158 if (Stride > 1) {
6159 Ops[1] = Ops[Stride];
6160 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6161 if (Bytes[I] >= int(SystemZ::VectorBytes))
6162 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6163 }
6164
6165 // Look for an instruction that can do the permute without resorting
6166 // to VPERM.
6167 unsigned OpNo0, OpNo1;
6168 SDValue Op;
6169 if (unpackWasPrepared() && Ops[1].isUndef())
6170 Op = Ops[0];
6171 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6172 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6173 else
6174 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6175
6176 Op = insertUnpackIfPrepared(DAG, DL, Op);
6177
6178 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6179}
6180
6181#ifndef NDEBUG
6182static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6183 dbgs() << Msg.c_str() << " { ";
6184 for (unsigned I = 0; I < Bytes.size(); I++)
6185 dbgs() << Bytes[I] << " ";
6186 dbgs() << "}\n";
6187}
6188#endif
6189
6190// If the Bytes vector matches an unpack operation, prepare to do the unpack
6191// after all else by removing the zero vector and the effect of the unpack on
6192// Bytes.
6193void GeneralShuffle::tryPrepareForUnpack() {
6194 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6195 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6196 return;
6197
6198 // Only do this if removing the zero vector reduces the depth, otherwise
6199 // the critical path will increase with the final unpack.
6200 if (Ops.size() > 2 &&
6201 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6202 return;
6203
6204 // Find an unpack that would allow removing the zero vector from Ops.
6205 UnpackFromEltSize = 1;
6206 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6207 bool MatchUnpack = true;
6209 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6210 unsigned ToEltSize = UnpackFromEltSize * 2;
6211 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6212 if (!IsZextByte)
6213 SrcBytes.push_back(Bytes[Elt]);
6214 if (Bytes[Elt] != -1) {
6215 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6216 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6217 MatchUnpack = false;
6218 break;
6219 }
6220 }
6221 }
6222 if (MatchUnpack) {
6223 if (Ops.size() == 2) {
6224 // Don't use unpack if a single source operand needs rearrangement.
6225 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6226 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6227 if (SrcBytes[i] == -1)
6228 continue;
6229 if (SrcBytes[i] % 16 != int(i))
6230 CanUseUnpackHigh = false;
6231 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6232 CanUseUnpackLow = false;
6233 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6234 UnpackFromEltSize = UINT_MAX;
6235 return;
6236 }
6237 }
6238 if (!CanUseUnpackHigh)
6239 UnpackLow = true;
6240 }
6241 break;
6242 }
6243 }
6244 if (UnpackFromEltSize > 4)
6245 return;
6246
6247 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6248 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6249 << ".\n";
6250 dumpBytes(Bytes, "Original Bytes vector:"););
6251
6252 // Apply the unpack in reverse to the Bytes array.
6253 unsigned B = 0;
6254 if (UnpackLow) {
6255 while (B < SystemZ::VectorBytes / 2)
6256 Bytes[B++] = -1;
6257 }
6258 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6259 Elt += UnpackFromEltSize;
6260 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6261 Bytes[B] = Bytes[Elt];
6262 }
6263 if (!UnpackLow) {
6264 while (B < SystemZ::VectorBytes)
6265 Bytes[B++] = -1;
6266 }
6267
6268 // Remove the zero vector from Ops
6269 Ops.erase(&Ops[ZeroVecOpNo]);
6270 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6271 if (Bytes[I] >= 0) {
6272 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6273 if (OpNo > ZeroVecOpNo)
6274 Bytes[I] -= SystemZ::VectorBytes;
6275 }
6276
6277 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6278 dbgs() << "\n";);
6279}
6280
6281SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6282 const SDLoc &DL,
6283 SDValue Op) {
6284 if (!unpackWasPrepared())
6285 return Op;
6286 unsigned InBits = UnpackFromEltSize * 8;
6287 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6288 SystemZ::VectorBits / InBits);
6289 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6290 unsigned OutBits = InBits * 2;
6291 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6292 SystemZ::VectorBits / OutBits);
6293 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6294 : SystemZISD::UNPACKL_HIGH,
6295 DL, OutVT, PackedOp);
6296}
6297
6298// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6300 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6301 if (!Op.getOperand(I).isUndef())
6302 return false;
6303 return true;
6304}
6305
6306// Return a vector of type VT that contains Value in the first element.
6307// The other elements don't matter.
6309 SDValue Value) {
6310 // If we have a constant, replicate it to all elements and let the
6311 // BUILD_VECTOR lowering take care of it.
6312 if (Value.getOpcode() == ISD::Constant ||
6313 Value.getOpcode() == ISD::ConstantFP) {
6315 return DAG.getBuildVector(VT, DL, Ops);
6316 }
6317 if (Value.isUndef())
6318 return DAG.getUNDEF(VT);
6319 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6320}
6321
6322// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6323// element 1. Used for cases in which replication is cheap.
6325 SDValue Op0, SDValue Op1) {
6326 if (Op0.isUndef()) {
6327 if (Op1.isUndef())
6328 return DAG.getUNDEF(VT);
6329 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6330 }
6331 if (Op1.isUndef())
6332 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6333 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6334 buildScalarToVector(DAG, DL, VT, Op0),
6335 buildScalarToVector(DAG, DL, VT, Op1));
6336}
6337
6338// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6339// vector for them.
6341 SDValue Op1) {
6342 if (Op0.isUndef() && Op1.isUndef())
6343 return DAG.getUNDEF(MVT::v2i64);
6344 // If one of the two inputs is undefined then replicate the other one,
6345 // in order to avoid using another register unnecessarily.
6346 if (Op0.isUndef())
6347 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6348 else if (Op1.isUndef())
6349 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6350 else {
6351 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6352 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6353 }
6354 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6355}
6356
6357// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6358// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6359// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6360// would benefit from this representation and return it if so.
6362 BuildVectorSDNode *BVN) {
6363 EVT VT = BVN->getValueType(0);
6364 unsigned NumElements = VT.getVectorNumElements();
6365
6366 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6367 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6368 // need a BUILD_VECTOR, add an additional placeholder operand for that
6369 // BUILD_VECTOR and store its operands in ResidueOps.
6370 GeneralShuffle GS(VT);
6372 bool FoundOne = false;
6373 for (unsigned I = 0; I < NumElements; ++I) {
6374 SDValue Op = BVN->getOperand(I);
6375 if (Op.getOpcode() == ISD::TRUNCATE)
6376 Op = Op.getOperand(0);
6377 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6378 Op.getOperand(1).getOpcode() == ISD::Constant) {
6379 unsigned Elem = Op.getConstantOperandVal(1);
6380 if (!GS.add(Op.getOperand(0), Elem))
6381 return SDValue();
6382 FoundOne = true;
6383 } else if (Op.isUndef()) {
6384 GS.addUndef();
6385 } else {
6386 if (!GS.add(SDValue(), ResidueOps.size()))
6387 return SDValue();
6388 ResidueOps.push_back(BVN->getOperand(I));
6389 }
6390 }
6391
6392 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6393 if (!FoundOne)
6394 return SDValue();
6395
6396 // Create the BUILD_VECTOR for the remaining elements, if any.
6397 if (!ResidueOps.empty()) {
6398 while (ResidueOps.size() < NumElements)
6399 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6400 for (auto &Op : GS.Ops) {
6401 if (!Op.getNode()) {
6402 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6403 break;
6404 }
6405 }
6406 }
6407 return GS.getNode(DAG, SDLoc(BVN));
6408}
6409
6410bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6411 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6412 return true;
6413 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6414 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6415 return true;
6416 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6417 return true;
6418 return false;
6419}
6420
6422 unsigned MergedBits, EVT VT, SDValue Op0,
6423 SDValue Op1) {
6424 MVT IntVecVT = MVT::getVectorVT(MVT::getIntegerVT(MergedBits),
6425 SystemZ::VectorBits / MergedBits);
6426 assert(VT.getSizeInBits() == 128 && IntVecVT.getSizeInBits() == 128 &&
6427 "Handling full vectors only.");
6428 Op0 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0);
6429 Op1 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op1);
6430 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, IntVecVT, Op0, Op1);
6431 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6432}
6433
6435 EVT VT, SmallVectorImpl<SDValue> &Elems,
6436 unsigned Pos) {
6437 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 0], Elems[Pos + 1]);
6438 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 2], Elems[Pos + 3]);
6439 // Avoid unnecessary undefs by reusing the other operand.
6440 if (Op01.isUndef()) {
6441 if (Op23.isUndef())
6442 return Op01;
6443 Op01 = Op23;
6444 } else if (Op23.isUndef())
6445 Op23 = Op01;
6446 // Merging identical replications is a no-op.
6447 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6448 return Op01;
6449 unsigned MergedBits = VT.getSimpleVT().getScalarSizeInBits() * 2;
6450 return mergeHighParts(DAG, DL, MergedBits, VT, Op01, Op23);
6451}
6452
6453// Combine GPR scalar values Elems into a vector of type VT.
6454SDValue
6455SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6456 SmallVectorImpl<SDValue> &Elems) const {
6457 // See whether there is a single replicated value.
6459 unsigned int NumElements = Elems.size();
6460 unsigned int Count = 0;
6461 for (auto Elem : Elems) {
6462 if (!Elem.isUndef()) {
6463 if (!Single.getNode())
6464 Single = Elem;
6465 else if (Elem != Single) {
6466 Single = SDValue();
6467 break;
6468 }
6469 Count += 1;
6470 }
6471 }
6472 // There are three cases here:
6473 //
6474 // - if the only defined element is a loaded one, the best sequence
6475 // is a replicating load.
6476 //
6477 // - otherwise, if the only defined element is an i64 value, we will
6478 // end up with the same VLVGP sequence regardless of whether we short-cut
6479 // for replication or fall through to the later code.
6480 //
6481 // - otherwise, if the only defined element is an i32 or smaller value,
6482 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6483 // This is only a win if the single defined element is used more than once.
6484 // In other cases we're better off using a single VLVGx.
6485 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6486 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6487
6488 // If all elements are loads, use VLREP/VLEs (below).
6489 bool AllLoads = true;
6490 for (auto Elem : Elems)
6491 if (!isVectorElementLoad(Elem)) {
6492 AllLoads = false;
6493 break;
6494 }
6495
6496 // The best way of building a v2i64 from two i64s is to use VLVGP.
6497 if (VT == MVT::v2i64 && !AllLoads)
6498 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6499
6500 // Use a 64-bit merge high to combine two doubles.
6501 if (VT == MVT::v2f64 && !AllLoads)
6502 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6503
6504 // Build v4f32 values directly from the FPRs:
6505 //
6506 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6507 // V V VMRHF
6508 // <ABxx> <CDxx>
6509 // V VMRHG
6510 // <ABCD>
6511 if (VT == MVT::v4f32 && !AllLoads)
6512 return buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6513
6514 // Same for v8f16.
6515 if (VT == MVT::v8f16 && !AllLoads) {
6516 SDValue Op0123 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6517 SDValue Op4567 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 4);
6518 // Avoid unnecessary undefs by reusing the other operand.
6519 if (Op0123.isUndef())
6520 Op0123 = Op4567;
6521 else if (Op4567.isUndef())
6522 Op4567 = Op0123;
6523 // Merging identical replications is a no-op.
6524 if (Op0123.getOpcode() == SystemZISD::REPLICATE && Op0123 == Op4567)
6525 return Op0123;
6526 return mergeHighParts(DAG, DL, 64, VT, Op0123, Op4567);
6527 }
6528
6529 // Collect the constant terms.
6532
6533 unsigned NumConstants = 0;
6534 for (unsigned I = 0; I < NumElements; ++I) {
6535 SDValue Elem = Elems[I];
6536 if (Elem.getOpcode() == ISD::Constant ||
6537 Elem.getOpcode() == ISD::ConstantFP) {
6538 NumConstants += 1;
6539 Constants[I] = Elem;
6540 Done[I] = true;
6541 }
6542 }
6543 // If there was at least one constant, fill in the other elements of
6544 // Constants with undefs to get a full vector constant and use that
6545 // as the starting point.
6547 SDValue ReplicatedVal;
6548 if (NumConstants > 0) {
6549 for (unsigned I = 0; I < NumElements; ++I)
6550 if (!Constants[I].getNode())
6551 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6552 Result = DAG.getBuildVector(VT, DL, Constants);
6553 } else {
6554 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6555 // avoid a false dependency on any previous contents of the vector
6556 // register.
6557
6558 // Use a VLREP if at least one element is a load. Make sure to replicate
6559 // the load with the most elements having its value.
6560 std::map<const SDNode*, unsigned> UseCounts;
6561 SDNode *LoadMaxUses = nullptr;
6562 for (unsigned I = 0; I < NumElements; ++I)
6563 if (isVectorElementLoad(Elems[I])) {
6564 SDNode *Ld = Elems[I].getNode();
6565 unsigned Count = ++UseCounts[Ld];
6566 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6567 LoadMaxUses = Ld;
6568 }
6569 if (LoadMaxUses != nullptr) {
6570 ReplicatedVal = SDValue(LoadMaxUses, 0);
6571 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6572 } else {
6573 // Try to use VLVGP.
6574 unsigned I1 = NumElements / 2 - 1;
6575 unsigned I2 = NumElements - 1;
6576 bool Def1 = !Elems[I1].isUndef();
6577 bool Def2 = !Elems[I2].isUndef();
6578 if (Def1 || Def2) {
6579 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6580 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6581 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6582 joinDwords(DAG, DL, Elem1, Elem2));
6583 Done[I1] = true;
6584 Done[I2] = true;
6585 } else
6586 Result = DAG.getUNDEF(VT);
6587 }
6588 }
6589
6590 // Use VLVGx to insert the other elements.
6591 for (unsigned I = 0; I < NumElements; ++I)
6592 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6593 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6594 DAG.getConstant(I, DL, MVT::i32));
6595 return Result;
6596}
6597
6598SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6599 SelectionDAG &DAG) const {
6600 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6601 SDLoc DL(Op);
6602 EVT VT = Op.getValueType();
6603
6604 if (BVN->isConstant()) {
6605 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6606 return Op;
6607
6608 // Fall back to loading it from memory.
6609 return SDValue();
6610 }
6611
6612 // See if we should use shuffles to construct the vector from other vectors.
6613 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6614 return Res;
6615
6616 // Detect SCALAR_TO_VECTOR conversions.
6618 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6619
6620 // Otherwise use buildVector to build the vector up from GPRs.
6621 unsigned NumElements = Op.getNumOperands();
6623 for (unsigned I = 0; I < NumElements; ++I)
6624 Ops[I] = Op.getOperand(I);
6625 return buildVector(DAG, DL, VT, Ops);
6626}
6627
6628SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6629 SelectionDAG &DAG) const {
6630 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6631 SDLoc DL(Op);
6632 EVT VT = Op.getValueType();
6633 unsigned NumElements = VT.getVectorNumElements();
6634
6635 if (VSN->isSplat()) {
6636 SDValue Op0 = Op.getOperand(0);
6637 unsigned Index = VSN->getSplatIndex();
6638 assert(Index < VT.getVectorNumElements() &&
6639 "Splat index should be defined and in first operand");
6640 // See whether the value we're splatting is directly available as a scalar.
6641 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6643 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6644 // Otherwise keep it as a vector-to-vector operation.
6645 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6646 DAG.getTargetConstant(Index, DL, MVT::i32));
6647 }
6648
6649 GeneralShuffle GS(VT);
6650 for (unsigned I = 0; I < NumElements; ++I) {
6651 int Elt = VSN->getMaskElt(I);
6652 if (Elt < 0)
6653 GS.addUndef();
6654 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6655 unsigned(Elt) % NumElements))
6656 return SDValue();
6657 }
6658 return GS.getNode(DAG, SDLoc(VSN));
6659}
6660
6661SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6662 SelectionDAG &DAG) const {
6663 SDLoc DL(Op);
6664 // Just insert the scalar into element 0 of an undefined vector.
6665 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6666 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6667 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6668}
6669
6670// Shift the lower 2 bytes of Op to the left in order to insert into the
6671// upper 2 bytes of the FP register.
6673 assert(Op.getSimpleValueType() == MVT::i64 &&
6674 "Expexted to convert i64 to f16.");
6675 SDLoc DL(Op);
6676 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6677 DAG.getConstant(48, DL, MVT::i64));
6678 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6679 SDValue F16Val =
6680 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6681 return F16Val;
6682}
6683
6684// Extract Op into GPR and shift the 2 f16 bytes to the right.
6686 assert(Op.getSimpleValueType() == MVT::f16 &&
6687 "Expected to convert f16 to i64.");
6688 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6689 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6690 SDValue(U32, 0), Op);
6691 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6692 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6693 DAG.getConstant(48, DL, MVT::i32));
6694 return Shft;
6695}
6696
6697SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6698 SelectionDAG &DAG) const {
6699 // Handle insertions of floating-point values.
6700 SDLoc DL(Op);
6701 SDValue Op0 = Op.getOperand(0);
6702 SDValue Op1 = Op.getOperand(1);
6703 SDValue Op2 = Op.getOperand(2);
6704 EVT VT = Op.getValueType();
6705
6706 // Insertions into constant indices of a v2f64 can be done using VPDI.
6707 // However, if the inserted value is a bitcast or a constant then it's
6708 // better to use GPRs, as below.
6709 if (VT == MVT::v2f64 &&
6710 Op1.getOpcode() != ISD::BITCAST &&
6711 Op1.getOpcode() != ISD::ConstantFP &&
6712 Op2.getOpcode() == ISD::Constant) {
6713 uint64_t Index = Op2->getAsZExtVal();
6714 unsigned Mask = VT.getVectorNumElements() - 1;
6715 if (Index <= Mask)
6716 return Op;
6717 }
6718
6719 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6720 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6721 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6722 SDValue IntOp1 =
6723 VT == MVT::v8f16
6724 ? DAG.getZExtOrTrunc(convertFromF16(Op1, DL, DAG), DL, MVT::i32)
6725 : DAG.getNode(ISD::BITCAST, DL, IntVT, Op1);
6726 SDValue Res =
6727 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6728 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), IntOp1, Op2);
6729 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6730}
6731
6732SDValue
6733SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6734 SelectionDAG &DAG) const {
6735 // Handle extractions of floating-point values.
6736 SDLoc DL(Op);
6737 SDValue Op0 = Op.getOperand(0);
6738 SDValue Op1 = Op.getOperand(1);
6739 EVT VT = Op.getValueType();
6740 EVT VecVT = Op0.getValueType();
6741
6742 // Extractions of constant indices can be done directly.
6743 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6744 uint64_t Index = CIndexN->getZExtValue();
6745 unsigned Mask = VecVT.getVectorNumElements() - 1;
6746 if (Index <= Mask)
6747 return Op;
6748 }
6749
6750 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6751 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6752 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6753 MVT ExtrVT = IntVT == MVT::i16 ? MVT::i32 : IntVT;
6754 SDValue Extr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrVT,
6755 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6756 if (VT == MVT::f16)
6757 return convertToF16(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Extr), DAG);
6758 return DAG.getNode(ISD::BITCAST, DL, VT, Extr);
6759}
6760
6761SDValue SystemZTargetLowering::
6762lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6763 SDValue PackedOp = Op.getOperand(0);
6764 EVT OutVT = Op.getValueType();
6765 EVT InVT = PackedOp.getValueType();
6766 unsigned ToBits = OutVT.getScalarSizeInBits();
6767 unsigned FromBits = InVT.getScalarSizeInBits();
6768 unsigned StartOffset = 0;
6769
6770 // If the input is a VECTOR_SHUFFLE, there are a number of important
6771 // cases where we can directly implement the sign-extension of the
6772 // original input lanes of the shuffle.
6773 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6774 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6775 ArrayRef<int> ShuffleMask = SVN->getMask();
6776 int OutNumElts = OutVT.getVectorNumElements();
6777
6778 // Recognize the special case where the sign-extension can be done
6779 // by the VSEG instruction. Handled via the default expander.
6780 if (ToBits == 64 && OutNumElts == 2) {
6781 int NumElem = ToBits / FromBits;
6782 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6783 return SDValue();
6784 }
6785
6786 // Recognize the special case where we can fold the shuffle by
6787 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6788 int StartOffsetCandidate = -1;
6789 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6790 if (ShuffleMask[Elt] == -1)
6791 continue;
6792 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6793 if (StartOffsetCandidate == -1)
6794 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6795 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6796 continue;
6797 }
6798 StartOffsetCandidate = -1;
6799 break;
6800 }
6801 if (StartOffsetCandidate != -1) {
6802 StartOffset = StartOffsetCandidate;
6803 PackedOp = PackedOp.getOperand(0);
6804 }
6805 }
6806
6807 do {
6808 FromBits *= 2;
6809 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6810 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6811 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6812 if (StartOffset >= OutNumElts) {
6813 Opcode = SystemZISD::UNPACK_LOW;
6814 StartOffset -= OutNumElts;
6815 }
6816 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6817 } while (FromBits != ToBits);
6818 return PackedOp;
6819}
6820
6821// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6822SDValue SystemZTargetLowering::
6823lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6824 SDValue PackedOp = Op.getOperand(0);
6825 SDLoc DL(Op);
6826 EVT OutVT = Op.getValueType();
6827 EVT InVT = PackedOp.getValueType();
6828 unsigned InNumElts = InVT.getVectorNumElements();
6829 unsigned OutNumElts = OutVT.getVectorNumElements();
6830 unsigned NumInPerOut = InNumElts / OutNumElts;
6831
6832 SDValue ZeroVec =
6833 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6834
6835 SmallVector<int, 16> Mask(InNumElts);
6836 unsigned ZeroVecElt = InNumElts;
6837 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6838 unsigned MaskElt = PackedElt * NumInPerOut;
6839 unsigned End = MaskElt + NumInPerOut - 1;
6840 for (; MaskElt < End; MaskElt++)
6841 Mask[MaskElt] = ZeroVecElt++;
6842 Mask[MaskElt] = PackedElt;
6843 }
6844 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6845 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6846}
6847
6848SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6849 unsigned ByScalar) const {
6850 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6851 SDValue Op0 = Op.getOperand(0);
6852 SDValue Op1 = Op.getOperand(1);
6853 SDLoc DL(Op);
6854 EVT VT = Op.getValueType();
6855 unsigned ElemBitSize = VT.getScalarSizeInBits();
6856
6857 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6858 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6859 APInt SplatBits, SplatUndef;
6860 unsigned SplatBitSize;
6861 bool HasAnyUndefs;
6862 // Check for constant splats. Use ElemBitSize as the minimum element
6863 // width and reject splats that need wider elements.
6864 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6865 ElemBitSize, true) &&
6866 SplatBitSize == ElemBitSize) {
6867 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6868 DL, MVT::i32);
6869 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6870 }
6871 // Check for variable splats.
6872 BitVector UndefElements;
6873 SDValue Splat = BVN->getSplatValue(&UndefElements);
6874 if (Splat) {
6875 // Since i32 is the smallest legal type, we either need a no-op
6876 // or a truncation.
6877 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6878 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6879 }
6880 }
6881
6882 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6883 // and the shift amount is directly available in a GPR.
6884 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6885 if (VSN->isSplat()) {
6886 SDValue VSNOp0 = VSN->getOperand(0);
6887 unsigned Index = VSN->getSplatIndex();
6888 assert(Index < VT.getVectorNumElements() &&
6889 "Splat index should be defined and in first operand");
6890 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6891 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6892 // Since i32 is the smallest legal type, we either need a no-op
6893 // or a truncation.
6894 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6895 VSNOp0.getOperand(Index));
6896 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6897 }
6898 }
6899 }
6900
6901 // Otherwise just treat the current form as legal.
6902 return Op;
6903}
6904
6905SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6906 SDLoc DL(Op);
6907
6908 // i128 FSHL with a constant amount that is a multiple of 8 can be
6909 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6910 // facility, FSHL with a constant amount less than 8 can be implemented
6911 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6912 // combination of the two.
6913 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6914 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6915 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6916 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6917 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6918 if (ShiftAmt > 120) {
6919 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6920 // SHR_DOUBLE_BIT emits fewer instructions.
6921 SDValue Val =
6922 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6923 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6924 return DAG.getBitcast(MVT::i128, Val);
6925 }
6926 SmallVector<int, 16> Mask(16);
6927 for (unsigned Elt = 0; Elt < 16; Elt++)
6928 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6929 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6930 if ((ShiftAmt & 7) == 0)
6931 return DAG.getBitcast(MVT::i128, Shuf1);
6932 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6933 SDValue Val =
6934 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6935 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6936 return DAG.getBitcast(MVT::i128, Val);
6937 }
6938 }
6939
6940 return SDValue();
6941}
6942
6943SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6944 SDLoc DL(Op);
6945
6946 // i128 FSHR with a constant amount that is a multiple of 8 can be
6947 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6948 // facility, FSHR with a constant amount less than 8 can be implemented
6949 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6950 // combination of the two.
6951 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6952 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6953 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6954 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6955 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6956 if (ShiftAmt > 120) {
6957 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6958 // SHL_DOUBLE_BIT emits fewer instructions.
6959 SDValue Val =
6960 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6961 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6962 return DAG.getBitcast(MVT::i128, Val);
6963 }
6964 SmallVector<int, 16> Mask(16);
6965 for (unsigned Elt = 0; Elt < 16; Elt++)
6966 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6967 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6968 if ((ShiftAmt & 7) == 0)
6969 return DAG.getBitcast(MVT::i128, Shuf1);
6970 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6971 SDValue Val =
6972 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6973 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6974 return DAG.getBitcast(MVT::i128, Val);
6975 }
6976 }
6977
6978 return SDValue();
6979}
6980
6982 SDLoc DL(Op);
6983 SDValue Src = Op.getOperand(0);
6984 MVT DstVT = Op.getSimpleValueType();
6985
6987 unsigned SrcAS = N->getSrcAddressSpace();
6988
6989 assert(SrcAS != N->getDestAddressSpace() &&
6990 "addrspacecast must be between different address spaces");
6991
6992 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6993 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6994 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6995 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6996 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6997 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6998 } else if (DstVT == MVT::i32) {
6999 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
7000 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
7001 DAG.getConstant(0x7fffffff, DL, MVT::i32));
7002 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
7003 } else {
7004 report_fatal_error("Bad address space in addrspacecast");
7005 }
7006 return Op;
7007}
7008
7009SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
7010 SelectionDAG &DAG) const {
7011 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7012 if (In.getSimpleValueType() != MVT::f16)
7013 return Op; // Legal
7014 return SDValue(); // Let legalizer emit the libcall.
7015}
7016
7018 MVT VT, SDValue Arg, SDLoc DL,
7019 SDValue Chain, bool IsStrict) const {
7020 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
7021 MakeLibCallOptions CallOptions;
7022 SDValue Result;
7023 std::tie(Result, Chain) =
7024 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
7025 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
7026}
7027
7028SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
7029 SelectionDAG &DAG) const {
7030 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
7031 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
7032 bool IsStrict = Op->isStrictFPOpcode();
7033 SDLoc DL(Op);
7034 MVT VT = Op.getSimpleValueType();
7035 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7036 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7037 EVT InVT = InOp.getValueType();
7038
7039 // FP to unsigned is not directly supported on z10. Promoting an i32
7040 // result to (signed) i64 doesn't generate an inexact condition (fp
7041 // exception) for values that are outside the i32 range but in the i64
7042 // range, so use the default expansion.
7043 if (!Subtarget.hasFPExtension() && !IsSigned)
7044 // Expand i32/i64. F16 values will be recognized to fit and extended.
7045 return SDValue();
7046
7047 // Conversion from f16 is done via f32.
7048 if (InOp.getSimpleValueType() == MVT::f16) {
7050 LowerOperationWrapper(Op.getNode(), Results, DAG);
7051 return DAG.getMergeValues(Results, DL);
7052 }
7053
7054 if (VT == MVT::i128) {
7055 RTLIB::Libcall LC =
7056 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
7057 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7058 }
7059
7060 return Op; // Legal
7061}
7062
7063SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
7064 SelectionDAG &DAG) const {
7065 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
7066 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
7067 bool IsStrict = Op->isStrictFPOpcode();
7068 SDLoc DL(Op);
7069 MVT VT = Op.getSimpleValueType();
7070 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7071 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7072 EVT InVT = InOp.getValueType();
7073
7074 // Conversion to f16 is done via f32.
7075 if (VT == MVT::f16) {
7077 LowerOperationWrapper(Op.getNode(), Results, DAG);
7078 return DAG.getMergeValues(Results, DL);
7079 }
7080
7081 // Unsigned to fp is not directly supported on z10.
7082 if (!Subtarget.hasFPExtension() && !IsSigned)
7083 return SDValue(); // Expand i64.
7084
7085 if (InVT == MVT::i128) {
7086 RTLIB::Libcall LC =
7087 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
7088 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7089 }
7090
7091 return Op; // Legal
7092}
7093
7094// Lower an f16 LOAD in case of no vector support.
7095SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
7096 SelectionDAG &DAG) const {
7097 EVT RegVT = Op.getValueType();
7098 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
7099 (void)RegVT;
7100
7101 // Load as integer.
7102 SDLoc DL(Op);
7103 SDValue NewLd;
7104 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
7105 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
7106 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
7107 AtomicLd->getChain(), AtomicLd->getBasePtr(),
7108 AtomicLd->getMemOperand());
7109 } else {
7110 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
7111 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7112 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7113 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7114 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7115 }
7116 SDValue F16Val = convertToF16(NewLd, DAG);
7117 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7118}
7119
7120// Lower an f16 STORE in case of no vector support.
7121SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7122 SelectionDAG &DAG) const {
7123 SDLoc DL(Op);
7124 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7125
7126 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7127 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7128 Shft, AtomicSt->getBasePtr(),
7129 AtomicSt->getMemOperand());
7130
7131 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7132 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7133 St->getMemOperand());
7134}
7135
7136SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7137 SelectionDAG &DAG) const {
7138 SDLoc DL(Op);
7139 MVT ResultVT = Op.getSimpleValueType();
7140 SDValue Arg = Op.getOperand(0);
7141 unsigned Check = Op.getConstantOperandVal(1);
7142
7143 unsigned TDCMask = 0;
7144 if (Check & fcSNan)
7146 if (Check & fcQNan)
7148 if (Check & fcPosInf)
7150 if (Check & fcNegInf)
7152 if (Check & fcPosNormal)
7154 if (Check & fcNegNormal)
7156 if (Check & fcPosSubnormal)
7158 if (Check & fcNegSubnormal)
7160 if (Check & fcPosZero)
7161 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7162 if (Check & fcNegZero)
7163 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7164 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7165
7166 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7167 return getCCResult(DAG, Intr);
7168}
7169
7170SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7171 SelectionDAG &DAG) const {
7172 SDLoc DL(Op);
7173 SDValue Chain = Op.getOperand(0);
7174
7175 // STCKF only supports a memory operand, so we have to use a temporary.
7176 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7177 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7178 MachinePointerInfo MPI =
7180
7181 // Use STCFK to store the TOD clock into the temporary.
7182 SDValue StoreOps[] = {Chain, StackPtr};
7183 Chain = DAG.getMemIntrinsicNode(
7184 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7185 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7186
7187 // And read it back from there.
7188 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7189}
7190
7192 SelectionDAG &DAG) const {
7193 switch (Op.getOpcode()) {
7194 case ISD::FRAMEADDR:
7195 return lowerFRAMEADDR(Op, DAG);
7196 case ISD::RETURNADDR:
7197 return lowerRETURNADDR(Op, DAG);
7198 case ISD::BR_CC:
7199 return lowerBR_CC(Op, DAG);
7200 case ISD::SELECT_CC:
7201 return lowerSELECT_CC(Op, DAG);
7202 case ISD::SETCC:
7203 return lowerSETCC(Op, DAG);
7204 case ISD::STRICT_FSETCC:
7205 return lowerSTRICT_FSETCC(Op, DAG, false);
7207 return lowerSTRICT_FSETCC(Op, DAG, true);
7208 case ISD::GlobalAddress:
7209 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7211 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7212 case ISD::BlockAddress:
7213 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7214 case ISD::JumpTable:
7215 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7216 case ISD::ConstantPool:
7217 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7218 case ISD::BITCAST:
7219 return lowerBITCAST(Op, DAG);
7220 case ISD::VASTART:
7221 return lowerVASTART(Op, DAG);
7222 case ISD::VACOPY:
7223 return lowerVACOPY(Op, DAG);
7225 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7227 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7228 case ISD::MULHS:
7229 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7230 case ISD::MULHU:
7231 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7232 case ISD::SMUL_LOHI:
7233 return lowerSMUL_LOHI(Op, DAG);
7234 case ISD::UMUL_LOHI:
7235 return lowerUMUL_LOHI(Op, DAG);
7236 case ISD::SDIVREM:
7237 return lowerSDIVREM(Op, DAG);
7238 case ISD::UDIVREM:
7239 return lowerUDIVREM(Op, DAG);
7240 case ISD::SADDO:
7241 case ISD::SSUBO:
7242 case ISD::UADDO:
7243 case ISD::USUBO:
7244 return lowerXALUO(Op, DAG);
7245 case ISD::UADDO_CARRY:
7246 case ISD::USUBO_CARRY:
7247 return lowerUADDSUBO_CARRY(Op, DAG);
7248 case ISD::OR:
7249 return lowerOR(Op, DAG);
7250 case ISD::CTPOP:
7251 return lowerCTPOP(Op, DAG);
7252 case ISD::VECREDUCE_ADD:
7253 return lowerVECREDUCE_ADD(Op, DAG);
7254 case ISD::ATOMIC_FENCE:
7255 return lowerATOMIC_FENCE(Op, DAG);
7256 case ISD::ATOMIC_SWAP:
7257 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7258 case ISD::ATOMIC_STORE:
7259 return lowerATOMIC_STORE(Op, DAG);
7260 case ISD::ATOMIC_LOAD:
7261 return lowerATOMIC_LOAD(Op, DAG);
7263 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7265 return lowerATOMIC_LOAD_SUB(Op, DAG);
7267 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7269 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7271 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7273 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7275 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7277 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7279 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7281 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7283 return lowerATOMIC_CMP_SWAP(Op, DAG);
7284 case ISD::STACKSAVE:
7285 return lowerSTACKSAVE(Op, DAG);
7286 case ISD::STACKRESTORE:
7287 return lowerSTACKRESTORE(Op, DAG);
7288 case ISD::PREFETCH:
7289 return lowerPREFETCH(Op, DAG);
7291 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7293 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7294 case ISD::BUILD_VECTOR:
7295 return lowerBUILD_VECTOR(Op, DAG);
7297 return lowerVECTOR_SHUFFLE(Op, DAG);
7299 return lowerSCALAR_TO_VECTOR(Op, DAG);
7301 return lowerINSERT_VECTOR_ELT(Op, DAG);
7303 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7305 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7307 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7308 case ISD::SHL:
7309 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7310 case ISD::SRL:
7311 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7312 case ISD::SRA:
7313 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7314 case ISD::ADDRSPACECAST:
7315 return lowerAddrSpaceCast(Op, DAG);
7316 case ISD::ROTL:
7317 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7318 case ISD::FSHL:
7319 return lowerFSHL(Op, DAG);
7320 case ISD::FSHR:
7321 return lowerFSHR(Op, DAG);
7322 case ISD::FP_EXTEND:
7324 return lowerFP_EXTEND(Op, DAG);
7325 case ISD::FP_TO_UINT:
7326 case ISD::FP_TO_SINT:
7329 return lower_FP_TO_INT(Op, DAG);
7330 case ISD::UINT_TO_FP:
7331 case ISD::SINT_TO_FP:
7334 return lower_INT_TO_FP(Op, DAG);
7335 case ISD::LOAD:
7336 return lowerLoadF16(Op, DAG);
7337 case ISD::STORE:
7338 return lowerStoreF16(Op, DAG);
7339 case ISD::IS_FPCLASS:
7340 return lowerIS_FPCLASS(Op, DAG);
7341 case ISD::GET_ROUNDING:
7342 return lowerGET_ROUNDING(Op, DAG);
7344 return lowerREADCYCLECOUNTER(Op, DAG);
7347 // These operations are legal on our platform, but we cannot actually
7348 // set the operation action to Legal as common code would treat this
7349 // as equivalent to Expand. Instead, we keep the operation action to
7350 // Custom and just leave them unchanged here.
7351 return Op;
7352
7353 default:
7354 llvm_unreachable("Unexpected node to lower");
7355 }
7356}
7357
7359 const SDLoc &SL) {
7360 // If i128 is legal, just use a normal bitcast.
7361 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7362 return DAG.getBitcast(MVT::f128, Src);
7363
7364 // Otherwise, f128 must live in FP128, so do a partwise move.
7366 &SystemZ::FP128BitRegClass);
7367
7368 SDValue Hi, Lo;
7369 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7370
7371 Hi = DAG.getBitcast(MVT::f64, Hi);
7372 Lo = DAG.getBitcast(MVT::f64, Lo);
7373
7374 SDNode *Pair = DAG.getMachineNode(
7375 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7376 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7377 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7378 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7379 return SDValue(Pair, 0);
7380}
7381
7383 const SDLoc &SL) {
7384 // If i128 is legal, just use a normal bitcast.
7385 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7386 return DAG.getBitcast(MVT::i128, Src);
7387
7388 // Otherwise, f128 must live in FP128, so do a partwise move.
7390 &SystemZ::FP128BitRegClass);
7391
7392 SDValue LoFP =
7393 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7394 SDValue HiFP =
7395 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7396 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7397 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7398
7399 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7400}
7401
7402// Lower operations with invalid operand or result types.
7403void
7406 SelectionDAG &DAG) const {
7407 switch (N->getOpcode()) {
7408 case ISD::ATOMIC_LOAD: {
7409 SDLoc DL(N);
7410 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7411 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7412 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7413 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7414 DL, Tys, Ops, MVT::i128, MMO);
7415
7416 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7417 if (N->getValueType(0) == MVT::f128)
7418 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7419 Results.push_back(Lowered);
7420 Results.push_back(Res.getValue(1));
7421 break;
7422 }
7423 case ISD::ATOMIC_STORE: {
7424 SDLoc DL(N);
7425 SDVTList Tys = DAG.getVTList(MVT::Other);
7426 SDValue Val = N->getOperand(1);
7427 if (Val.getValueType() == MVT::f128)
7428 Val = expandBitCastF128ToI128(DAG, Val, DL);
7429 Val = lowerI128ToGR128(DAG, Val);
7430
7431 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7432 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7433 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7434 DL, Tys, Ops, MVT::i128, MMO);
7435 // We have to enforce sequential consistency by performing a
7436 // serialization operation after the store.
7437 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7439 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7440 MVT::Other, Res), 0);
7441 Results.push_back(Res);
7442 break;
7443 }
7445 SDLoc DL(N);
7446 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7447 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7448 lowerI128ToGR128(DAG, N->getOperand(2)),
7449 lowerI128ToGR128(DAG, N->getOperand(3)) };
7450 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7451 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7452 DL, Tys, Ops, MVT::i128, MMO);
7453 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7455 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7456 Results.push_back(lowerGR128ToI128(DAG, Res));
7457 Results.push_back(Success);
7458 Results.push_back(Res.getValue(2));
7459 break;
7460 }
7461 case ISD::BITCAST: {
7462 if (useSoftFloat())
7463 return;
7464 SDLoc DL(N);
7465 SDValue Src = N->getOperand(0);
7466 EVT SrcVT = Src.getValueType();
7467 EVT ResVT = N->getValueType(0);
7468 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7469 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7470 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7471 if (Subtarget.hasVector()) {
7472 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7473 Results.push_back(SDValue(
7474 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7475 } else {
7476 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7477 Results.push_back(convertToF16(In64, DAG));
7478 }
7479 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7480 SDValue ExtractedVal =
7481 Subtarget.hasVector()
7482 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7483 0)
7484 : convertFromF16(Src, DL, DAG);
7485 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7486 }
7487 break;
7488 }
7489 case ISD::UINT_TO_FP:
7490 case ISD::SINT_TO_FP:
7493 if (useSoftFloat())
7494 return;
7495 bool IsStrict = N->isStrictFPOpcode();
7496 SDLoc DL(N);
7497 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7498 EVT ResVT = N->getValueType(0);
7499 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7500 if (ResVT == MVT::f16) {
7501 if (!IsStrict) {
7502 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7503 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7504 } else {
7505 SDValue OpF32 =
7506 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7507 {Chain, InOp});
7508 SDValue F16Res;
7509 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7510 OpF32, OpF32.getValue(1), DL, MVT::f16);
7511 Results.push_back(F16Res);
7512 Results.push_back(Chain);
7513 }
7514 }
7515 break;
7516 }
7517 case ISD::FP_TO_UINT:
7518 case ISD::FP_TO_SINT:
7521 if (useSoftFloat())
7522 return;
7523 bool IsStrict = N->isStrictFPOpcode();
7524 SDLoc DL(N);
7525 EVT ResVT = N->getValueType(0);
7526 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7527 EVT InVT = InOp->getValueType(0);
7528 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7529 if (InVT == MVT::f16) {
7530 if (!IsStrict) {
7531 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7532 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7533 } else {
7534 SDValue InF32;
7535 std::tie(InF32, Chain) =
7536 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7537 SDValue OpF32 =
7538 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7539 {Chain, InF32});
7540 Results.push_back(OpF32);
7541 Results.push_back(OpF32.getValue(1));
7542 }
7543 }
7544 break;
7545 }
7546 default:
7547 llvm_unreachable("Unexpected node to lower");
7548 }
7549}
7550
7551void
7557
7558// Return true if VT is a vector whose elements are a whole number of bytes
7559// in width. Also check for presence of vector support.
7560bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7561 if (!Subtarget.hasVector())
7562 return false;
7563
7564 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7565}
7566
7567// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7568// producing a result of type ResVT. Op is a possibly bitcast version
7569// of the input vector and Index is the index (based on type VecVT) that
7570// should be extracted. Return the new extraction if a simplification
7571// was possible or if Force is true.
7572SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7573 EVT VecVT, SDValue Op,
7574 unsigned Index,
7575 DAGCombinerInfo &DCI,
7576 bool Force) const {
7577 SelectionDAG &DAG = DCI.DAG;
7578
7579 // The number of bytes being extracted.
7580 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7581
7582 for (;;) {
7583 unsigned Opcode = Op.getOpcode();
7584 if (Opcode == ISD::BITCAST)
7585 // Look through bitcasts.
7586 Op = Op.getOperand(0);
7587 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7588 canTreatAsByteVector(Op.getValueType())) {
7589 // Get a VPERM-like permute mask and see whether the bytes covered
7590 // by the extracted element are a contiguous sequence from one
7591 // source operand.
7593 if (!getVPermMask(Op, Bytes))
7594 break;
7595 int First;
7596 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7597 BytesPerElement, First))
7598 break;
7599 if (First < 0)
7600 return DAG.getUNDEF(ResVT);
7601 // Make sure the contiguous sequence starts at a multiple of the
7602 // original element size.
7603 unsigned Byte = unsigned(First) % Bytes.size();
7604 if (Byte % BytesPerElement != 0)
7605 break;
7606 // We can get the extracted value directly from an input.
7607 Index = Byte / BytesPerElement;
7608 Op = Op.getOperand(unsigned(First) / Bytes.size());
7609 Force = true;
7610 } else if (Opcode == ISD::BUILD_VECTOR &&
7611 canTreatAsByteVector(Op.getValueType())) {
7612 // We can only optimize this case if the BUILD_VECTOR elements are
7613 // at least as wide as the extracted value.
7614 EVT OpVT = Op.getValueType();
7615 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7616 if (OpBytesPerElement < BytesPerElement)
7617 break;
7618 // Make sure that the least-significant bit of the extracted value
7619 // is the least significant bit of an input.
7620 unsigned End = (Index + 1) * BytesPerElement;
7621 if (End % OpBytesPerElement != 0)
7622 break;
7623 // We're extracting the low part of one operand of the BUILD_VECTOR.
7624 Op = Op.getOperand(End / OpBytesPerElement - 1);
7625 if (!Op.getValueType().isInteger()) {
7626 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7627 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7628 DCI.AddToWorklist(Op.getNode());
7629 }
7630 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7631 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7632 if (VT != ResVT) {
7633 DCI.AddToWorklist(Op.getNode());
7634 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7635 }
7636 return Op;
7637 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7639 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7640 canTreatAsByteVector(Op.getValueType()) &&
7641 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7642 // Make sure that only the unextended bits are significant.
7643 EVT ExtVT = Op.getValueType();
7644 EVT OpVT = Op.getOperand(0).getValueType();
7645 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7646 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7647 unsigned Byte = Index * BytesPerElement;
7648 unsigned SubByte = Byte % ExtBytesPerElement;
7649 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7650 if (SubByte < MinSubByte ||
7651 SubByte + BytesPerElement > ExtBytesPerElement)
7652 break;
7653 // Get the byte offset of the unextended element
7654 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7655 // ...then add the byte offset relative to that element.
7656 Byte += SubByte - MinSubByte;
7657 if (Byte % BytesPerElement != 0)
7658 break;
7659 Op = Op.getOperand(0);
7660 Index = Byte / BytesPerElement;
7661 Force = true;
7662 } else
7663 break;
7664 }
7665 if (Force) {
7666 if (Op.getValueType() != VecVT) {
7667 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7668 DCI.AddToWorklist(Op.getNode());
7669 }
7670 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7671 DAG.getConstant(Index, DL, MVT::i32));
7672 }
7673 return SDValue();
7674}
7675
7676// Optimize vector operations in scalar value Op on the basis that Op
7677// is truncated to TruncVT.
7678SDValue SystemZTargetLowering::combineTruncateExtract(
7679 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7680 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7681 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7682 // of type TruncVT.
7683 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7684 TruncVT.getSizeInBits() % 8 == 0) {
7685 SDValue Vec = Op.getOperand(0);
7686 EVT VecVT = Vec.getValueType();
7687 if (canTreatAsByteVector(VecVT)) {
7688 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7689 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7690 unsigned TruncBytes = TruncVT.getStoreSize();
7691 if (BytesPerElement % TruncBytes == 0) {
7692 // Calculate the value of Y' in the above description. We are
7693 // splitting the original elements into Scale equal-sized pieces
7694 // and for truncation purposes want the last (least-significant)
7695 // of these pieces for IndexN. This is easiest to do by calculating
7696 // the start index of the following element and then subtracting 1.
7697 unsigned Scale = BytesPerElement / TruncBytes;
7698 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7699
7700 // Defer the creation of the bitcast from X to combineExtract,
7701 // which might be able to optimize the extraction.
7702 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7703 MVT::getIntegerVT(TruncBytes * 8),
7704 VecVT.getStoreSize() / TruncBytes);
7705 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7706 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7707 }
7708 }
7709 }
7710 }
7711 return SDValue();
7712}
7713
7714SDValue SystemZTargetLowering::combineZERO_EXTEND(
7715 SDNode *N, DAGCombinerInfo &DCI) const {
7716 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7717 SelectionDAG &DAG = DCI.DAG;
7718 SDValue N0 = N->getOperand(0);
7719 EVT VT = N->getValueType(0);
7720 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7721 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7722 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7723 if (TrueOp && FalseOp) {
7724 SDLoc DL(N0);
7725 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7726 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7727 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7728 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7729 // If N0 has multiple uses, change other uses as well.
7730 if (!N0.hasOneUse()) {
7731 SDValue TruncSelect =
7732 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7733 DCI.CombineTo(N0.getNode(), TruncSelect);
7734 }
7735 return NewSelect;
7736 }
7737 }
7738 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7739 // of the result is smaller than the size of X and all the truncated bits
7740 // of X are already zero.
7741 if (N0.getOpcode() == ISD::XOR &&
7742 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7743 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7744 N0.getOperand(1).getOpcode() == ISD::Constant) {
7745 SDValue X = N0.getOperand(0).getOperand(0);
7746 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7747 KnownBits Known = DAG.computeKnownBits(X);
7748 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7749 N0.getValueSizeInBits(),
7750 VT.getSizeInBits());
7751 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7752 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7753 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7754 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7755 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7756 }
7757 }
7758 }
7759 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7760 // and VECTOR ADD COMPUTE CARRY for i128:
7761 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7762 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7763 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7764 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7765 // For vector types, these patterns are recognized in the .td file.
7766 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7767 N0.getOperand(0).getValueType() == VT) {
7768 SDValue Op0 = N0.getOperand(0);
7769 SDValue Op1 = N0.getOperand(1);
7770 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7771 switch (CC) {
7772 case ISD::SETULE:
7773 std::swap(Op0, Op1);
7774 [[fallthrough]];
7775 case ISD::SETUGE:
7776 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7777 case ISD::SETUGT:
7778 std::swap(Op0, Op1);
7779 [[fallthrough]];
7780 case ISD::SETULT:
7781 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7782 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7783 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7784 Op0->getOperand(1));
7785 break;
7786 default:
7787 break;
7788 }
7789 }
7790
7791 return SDValue();
7792}
7793
7794SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7795 SDNode *N, DAGCombinerInfo &DCI) const {
7796 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7797 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7798 // into (select_cc LHS, RHS, -1, 0, COND)
7799 SelectionDAG &DAG = DCI.DAG;
7800 SDValue N0 = N->getOperand(0);
7801 EVT VT = N->getValueType(0);
7802 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7803 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7804 N0 = N0.getOperand(0);
7805 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7806 SDLoc DL(N0);
7807 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7808 DAG.getAllOnesConstant(DL, VT),
7809 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7810 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7811 }
7812 return SDValue();
7813}
7814
7815SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7816 SDNode *N, DAGCombinerInfo &DCI) const {
7817 // Convert (sext (ashr (shl X, C1), C2)) to
7818 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7819 // cheap as narrower ones.
7820 SelectionDAG &DAG = DCI.DAG;
7821 SDValue N0 = N->getOperand(0);
7822 EVT VT = N->getValueType(0);
7823 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7824 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7825 SDValue Inner = N0.getOperand(0);
7826 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7827 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7828 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7829 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7830 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7831 EVT ShiftVT = N0.getOperand(1).getValueType();
7832 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7833 Inner.getOperand(0));
7834 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7835 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7836 ShiftVT));
7837 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7838 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7839 }
7840 }
7841 }
7842
7843 return SDValue();
7844}
7845
7846SDValue SystemZTargetLowering::combineMERGE(
7847 SDNode *N, DAGCombinerInfo &DCI) const {
7848 SelectionDAG &DAG = DCI.DAG;
7849 unsigned Opcode = N->getOpcode();
7850 SDValue Op0 = N->getOperand(0);
7851 SDValue Op1 = N->getOperand(1);
7852 if (Op0.getOpcode() == ISD::BITCAST)
7853 Op0 = Op0.getOperand(0);
7855 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7856 // for v4f32.
7857 if (Op1 == N->getOperand(0))
7858 return Op1;
7859 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7860 EVT VT = Op1.getValueType();
7861 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7862 if (ElemBytes <= 4) {
7863 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7864 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7865 EVT InVT = VT.changeVectorElementTypeToInteger();
7866 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7867 SystemZ::VectorBytes / ElemBytes / 2);
7868 if (VT != InVT) {
7869 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7870 DCI.AddToWorklist(Op1.getNode());
7871 }
7872 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7873 DCI.AddToWorklist(Op.getNode());
7874 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7875 }
7876 }
7877 return SDValue();
7878}
7879
7880static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7881 SDNode *&HiPart) {
7882 LoPart = HiPart = nullptr;
7883
7884 // Scan through all users.
7885 for (SDUse &Use : LD->uses()) {
7886 // Skip the uses of the chain.
7887 if (Use.getResNo() != 0)
7888 continue;
7889
7890 // Verify every user is a TRUNCATE to i64 of the low or high half.
7891 SDNode *User = Use.getUser();
7892 bool IsLoPart = true;
7893 if (User->getOpcode() == ISD::SRL &&
7894 User->getOperand(1).getOpcode() == ISD::Constant &&
7895 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7896 User = *User->user_begin();
7897 IsLoPart = false;
7898 }
7899 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7900 return false;
7901
7902 if (IsLoPart) {
7903 if (LoPart)
7904 return false;
7905 LoPart = User;
7906 } else {
7907 if (HiPart)
7908 return false;
7909 HiPart = User;
7910 }
7911 }
7912 return true;
7913}
7914
7915static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7916 SDNode *&HiPart) {
7917 LoPart = HiPart = nullptr;
7918
7919 // Scan through all users.
7920 for (SDUse &Use : LD->uses()) {
7921 // Skip the uses of the chain.
7922 if (Use.getResNo() != 0)
7923 continue;
7924
7925 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7926 SDNode *User = Use.getUser();
7927 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7928 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7929 return false;
7930
7931 switch (User->getConstantOperandVal(1)) {
7932 case SystemZ::subreg_l64:
7933 if (LoPart)
7934 return false;
7935 LoPart = User;
7936 break;
7937 case SystemZ::subreg_h64:
7938 if (HiPart)
7939 return false;
7940 HiPart = User;
7941 break;
7942 default:
7943 return false;
7944 }
7945 }
7946 return true;
7947}
7948
7949SDValue SystemZTargetLowering::combineLOAD(
7950 SDNode *N, DAGCombinerInfo &DCI) const {
7951 SelectionDAG &DAG = DCI.DAG;
7952 EVT LdVT = N->getValueType(0);
7953 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7954 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7955 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7956 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7957 if (PtrVT != LoadNodeVT) {
7958 SDLoc DL(LN);
7959 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7960 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7961 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7962 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7963 LN->getMemOperand());
7964 }
7965 }
7966 }
7967 SDLoc DL(N);
7968
7969 // Replace a 128-bit load that is used solely to move its value into GPRs
7970 // by separate loads of both halves.
7971 LoadSDNode *LD = cast<LoadSDNode>(N);
7972 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7973 SDNode *LoPart, *HiPart;
7974 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7975 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7976 // Rewrite each extraction as an independent load.
7977 SmallVector<SDValue, 2> ArgChains;
7978 if (HiPart) {
7979 SDValue EltLoad = DAG.getLoad(
7980 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7981 LD->getPointerInfo(), LD->getBaseAlign(),
7982 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7983
7984 DCI.CombineTo(HiPart, EltLoad, true);
7985 ArgChains.push_back(EltLoad.getValue(1));
7986 }
7987 if (LoPart) {
7988 SDValue EltLoad = DAG.getLoad(
7989 LoPart->getValueType(0), DL, LD->getChain(),
7990 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7991 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7992 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7993
7994 DCI.CombineTo(LoPart, EltLoad, true);
7995 ArgChains.push_back(EltLoad.getValue(1));
7996 }
7997
7998 // Collect all chains via TokenFactor.
7999 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
8000 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
8001 DCI.AddToWorklist(Chain.getNode());
8002 return SDValue(N, 0);
8003 }
8004 }
8005
8006 if (LdVT.isVector() || LdVT.isInteger())
8007 return SDValue();
8008 // Transform a scalar load that is REPLICATEd as well as having other
8009 // use(s) to the form where the other use(s) use the first element of the
8010 // REPLICATE instead of the load. Otherwise instruction selection will not
8011 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
8012 // point loads.
8013
8014 SDValue Replicate;
8015 SmallVector<SDNode*, 8> OtherUses;
8016 for (SDUse &Use : N->uses()) {
8017 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
8018 if (Replicate)
8019 return SDValue(); // Should never happen
8020 Replicate = SDValue(Use.getUser(), 0);
8021 } else if (Use.getResNo() == 0)
8022 OtherUses.push_back(Use.getUser());
8023 }
8024 if (!Replicate || OtherUses.empty())
8025 return SDValue();
8026
8027 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
8028 Replicate, DAG.getConstant(0, DL, MVT::i32));
8029 // Update uses of the loaded Value while preserving old chains.
8030 for (SDNode *U : OtherUses) {
8032 for (SDValue Op : U->ops())
8033 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8034 DAG.UpdateNodeOperands(U, Ops);
8035 }
8036 return SDValue(N, 0);
8037}
8038
8039bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8040 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8041 return true;
8042 if (Subtarget.hasVectorEnhancements2())
8043 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8044 return true;
8045 return false;
8046}
8047
8049 if (!VT.isVector() || !VT.isSimple() ||
8050 VT.getSizeInBits() != 128 ||
8051 VT.getScalarSizeInBits() % 8 != 0)
8052 return false;
8053
8054 unsigned NumElts = VT.getVectorNumElements();
8055 for (unsigned i = 0; i < NumElts; ++i) {
8056 if (M[i] < 0) continue; // ignore UNDEF indices
8057 if ((unsigned) M[i] != NumElts - 1 - i)
8058 return false;
8059 }
8060
8061 return true;
8062}
8063
8064static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8065 for (auto *U : StoredVal->users()) {
8066 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8067 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8068 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8069 continue;
8070 } else if (isa<BuildVectorSDNode>(U)) {
8071 SDValue BuildVector = SDValue(U, 0);
8072 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8073 isOnlyUsedByStores(BuildVector, DAG))
8074 continue;
8075 }
8076 return false;
8077 }
8078 return true;
8079}
8080
8081static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8082 SDValue &HiPart) {
8083 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8084 return false;
8085
8086 SDValue Op0 = Val.getOperand(0);
8087 SDValue Op1 = Val.getOperand(1);
8088
8089 if (Op0.getOpcode() == ISD::SHL)
8090 std::swap(Op0, Op1);
8091 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8092 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8093 Op1.getConstantOperandVal(1) != 64)
8094 return false;
8095 Op1 = Op1.getOperand(0);
8096
8097 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8098 Op0.getOperand(0).getValueType() != MVT::i64)
8099 return false;
8100 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8101 Op1.getOperand(0).getValueType() != MVT::i64)
8102 return false;
8103
8104 LoPart = Op0.getOperand(0);
8105 HiPart = Op1.getOperand(0);
8106 return true;
8107}
8108
8109static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8110 SDValue &HiPart) {
8111 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8112 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8113 return false;
8114
8115 if (Val->getNumOperands() != 5 ||
8116 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8117 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8118 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8119 return false;
8120
8121 LoPart = Val->getOperand(1);
8122 HiPart = Val->getOperand(3);
8123 return true;
8124}
8125
8126SDValue SystemZTargetLowering::combineSTORE(
8127 SDNode *N, DAGCombinerInfo &DCI) const {
8128 SelectionDAG &DAG = DCI.DAG;
8129 auto *SN = cast<StoreSDNode>(N);
8130 auto &Op1 = N->getOperand(1);
8131 EVT MemVT = SN->getMemoryVT();
8132
8133 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8134 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8135 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8136 if (PtrVT != StoreNodeVT) {
8137 SDLoc DL(SN);
8138 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8139 SYSTEMZAS::PTR32, 0);
8140 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8141 SN->getPointerInfo(), SN->getBaseAlign(),
8142 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8143 }
8144 }
8145
8146 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8147 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8148 // If X has wider elements then convert it to:
8149 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8150 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8151 if (SDValue Value =
8152 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8153 DCI.AddToWorklist(Value.getNode());
8154
8155 // Rewrite the store with the new form of stored value.
8156 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8157 SN->getBasePtr(), SN->getMemoryVT(),
8158 SN->getMemOperand());
8159 }
8160 }
8161
8162 // combine STORE (LOAD_STACK_GUARD) into MOV_STACKGUARD_DAG
8163 if (Op1->isMachineOpcode() &&
8164 (Op1->getMachineOpcode() == SystemZ::LOAD_STACK_GUARD)) {
8165 // Obtain the frame index the store was targeting.
8166 int FI = cast<FrameIndexSDNode>(SN->getOperand(2))->getIndex();
8167 // Prepare operands of the MOV_STACKGUARD ISD Node - Chain and FrameIndex.
8168 SDValue Ops[] = {SN->getChain(), DAG.getTargetFrameIndex(FI, MVT::i64)};
8169 return DAG.getNode(SystemZISD::MOV_STACKGUARD, SDLoc(SN), MVT::Other, Ops);
8170 }
8171
8172 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8173 if (!SN->isTruncatingStore() &&
8174 Op1.getOpcode() == ISD::BSWAP &&
8175 Op1.getNode()->hasOneUse() &&
8176 canLoadStoreByteSwapped(Op1.getValueType())) {
8177
8178 SDValue BSwapOp = Op1.getOperand(0);
8179
8180 if (BSwapOp.getValueType() == MVT::i16)
8181 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8182
8183 SDValue Ops[] = {
8184 N->getOperand(0), BSwapOp, N->getOperand(2)
8185 };
8186
8187 return
8188 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8189 Ops, MemVT, SN->getMemOperand());
8190 }
8191 // Combine STORE (element-swap) into VSTER
8192 if (!SN->isTruncatingStore() &&
8193 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8194 Op1.getNode()->hasOneUse() &&
8195 Subtarget.hasVectorEnhancements2()) {
8196 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8197 ArrayRef<int> ShuffleMask = SVN->getMask();
8198 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8199 SDValue Ops[] = {
8200 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8201 };
8202
8203 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8204 DAG.getVTList(MVT::Other),
8205 Ops, MemVT, SN->getMemOperand());
8206 }
8207 }
8208
8209 // Combine STORE (READCYCLECOUNTER) into STCKF.
8210 if (!SN->isTruncatingStore() &&
8212 Op1.hasOneUse() &&
8213 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8214 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8215 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8216 DAG.getVTList(MVT::Other),
8217 Ops, MemVT, SN->getMemOperand());
8218 }
8219
8220 // Transform a store of a 128-bit value moved from parts into two stores.
8221 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8222 SDValue LoPart, HiPart;
8223 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8224 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8225 SDLoc DL(SN);
8226 SDValue Chain0 = DAG.getStore(
8227 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8228 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8229 SDValue Chain1 = DAG.getStore(
8230 SN->getChain(), DL, LoPart,
8231 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8232 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8233 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8234
8235 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8236 }
8237 }
8238
8239 // Replicate a reg or immediate with VREP instead of scalar multiply or
8240 // immediate load. It seems best to do this during the first DAGCombine as
8241 // it is straight-forward to handle the zero-extend node in the initial
8242 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8243 // extracting an i16 element from a v16i8 vector).
8244 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8245 isOnlyUsedByStores(Op1, DAG)) {
8246 SDValue Word = SDValue();
8247 EVT WordVT;
8248
8249 // Find a replicated immediate and return it if found in Word and its
8250 // type in WordVT.
8251 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8252 // Some constants are better handled with a scalar store.
8253 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8254 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8255 return;
8256
8257 APInt Val = C->getAPIntValue();
8258 // Truncate Val in case of a truncating store.
8259 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8260 assert(SN->isTruncatingStore() &&
8261 "Non-truncating store and immediate value does not fit?");
8262 Val = Val.trunc(TotBytes * 8);
8263 }
8264
8265 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8266 if (VCI.isVectorConstantLegal(Subtarget) &&
8267 VCI.Opcode == SystemZISD::REPLICATE) {
8268 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8269 WordVT = VCI.VecVT.getScalarType();
8270 }
8271 };
8272
8273 // Find a replicated register and return it if found in Word and its type
8274 // in WordVT.
8275 auto FindReplicatedReg = [&](SDValue MulOp) {
8276 EVT MulVT = MulOp.getValueType();
8277 if (MulOp->getOpcode() == ISD::MUL &&
8278 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8279 // Find a zero extended value and its type.
8280 SDValue LHS = MulOp->getOperand(0);
8281 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8282 WordVT = LHS->getOperand(0).getValueType();
8283 else if (LHS->getOpcode() == ISD::AssertZext)
8284 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8285 else
8286 return;
8287 // Find a replicating constant, e.g. 0x00010001.
8288 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8289 SystemZVectorConstantInfo VCI(
8290 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8291 if (VCI.isVectorConstantLegal(Subtarget) &&
8292 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8293 WordVT == VCI.VecVT.getScalarType())
8294 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8295 }
8296 }
8297 };
8298
8299 if (isa<BuildVectorSDNode>(Op1) &&
8300 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8301 SDValue SplatVal = Op1->getOperand(0);
8302 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8303 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8304 else
8305 FindReplicatedReg(SplatVal);
8306 } else {
8307 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8308 FindReplicatedImm(C, MemVT.getStoreSize());
8309 else
8310 FindReplicatedReg(Op1);
8311 }
8312
8313 if (Word != SDValue()) {
8314 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8315 "Bad type handling");
8316 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8317 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8318 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8319 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8320 SN->getBasePtr(), SN->getMemOperand());
8321 }
8322 }
8323
8324 return SDValue();
8325}
8326
8327SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8328 SDNode *N, DAGCombinerInfo &DCI) const {
8329 SelectionDAG &DAG = DCI.DAG;
8330 // Combine element-swap (LOAD) into VLER
8331 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8332 N->getOperand(0).hasOneUse() &&
8333 Subtarget.hasVectorEnhancements2()) {
8334 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8335 ArrayRef<int> ShuffleMask = SVN->getMask();
8336 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8337 SDValue Load = N->getOperand(0);
8338 LoadSDNode *LD = cast<LoadSDNode>(Load);
8339
8340 // Create the element-swapping load.
8341 SDValue Ops[] = {
8342 LD->getChain(), // Chain
8343 LD->getBasePtr() // Ptr
8344 };
8345 SDValue ESLoad =
8346 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8347 DAG.getVTList(LD->getValueType(0), MVT::Other),
8348 Ops, LD->getMemoryVT(), LD->getMemOperand());
8349
8350 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8351 // by the load dead.
8352 DCI.CombineTo(N, ESLoad);
8353
8354 // Next, combine the load away, we give it a bogus result value but a real
8355 // chain result. The result value is dead because the shuffle is dead.
8356 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8357
8358 // Return N so it doesn't get rechecked!
8359 return SDValue(N, 0);
8360 }
8361 }
8362
8363 return SDValue();
8364}
8365
8366SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8367 SDNode *N, DAGCombinerInfo &DCI) const {
8368 SelectionDAG &DAG = DCI.DAG;
8369
8370 if (!Subtarget.hasVector())
8371 return SDValue();
8372
8373 // Look through bitcasts that retain the number of vector elements.
8374 SDValue Op = N->getOperand(0);
8375 if (Op.getOpcode() == ISD::BITCAST &&
8376 Op.getValueType().isVector() &&
8377 Op.getOperand(0).getValueType().isVector() &&
8378 Op.getValueType().getVectorNumElements() ==
8379 Op.getOperand(0).getValueType().getVectorNumElements())
8380 Op = Op.getOperand(0);
8381
8382 // Pull BSWAP out of a vector extraction.
8383 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8384 EVT VecVT = Op.getValueType();
8385 EVT EltVT = VecVT.getVectorElementType();
8386 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8387 Op.getOperand(0), N->getOperand(1));
8388 DCI.AddToWorklist(Op.getNode());
8389 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8390 if (EltVT != N->getValueType(0)) {
8391 DCI.AddToWorklist(Op.getNode());
8392 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8393 }
8394 return Op;
8395 }
8396
8397 // Try to simplify a vector extraction.
8398 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8399 SDValue Op0 = N->getOperand(0);
8400 EVT VecVT = Op0.getValueType();
8401 if (canTreatAsByteVector(VecVT))
8402 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8403 IndexN->getZExtValue(), DCI, false);
8404 }
8405 return SDValue();
8406}
8407
8408SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8409 SDNode *N, DAGCombinerInfo &DCI) const {
8410 SelectionDAG &DAG = DCI.DAG;
8411 // (join_dwords X, X) == (replicate X)
8412 if (N->getOperand(0) == N->getOperand(1))
8413 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8414 N->getOperand(0));
8415 return SDValue();
8416}
8417
8419 SDValue Chain1 = N1->getOperand(0);
8420 SDValue Chain2 = N2->getOperand(0);
8421
8422 // Trivial case: both nodes take the same chain.
8423 if (Chain1 == Chain2)
8424 return Chain1;
8425
8426 // FIXME - we could handle more complex cases via TokenFactor,
8427 // assuming we can verify that this would not create a cycle.
8428 return SDValue();
8429}
8430
8431SDValue SystemZTargetLowering::combineFP_ROUND(
8432 SDNode *N, DAGCombinerInfo &DCI) const {
8433
8434 if (!Subtarget.hasVector())
8435 return SDValue();
8436
8437 // (fpround (extract_vector_elt X 0))
8438 // (fpround (extract_vector_elt X 1)) ->
8439 // (extract_vector_elt (VROUND X) 0)
8440 // (extract_vector_elt (VROUND X) 2)
8441 //
8442 // This is a special case since the target doesn't really support v2f32s.
8443 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8444 SelectionDAG &DAG = DCI.DAG;
8445 SDValue Op0 = N->getOperand(OpNo);
8446 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8448 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8449 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8450 Op0.getConstantOperandVal(1) == 0) {
8451 SDValue Vec = Op0.getOperand(0);
8452 for (auto *U : Vec->users()) {
8453 if (U != Op0.getNode() && U->hasOneUse() &&
8454 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8455 U->getOperand(0) == Vec &&
8456 U->getOperand(1).getOpcode() == ISD::Constant &&
8457 U->getConstantOperandVal(1) == 1) {
8458 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8459 if (OtherRound.getOpcode() == N->getOpcode() &&
8460 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8461 OtherRound.getValueType() == MVT::f32) {
8462 SDValue VRound, Chain;
8463 if (N->isStrictFPOpcode()) {
8464 Chain = MergeInputChains(N, OtherRound.getNode());
8465 if (!Chain)
8466 continue;
8467 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8468 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8469 Chain = VRound.getValue(1);
8470 } else
8471 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8472 MVT::v4f32, Vec);
8473 DCI.AddToWorklist(VRound.getNode());
8474 SDValue Extract1 =
8475 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8476 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8477 DCI.AddToWorklist(Extract1.getNode());
8478 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8479 if (Chain)
8480 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8481 SDValue Extract0 =
8482 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8483 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8484 if (Chain)
8485 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8486 N->getVTList(), Extract0, Chain);
8487 return Extract0;
8488 }
8489 }
8490 }
8491 }
8492 return SDValue();
8493}
8494
8495SDValue SystemZTargetLowering::combineFP_EXTEND(
8496 SDNode *N, DAGCombinerInfo &DCI) const {
8497
8498 if (!Subtarget.hasVector())
8499 return SDValue();
8500
8501 // (fpextend (extract_vector_elt X 0))
8502 // (fpextend (extract_vector_elt X 2)) ->
8503 // (extract_vector_elt (VEXTEND X) 0)
8504 // (extract_vector_elt (VEXTEND X) 1)
8505 //
8506 // This is a special case since the target doesn't really support v2f32s.
8507 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8508 SelectionDAG &DAG = DCI.DAG;
8509 SDValue Op0 = N->getOperand(OpNo);
8510 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8512 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8513 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8514 Op0.getConstantOperandVal(1) == 0) {
8515 SDValue Vec = Op0.getOperand(0);
8516 for (auto *U : Vec->users()) {
8517 if (U != Op0.getNode() && U->hasOneUse() &&
8518 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8519 U->getOperand(0) == Vec &&
8520 U->getOperand(1).getOpcode() == ISD::Constant &&
8521 U->getConstantOperandVal(1) == 2) {
8522 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8523 if (OtherExtend.getOpcode() == N->getOpcode() &&
8524 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8525 OtherExtend.getValueType() == MVT::f64) {
8526 SDValue VExtend, Chain;
8527 if (N->isStrictFPOpcode()) {
8528 Chain = MergeInputChains(N, OtherExtend.getNode());
8529 if (!Chain)
8530 continue;
8531 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8532 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8533 Chain = VExtend.getValue(1);
8534 } else
8535 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8536 MVT::v2f64, Vec);
8537 DCI.AddToWorklist(VExtend.getNode());
8538 SDValue Extract1 =
8539 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8540 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8541 DCI.AddToWorklist(Extract1.getNode());
8542 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8543 if (Chain)
8544 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8545 SDValue Extract0 =
8546 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8547 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8548 if (Chain)
8549 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8550 N->getVTList(), Extract0, Chain);
8551 return Extract0;
8552 }
8553 }
8554 }
8555 }
8556 return SDValue();
8557}
8558
8559SDValue SystemZTargetLowering::combineINT_TO_FP(
8560 SDNode *N, DAGCombinerInfo &DCI) const {
8561 if (DCI.Level != BeforeLegalizeTypes)
8562 return SDValue();
8563 SelectionDAG &DAG = DCI.DAG;
8564 LLVMContext &Ctx = *DAG.getContext();
8565 unsigned Opcode = N->getOpcode();
8566 EVT OutVT = N->getValueType(0);
8567 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8568 SDValue Op = N->getOperand(0);
8569 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8570 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8571
8572 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8573 // v2f64 = uint_to_fp v2i16
8574 // =>
8575 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8576 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8577 OutScalarBits <= 64) {
8578 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8579 EVT ExtVT = EVT::getVectorVT(
8580 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8581 unsigned ExtOpcode =
8583 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8584 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8585 }
8586 return SDValue();
8587}
8588
8589SDValue SystemZTargetLowering::combineFCOPYSIGN(
8590 SDNode *N, DAGCombinerInfo &DCI) const {
8591 SelectionDAG &DAG = DCI.DAG;
8592 EVT VT = N->getValueType(0);
8593 SDValue ValOp = N->getOperand(0);
8594 SDValue SignOp = N->getOperand(1);
8595
8596 // Remove the rounding which is not needed.
8597 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8598 SDValue WideOp = SignOp.getOperand(0);
8599 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8600 }
8601
8602 return SDValue();
8603}
8604
8605SDValue SystemZTargetLowering::combineBSWAP(
8606 SDNode *N, DAGCombinerInfo &DCI) const {
8607 SelectionDAG &DAG = DCI.DAG;
8608 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8609 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8610 N->getOperand(0).hasOneUse() &&
8611 canLoadStoreByteSwapped(N->getValueType(0))) {
8612 SDValue Load = N->getOperand(0);
8613 LoadSDNode *LD = cast<LoadSDNode>(Load);
8614
8615 // Create the byte-swapping load.
8616 SDValue Ops[] = {
8617 LD->getChain(), // Chain
8618 LD->getBasePtr() // Ptr
8619 };
8620 EVT LoadVT = N->getValueType(0);
8621 if (LoadVT == MVT::i16)
8622 LoadVT = MVT::i32;
8623 SDValue BSLoad =
8624 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8625 DAG.getVTList(LoadVT, MVT::Other),
8626 Ops, LD->getMemoryVT(), LD->getMemOperand());
8627
8628 // If this is an i16 load, insert the truncate.
8629 SDValue ResVal = BSLoad;
8630 if (N->getValueType(0) == MVT::i16)
8631 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8632
8633 // First, combine the bswap away. This makes the value produced by the
8634 // load dead.
8635 DCI.CombineTo(N, ResVal);
8636
8637 // Next, combine the load away, we give it a bogus result value but a real
8638 // chain result. The result value is dead because the bswap is dead.
8639 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8640
8641 // Return N so it doesn't get rechecked!
8642 return SDValue(N, 0);
8643 }
8644
8645 // Look through bitcasts that retain the number of vector elements.
8646 SDValue Op = N->getOperand(0);
8647 if (Op.getOpcode() == ISD::BITCAST &&
8648 Op.getValueType().isVector() &&
8649 Op.getOperand(0).getValueType().isVector() &&
8650 Op.getValueType().getVectorNumElements() ==
8651 Op.getOperand(0).getValueType().getVectorNumElements())
8652 Op = Op.getOperand(0);
8653
8654 // Push BSWAP into a vector insertion if at least one side then simplifies.
8655 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8656 SDValue Vec = Op.getOperand(0);
8657 SDValue Elt = Op.getOperand(1);
8658 SDValue Idx = Op.getOperand(2);
8659
8661 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8663 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8664 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8665 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8666 EVT VecVT = N->getValueType(0);
8667 EVT EltVT = N->getValueType(0).getVectorElementType();
8668 if (VecVT != Vec.getValueType()) {
8669 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8670 DCI.AddToWorklist(Vec.getNode());
8671 }
8672 if (EltVT != Elt.getValueType()) {
8673 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8674 DCI.AddToWorklist(Elt.getNode());
8675 }
8676 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8677 DCI.AddToWorklist(Vec.getNode());
8678 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8679 DCI.AddToWorklist(Elt.getNode());
8680 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8681 Vec, Elt, Idx);
8682 }
8683 }
8684
8685 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8686 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8687 if (SV && Op.hasOneUse()) {
8688 SDValue Op0 = Op.getOperand(0);
8689 SDValue Op1 = Op.getOperand(1);
8690
8692 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8694 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8695 EVT VecVT = N->getValueType(0);
8696 if (VecVT != Op0.getValueType()) {
8697 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8698 DCI.AddToWorklist(Op0.getNode());
8699 }
8700 if (VecVT != Op1.getValueType()) {
8701 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8702 DCI.AddToWorklist(Op1.getNode());
8703 }
8704 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8705 DCI.AddToWorklist(Op0.getNode());
8706 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8707 DCI.AddToWorklist(Op1.getNode());
8708 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8709 }
8710 }
8711
8712 return SDValue();
8713}
8714
8715SDValue SystemZTargetLowering::combineSETCC(
8716 SDNode *N, DAGCombinerInfo &DCI) const {
8717 SelectionDAG &DAG = DCI.DAG;
8718 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8719 const SDValue LHS = N->getOperand(0);
8720 const SDValue RHS = N->getOperand(1);
8721 bool CmpNull = isNullConstant(RHS);
8722 bool CmpAllOnes = isAllOnesConstant(RHS);
8723 EVT VT = N->getValueType(0);
8724 SDLoc DL(N);
8725
8726 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8727 // change the outer compare to a i128 compare. This will normally
8728 // allow the reduction to be recognized in adjustICmp128, and even if
8729 // not, the i128 compare will still generate better code.
8730 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8732 if (Src.getOpcode() == ISD::SETCC &&
8733 Src.getValueType().isFixedLengthVector() &&
8734 Src.getValueType().getScalarType() == MVT::i1) {
8735 EVT CmpVT = Src.getOperand(0).getValueType();
8736 if (CmpVT.getSizeInBits() == 128) {
8737 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8738 SDValue LHS =
8739 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8740 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8741 : DAG.getAllOnesConstant(DL, MVT::i128);
8742 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8743 N->getFlags());
8744 }
8745 }
8746 }
8747
8748 return SDValue();
8749}
8750
8751static std::pair<SDValue, int> findCCUse(const SDValue &Val,
8752 unsigned Depth = 0) {
8753 // Limit depth of potentially exponential walk.
8754 if (Depth > 5)
8755 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8756
8757 switch (Val.getOpcode()) {
8758 default:
8759 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8760 case SystemZISD::IPM:
8761 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8762 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8763 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8764 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8765 case SystemZISD::SELECT_CCMASK: {
8766 SDValue Op4CCReg = Val.getOperand(4);
8767 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8768 Op4CCReg.getOpcode() == SystemZISD::TM) {
8769 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0), Depth + 1);
8770 if (OpCC != SDValue())
8771 return std::make_pair(OpCC, OpCCValid);
8772 }
8773 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8774 if (!CCValid)
8775 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8776 int CCValidVal = CCValid->getZExtValue();
8777 return std::make_pair(Op4CCReg, CCValidVal);
8778 }
8779 case ISD::ADD:
8780 case ISD::AND:
8781 case ISD::OR:
8782 case ISD::XOR:
8783 case ISD::SHL:
8784 case ISD::SRA:
8785 case ISD::SRL:
8786 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0), Depth + 1);
8787 if (Op0CC != SDValue())
8788 return std::make_pair(Op0CC, Op0CCValid);
8789 return findCCUse(Val.getOperand(1), Depth + 1);
8790 }
8791}
8792
8793static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8794 SelectionDAG &DAG);
8795
8797 SelectionDAG &DAG) {
8798 SDLoc DL(Val);
8799 auto Opcode = Val.getOpcode();
8800 switch (Opcode) {
8801 default:
8802 return {};
8803 case ISD::Constant:
8804 return {Val, Val, Val, Val};
8805 case SystemZISD::IPM: {
8806 SDValue IPMOp0 = Val.getOperand(0);
8807 if (IPMOp0 != CC)
8808 return {};
8809 SmallVector<SDValue, 4> ShiftedCCVals;
8810 for (auto CC : {0, 1, 2, 3})
8811 ShiftedCCVals.emplace_back(
8812 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8813 return ShiftedCCVals;
8814 }
8815 case SystemZISD::SELECT_CCMASK: {
8816 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8817 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8818 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8819 if (!CCValid || !CCMask)
8820 return {};
8821
8822 int CCValidVal = CCValid->getZExtValue();
8823 int CCMaskVal = CCMask->getZExtValue();
8824 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8825 // recursive call to simplifyAssumingCCVal.
8826 SDValue Op4CCReg = Val.getOperand(4);
8827 if (Op4CCReg != CC)
8828 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8829 if (Op4CCReg != CC)
8830 return {};
8831 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8832 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8833 if (TrueSDVals.empty() || FalseSDVals.empty())
8834 return {};
8835 SmallVector<SDValue, 4> MergedSDVals;
8836 for (auto &CCVal : {0, 1, 2, 3})
8837 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8838 ? TrueSDVals[CCVal]
8839 : FalseSDVals[CCVal]);
8840 return MergedSDVals;
8841 }
8842 case ISD::ADD:
8843 case ISD::AND:
8844 case ISD::OR:
8845 case ISD::XOR:
8846 case ISD::SRA:
8847 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8848 // would clobber CC).
8849 if (!Val.hasOneUse())
8850 return {};
8851 [[fallthrough]];
8852 case ISD::SHL:
8853 case ISD::SRL:
8854 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8855 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8856 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8857 if (Op0SDVals.empty() || Op1SDVals.empty())
8858 return {};
8859 SmallVector<SDValue, 4> BinaryOpSDVals;
8860 for (auto CCVal : {0, 1, 2, 3})
8861 BinaryOpSDVals.emplace_back(DAG.getNode(
8862 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8863 return BinaryOpSDVals;
8864 }
8865}
8866
8867static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8868 SelectionDAG &DAG) {
8869 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8870 // set by the CCReg instruction using the CCValid / CCMask masks,
8871 // If the CCReg instruction is itself a ICMP / TM testing the condition
8872 // code set by some other instruction, see whether we can directly
8873 // use that condition code.
8874 auto *CCNode = CCReg.getNode();
8875 if (!CCNode)
8876 return false;
8877
8878 if (CCNode->getOpcode() == SystemZISD::TM) {
8879 if (CCValid != SystemZ::CCMASK_TM)
8880 return false;
8881 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8882 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8883 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8884 if (!Op0Node || !Op1Node)
8885 return -1;
8886 auto Op0APVal = Op0Node->getAPIntValue();
8887 auto Op1APVal = Op1Node->getAPIntValue();
8888 auto Result = Op0APVal & Op1APVal;
8889 bool AllOnes = Result == Op1APVal;
8890 bool AllZeros = Result == 0;
8891 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits() - 1] != 0;
8892 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8893 };
8894 SDValue Op0 = CCNode->getOperand(0);
8895 SDValue Op1 = CCNode->getOperand(1);
8896 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8897 if (Op0CC == SDValue())
8898 return false;
8899 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8900 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8901 if (Op0SDVals.empty() || Op1SDVals.empty())
8902 return false;
8903 int NewCCMask = 0;
8904 for (auto CC : {0, 1, 2, 3}) {
8905 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8906 if (CCVal < 0)
8907 return false;
8908 NewCCMask <<= 1;
8909 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8910 }
8911 NewCCMask &= Op0CCValid;
8912 CCReg = Op0CC;
8913 CCMask = NewCCMask;
8914 CCValid = Op0CCValid;
8915 return true;
8916 }
8917 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8918 CCValid != SystemZ::CCMASK_ICMP)
8919 return false;
8920
8921 SDValue CmpOp0 = CCNode->getOperand(0);
8922 SDValue CmpOp1 = CCNode->getOperand(1);
8923 SDValue CmpOp2 = CCNode->getOperand(2);
8924 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8925 if (Op0CC != SDValue()) {
8926 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8927 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8928 if (Op0SDVals.empty() || Op1SDVals.empty())
8929 return false;
8930
8931 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8932 auto CmpTypeVal = CmpType->getZExtValue();
8933 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8934 const SDValue &Op1Val) {
8935 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8936 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8937 if (!Op0Node || !Op1Node)
8938 return -1;
8939 auto Op0APVal = Op0Node->getAPIntValue();
8940 auto Op1APVal = Op1Node->getAPIntValue();
8941 if (CmpTypeVal == SystemZICMP::SignedOnly)
8942 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8943 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8944 };
8945 int NewCCMask = 0;
8946 for (auto CC : {0, 1, 2, 3}) {
8947 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8948 if (CCVal < 0)
8949 return false;
8950 NewCCMask <<= 1;
8951 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8952 }
8953 NewCCMask &= Op0CCValid;
8954 CCMask = NewCCMask;
8955 CCReg = Op0CC;
8956 CCValid = Op0CCValid;
8957 return true;
8958 }
8959
8960 return false;
8961}
8962
8963// Merging versus split in multiple branches cost.
8966 const Value *Lhs,
8967 const Value *Rhs) const {
8968 const auto isFlagOutOpCC = [](const Value *V) {
8969 using namespace llvm::PatternMatch;
8970 const Value *RHSVal;
8971 const APInt *RHSC;
8972 if (const auto *I = dyn_cast<Instruction>(V)) {
8973 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8974 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8975 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8976 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8977 if (CB->isInlineAsm()) {
8978 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8979 return IA && IA->getConstraintString().contains("{@cc}");
8980 }
8981 }
8982 }
8983 }
8984 return false;
8985 };
8986 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8987 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8988 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8989 // conditionals will be merged or else conditionals will be split.
8990 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8991 return {3, 0, -1};
8992 // Default.
8993 return {-1, -1, -1};
8994}
8995
8996SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8997 DAGCombinerInfo &DCI) const {
8998 SelectionDAG &DAG = DCI.DAG;
8999
9000 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
9001 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9002 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9003 if (!CCValid || !CCMask)
9004 return SDValue();
9005
9006 int CCValidVal = CCValid->getZExtValue();
9007 int CCMaskVal = CCMask->getZExtValue();
9008 SDValue Chain = N->getOperand(0);
9009 SDValue CCReg = N->getOperand(4);
9010 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
9011 // the modified BR_CCMASK with the new values.
9012 // In order to avoid conditional branches with full or empty cc masks, do not
9013 // do this if ccmask is 0 or equal to ccvalid.
9014 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
9015 CCMaskVal != CCValidVal)
9016 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
9017 Chain,
9018 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9019 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
9020 N->getOperand(3), CCReg);
9021 return SDValue();
9022}
9023
9024SDValue SystemZTargetLowering::combineSELECT_CCMASK(
9025 SDNode *N, DAGCombinerInfo &DCI) const {
9026 SelectionDAG &DAG = DCI.DAG;
9027
9028 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
9029 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
9030 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
9031 if (!CCValid || !CCMask)
9032 return SDValue();
9033
9034 int CCValidVal = CCValid->getZExtValue();
9035 int CCMaskVal = CCMask->getZExtValue();
9036 SDValue CCReg = N->getOperand(4);
9037
9038 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
9039
9040 // Populate SDVals vector for each condition code ccval for given Val, which
9041 // can again be another nested select_ccmask with the same CC.
9042 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
9043 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
9045 if (Val.getOperand(4) != CCReg)
9046 return SmallVector<SDValue, 4>{};
9047 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
9048 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
9049 if (!CCMask)
9050 return SmallVector<SDValue, 4>{};
9051
9052 int CCMaskVal = CCMask->getZExtValue();
9053 for (auto &CC : {0, 1, 2, 3})
9054 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
9055 : FalseVal);
9056 return Res;
9057 }
9058 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
9059 };
9060 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9061 // with CCReg found by combineCCMask or original CCReg.
9062 SDValue TrueVal = N->getOperand(0);
9063 SDValue FalseVal = N->getOperand(1);
9064 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
9065 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
9066 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9067 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9068 if (TrueSDVals.empty())
9069 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9070 if (FalseSDVals.empty())
9071 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9072 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9073 SmallSet<SDValue, 4> MergedSDValsSet;
9074 // Ignoring CC values outside CCValiid.
9075 for (auto CC : {0, 1, 2, 3}) {
9076 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9077 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
9078 ? TrueSDVals[CC]
9079 : FalseSDVals[CC]);
9080 }
9081 if (MergedSDValsSet.size() == 1)
9082 return *MergedSDValsSet.begin();
9083 if (MergedSDValsSet.size() == 2) {
9084 auto BeginIt = MergedSDValsSet.begin();
9085 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
9086 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9087 std::swap(NewTrueVal, NewFalseVal);
9088 int NewCCMask = 0;
9089 for (auto CC : {0, 1, 2, 3}) {
9090 NewCCMask <<= 1;
9091 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9092 ? (TrueSDVals[CC] == NewTrueVal)
9093 : (FalseSDVals[CC] == NewTrueVal);
9094 }
9095 CCMaskVal = NewCCMask;
9096 CCMaskVal &= CCValidVal;
9097 TrueVal = NewTrueVal;
9098 FalseVal = NewFalseVal;
9099 IsCombinedCCReg = true;
9100 }
9101 }
9102 // If the condition is trivially false or trivially true after
9103 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
9104 // (possibly modified by constructCCSDValsFromSELECT).
9105 if (CCMaskVal == 0)
9106 return FalseVal;
9107 if (CCMaskVal == CCValidVal)
9108 return TrueVal;
9109
9110 if (IsCombinedCCReg)
9111 return DAG.getNode(
9112 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
9113 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9114 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
9115
9116 return SDValue();
9117}
9118
9119SDValue SystemZTargetLowering::combineGET_CCMASK(
9120 SDNode *N, DAGCombinerInfo &DCI) const {
9121
9122 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9123 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9124 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9125 if (!CCValid || !CCMask)
9126 return SDValue();
9127 int CCValidVal = CCValid->getZExtValue();
9128 int CCMaskVal = CCMask->getZExtValue();
9129
9130 SDValue Select = N->getOperand(0);
9131 if (Select->getOpcode() == ISD::TRUNCATE)
9132 Select = Select->getOperand(0);
9133 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9134 return SDValue();
9135
9136 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9137 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9138 if (!SelectCCValid || !SelectCCMask)
9139 return SDValue();
9140 int SelectCCValidVal = SelectCCValid->getZExtValue();
9141 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9142
9143 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9144 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9145 if (!TrueVal || !FalseVal)
9146 return SDValue();
9147 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9148 ;
9149 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9150 SelectCCMaskVal ^= SelectCCValidVal;
9151 else
9152 return SDValue();
9153
9154 if (SelectCCValidVal & ~CCValidVal)
9155 return SDValue();
9156 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9157 return SDValue();
9158
9159 return Select->getOperand(4);
9160}
9161
9162SDValue SystemZTargetLowering::combineIntDIVREM(
9163 SDNode *N, DAGCombinerInfo &DCI) const {
9164 SelectionDAG &DAG = DCI.DAG;
9165 EVT VT = N->getValueType(0);
9166 // In the case where the divisor is a vector of constants a cheaper
9167 // sequence of instructions can replace the divide. BuildSDIV is called to
9168 // do this during DAG combining, but it only succeeds when it can build a
9169 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9170 // since it is not Legal but Custom it can only happen before
9171 // legalization. Therefore we must scalarize this early before Combine
9172 // 1. For widened vectors, this is already the result of type legalization.
9173 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9174 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9175 return DAG.UnrollVectorOp(N);
9176 return SDValue();
9177}
9178
9179
9180// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9181// This is closely modeled after the common-code combineShiftToMULH.
9182SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9183 SDNode *N, DAGCombinerInfo &DCI) const {
9184 SelectionDAG &DAG = DCI.DAG;
9185 SDLoc DL(N);
9186
9187 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9188 "SRL or SRA node is required here!");
9189
9190 if (!Subtarget.hasVector())
9191 return SDValue();
9192
9193 // Check the shift amount. Proceed with the transformation if the shift
9194 // amount is constant.
9195 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9196 if (!ShiftAmtSrc)
9197 return SDValue();
9198
9199 // The operation feeding into the shift must be an add.
9200 SDValue ShiftOperand = N->getOperand(0);
9201 if (ShiftOperand.getOpcode() != ISD::ADD)
9202 return SDValue();
9203
9204 // One operand of the add must be a multiply.
9205 SDValue MulOp = ShiftOperand.getOperand(0);
9206 SDValue AddOp = ShiftOperand.getOperand(1);
9207 if (MulOp.getOpcode() != ISD::MUL) {
9208 if (AddOp.getOpcode() != ISD::MUL)
9209 return SDValue();
9210 std::swap(MulOp, AddOp);
9211 }
9212
9213 // All operands must be equivalent extend nodes.
9214 SDValue LeftOp = MulOp.getOperand(0);
9215 SDValue RightOp = MulOp.getOperand(1);
9216
9217 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9218 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9219
9220 if (!IsSignExt && !IsZeroExt)
9221 return SDValue();
9222
9223 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9224 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9225
9226 SDValue MulhRightOp;
9227 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9228 unsigned ActiveBits = IsSignExt
9229 ? Constant->getAPIntValue().getSignificantBits()
9230 : Constant->getAPIntValue().getActiveBits();
9231 if (ActiveBits > NarrowVTSize)
9232 return SDValue();
9233 MulhRightOp = DAG.getConstant(
9234 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9235 NarrowVT);
9236 } else {
9237 if (LeftOp.getOpcode() != RightOp.getOpcode())
9238 return SDValue();
9239 // Check that the two extend nodes are the same type.
9240 if (NarrowVT != RightOp.getOperand(0).getValueType())
9241 return SDValue();
9242 MulhRightOp = RightOp.getOperand(0);
9243 }
9244
9245 SDValue MulhAddOp;
9246 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9247 unsigned ActiveBits = IsSignExt
9248 ? Constant->getAPIntValue().getSignificantBits()
9249 : Constant->getAPIntValue().getActiveBits();
9250 if (ActiveBits > NarrowVTSize)
9251 return SDValue();
9252 MulhAddOp = DAG.getConstant(
9253 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9254 NarrowVT);
9255 } else {
9256 if (LeftOp.getOpcode() != AddOp.getOpcode())
9257 return SDValue();
9258 // Check that the two extend nodes are the same type.
9259 if (NarrowVT != AddOp.getOperand(0).getValueType())
9260 return SDValue();
9261 MulhAddOp = AddOp.getOperand(0);
9262 }
9263
9264 EVT WideVT = LeftOp.getValueType();
9265 // Proceed with the transformation if the wide types match.
9266 assert((WideVT == RightOp.getValueType()) &&
9267 "Cannot have a multiply node with two different operand types.");
9268 assert((WideVT == AddOp.getValueType()) &&
9269 "Cannot have an add node with two different operand types.");
9270
9271 // Proceed with the transformation if the wide type is twice as large
9272 // as the narrow type.
9273 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9274 return SDValue();
9275
9276 // Check the shift amount with the narrow type size.
9277 // Proceed with the transformation if the shift amount is the width
9278 // of the narrow type.
9279 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9280 if (ShiftAmt != NarrowVTSize)
9281 return SDValue();
9282
9283 // Proceed if we support the multiply-and-add-high operation.
9284 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9285 NarrowVT == MVT::v4i32 ||
9286 (Subtarget.hasVectorEnhancements3() &&
9287 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9288 return SDValue();
9289
9290 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9291 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9292 DL, NarrowVT, LeftOp.getOperand(0),
9293 MulhRightOp, MulhAddOp);
9294 bool IsSigned = N->getOpcode() == ISD::SRA;
9295 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9296}
9297
9298// Op is an operand of a multiplication. Check whether this can be folded
9299// into an even/odd widening operation; if so, return the opcode to be used
9300// and update Op to the appropriate sub-operand. Note that the caller must
9301// verify that *both* operands of the multiplication support the operation.
9303 const SystemZSubtarget &Subtarget,
9304 SDValue &Op) {
9305 EVT VT = Op.getValueType();
9306
9307 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9308 // to selecting the even or odd vector elements.
9309 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9310 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9311 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9312 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9313 unsigned NumElts = VT.getVectorNumElements();
9314 Op = Op.getOperand(0);
9315 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9316 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9318 ArrayRef<int> ShuffleMask = SVN->getMask();
9319 bool CanUseEven = true, CanUseOdd = true;
9320 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9321 if (ShuffleMask[Elt] == -1)
9322 continue;
9323 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9324 CanUseEven = false;
9325 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9326 CanUseOdd = false;
9327 }
9328 Op = Op.getOperand(0);
9329 if (CanUseEven)
9330 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9331 if (CanUseOdd)
9332 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9333 }
9334 }
9335
9336 // For z17, we can also support the v2i64->i128 case, which looks like
9337 // (sign/zero_extend (extract_vector_elt X 0/1))
9338 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9339 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9340 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9341 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9342 Op = Op.getOperand(0);
9343 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9344 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9345 Op.getOperand(1).getOpcode() == ISD::Constant) {
9346 unsigned Elem = Op.getConstantOperandVal(1);
9347 Op = Op.getOperand(0);
9348 if (Elem == 0)
9349 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9350 if (Elem == 1)
9351 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9352 }
9353 }
9354
9355 return 0;
9356}
9357
9358SDValue SystemZTargetLowering::combineMUL(
9359 SDNode *N, DAGCombinerInfo &DCI) const {
9360 SelectionDAG &DAG = DCI.DAG;
9361
9362 // Detect even/odd widening multiplication.
9363 SDValue Op0 = N->getOperand(0);
9364 SDValue Op1 = N->getOperand(1);
9365 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9366 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9367 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9368 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9369
9370 return SDValue();
9371}
9372
9373SDValue SystemZTargetLowering::combineINTRINSIC(
9374 SDNode *N, DAGCombinerInfo &DCI) const {
9375 SelectionDAG &DAG = DCI.DAG;
9376
9377 unsigned Id = N->getConstantOperandVal(1);
9378 switch (Id) {
9379 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9380 // or larger is simply a vector load.
9381 case Intrinsic::s390_vll:
9382 case Intrinsic::s390_vlrl:
9383 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9384 if (C->getZExtValue() >= 15)
9385 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9386 N->getOperand(3), MachinePointerInfo());
9387 break;
9388 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9389 case Intrinsic::s390_vstl:
9390 case Intrinsic::s390_vstrl:
9391 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9392 if (C->getZExtValue() >= 15)
9393 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9394 N->getOperand(4), MachinePointerInfo());
9395 break;
9396 }
9397
9398 return SDValue();
9399}
9400
9401SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9402 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9403 return N->getOperand(0);
9404 return N;
9405}
9406
9408 DAGCombinerInfo &DCI) const {
9409 switch(N->getOpcode()) {
9410 default: break;
9411 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9412 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9413 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9414 case SystemZISD::MERGE_HIGH:
9415 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9416 case ISD::LOAD: return combineLOAD(N, DCI);
9417 case ISD::STORE: return combineSTORE(N, DCI);
9418 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9419 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9420 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9422 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9424 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9425 case ISD::SINT_TO_FP:
9426 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9427 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9428 case ISD::BSWAP: return combineBSWAP(N, DCI);
9429 case ISD::SETCC: return combineSETCC(N, DCI);
9430 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9431 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9432 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9433 case ISD::SRL:
9434 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9435 case ISD::MUL: return combineMUL(N, DCI);
9436 case ISD::SDIV:
9437 case ISD::UDIV:
9438 case ISD::SREM:
9439 case ISD::UREM: return combineIntDIVREM(N, DCI);
9441 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9442 }
9443
9444 return SDValue();
9445}
9446
9447// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9448// are for Op.
9449static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9450 unsigned OpNo) {
9451 EVT VT = Op.getValueType();
9452 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9453 APInt SrcDemE;
9454 unsigned Opcode = Op.getOpcode();
9455 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9456 unsigned Id = Op.getConstantOperandVal(0);
9457 switch (Id) {
9458 case Intrinsic::s390_vpksh: // PACKS
9459 case Intrinsic::s390_vpksf:
9460 case Intrinsic::s390_vpksg:
9461 case Intrinsic::s390_vpkshs: // PACKS_CC
9462 case Intrinsic::s390_vpksfs:
9463 case Intrinsic::s390_vpksgs:
9464 case Intrinsic::s390_vpklsh: // PACKLS
9465 case Intrinsic::s390_vpklsf:
9466 case Intrinsic::s390_vpklsg:
9467 case Intrinsic::s390_vpklshs: // PACKLS_CC
9468 case Intrinsic::s390_vpklsfs:
9469 case Intrinsic::s390_vpklsgs:
9470 // VECTOR PACK truncates the elements of two source vectors into one.
9471 SrcDemE = DemandedElts;
9472 if (OpNo == 2)
9473 SrcDemE.lshrInPlace(NumElts / 2);
9474 SrcDemE = SrcDemE.trunc(NumElts / 2);
9475 break;
9476 // VECTOR UNPACK extends half the elements of the source vector.
9477 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9478 case Intrinsic::s390_vuphh:
9479 case Intrinsic::s390_vuphf:
9480 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9481 case Intrinsic::s390_vuplhh:
9482 case Intrinsic::s390_vuplhf:
9483 SrcDemE = APInt(NumElts * 2, 0);
9484 SrcDemE.insertBits(DemandedElts, 0);
9485 break;
9486 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9487 case Intrinsic::s390_vuplhw:
9488 case Intrinsic::s390_vuplf:
9489 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9490 case Intrinsic::s390_vupllh:
9491 case Intrinsic::s390_vupllf:
9492 SrcDemE = APInt(NumElts * 2, 0);
9493 SrcDemE.insertBits(DemandedElts, NumElts);
9494 break;
9495 case Intrinsic::s390_vpdi: {
9496 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9497 SrcDemE = APInt(NumElts, 0);
9498 if (!DemandedElts[OpNo - 1])
9499 break;
9500 unsigned Mask = Op.getConstantOperandVal(3);
9501 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9502 // Demand input element 0 or 1, given by the mask bit value.
9503 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9504 break;
9505 }
9506 case Intrinsic::s390_vsldb: {
9507 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9508 assert(VT == MVT::v16i8 && "Unexpected type.");
9509 unsigned FirstIdx = Op.getConstantOperandVal(3);
9510 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9511 unsigned NumSrc0Els = 16 - FirstIdx;
9512 SrcDemE = APInt(NumElts, 0);
9513 if (OpNo == 1) {
9514 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9515 SrcDemE.insertBits(DemEls, FirstIdx);
9516 } else {
9517 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9518 SrcDemE.insertBits(DemEls, 0);
9519 }
9520 break;
9521 }
9522 case Intrinsic::s390_vperm:
9523 SrcDemE = APInt::getAllOnes(NumElts);
9524 break;
9525 default:
9526 llvm_unreachable("Unhandled intrinsic.");
9527 break;
9528 }
9529 } else {
9530 switch (Opcode) {
9531 case SystemZISD::JOIN_DWORDS:
9532 // Scalar operand.
9533 SrcDemE = APInt(1, 1);
9534 break;
9535 case SystemZISD::SELECT_CCMASK:
9536 SrcDemE = DemandedElts;
9537 break;
9538 default:
9539 llvm_unreachable("Unhandled opcode.");
9540 break;
9541 }
9542 }
9543 return SrcDemE;
9544}
9545
9546static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9547 const APInt &DemandedElts,
9548 const SelectionDAG &DAG, unsigned Depth,
9549 unsigned OpNo) {
9550 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9551 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9552 KnownBits LHSKnown =
9553 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9554 KnownBits RHSKnown =
9555 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9556 Known = LHSKnown.intersectWith(RHSKnown);
9557}
9558
9559void
9561 KnownBits &Known,
9562 const APInt &DemandedElts,
9563 const SelectionDAG &DAG,
9564 unsigned Depth) const {
9565 Known.resetAll();
9566
9567 // Intrinsic CC result is returned in the two low bits.
9568 unsigned Tmp0, Tmp1; // not used
9569 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9570 Known.Zero.setBitsFrom(2);
9571 return;
9572 }
9573 EVT VT = Op.getValueType();
9574 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9575 return;
9576 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9577 "KnownBits does not match VT in bitwidth");
9578 assert ((!VT.isVector() ||
9579 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9580 "DemandedElts does not match VT number of elements");
9581 unsigned BitWidth = Known.getBitWidth();
9582 unsigned Opcode = Op.getOpcode();
9583 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9584 bool IsLogical = false;
9585 unsigned Id = Op.getConstantOperandVal(0);
9586 switch (Id) {
9587 case Intrinsic::s390_vpksh: // PACKS
9588 case Intrinsic::s390_vpksf:
9589 case Intrinsic::s390_vpksg:
9590 case Intrinsic::s390_vpkshs: // PACKS_CC
9591 case Intrinsic::s390_vpksfs:
9592 case Intrinsic::s390_vpksgs:
9593 case Intrinsic::s390_vpklsh: // PACKLS
9594 case Intrinsic::s390_vpklsf:
9595 case Intrinsic::s390_vpklsg:
9596 case Intrinsic::s390_vpklshs: // PACKLS_CC
9597 case Intrinsic::s390_vpklsfs:
9598 case Intrinsic::s390_vpklsgs:
9599 case Intrinsic::s390_vpdi:
9600 case Intrinsic::s390_vsldb:
9601 case Intrinsic::s390_vperm:
9602 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9603 break;
9604 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9605 case Intrinsic::s390_vuplhh:
9606 case Intrinsic::s390_vuplhf:
9607 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9608 case Intrinsic::s390_vupllh:
9609 case Intrinsic::s390_vupllf:
9610 IsLogical = true;
9611 [[fallthrough]];
9612 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9613 case Intrinsic::s390_vuphh:
9614 case Intrinsic::s390_vuphf:
9615 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9616 case Intrinsic::s390_vuplhw:
9617 case Intrinsic::s390_vuplf: {
9618 SDValue SrcOp = Op.getOperand(1);
9619 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9620 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9621 if (IsLogical) {
9622 Known = Known.zext(BitWidth);
9623 } else
9624 Known = Known.sext(BitWidth);
9625 break;
9626 }
9627 default:
9628 break;
9629 }
9630 } else {
9631 switch (Opcode) {
9632 case SystemZISD::JOIN_DWORDS:
9633 case SystemZISD::SELECT_CCMASK:
9634 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9635 break;
9636 case SystemZISD::REPLICATE: {
9637 SDValue SrcOp = Op.getOperand(0);
9638 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9640 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9641 break;
9642 }
9643 default:
9644 break;
9645 }
9646 }
9647
9648 // Known has the width of the source operand(s). Adjust if needed to match
9649 // the passed bitwidth.
9650 if (Known.getBitWidth() != BitWidth)
9651 Known = Known.anyextOrTrunc(BitWidth);
9652}
9653
9654static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9655 const SelectionDAG &DAG, unsigned Depth,
9656 unsigned OpNo) {
9657 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9658 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9659 if (LHS == 1) return 1; // Early out.
9660 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9661 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9662 if (RHS == 1) return 1; // Early out.
9663 unsigned Common = std::min(LHS, RHS);
9664 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9665 EVT VT = Op.getValueType();
9666 unsigned VTBits = VT.getScalarSizeInBits();
9667 if (SrcBitWidth > VTBits) { // PACK
9668 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9669 if (Common > SrcExtraBits)
9670 return (Common - SrcExtraBits);
9671 return 1;
9672 }
9673 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9674 return Common;
9675}
9676
9677unsigned
9679 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9680 unsigned Depth) const {
9681 if (Op.getResNo() != 0)
9682 return 1;
9683 unsigned Opcode = Op.getOpcode();
9684 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9685 unsigned Id = Op.getConstantOperandVal(0);
9686 switch (Id) {
9687 case Intrinsic::s390_vpksh: // PACKS
9688 case Intrinsic::s390_vpksf:
9689 case Intrinsic::s390_vpksg:
9690 case Intrinsic::s390_vpkshs: // PACKS_CC
9691 case Intrinsic::s390_vpksfs:
9692 case Intrinsic::s390_vpksgs:
9693 case Intrinsic::s390_vpklsh: // PACKLS
9694 case Intrinsic::s390_vpklsf:
9695 case Intrinsic::s390_vpklsg:
9696 case Intrinsic::s390_vpklshs: // PACKLS_CC
9697 case Intrinsic::s390_vpklsfs:
9698 case Intrinsic::s390_vpklsgs:
9699 case Intrinsic::s390_vpdi:
9700 case Intrinsic::s390_vsldb:
9701 case Intrinsic::s390_vperm:
9702 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9703 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9704 case Intrinsic::s390_vuphh:
9705 case Intrinsic::s390_vuphf:
9706 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9707 case Intrinsic::s390_vuplhw:
9708 case Intrinsic::s390_vuplf: {
9709 SDValue PackedOp = Op.getOperand(1);
9710 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9711 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9712 EVT VT = Op.getValueType();
9713 unsigned VTBits = VT.getScalarSizeInBits();
9714 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9715 return Tmp;
9716 }
9717 default:
9718 break;
9719 }
9720 } else {
9721 switch (Opcode) {
9722 case SystemZISD::SELECT_CCMASK:
9723 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9724 default:
9725 break;
9726 }
9727 }
9728
9729 return 1;
9730}
9731
9733 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9734 UndefPoisonKind Kind, unsigned Depth) const {
9735 switch (Op->getOpcode()) {
9736 case SystemZISD::PCREL_WRAPPER:
9737 case SystemZISD::PCREL_OFFSET:
9738 return true;
9739 }
9740 return false;
9741}
9742
9743unsigned
9745 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9746 unsigned StackAlign = TFI->getStackAlignment();
9747 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9748 "Unexpected stack alignment");
9749 // The default stack probe size is 4096 if the function has no
9750 // stack-probe-size attribute.
9751 unsigned StackProbeSize =
9752 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9753 // Round down to the stack alignment.
9754 StackProbeSize &= ~(StackAlign - 1);
9755 return StackProbeSize ? StackProbeSize : StackAlign;
9756}
9757
9758//===----------------------------------------------------------------------===//
9759// Custom insertion
9760//===----------------------------------------------------------------------===//
9761
9762// Force base value Base into a register before MI. Return the register.
9764 const SystemZInstrInfo *TII) {
9765 MachineBasicBlock *MBB = MI.getParent();
9766 MachineFunction &MF = *MBB->getParent();
9767 MachineRegisterInfo &MRI = MF.getRegInfo();
9768
9769 if (Base.isReg()) {
9770 // Copy Base into a new virtual register to help register coalescing in
9771 // cases with multiple uses.
9772 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9773 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9774 .add(Base);
9775 return Reg;
9776 }
9777
9778 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9779 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9780 .add(Base)
9781 .addImm(0)
9782 .addReg(0);
9783 return Reg;
9784}
9785
9786// The CC operand of MI might be missing a kill marker because there
9787// were multiple uses of CC, and ISel didn't know which to mark.
9788// Figure out whether MI should have had a kill marker.
9790 // Scan forward through BB for a use/def of CC.
9792 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9793 const MachineInstr &MI = *miI;
9794 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9795 return false;
9796 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9797 break; // Should have kill-flag - update below.
9798 }
9799
9800 // If we hit the end of the block, check whether CC is live into a
9801 // successor.
9802 if (miI == MBB->end()) {
9803 for (const MachineBasicBlock *Succ : MBB->successors())
9804 if (Succ->isLiveIn(SystemZ::CC))
9805 return false;
9806 }
9807
9808 return true;
9809}
9810
9811// Return true if it is OK for this Select pseudo-opcode to be cascaded
9812// together with other Select pseudo-opcodes into a single basic-block with
9813// a conditional jump around it.
9815 switch (MI.getOpcode()) {
9816 case SystemZ::Select32:
9817 case SystemZ::Select64:
9818 case SystemZ::Select128:
9819 case SystemZ::SelectF32:
9820 case SystemZ::SelectF64:
9821 case SystemZ::SelectF128:
9822 case SystemZ::SelectVR32:
9823 case SystemZ::SelectVR64:
9824 case SystemZ::SelectVR128:
9825 return true;
9826
9827 default:
9828 return false;
9829 }
9830}
9831
9832// Helper function, which inserts PHI functions into SinkMBB:
9833// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9834// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9836 MachineBasicBlock *TrueMBB,
9837 MachineBasicBlock *FalseMBB,
9838 MachineBasicBlock *SinkMBB) {
9839 MachineFunction *MF = TrueMBB->getParent();
9841
9842 MachineInstr *FirstMI = Selects.front();
9843 unsigned CCValid = FirstMI->getOperand(3).getImm();
9844 unsigned CCMask = FirstMI->getOperand(4).getImm();
9845
9846 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9847
9848 // As we are creating the PHIs, we have to be careful if there is more than
9849 // one. Later Selects may reference the results of earlier Selects, but later
9850 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9851 // That also means that PHI construction must work forward from earlier to
9852 // later, and that the code must maintain a mapping from earlier PHI's
9853 // destination registers, and the registers that went into the PHI.
9855
9856 for (auto *MI : Selects) {
9857 Register DestReg = MI->getOperand(0).getReg();
9858 Register TrueReg = MI->getOperand(1).getReg();
9859 Register FalseReg = MI->getOperand(2).getReg();
9860
9861 // If this Select we are generating is the opposite condition from
9862 // the jump we generated, then we have to swap the operands for the
9863 // PHI that is going to be generated.
9864 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9865 std::swap(TrueReg, FalseReg);
9866
9867 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9868 TrueReg = It->second.first;
9869
9870 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9871 FalseReg = It->second.second;
9872
9873 DebugLoc DL = MI->getDebugLoc();
9874 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9875 .addReg(TrueReg).addMBB(TrueMBB)
9876 .addReg(FalseReg).addMBB(FalseMBB);
9877
9878 // Add this PHI to the rewrite table.
9879 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9880 }
9881
9882 MF->getProperties().resetNoPHIs();
9883}
9884
9886SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9887 MachineBasicBlock *BB) const {
9888 MachineFunction &MF = *BB->getParent();
9889 MachineFrameInfo &MFI = MF.getFrameInfo();
9890 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9891 assert(TFL->hasReservedCallFrame(MF) &&
9892 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9893 (void)TFL;
9894 // Get the MaxCallFrameSize value and erase MI since it serves no further
9895 // purpose as the call frame is statically reserved in the prolog. Set
9896 // AdjustsStack as MI is *not* mapped as a frame instruction.
9897 uint32_t NumBytes = MI.getOperand(0).getImm();
9898 if (NumBytes > MFI.getMaxCallFrameSize())
9899 MFI.setMaxCallFrameSize(NumBytes);
9900 MFI.setAdjustsStack(true);
9901
9902 MI.eraseFromParent();
9903 return BB;
9904}
9905
9906// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9908SystemZTargetLowering::emitSelect(MachineInstr &MI,
9909 MachineBasicBlock *MBB) const {
9910 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9911 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9912
9913 unsigned CCValid = MI.getOperand(3).getImm();
9914 unsigned CCMask = MI.getOperand(4).getImm();
9915
9916 // If we have a sequence of Select* pseudo instructions using the
9917 // same condition code value, we want to expand all of them into
9918 // a single pair of basic blocks using the same condition.
9919 SmallVector<MachineInstr*, 8> Selects;
9920 SmallVector<MachineInstr*, 8> DbgValues;
9921 Selects.push_back(&MI);
9922 unsigned Count = 0;
9923 for (MachineInstr &NextMI : llvm::make_range(
9924 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9925 if (isSelectPseudo(NextMI)) {
9926 assert(NextMI.getOperand(3).getImm() == CCValid &&
9927 "Bad CCValid operands since CC was not redefined.");
9928 if (NextMI.getOperand(4).getImm() == CCMask ||
9929 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9930 Selects.push_back(&NextMI);
9931 continue;
9932 }
9933 break;
9934 }
9935 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9936 NextMI.usesCustomInsertionHook())
9937 break;
9938 bool User = false;
9939 for (auto *SelMI : Selects)
9940 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9941 User = true;
9942 break;
9943 }
9944 if (NextMI.isDebugInstr()) {
9945 if (User) {
9946 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9947 DbgValues.push_back(&NextMI);
9948 }
9949 } else if (User || ++Count > 20)
9950 break;
9951 }
9952
9953 MachineInstr *LastMI = Selects.back();
9954 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9955 checkCCKill(*LastMI, MBB));
9956 MachineBasicBlock *StartMBB = MBB;
9957 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9958 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9959
9960 // Unless CC was killed in the last Select instruction, mark it as
9961 // live-in to both FalseMBB and JoinMBB.
9962 if (!CCKilled) {
9963 FalseMBB->addLiveIn(SystemZ::CC);
9964 JoinMBB->addLiveIn(SystemZ::CC);
9965 }
9966
9967 // StartMBB:
9968 // BRC CCMask, JoinMBB
9969 // # fallthrough to FalseMBB
9970 MBB = StartMBB;
9971 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9972 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9973 MBB->addSuccessor(JoinMBB);
9974 MBB->addSuccessor(FalseMBB);
9975
9976 // FalseMBB:
9977 // # fallthrough to JoinMBB
9978 MBB = FalseMBB;
9979 MBB->addSuccessor(JoinMBB);
9980
9981 // JoinMBB:
9982 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9983 // ...
9984 MBB = JoinMBB;
9985 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9986 for (auto *SelMI : Selects)
9987 SelMI->eraseFromParent();
9988
9990 for (auto *DbgMI : DbgValues)
9991 MBB->splice(InsertPos, StartMBB, DbgMI);
9992
9993 return JoinMBB;
9994}
9995
9996// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9997// StoreOpcode is the store to use and Invert says whether the store should
9998// happen when the condition is false rather than true. If a STORE ON
9999// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
10000MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
10002 unsigned StoreOpcode,
10003 unsigned STOCOpcode,
10004 bool Invert) const {
10005 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10006
10007 Register SrcReg = MI.getOperand(0).getReg();
10008 MachineOperand Base = MI.getOperand(1);
10009 int64_t Disp = MI.getOperand(2).getImm();
10010 Register IndexReg = MI.getOperand(3).getReg();
10011 unsigned CCValid = MI.getOperand(4).getImm();
10012 unsigned CCMask = MI.getOperand(5).getImm();
10013 DebugLoc DL = MI.getDebugLoc();
10014
10015 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
10016
10017 // ISel pattern matching also adds a load memory operand of the same
10018 // address, so take special care to find the storing memory operand.
10019 MachineMemOperand *MMO = nullptr;
10020 for (auto *I : MI.memoperands())
10021 if (I->isStore()) {
10022 MMO = I;
10023 break;
10024 }
10025
10026 // Use STOCOpcode if possible. We could use different store patterns in
10027 // order to avoid matching the index register, but the performance trade-offs
10028 // might be more complicated in that case.
10029 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
10030 if (Invert)
10031 CCMask ^= CCValid;
10032
10033 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
10034 .addReg(SrcReg)
10035 .add(Base)
10036 .addImm(Disp)
10037 .addImm(CCValid)
10038 .addImm(CCMask)
10039 .addMemOperand(MMO);
10040
10041 MI.eraseFromParent();
10042 return MBB;
10043 }
10044
10045 // Get the condition needed to branch around the store.
10046 if (!Invert)
10047 CCMask ^= CCValid;
10048
10049 MachineBasicBlock *StartMBB = MBB;
10050 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
10051 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
10052
10053 // Unless CC was killed in the CondStore instruction, mark it as
10054 // live-in to both FalseMBB and JoinMBB.
10055 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
10056 !checkCCKill(MI, JoinMBB)) {
10057 FalseMBB->addLiveIn(SystemZ::CC);
10058 JoinMBB->addLiveIn(SystemZ::CC);
10059 }
10060
10061 // StartMBB:
10062 // BRC CCMask, JoinMBB
10063 // # fallthrough to FalseMBB
10064 MBB = StartMBB;
10065 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10066 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
10067 MBB->addSuccessor(JoinMBB);
10068 MBB->addSuccessor(FalseMBB);
10069
10070 // FalseMBB:
10071 // store %SrcReg, %Disp(%Index,%Base)
10072 // # fallthrough to JoinMBB
10073 MBB = FalseMBB;
10074 BuildMI(MBB, DL, TII->get(StoreOpcode))
10075 .addReg(SrcReg)
10076 .add(Base)
10077 .addImm(Disp)
10078 .addReg(IndexReg)
10079 .addMemOperand(MMO);
10080 MBB->addSuccessor(JoinMBB);
10081
10082 MI.eraseFromParent();
10083 return JoinMBB;
10084}
10085
10086// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10088SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10090 bool Unsigned) const {
10091 MachineFunction &MF = *MBB->getParent();
10092 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10093 MachineRegisterInfo &MRI = MF.getRegInfo();
10094
10095 // Synthetic instruction to compare 128-bit values.
10096 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10097 Register Op0 = MI.getOperand(0).getReg();
10098 Register Op1 = MI.getOperand(1).getReg();
10099
10100 MachineBasicBlock *StartMBB = MBB;
10101 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10102 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
10103
10104 // StartMBB:
10105 //
10106 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10107 // Swap the inputs to get:
10108 // CC 1 if high(Op0) > high(Op1)
10109 // CC 2 if high(Op0) < high(Op1)
10110 // CC 0 if high(Op0) == high(Op1)
10111 //
10112 // If CC != 0, we'd done, so jump over the next instruction.
10113 //
10114 // VEC[L]G Op1, Op0
10115 // JNE JoinMBB
10116 // # fallthrough to HiEqMBB
10117 MBB = StartMBB;
10118 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10119 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10120 .addReg(Op1).addReg(Op0);
10121 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10123 MBB->addSuccessor(JoinMBB);
10124 MBB->addSuccessor(HiEqMBB);
10125
10126 // HiEqMBB:
10127 //
10128 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10129 // Since we already know the high parts are equal, the CC
10130 // result will only depend on the low parts:
10131 // CC 1 if low(Op0) > low(Op1)
10132 // CC 3 if low(Op0) <= low(Op1)
10133 //
10134 // VCHLGS Tmp, Op0, Op1
10135 // # fallthrough to JoinMBB
10136 MBB = HiEqMBB;
10137 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10138 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10139 .addReg(Op0).addReg(Op1);
10140 MBB->addSuccessor(JoinMBB);
10141
10142 // Mark CC as live-in to JoinMBB.
10143 JoinMBB->addLiveIn(SystemZ::CC);
10144
10145 MI.eraseFromParent();
10146 return JoinMBB;
10147}
10148
10149// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10150// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10151// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10152// whether the field should be inverted after performing BinOpcode (e.g. for
10153// NAND).
10154MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10155 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10156 bool Invert) const {
10157 MachineFunction &MF = *MBB->getParent();
10158 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10159 MachineRegisterInfo &MRI = MF.getRegInfo();
10160
10161 // Extract the operands. Base can be a register or a frame index.
10162 // Src2 can be a register or immediate.
10163 Register Dest = MI.getOperand(0).getReg();
10164 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10165 int64_t Disp = MI.getOperand(2).getImm();
10166 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10167 Register BitShift = MI.getOperand(4).getReg();
10168 Register NegBitShift = MI.getOperand(5).getReg();
10169 unsigned BitSize = MI.getOperand(6).getImm();
10170 DebugLoc DL = MI.getDebugLoc();
10171
10172 // Get the right opcodes for the displacement.
10173 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10174 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10175 assert(LOpcode && CSOpcode && "Displacement out of range");
10176
10177 // Create virtual registers for temporary results.
10178 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10179 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10180 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10181 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10182 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10183
10184 // Insert a basic block for the main loop.
10185 MachineBasicBlock *StartMBB = MBB;
10186 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10187 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10188
10189 // StartMBB:
10190 // ...
10191 // %OrigVal = L Disp(%Base)
10192 // # fall through to LoopMBB
10193 MBB = StartMBB;
10194 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10195 MBB->addSuccessor(LoopMBB);
10196
10197 // LoopMBB:
10198 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10199 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10200 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10201 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10202 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10203 // JNE LoopMBB
10204 // # fall through to DoneMBB
10205 MBB = LoopMBB;
10206 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10207 .addReg(OrigVal).addMBB(StartMBB)
10208 .addReg(Dest).addMBB(LoopMBB);
10209 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10210 .addReg(OldVal).addReg(BitShift).addImm(0);
10211 if (Invert) {
10212 // Perform the operation normally and then invert every bit of the field.
10213 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10214 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10215 // XILF with the upper BitSize bits set.
10216 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10217 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10218 } else if (BinOpcode)
10219 // A simply binary operation.
10220 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10221 .addReg(RotatedOldVal)
10222 .add(Src2);
10223 else
10224 // Use RISBG to rotate Src2 into position and use it to replace the
10225 // field in RotatedOldVal.
10226 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10227 .addReg(RotatedOldVal).addReg(Src2.getReg())
10228 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10229 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10230 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10231 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10232 .addReg(OldVal)
10233 .addReg(NewVal)
10234 .add(Base)
10235 .addImm(Disp);
10236 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10238 MBB->addSuccessor(LoopMBB);
10239 MBB->addSuccessor(DoneMBB);
10240
10241 MI.eraseFromParent();
10242 return DoneMBB;
10243}
10244
10245// Implement EmitInstrWithCustomInserter for subword pseudo
10246// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10247// instruction that should be used to compare the current field with the
10248// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10249// for when the current field should be kept.
10250MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10251 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10252 unsigned KeepOldMask) const {
10253 MachineFunction &MF = *MBB->getParent();
10254 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10255 MachineRegisterInfo &MRI = MF.getRegInfo();
10256
10257 // Extract the operands. Base can be a register or a frame index.
10258 Register Dest = MI.getOperand(0).getReg();
10259 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10260 int64_t Disp = MI.getOperand(2).getImm();
10261 Register Src2 = MI.getOperand(3).getReg();
10262 Register BitShift = MI.getOperand(4).getReg();
10263 Register NegBitShift = MI.getOperand(5).getReg();
10264 unsigned BitSize = MI.getOperand(6).getImm();
10265 DebugLoc DL = MI.getDebugLoc();
10266
10267 // Get the right opcodes for the displacement.
10268 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10269 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10270 assert(LOpcode && CSOpcode && "Displacement out of range");
10271
10272 // Create virtual registers for temporary results.
10273 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10274 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10275 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10276 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10277 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10278 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10279
10280 // Insert 3 basic blocks for the loop.
10281 MachineBasicBlock *StartMBB = MBB;
10282 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10283 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10284 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10285 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10286
10287 // StartMBB:
10288 // ...
10289 // %OrigVal = L Disp(%Base)
10290 // # fall through to LoopMBB
10291 MBB = StartMBB;
10292 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10293 MBB->addSuccessor(LoopMBB);
10294
10295 // LoopMBB:
10296 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10297 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10298 // CompareOpcode %RotatedOldVal, %Src2
10299 // BRC KeepOldMask, UpdateMBB
10300 MBB = LoopMBB;
10301 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10302 .addReg(OrigVal).addMBB(StartMBB)
10303 .addReg(Dest).addMBB(UpdateMBB);
10304 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10305 .addReg(OldVal).addReg(BitShift).addImm(0);
10306 BuildMI(MBB, DL, TII->get(CompareOpcode))
10307 .addReg(RotatedOldVal).addReg(Src2);
10308 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10309 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10310 MBB->addSuccessor(UpdateMBB);
10311 MBB->addSuccessor(UseAltMBB);
10312
10313 // UseAltMBB:
10314 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10315 // # fall through to UpdateMBB
10316 MBB = UseAltMBB;
10317 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10318 .addReg(RotatedOldVal).addReg(Src2)
10319 .addImm(32).addImm(31 + BitSize).addImm(0);
10320 MBB->addSuccessor(UpdateMBB);
10321
10322 // UpdateMBB:
10323 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10324 // [ %RotatedAltVal, UseAltMBB ]
10325 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10326 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10327 // JNE LoopMBB
10328 // # fall through to DoneMBB
10329 MBB = UpdateMBB;
10330 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10331 .addReg(RotatedOldVal).addMBB(LoopMBB)
10332 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10333 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10334 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10335 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10336 .addReg(OldVal)
10337 .addReg(NewVal)
10338 .add(Base)
10339 .addImm(Disp);
10340 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10342 MBB->addSuccessor(LoopMBB);
10343 MBB->addSuccessor(DoneMBB);
10344
10345 MI.eraseFromParent();
10346 return DoneMBB;
10347}
10348
10349// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10350// instruction MI.
10352SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10353 MachineBasicBlock *MBB) const {
10354 MachineFunction &MF = *MBB->getParent();
10355 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10356 MachineRegisterInfo &MRI = MF.getRegInfo();
10357
10358 // Extract the operands. Base can be a register or a frame index.
10359 Register Dest = MI.getOperand(0).getReg();
10360 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10361 int64_t Disp = MI.getOperand(2).getImm();
10362 Register CmpVal = MI.getOperand(3).getReg();
10363 Register OrigSwapVal = MI.getOperand(4).getReg();
10364 Register BitShift = MI.getOperand(5).getReg();
10365 Register NegBitShift = MI.getOperand(6).getReg();
10366 int64_t BitSize = MI.getOperand(7).getImm();
10367 DebugLoc DL = MI.getDebugLoc();
10368
10369 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10370
10371 // Get the right opcodes for the displacement and zero-extension.
10372 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10373 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10374 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10375 assert(LOpcode && CSOpcode && "Displacement out of range");
10376
10377 // Create virtual registers for temporary results.
10378 Register OrigOldVal = MRI.createVirtualRegister(RC);
10379 Register OldVal = MRI.createVirtualRegister(RC);
10380 Register SwapVal = MRI.createVirtualRegister(RC);
10381 Register StoreVal = MRI.createVirtualRegister(RC);
10382 Register OldValRot = MRI.createVirtualRegister(RC);
10383 Register RetryOldVal = MRI.createVirtualRegister(RC);
10384 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10385
10386 // Insert 2 basic blocks for the loop.
10387 MachineBasicBlock *StartMBB = MBB;
10388 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10389 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10390 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10391
10392 // StartMBB:
10393 // ...
10394 // %OrigOldVal = L Disp(%Base)
10395 // # fall through to LoopMBB
10396 MBB = StartMBB;
10397 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10398 .add(Base)
10399 .addImm(Disp)
10400 .addReg(0);
10401 MBB->addSuccessor(LoopMBB);
10402
10403 // LoopMBB:
10404 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10405 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10406 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10407 // ^^ The low BitSize bits contain the field
10408 // of interest.
10409 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10410 // ^^ Replace the upper 32-BitSize bits of the
10411 // swap value with those that we loaded and rotated.
10412 // %Dest = LL[CH] %OldValRot
10413 // CR %Dest, %CmpVal
10414 // JNE DoneMBB
10415 // # Fall through to SetMBB
10416 MBB = LoopMBB;
10417 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10418 .addReg(OrigOldVal).addMBB(StartMBB)
10419 .addReg(RetryOldVal).addMBB(SetMBB);
10420 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10421 .addReg(OrigSwapVal).addMBB(StartMBB)
10422 .addReg(RetrySwapVal).addMBB(SetMBB);
10423 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10424 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10425 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10426 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10427 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10428 .addReg(OldValRot);
10429 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10430 .addReg(Dest).addReg(CmpVal);
10431 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10434 MBB->addSuccessor(DoneMBB);
10435 MBB->addSuccessor(SetMBB);
10436
10437 // SetMBB:
10438 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10439 // ^^ Rotate the new field to its proper position.
10440 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10441 // JNE LoopMBB
10442 // # fall through to ExitMBB
10443 MBB = SetMBB;
10444 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10445 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10446 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10447 .addReg(OldVal)
10448 .addReg(StoreVal)
10449 .add(Base)
10450 .addImm(Disp);
10451 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10453 MBB->addSuccessor(LoopMBB);
10454 MBB->addSuccessor(DoneMBB);
10455
10456 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10457 // to the block after the loop. At this point, CC may have been defined
10458 // either by the CR in LoopMBB or by the CS in SetMBB.
10459 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10460 DoneMBB->addLiveIn(SystemZ::CC);
10461
10462 MI.eraseFromParent();
10463 return DoneMBB;
10464}
10465
10466// Emit a move from two GR64s to a GR128.
10468SystemZTargetLowering::emitPair128(MachineInstr &MI,
10469 MachineBasicBlock *MBB) const {
10470 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10471 const DebugLoc &DL = MI.getDebugLoc();
10472
10473 Register Dest = MI.getOperand(0).getReg();
10474 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10475 .add(MI.getOperand(1))
10476 .addImm(SystemZ::subreg_h64)
10477 .add(MI.getOperand(2))
10478 .addImm(SystemZ::subreg_l64);
10479 MI.eraseFromParent();
10480 return MBB;
10481}
10482
10483// Emit an extension from a GR64 to a GR128. ClearEven is true
10484// if the high register of the GR128 value must be cleared or false if
10485// it's "don't care".
10486MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10488 bool ClearEven) const {
10489 MachineFunction &MF = *MBB->getParent();
10490 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10491 MachineRegisterInfo &MRI = MF.getRegInfo();
10492 DebugLoc DL = MI.getDebugLoc();
10493
10494 Register Dest = MI.getOperand(0).getReg();
10495 Register Src = MI.getOperand(1).getReg();
10496 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10497
10498 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10499 if (ClearEven) {
10500 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10501 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10502
10503 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10504 .addImm(0);
10505 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10506 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10507 In128 = NewIn128;
10508 }
10509 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10510 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10511
10512 MI.eraseFromParent();
10513 return MBB;
10514}
10515
10517SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10519 unsigned Opcode, bool IsMemset) const {
10520 MachineFunction &MF = *MBB->getParent();
10521 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10522 MachineRegisterInfo &MRI = MF.getRegInfo();
10523 DebugLoc DL = MI.getDebugLoc();
10524
10525 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10526 uint64_t DestDisp = MI.getOperand(1).getImm();
10527 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10528 uint64_t SrcDisp;
10529
10530 // Fold the displacement Disp if it is out of range.
10531 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10532 if (!isUInt<12>(Disp)) {
10533 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10534 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10535 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10536 .add(Base).addImm(Disp).addReg(0);
10538 Disp = 0;
10539 }
10540 };
10541
10542 if (!IsMemset) {
10543 SrcBase = earlyUseOperand(MI.getOperand(2));
10544 SrcDisp = MI.getOperand(3).getImm();
10545 } else {
10546 SrcBase = DestBase;
10547 SrcDisp = DestDisp++;
10548 foldDisplIfNeeded(DestBase, DestDisp);
10549 }
10550
10551 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10552 bool IsImmForm = LengthMO.isImm();
10553 bool IsRegForm = !IsImmForm;
10554
10555 // Build and insert one Opcode of Length, with special treatment for memset.
10556 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10558 MachineOperand DBase, uint64_t DDisp,
10559 MachineOperand SBase, uint64_t SDisp,
10560 unsigned Length) -> void {
10561 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10562 if (IsMemset) {
10563 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10564 if (ByteMO.isImm())
10565 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10566 .add(SBase).addImm(SDisp).add(ByteMO);
10567 else
10568 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10569 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10570 if (--Length == 0)
10571 return;
10572 }
10573 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10574 .add(DBase).addImm(DDisp).addImm(Length)
10575 .add(SBase).addImm(SDisp)
10576 .setMemRefs(MI.memoperands());
10577 };
10578
10579 bool NeedsLoop = false;
10580 uint64_t ImmLength = 0;
10581 Register LenAdjReg = SystemZ::NoRegister;
10582 if (IsImmForm) {
10583 ImmLength = LengthMO.getImm();
10584 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10585 if (ImmLength == 0) {
10586 MI.eraseFromParent();
10587 return MBB;
10588 }
10589 if (Opcode == SystemZ::CLC) {
10590 if (ImmLength > 3 * 256)
10591 // A two-CLC sequence is a clear win over a loop, not least because
10592 // it needs only one branch. A three-CLC sequence needs the same
10593 // number of branches as a loop (i.e. 2), but is shorter. That
10594 // brings us to lengths greater than 768 bytes. It seems relatively
10595 // likely that a difference will be found within the first 768 bytes,
10596 // so we just optimize for the smallest number of branch
10597 // instructions, in order to avoid polluting the prediction buffer
10598 // too much.
10599 NeedsLoop = true;
10600 } else if (ImmLength > 6 * 256)
10601 // The heuristic we use is to prefer loops for anything that would
10602 // require 7 or more MVCs. With these kinds of sizes there isn't much
10603 // to choose between straight-line code and looping code, since the
10604 // time will be dominated by the MVCs themselves.
10605 NeedsLoop = true;
10606 } else {
10607 NeedsLoop = true;
10608 LenAdjReg = LengthMO.getReg();
10609 }
10610
10611 // When generating more than one CLC, all but the last will need to
10612 // branch to the end when a difference is found.
10613 MachineBasicBlock *EndMBB =
10614 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10616 : nullptr);
10617
10618 if (NeedsLoop) {
10619 Register StartCountReg =
10620 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10621 if (IsImmForm) {
10622 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10623 ImmLength &= 255;
10624 } else {
10625 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10626 .addReg(LenAdjReg)
10627 .addReg(0)
10628 .addImm(8);
10629 }
10630
10631 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10632 auto loadZeroAddress = [&]() -> MachineOperand {
10633 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10634 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10635 return MachineOperand::CreateReg(Reg, false);
10636 };
10637 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10638 DestBase = loadZeroAddress();
10639 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10640 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10641
10642 MachineBasicBlock *StartMBB = nullptr;
10643 MachineBasicBlock *LoopMBB = nullptr;
10644 MachineBasicBlock *NextMBB = nullptr;
10645 MachineBasicBlock *DoneMBB = nullptr;
10646 MachineBasicBlock *AllDoneMBB = nullptr;
10647
10648 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10649 Register StartDestReg =
10650 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10651
10652 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10653 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10654 Register ThisDestReg =
10655 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10656 Register NextSrcReg = MRI.createVirtualRegister(RC);
10657 Register NextDestReg =
10658 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10659 RC = &SystemZ::GR64BitRegClass;
10660 Register ThisCountReg = MRI.createVirtualRegister(RC);
10661 Register NextCountReg = MRI.createVirtualRegister(RC);
10662
10663 if (IsRegForm) {
10664 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10665 StartMBB = SystemZ::emitBlockAfter(MBB);
10666 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10667 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10668 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10669
10670 // MBB:
10671 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10672 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10673 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10674 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10676 .addMBB(AllDoneMBB);
10677 MBB->addSuccessor(AllDoneMBB);
10678 if (!IsMemset)
10679 MBB->addSuccessor(StartMBB);
10680 else {
10681 // MemsetOneCheckMBB:
10682 // # Jump to MemsetOneMBB for a memset of length 1, or
10683 // # fall thru to StartMBB.
10684 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10685 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10686 MBB->addSuccessor(MemsetOneCheckMBB);
10687 MBB = MemsetOneCheckMBB;
10688 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10689 .addReg(LenAdjReg).addImm(-1);
10690 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10692 .addMBB(MemsetOneMBB);
10693 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10694 MBB->addSuccessor(StartMBB, {90, 100});
10695
10696 // MemsetOneMBB:
10697 // # Jump back to AllDoneMBB after a single MVI or STC.
10698 MBB = MemsetOneMBB;
10699 insertMemMemOp(MBB, MBB->end(),
10700 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10701 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10702 1);
10703 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10704 MBB->addSuccessor(AllDoneMBB);
10705 }
10706
10707 // StartMBB:
10708 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10709 MBB = StartMBB;
10710 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10711 .addReg(StartCountReg).addImm(0);
10712 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10714 .addMBB(DoneMBB);
10715 MBB->addSuccessor(DoneMBB);
10716 MBB->addSuccessor(LoopMBB);
10717 }
10718 else {
10719 StartMBB = MBB;
10720 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10721 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10722 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10723
10724 // StartMBB:
10725 // # fall through to LoopMBB
10726 MBB->addSuccessor(LoopMBB);
10727
10728 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10729 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10730 if (EndMBB && !ImmLength)
10731 // If the loop handled the whole CLC range, DoneMBB will be empty with
10732 // CC live-through into EndMBB, so add it as live-in.
10733 DoneMBB->addLiveIn(SystemZ::CC);
10734 }
10735
10736 // LoopMBB:
10737 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10738 // [ %NextDestReg, NextMBB ]
10739 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10740 // [ %NextSrcReg, NextMBB ]
10741 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10742 // [ %NextCountReg, NextMBB ]
10743 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10744 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10745 // ( JLH EndMBB )
10746 //
10747 // The prefetch is used only for MVC. The JLH is used only for CLC.
10748 MBB = LoopMBB;
10749 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10750 .addReg(StartDestReg).addMBB(StartMBB)
10751 .addReg(NextDestReg).addMBB(NextMBB);
10752 if (!HaveSingleBase)
10753 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10754 .addReg(StartSrcReg).addMBB(StartMBB)
10755 .addReg(NextSrcReg).addMBB(NextMBB);
10756 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10757 .addReg(StartCountReg).addMBB(StartMBB)
10758 .addReg(NextCountReg).addMBB(NextMBB);
10759 if (Opcode == SystemZ::MVC)
10760 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10762 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10763 insertMemMemOp(MBB, MBB->end(),
10764 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10765 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10766 if (EndMBB) {
10767 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10769 .addMBB(EndMBB);
10770 MBB->addSuccessor(EndMBB);
10771 MBB->addSuccessor(NextMBB);
10772 }
10773
10774 // NextMBB:
10775 // %NextDestReg = LA 256(%ThisDestReg)
10776 // %NextSrcReg = LA 256(%ThisSrcReg)
10777 // %NextCountReg = AGHI %ThisCountReg, -1
10778 // CGHI %NextCountReg, 0
10779 // JLH LoopMBB
10780 // # fall through to DoneMBB
10781 //
10782 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10783 MBB = NextMBB;
10784 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10785 .addReg(ThisDestReg).addImm(256).addReg(0);
10786 if (!HaveSingleBase)
10787 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10788 .addReg(ThisSrcReg).addImm(256).addReg(0);
10789 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10790 .addReg(ThisCountReg).addImm(-1);
10791 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10792 .addReg(NextCountReg).addImm(0);
10793 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10795 .addMBB(LoopMBB);
10796 MBB->addSuccessor(LoopMBB);
10797 MBB->addSuccessor(DoneMBB);
10798
10799 MBB = DoneMBB;
10800 if (IsRegForm) {
10801 // DoneMBB:
10802 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10803 // # Use EXecute Relative Long for the remainder of the bytes. The target
10804 // instruction of the EXRL will have a length field of 1 since 0 is an
10805 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10806 // 0xff) + 1.
10807 // # Fall through to AllDoneMBB.
10808 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10809 Register RemDestReg = HaveSingleBase ? RemSrcReg
10810 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10811 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10812 .addReg(StartDestReg).addMBB(StartMBB)
10813 .addReg(NextDestReg).addMBB(NextMBB);
10814 if (!HaveSingleBase)
10815 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10816 .addReg(StartSrcReg).addMBB(StartMBB)
10817 .addReg(NextSrcReg).addMBB(NextMBB);
10818 if (IsMemset)
10819 insertMemMemOp(MBB, MBB->end(),
10820 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10821 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10822 MachineInstrBuilder EXRL_MIB =
10823 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10824 .addImm(Opcode)
10825 .addReg(LenAdjReg)
10826 .addReg(RemDestReg).addImm(DestDisp)
10827 .addReg(RemSrcReg).addImm(SrcDisp);
10828 MBB->addSuccessor(AllDoneMBB);
10829 MBB = AllDoneMBB;
10830 if (Opcode != SystemZ::MVC) {
10831 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10832 if (EndMBB)
10833 MBB->addLiveIn(SystemZ::CC);
10834 }
10835 }
10836 MF.getProperties().resetNoPHIs();
10837 }
10838
10839 // Handle any remaining bytes with straight-line code.
10840 while (ImmLength > 0) {
10841 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10842 // The previous iteration might have created out-of-range displacements.
10843 // Apply them using LA/LAY if so.
10844 foldDisplIfNeeded(DestBase, DestDisp);
10845 foldDisplIfNeeded(SrcBase, SrcDisp);
10846 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10847 DestDisp += ThisLength;
10848 SrcDisp += ThisLength;
10849 ImmLength -= ThisLength;
10850 // If there's another CLC to go, branch to the end if a difference
10851 // was found.
10852 if (EndMBB && ImmLength > 0) {
10853 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10854 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10856 .addMBB(EndMBB);
10857 MBB->addSuccessor(EndMBB);
10858 MBB->addSuccessor(NextMBB);
10859 MBB = NextMBB;
10860 }
10861 }
10862 if (EndMBB) {
10863 MBB->addSuccessor(EndMBB);
10864 MBB = EndMBB;
10865 MBB->addLiveIn(SystemZ::CC);
10866 }
10867
10868 MI.eraseFromParent();
10869 return MBB;
10870}
10871
10872// Decompose string pseudo-instruction MI into a loop that continually performs
10873// Opcode until CC != 3.
10874MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10875 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10876 MachineFunction &MF = *MBB->getParent();
10877 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10878 MachineRegisterInfo &MRI = MF.getRegInfo();
10879 DebugLoc DL = MI.getDebugLoc();
10880
10881 uint64_t End1Reg = MI.getOperand(0).getReg();
10882 uint64_t Start1Reg = MI.getOperand(1).getReg();
10883 uint64_t Start2Reg = MI.getOperand(2).getReg();
10884 uint64_t CharReg = MI.getOperand(3).getReg();
10885
10886 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10887 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10888 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10889 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10890
10891 MachineBasicBlock *StartMBB = MBB;
10892 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10893 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10894
10895 // StartMBB:
10896 // # fall through to LoopMBB
10897 MBB->addSuccessor(LoopMBB);
10898
10899 // LoopMBB:
10900 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10901 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10902 // R0L = %CharReg
10903 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10904 // JO LoopMBB
10905 // # fall through to DoneMBB
10906 //
10907 // The load of R0L can be hoisted by post-RA LICM.
10908 MBB = LoopMBB;
10909
10910 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10911 .addReg(Start1Reg).addMBB(StartMBB)
10912 .addReg(End1Reg).addMBB(LoopMBB);
10913 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10914 .addReg(Start2Reg).addMBB(StartMBB)
10915 .addReg(End2Reg).addMBB(LoopMBB);
10916 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10917 BuildMI(MBB, DL, TII->get(Opcode))
10918 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10919 .addReg(This1Reg).addReg(This2Reg);
10920 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10922 MBB->addSuccessor(LoopMBB);
10923 MBB->addSuccessor(DoneMBB);
10924
10925 DoneMBB->addLiveIn(SystemZ::CC);
10926
10927 MI.eraseFromParent();
10928 return DoneMBB;
10929}
10930
10931// Update TBEGIN instruction with final opcode and register clobbers.
10932MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10933 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10934 bool NoFloat) const {
10935 MachineFunction &MF = *MBB->getParent();
10936 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10937 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10938
10939 // Update opcode.
10940 MI.setDesc(TII->get(Opcode));
10941
10942 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10943 // Make sure to add the corresponding GRSM bits if they are missing.
10944 uint64_t Control = MI.getOperand(2).getImm();
10945 static const unsigned GPRControlBit[16] = {
10946 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10947 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10948 };
10949 Control |= GPRControlBit[15];
10950 if (TFI->hasFP(MF))
10951 Control |= GPRControlBit[11];
10952 MI.getOperand(2).setImm(Control);
10953
10954 // Add GPR clobbers.
10955 for (int I = 0; I < 16; I++) {
10956 if ((Control & GPRControlBit[I]) == 0) {
10957 unsigned Reg = SystemZMC::GR64Regs[I];
10958 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10959 }
10960 }
10961
10962 // Add FPR/VR clobbers.
10963 if (!NoFloat && (Control & 4) != 0) {
10964 if (Subtarget.hasVector()) {
10965 for (unsigned Reg : SystemZMC::VR128Regs) {
10966 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10967 }
10968 } else {
10969 for (unsigned Reg : SystemZMC::FP64Regs) {
10970 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10971 }
10972 }
10973 }
10974
10975 return MBB;
10976}
10977
10978MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10979 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10980 MachineFunction &MF = *MBB->getParent();
10981 MachineRegisterInfo *MRI = &MF.getRegInfo();
10982 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10983 DebugLoc DL = MI.getDebugLoc();
10984
10985 Register SrcReg = MI.getOperand(0).getReg();
10986
10987 // Create new virtual register of the same class as source.
10988 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10989 Register DstReg = MRI->createVirtualRegister(RC);
10990
10991 // Replace pseudo with a normal load-and-test that models the def as
10992 // well.
10993 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10994 .addReg(SrcReg)
10995 .setMIFlags(MI.getFlags());
10996 MI.eraseFromParent();
10997
10998 return MBB;
10999}
11000
11001MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
11003 MachineFunction &MF = *MBB->getParent();
11004 MachineRegisterInfo *MRI = &MF.getRegInfo();
11005 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
11006 DebugLoc DL = MI.getDebugLoc();
11007 const unsigned ProbeSize = getStackProbeSize(MF);
11008 Register DstReg = MI.getOperand(0).getReg();
11009 Register SizeReg = MI.getOperand(2).getReg();
11010
11011 MachineBasicBlock *StartMBB = MBB;
11012 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
11013 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
11014 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
11015 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
11016 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
11017
11018 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
11020
11021 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11022 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11023
11024 // LoopTestMBB
11025 // BRC TailTestMBB
11026 // # fallthrough to LoopBodyMBB
11027 StartMBB->addSuccessor(LoopTestMBB);
11028 MBB = LoopTestMBB;
11029 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
11030 .addReg(SizeReg)
11031 .addMBB(StartMBB)
11032 .addReg(IncReg)
11033 .addMBB(LoopBodyMBB);
11034 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
11035 .addReg(PHIReg)
11036 .addImm(ProbeSize);
11037 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11039 .addMBB(TailTestMBB);
11040 MBB->addSuccessor(LoopBodyMBB);
11041 MBB->addSuccessor(TailTestMBB);
11042
11043 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
11044 // J LoopTestMBB
11045 MBB = LoopBodyMBB;
11046 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
11047 .addReg(PHIReg)
11048 .addImm(ProbeSize);
11049 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
11050 .addReg(SystemZ::R15D)
11051 .addImm(ProbeSize);
11052 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11053 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
11054 .setMemRefs(VolLdMMO);
11055 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
11056 MBB->addSuccessor(LoopTestMBB);
11057
11058 // TailTestMBB
11059 // BRC DoneMBB
11060 // # fallthrough to TailMBB
11061 MBB = TailTestMBB;
11062 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
11063 .addReg(PHIReg)
11064 .addImm(0);
11065 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11067 .addMBB(DoneMBB);
11068 MBB->addSuccessor(TailMBB);
11069 MBB->addSuccessor(DoneMBB);
11070
11071 // TailMBB
11072 // # fallthrough to DoneMBB
11073 MBB = TailMBB;
11074 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
11075 .addReg(SystemZ::R15D)
11076 .addReg(PHIReg);
11077 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11078 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
11079 .setMemRefs(VolLdMMO);
11080 MBB->addSuccessor(DoneMBB);
11081
11082 // DoneMBB
11083 MBB = DoneMBB;
11084 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
11085 .addReg(SystemZ::R15D);
11086
11087 MI.eraseFromParent();
11088 return DoneMBB;
11089}
11090
11091SDValue SystemZTargetLowering::
11092getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11093 MachineFunction &MF = DAG.getMachineFunction();
11094 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11095 SDLoc DL(SP);
11096 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
11097 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
11098}
11099
11100// Replace a _STACKGUARD_DAG pseudo with a _STACKGUARD pseudo, adding
11101// a dead early-clobber def reg that will be used as a scratch register
11102// when the pseudo is expanded.
11103MachineBasicBlock *SystemZTargetLowering::emitStackGuardPseudo(
11104 MachineInstr &MI, MachineBasicBlock *MBB, unsigned PseudoOp) const {
11105 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
11106 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
11107 DebugLoc DL = MI.getDebugLoc();
11108 Register AddrReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11109 BuildMI(*MBB, MI, DL, TII->get(PseudoOp), AddrReg)
11110 .addFrameIndex(MI.getOperand(0).getIndex())
11111 .addImm(MI.getOperand(1).getImm());
11112 MI.eraseFromParent();
11113 return MBB;
11114}
11115
11118 switch (MI.getOpcode()) {
11119 case SystemZ::ADJCALLSTACKDOWN:
11120 case SystemZ::ADJCALLSTACKUP:
11121 return emitAdjCallStack(MI, MBB);
11122
11123 case SystemZ::Select32:
11124 case SystemZ::Select64:
11125 case SystemZ::Select128:
11126 case SystemZ::SelectF32:
11127 case SystemZ::SelectF64:
11128 case SystemZ::SelectF128:
11129 case SystemZ::SelectVR32:
11130 case SystemZ::SelectVR64:
11131 case SystemZ::SelectVR128:
11132 return emitSelect(MI, MBB);
11133
11134 case SystemZ::CondStore8Mux:
11135 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11136 case SystemZ::CondStore8MuxInv:
11137 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11138 case SystemZ::CondStore16Mux:
11139 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11140 case SystemZ::CondStore16MuxInv:
11141 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11142 case SystemZ::CondStore32Mux:
11143 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11144 case SystemZ::CondStore32MuxInv:
11145 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11146 case SystemZ::CondStore8:
11147 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11148 case SystemZ::CondStore8Inv:
11149 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11150 case SystemZ::CondStore16:
11151 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11152 case SystemZ::CondStore16Inv:
11153 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11154 case SystemZ::CondStore32:
11155 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11156 case SystemZ::CondStore32Inv:
11157 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11158 case SystemZ::CondStore64:
11159 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11160 case SystemZ::CondStore64Inv:
11161 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11162 case SystemZ::CondStoreF32:
11163 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11164 case SystemZ::CondStoreF32Inv:
11165 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11166 case SystemZ::CondStoreF64:
11167 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11168 case SystemZ::CondStoreF64Inv:
11169 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11170
11171 case SystemZ::SCmp128Hi:
11172 return emitICmp128Hi(MI, MBB, false);
11173 case SystemZ::UCmp128Hi:
11174 return emitICmp128Hi(MI, MBB, true);
11175
11176 case SystemZ::PAIR128:
11177 return emitPair128(MI, MBB);
11178 case SystemZ::AEXT128:
11179 return emitExt128(MI, MBB, false);
11180 case SystemZ::ZEXT128:
11181 return emitExt128(MI, MBB, true);
11182
11183 case SystemZ::ATOMIC_SWAPW:
11184 return emitAtomicLoadBinary(MI, MBB, 0);
11185
11186 case SystemZ::ATOMIC_LOADW_AR:
11187 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11188 case SystemZ::ATOMIC_LOADW_AFI:
11189 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11190
11191 case SystemZ::ATOMIC_LOADW_SR:
11192 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11193
11194 case SystemZ::ATOMIC_LOADW_NR:
11195 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11196 case SystemZ::ATOMIC_LOADW_NILH:
11197 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11198
11199 case SystemZ::ATOMIC_LOADW_OR:
11200 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11201 case SystemZ::ATOMIC_LOADW_OILH:
11202 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11203
11204 case SystemZ::ATOMIC_LOADW_XR:
11205 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11206 case SystemZ::ATOMIC_LOADW_XILF:
11207 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11208
11209 case SystemZ::ATOMIC_LOADW_NRi:
11210 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11211 case SystemZ::ATOMIC_LOADW_NILHi:
11212 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11213
11214 case SystemZ::ATOMIC_LOADW_MIN:
11215 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11216 case SystemZ::ATOMIC_LOADW_MAX:
11217 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11218 case SystemZ::ATOMIC_LOADW_UMIN:
11219 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11220 case SystemZ::ATOMIC_LOADW_UMAX:
11221 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11222
11223 case SystemZ::ATOMIC_CMP_SWAPW:
11224 return emitAtomicCmpSwapW(MI, MBB);
11225 case SystemZ::MVCImm:
11226 case SystemZ::MVCReg:
11227 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11228 case SystemZ::NCImm:
11229 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11230 case SystemZ::OCImm:
11231 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11232 case SystemZ::XCImm:
11233 case SystemZ::XCReg:
11234 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11235 case SystemZ::CLCImm:
11236 case SystemZ::CLCReg:
11237 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11238 case SystemZ::MemsetImmImm:
11239 case SystemZ::MemsetImmReg:
11240 case SystemZ::MemsetRegImm:
11241 case SystemZ::MemsetRegReg:
11242 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11243 case SystemZ::CLSTLoop:
11244 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11245 case SystemZ::MVSTLoop:
11246 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11247 case SystemZ::SRSTLoop:
11248 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11249 case SystemZ::TBEGIN:
11250 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11251 case SystemZ::TBEGIN_nofloat:
11252 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11253 case SystemZ::TBEGINC:
11254 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11255 case SystemZ::LTEBRCompare_Pseudo:
11256 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11257 case SystemZ::LTDBRCompare_Pseudo:
11258 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11259 case SystemZ::LTXBRCompare_Pseudo:
11260 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11261
11262 case SystemZ::PROBED_ALLOCA:
11263 return emitProbedAlloca(MI, MBB);
11264 case SystemZ::EH_SjLj_SetJmp:
11265 return emitEHSjLjSetJmp(MI, MBB);
11266 case SystemZ::EH_SjLj_LongJmp:
11267 return emitEHSjLjLongJmp(MI, MBB);
11268
11269 case TargetOpcode::STACKMAP:
11270 case TargetOpcode::PATCHPOINT:
11271 return emitPatchPoint(MI, MBB);
11272
11273 case SystemZ::MOV_STACKGUARD_DAG:
11274 return emitStackGuardPseudo(MI, MBB, SystemZ::MOV_STACKGUARD);
11275
11276 case SystemZ::CMP_STACKGUARD_DAG:
11277 return emitStackGuardPseudo(MI, MBB, SystemZ::CMP_STACKGUARD);
11278
11279 default:
11280 llvm_unreachable("Unexpected instr type to insert");
11281 }
11282}
11283
11284// This is only used by the isel schedulers, and is needed only to prevent
11285// compiler from crashing when list-ilp is used.
11286const TargetRegisterClass *
11287SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11288 if (VT == MVT::Untyped)
11289 return &SystemZ::ADDR128BitRegClass;
11291}
11292
11293SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11294 SelectionDAG &DAG) const {
11295 SDLoc dl(Op);
11296 /*
11297 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11298 settings:
11299 00 Round to nearest
11300 01 Round to 0
11301 10 Round to +inf
11302 11 Round to -inf
11303
11304 FLT_ROUNDS, on the other hand, expects the following:
11305 -1 Undefined
11306 0 Round to 0
11307 1 Round to nearest
11308 2 Round to +inf
11309 3 Round to -inf
11310 */
11311
11312 // Save FPC to register.
11313 SDValue Chain = Op.getOperand(0);
11314 SDValue EFPC(
11315 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11316 Chain = EFPC.getValue(1);
11317
11318 // Transform as necessary
11319 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11320 DAG.getConstant(3, dl, MVT::i32));
11321 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11322 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11323 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11324 DAG.getConstant(1, dl, MVT::i32)));
11325
11326 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11327 DAG.getConstant(1, dl, MVT::i32));
11328 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11329
11330 return DAG.getMergeValues({RetVal, Chain}, dl);
11331}
11332
11333SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11334 SelectionDAG &DAG) const {
11335 EVT VT = Op.getValueType();
11336 Op = Op.getOperand(0);
11337 EVT OpVT = Op.getValueType();
11338
11339 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11340
11341 SDLoc DL(Op);
11342
11343 // load a 0 vector for the third operand of VSUM.
11344 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11345
11346 // execute VSUM.
11347 switch (OpVT.getScalarSizeInBits()) {
11348 case 8:
11349 case 16:
11350 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11351 [[fallthrough]];
11352 case 32:
11353 case 64:
11354 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11355 DAG.getBitcast(Op.getValueType(), Zero));
11356 break;
11357 case 128:
11358 break; // VSUM over v1i128 should not happen and would be a noop
11359 default:
11360 llvm_unreachable("Unexpected scalar size.");
11361 }
11362 // Cast to original vector type, retrieve last element.
11363 return DAG.getNode(
11364 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11365 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11366}
11367
11369 FunctionType *FT = F->getFunctionType();
11370 const AttributeList &Attrs = F->getAttributes();
11371 if (Attrs.hasRetAttrs())
11372 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11373 OS << *F->getReturnType() << " @" << F->getName() << "(";
11374 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11375 if (I)
11376 OS << ", ";
11377 OS << *FT->getParamType(I);
11378 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11379 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11380 if (ArgAttrs.hasAttribute(A))
11381 OS << " " << Attribute::getNameFromAttrKind(A);
11382 }
11383 OS << ")\n";
11384}
11385
11386bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11387 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11388 if (Itr == IsInternalCache.end())
11389 Itr = IsInternalCache
11390 .insert(std::pair<const Function *, bool>(
11391 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11392 .first;
11393 return Itr->second;
11394}
11395
11396void SystemZTargetLowering::
11397verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11398 const Function *F, SDValue Callee) const {
11399 // Temporarily only do the check when explicitly requested, until it can be
11400 // enabled by default.
11402 return;
11403
11404 bool IsInternal = false;
11405 const Function *CalleeFn = nullptr;
11406 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11407 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11408 IsInternal = isInternal(CalleeFn);
11409 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11410 errs() << "ERROR: Missing extension attribute of passed "
11411 << "value in call to function:\n" << "Callee: ";
11412 if (CalleeFn != nullptr)
11413 printFunctionArgExts(CalleeFn, errs());
11414 else
11415 errs() << "-\n";
11416 errs() << "Caller: ";
11418 llvm_unreachable("");
11419 }
11420}
11421
11422void SystemZTargetLowering::
11423verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11424 const Function *F) const {
11425 // Temporarily only do the check when explicitly requested, until it can be
11426 // enabled by default.
11428 return;
11429
11430 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11431 errs() << "ERROR: Missing extension attribute of returned "
11432 << "value from function:\n";
11434 llvm_unreachable("");
11435 }
11436}
11437
11438// Verify that narrow integer arguments are extended as required by the ABI.
11439// Return false if an error is found.
11440bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11441 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11442 if (!Subtarget.isTargetELF())
11443 return true;
11444
11447 return true;
11448 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11449 return true;
11450
11451 for (unsigned i = 0; i < Outs.size(); ++i) {
11452 MVT VT = Outs[i].VT;
11453 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11454 if (VT.isInteger()) {
11455 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11456 "Unexpected integer argument VT.");
11457 if (VT == MVT::i32 &&
11458 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11459 return false;
11460 }
11461 }
11462
11463 return true;
11464}
11465
11467 Module &M, const LibcallLoweringInfo &Libcalls) const {
11468 StringRef GuardMode = M.getStackProtectorGuard();
11469
11470 // In the TLS case, no symbol needs to be inserted.
11471 if (GuardMode == "tls" || GuardMode.empty())
11472 return;
11473
11474 // Otherwise (in the global case), insert the appropriate global variable.
11476}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static SDValue mergeHighParts(SelectionDAG &DAG, const SDLoc &DL, unsigned MergedBits, EVT VT, SDValue Op0, SDValue Op1)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static void adjustForStackGuardCompare(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static std::pair< SDValue, int > findCCUse(const SDValue &Val, unsigned Depth=0)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static SDValue buildFPVecFromScalars4(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl< SDValue > &Elems, unsigned Pos)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
MachineConstantPoolValue * getMachineCPVal() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:938
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:216
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
iterator end() const
Definition StringRef.h:116
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Insert SSP declaration if global stack protector is used.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:471
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:974
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:470
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
auto m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Define
Register definition.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:271
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isVectorOf(EVT EltVT) const
Return true if this is a vector with matching element type.
Definition ValueTypes.h:181
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.