LLVM 18.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
27#include <cctype>
28#include <optional>
29
30using namespace llvm;
31
32#define DEBUG_TYPE "systemz-lower"
33
34namespace {
35// Represents information about a comparison.
36struct Comparison {
37 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
38 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
39 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40
41 // The operands to the comparison.
42 SDValue Op0, Op1;
43
44 // Chain if this is a strict floating-point comparison.
45 SDValue Chain;
46
47 // The opcode that should be used to compare Op0 and Op1.
48 unsigned Opcode;
49
50 // A SystemZICMP value. Only used for integer comparisons.
51 unsigned ICmpType;
52
53 // The mask of CC values that Opcode can produce.
54 unsigned CCValid;
55
56 // The mask of CC values for which the original condition is true.
57 unsigned CCMask;
58};
59} // end anonymous namespace
60
61// Classify VT as either 32 or 64 bit.
62static bool is32Bit(EVT VT) {
63 switch (VT.getSimpleVT().SimpleTy) {
64 case MVT::i32:
65 return true;
66 case MVT::i64:
67 return false;
68 default:
69 llvm_unreachable("Unsupported type");
70 }
71}
72
73// Return a version of MachineOperand that can be safely used before the
74// final use.
76 if (Op.isReg())
77 Op.setIsKill(false);
78 return Op;
79}
80
82 const SystemZSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
85
86 auto *Regs = STI.getSpecialRegisters();
87
88 // Set up the register classes.
89 if (Subtarget.hasHighWord())
90 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
91 else
92 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
93 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
94 if (!useSoftFloat()) {
95 if (Subtarget.hasVector()) {
96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
98 } else {
99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
101 }
102 if (Subtarget.hasVectorEnhancements1())
103 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
104 else
105 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
106
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
114 }
115 }
116
117 // Compute derived properties from the register classes
119
120 // Set up special registers.
121 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
122
123 // TODO: It may be better to default to latency-oriented scheduling, however
124 // LLVM's current latency-oriented scheduler can't handle physreg definitions
125 // such as SystemZ has with CC, so set this to the register-pressure
126 // scheduler, because it can.
128
131
133
134 // Instructions are strings of 2-byte aligned 2-byte values.
136 // For performance reasons we prefer 16-byte alignment.
138
139 // Handle operations that are handled in a similar way for all types.
140 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
141 I <= MVT::LAST_FP_VALUETYPE;
142 ++I) {
144 if (isTypeLegal(VT)) {
145 // Lower SET_CC into an IPM-based sequence.
149
150 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
152
153 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
156 }
157 }
158
159 // Expand jump table branches as address arithmetic followed by an
160 // indirect jump.
162
163 // Expand BRCOND into a BR_CC (see above).
165
166 // Handle integer types.
167 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
168 I <= MVT::LAST_INTEGER_VALUETYPE;
169 ++I) {
171 if (isTypeLegal(VT)) {
173
174 // Expand individual DIV and REMs into DIVREMs.
181
182 // Support addition/subtraction with overflow.
185
186 // Support addition/subtraction with carry.
189
190 // Support carry in as value rather than glue.
193
194 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
195 // stores, putting a serialization instruction after the stores.
198
199 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
200 // available, or if the operand is constant.
202
203 // Use POPCNT on z196 and above.
204 if (Subtarget.hasPopulationCount())
206 else
208
209 // No special instructions for these.
212
213 // Use *MUL_LOHI where possible instead of MULH*.
218
219 // Only z196 and above have native support for conversions to unsigned.
220 // On z10, promoting to i64 doesn't generate an inexact condition for
221 // values that are outside the i32 range but in the i64 range, so use
222 // the default expansion.
223 if (!Subtarget.hasFPExtension())
225
226 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
227 // default to Expand, so need to be modified to Legal where appropriate.
229 if (Subtarget.hasFPExtension())
231
232 // And similarly for STRICT_[SU]INT_TO_FP.
234 if (Subtarget.hasFPExtension())
236 }
237 }
238
239 // Type legalization will convert 8- and 16-bit atomic operations into
240 // forms that operate on i32s (but still keeping the original memory VT).
241 // Lower them into full i32 operations.
253
254 // Even though i128 is not a legal type, we still need to custom lower
255 // the atomic operations in order to exploit SystemZ instructions.
258
259 // We can use the CC result of compare-and-swap to implement
260 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
264
266
267 // Traps are legal, as we will convert them to "j .+2".
268 setOperationAction(ISD::TRAP, MVT::Other, Legal);
269
270 // z10 has instructions for signed but not unsigned FP conversion.
271 // Handle unsigned 32-bit types as signed 64-bit types.
272 if (!Subtarget.hasFPExtension()) {
277 }
278
279 // We have native support for a 64-bit CTLZ, via FLOGR.
283
284 // On z15 we have native support for a 64-bit CTPOP.
285 if (Subtarget.hasMiscellaneousExtensions3()) {
288 }
289
290 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
292
293 // Expand 128 bit shifts without using a libcall.
297 setLibcallName(RTLIB::SRL_I128, nullptr);
298 setLibcallName(RTLIB::SHL_I128, nullptr);
299 setLibcallName(RTLIB::SRA_I128, nullptr);
300
301 // Handle bitcast from fp128 to i128.
303
304 // We have native instructions for i8, i16 and i32 extensions, but not i1.
306 for (MVT VT : MVT::integer_valuetypes()) {
310 }
311
312 // Handle the various types of symbolic address.
318
319 // We need to handle dynamic allocations specially because of the
320 // 160-byte area at the bottom of the stack.
323
326
327 // Handle prefetches with PFD or PFDRL.
329
331 // Assume by default that all vector operations need to be expanded.
332 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
333 if (getOperationAction(Opcode, VT) == Legal)
335
336 // Likewise all truncating stores and extending loads.
337 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
338 setTruncStoreAction(VT, InnerVT, Expand);
341 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
342 }
343
344 if (isTypeLegal(VT)) {
345 // These operations are legal for anything that can be stored in a
346 // vector register, even if there is no native support for the format
347 // as such. In particular, we can do these for v4f32 even though there
348 // are no specific instructions for that format.
354
355 // Likewise, except that we need to replace the nodes with something
356 // more specific.
359 }
360 }
361
362 // Handle integer vector types.
364 if (isTypeLegal(VT)) {
365 // These operations have direct equivalents.
370 if (VT != MVT::v2i64)
376 if (Subtarget.hasVectorEnhancements1())
378 else
382
383 // Convert a GPR scalar to a vector by inserting it into element 0.
385
386 // Use a series of unpacks for extensions.
389
390 // Detect shifts/rotates by a scalar amount and convert them into
391 // V*_BY_SCALAR.
396
397 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
398 // and inverting the result as necessary.
400 }
401 }
402
403 if (Subtarget.hasVector()) {
404 // There should be no need to check for float types other than v2f64
405 // since <2 x f32> isn't a legal type.
414
423 }
424
425 if (Subtarget.hasVectorEnhancements2()) {
434
443 }
444
445 // Handle floating-point types.
446 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
447 I <= MVT::LAST_FP_VALUETYPE;
448 ++I) {
450 if (isTypeLegal(VT)) {
451 // We can use FI for FRINT.
453
454 // We can use the extended form of FI for other rounding operations.
455 if (Subtarget.hasFPExtension()) {
461 }
462
463 // No special instructions for these.
469
470 // Special treatment.
472
473 // Handle constrained floating-point operations.
483 if (Subtarget.hasFPExtension()) {
489 }
490 }
491 }
492
493 // Handle floating-point vector types.
494 if (Subtarget.hasVector()) {
495 // Scalar-to-vector conversion is just a subreg.
498
499 // Some insertions and extractions can be done directly but others
500 // need to go via integers.
505
506 // These operations have direct equivalents.
507 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
508 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
509 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
510 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
511 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
512 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
513 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
514 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
515 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
518 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
521
522 // Handle constrained floating-point operations.
535
540 if (Subtarget.hasVectorEnhancements1()) {
543 }
544 }
545
546 // The vector enhancements facility 1 has instructions for these.
547 if (Subtarget.hasVectorEnhancements1()) {
548 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
549 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
550 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
551 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
552 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
553 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
554 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
555 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
556 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
559 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
562
567
572
577
582
587
588 // Handle constrained floating-point operations.
601 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
602 MVT::v4f32, MVT::v2f64 }) {
607 }
608 }
609
610 // We only have fused f128 multiply-addition on vector registers.
611 if (!Subtarget.hasVectorEnhancements1()) {
614 }
615
616 // We don't have a copysign instruction on vector registers.
617 if (Subtarget.hasVectorEnhancements1())
619
620 // Needed so that we don't try to implement f128 constant loads using
621 // a load-and-extend of a f80 constant (in cases where the constant
622 // would fit in an f80).
623 for (MVT VT : MVT::fp_valuetypes())
624 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
625
626 // We don't have extending load instruction on vector registers.
627 if (Subtarget.hasVectorEnhancements1()) {
628 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
629 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
630 }
631
632 // Floating-point truncation and stores need to be done separately.
633 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
634 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
635 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
636
637 // We have 64-bit FPR<->GPR moves, but need special handling for
638 // 32-bit forms.
639 if (!Subtarget.hasVector()) {
642 }
643
644 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
645 // structure, but VAEND is a no-op.
649
651
652 // Codes for which we want to perform some z-specific combinations.
656 ISD::LOAD,
667 ISD::SDIV,
668 ISD::UDIV,
669 ISD::SREM,
670 ISD::UREM,
673
674 // Handle intrinsics.
677
678 // We want to use MVC in preference to even a single load/store pair.
679 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
681
682 // The main memset sequence is a byte store followed by an MVC.
683 // Two STC or MV..I stores win over that, but the kind of fused stores
684 // generated by target-independent code don't when the byte value is
685 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
686 // than "STC;MVC". Handle the choice in target-specific code instead.
687 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
689
690 // Default to having -disable-strictnode-mutation on
691 IsStrictFPEnabled = true;
692
693 if (Subtarget.isTargetzOS()) {
694 struct RTLibCallMapping {
695 RTLIB::Libcall Code;
696 const char *Name;
697 };
698 static RTLibCallMapping RTLibCallCommon[] = {
699#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
700#include "ZOSLibcallNames.def"
701 };
702 for (auto &E : RTLibCallCommon)
703 setLibcallName(E.Code, E.Name);
704 }
705}
706
708 return Subtarget.hasSoftFloat();
709}
710
712 LLVMContext &, EVT VT) const {
713 if (!VT.isVector())
714 return MVT::i32;
716}
717
719 const MachineFunction &MF, EVT VT) const {
720 VT = VT.getScalarType();
721
722 if (!VT.isSimple())
723 return false;
724
725 switch (VT.getSimpleVT().SimpleTy) {
726 case MVT::f32:
727 case MVT::f64:
728 return true;
729 case MVT::f128:
730 return Subtarget.hasVectorEnhancements1();
731 default:
732 break;
733 }
734
735 return false;
736}
737
738// Return true if the constant can be generated with a vector instruction,
739// such as VGM, VGMB or VREPI.
741 const SystemZSubtarget &Subtarget) {
742 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
743 if (!Subtarget.hasVector() ||
744 (isFP128 && !Subtarget.hasVectorEnhancements1()))
745 return false;
746
747 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
748 // preferred way of creating all-zero and all-one vectors so give it
749 // priority over other methods below.
750 unsigned Mask = 0;
751 unsigned I = 0;
752 for (; I < SystemZ::VectorBytes; ++I) {
753 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
754 if (Byte == 0xff)
755 Mask |= 1ULL << I;
756 else if (Byte != 0)
757 break;
758 }
759 if (I == SystemZ::VectorBytes) {
761 OpVals.push_back(Mask);
763 return true;
764 }
765
766 if (SplatBitSize > 64)
767 return false;
768
769 auto tryValue = [&](uint64_t Value) -> bool {
770 // Try VECTOR REPLICATE IMMEDIATE
771 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
772 if (isInt<16>(SignedValue)) {
773 OpVals.push_back(((unsigned) SignedValue));
776 SystemZ::VectorBits / SplatBitSize);
777 return true;
778 }
779 // Try VECTOR GENERATE MASK
780 unsigned Start, End;
781 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
782 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
783 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
784 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
785 OpVals.push_back(Start - (64 - SplatBitSize));
786 OpVals.push_back(End - (64 - SplatBitSize));
789 SystemZ::VectorBits / SplatBitSize);
790 return true;
791 }
792 return false;
793 };
794
795 // First try assuming that any undefined bits above the highest set bit
796 // and below the lowest set bit are 1s. This increases the likelihood of
797 // being able to use a sign-extended element value in VECTOR REPLICATE
798 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
799 uint64_t SplatBitsZ = SplatBits.getZExtValue();
800 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
801 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
802 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
803 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
804 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
805 if (tryValue(SplatBitsZ | Upper | Lower))
806 return true;
807
808 // Now try assuming that any undefined bits between the first and
809 // last defined set bits are set. This increases the chances of
810 // using a non-wraparound mask.
811 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
812 return tryValue(SplatBitsZ | Middle);
813}
814
816 if (IntImm.isSingleWord()) {
817 IntBits = APInt(128, IntImm.getZExtValue());
818 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
819 } else
820 IntBits = IntImm;
821 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
822
823 // Find the smallest splat.
824 SplatBits = IntImm;
825 unsigned Width = SplatBits.getBitWidth();
826 while (Width > 8) {
827 unsigned HalfSize = Width / 2;
828 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
829 APInt LowValue = SplatBits.trunc(HalfSize);
830
831 // If the two halves do not match, stop here.
832 if (HighValue != LowValue || 8 > HalfSize)
833 break;
834
835 SplatBits = HighValue;
836 Width = HalfSize;
837 }
838 SplatUndef = 0;
839 SplatBitSize = Width;
840}
841
843 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
844 bool HasAnyUndefs;
845
846 // Get IntBits by finding the 128 bit splat.
847 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
848 true);
849
850 // Get SplatBits by finding the 8 bit or greater splat.
851 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
852 true);
853}
854
856 bool ForCodeSize) const {
857 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
858 if (Imm.isZero() || Imm.isNegZero())
859 return true;
860
862}
863
864/// Returns true if stack probing through inline assembly is requested.
866 // If the function specifically requests inline stack probes, emit them.
867 if (MF.getFunction().hasFnAttribute("probe-stack"))
868 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
869 "inline-asm";
870 return false;
871}
872
875 // Don't expand subword operations as they require special treatment.
876 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
878
879 // Don't expand if there is a target instruction available.
880 if (Subtarget.hasInterlockedAccess1() &&
881 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
888
890}
891
893 // We can use CGFI or CLGFI.
894 return isInt<32>(Imm) || isUInt<32>(Imm);
895}
896
898 // We can use ALGFI or SLGFI.
899 return isUInt<32>(Imm) || isUInt<32>(-Imm);
900}
901
903 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
904 // Unaligned accesses should never be slower than the expanded version.
905 // We check specifically for aligned accesses in the few cases where
906 // they are required.
907 if (Fast)
908 *Fast = 1;
909 return true;
910}
911
912// Information about the addressing mode for a memory access.
914 // True if a long displacement is supported.
916
917 // True if use of index register is supported.
919
920 AddressingMode(bool LongDispl, bool IdxReg) :
921 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
922};
923
924// Return the desired addressing mode for a Load which has only one use (in
925// the same block) which is a Store.
927 Type *Ty) {
928 // With vector support a Load->Store combination may be combined to either
929 // an MVC or vector operations and it seems to work best to allow the
930 // vector addressing mode.
931 if (HasVector)
932 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
933
934 // Otherwise only the MVC case is special.
935 bool MVC = Ty->isIntegerTy(8);
936 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
937}
938
939// Return the addressing mode which seems most desirable given an LLVM
940// Instruction pointer.
941static AddressingMode
943 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
944 switch (II->getIntrinsicID()) {
945 default: break;
946 case Intrinsic::memset:
947 case Intrinsic::memmove:
948 case Intrinsic::memcpy:
949 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
950 }
951 }
952
953 if (isa<LoadInst>(I) && I->hasOneUse()) {
954 auto *SingleUser = cast<Instruction>(*I->user_begin());
955 if (SingleUser->getParent() == I->getParent()) {
956 if (isa<ICmpInst>(SingleUser)) {
957 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
958 if (C->getBitWidth() <= 64 &&
959 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
960 // Comparison of memory with 16 bit signed / unsigned immediate
961 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
962 } else if (isa<StoreInst>(SingleUser))
963 // Load->Store
964 return getLoadStoreAddrMode(HasVector, I->getType());
965 }
966 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
967 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
968 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
969 // Load->Store
970 return getLoadStoreAddrMode(HasVector, LoadI->getType());
971 }
972
973 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
974
975 // * Use LDE instead of LE/LEY for z13 to avoid partial register
976 // dependencies (LDE only supports small offsets).
977 // * Utilize the vector registers to hold floating point
978 // values (vector load / store instructions only support small
979 // offsets).
980
981 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
982 I->getOperand(0)->getType());
983 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
984 bool IsVectorAccess = MemAccessTy->isVectorTy();
985
986 // A store of an extracted vector element will be combined into a VSTE type
987 // instruction.
988 if (!IsVectorAccess && isa<StoreInst>(I)) {
989 Value *DataOp = I->getOperand(0);
990 if (isa<ExtractElementInst>(DataOp))
991 IsVectorAccess = true;
992 }
993
994 // A load which gets inserted into a vector element will be combined into a
995 // VLE type instruction.
996 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
997 User *LoadUser = *I->user_begin();
998 if (isa<InsertElementInst>(LoadUser))
999 IsVectorAccess = true;
1000 }
1001
1002 if (IsFPAccess || IsVectorAccess)
1003 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1004 }
1005
1006 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1007}
1008
1010 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1011 // Punt on globals for now, although they can be used in limited
1012 // RELATIVE LONG cases.
1013 if (AM.BaseGV)
1014 return false;
1015
1016 // Require a 20-bit signed offset.
1017 if (!isInt<20>(AM.BaseOffs))
1018 return false;
1019
1020 bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
1021 AddressingMode SupportedAM(!RequireD12, true);
1022 if (I != nullptr)
1023 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1024
1025 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1026 return false;
1027
1028 if (!SupportedAM.IndexReg)
1029 // No indexing allowed.
1030 return AM.Scale == 0;
1031 else
1032 // Indexing is OK but no scale factor can be applied.
1033 return AM.Scale == 0 || AM.Scale == 1;
1034}
1035
1037 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1038 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1039 const int MVCFastLen = 16;
1040
1041 if (Limit != ~unsigned(0)) {
1042 // Don't expand Op into scalar loads/stores in these cases:
1043 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1044 return false; // Small memcpy: Use MVC
1045 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1046 return false; // Small memset (first byte with STC/MVI): Use MVC
1047 if (Op.isZeroMemset())
1048 return false; // Memset zero: Use XC
1049 }
1050
1051 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1052 SrcAS, FuncAttributes);
1053}
1054
1056 const AttributeList &FuncAttributes) const {
1057 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1058}
1059
1060bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1061 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1062 return false;
1063 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1064 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1065 return FromBits > ToBits;
1066}
1067
1069 if (!FromVT.isInteger() || !ToVT.isInteger())
1070 return false;
1071 unsigned FromBits = FromVT.getFixedSizeInBits();
1072 unsigned ToBits = ToVT.getFixedSizeInBits();
1073 return FromBits > ToBits;
1074}
1075
1076//===----------------------------------------------------------------------===//
1077// Inline asm support
1078//===----------------------------------------------------------------------===//
1079
1082 if (Constraint.size() == 1) {
1083 switch (Constraint[0]) {
1084 case 'a': // Address register
1085 case 'd': // Data register (equivalent to 'r')
1086 case 'f': // Floating-point register
1087 case 'h': // High-part register
1088 case 'r': // General-purpose register
1089 case 'v': // Vector register
1090 return C_RegisterClass;
1091
1092 case 'Q': // Memory with base and unsigned 12-bit displacement
1093 case 'R': // Likewise, plus an index
1094 case 'S': // Memory with base and signed 20-bit displacement
1095 case 'T': // Likewise, plus an index
1096 case 'm': // Equivalent to 'T'.
1097 return C_Memory;
1098
1099 case 'I': // Unsigned 8-bit constant
1100 case 'J': // Unsigned 12-bit constant
1101 case 'K': // Signed 16-bit constant
1102 case 'L': // Signed 20-bit displacement (on all targets we support)
1103 case 'M': // 0x7fffffff
1104 return C_Immediate;
1105
1106 default:
1107 break;
1108 }
1109 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1110 switch (Constraint[1]) {
1111 case 'Q': // Address with base and unsigned 12-bit displacement
1112 case 'R': // Likewise, plus an index
1113 case 'S': // Address with base and signed 20-bit displacement
1114 case 'T': // Likewise, plus an index
1115 return C_Address;
1116
1117 default:
1118 break;
1119 }
1120 }
1121 return TargetLowering::getConstraintType(Constraint);
1122}
1123
1126 const char *constraint) const {
1128 Value *CallOperandVal = info.CallOperandVal;
1129 // If we don't have a value, we can't do a match,
1130 // but allow it at the lowest weight.
1131 if (!CallOperandVal)
1132 return CW_Default;
1133 Type *type = CallOperandVal->getType();
1134 // Look at the constraint type.
1135 switch (*constraint) {
1136 default:
1138 break;
1139
1140 case 'a': // Address register
1141 case 'd': // Data register (equivalent to 'r')
1142 case 'h': // High-part register
1143 case 'r': // General-purpose register
1144 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1145 break;
1146
1147 case 'f': // Floating-point register
1148 if (!useSoftFloat())
1149 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1150 break;
1151
1152 case 'v': // Vector register
1153 if (Subtarget.hasVector())
1154 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1155 : CW_Default;
1156 break;
1157
1158 case 'I': // Unsigned 8-bit constant
1159 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1160 if (isUInt<8>(C->getZExtValue()))
1161 weight = CW_Constant;
1162 break;
1163
1164 case 'J': // Unsigned 12-bit constant
1165 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1166 if (isUInt<12>(C->getZExtValue()))
1167 weight = CW_Constant;
1168 break;
1169
1170 case 'K': // Signed 16-bit constant
1171 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1172 if (isInt<16>(C->getSExtValue()))
1173 weight = CW_Constant;
1174 break;
1175
1176 case 'L': // Signed 20-bit displacement (on all targets we support)
1177 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1178 if (isInt<20>(C->getSExtValue()))
1179 weight = CW_Constant;
1180 break;
1181
1182 case 'M': // 0x7fffffff
1183 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1184 if (C->getZExtValue() == 0x7fffffff)
1185 weight = CW_Constant;
1186 break;
1187 }
1188 return weight;
1189}
1190
1191// Parse a "{tNNN}" register constraint for which the register type "t"
1192// has already been verified. MC is the class associated with "t" and
1193// Map maps 0-based register numbers to LLVM register numbers.
1194static std::pair<unsigned, const TargetRegisterClass *>
1196 const unsigned *Map, unsigned Size) {
1197 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1198 if (isdigit(Constraint[2])) {
1199 unsigned Index;
1200 bool Failed =
1201 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1202 if (!Failed && Index < Size && Map[Index])
1203 return std::make_pair(Map[Index], RC);
1204 }
1205 return std::make_pair(0U, nullptr);
1206}
1207
1208std::pair<unsigned, const TargetRegisterClass *>
1210 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1211 if (Constraint.size() == 1) {
1212 // GCC Constraint Letters
1213 switch (Constraint[0]) {
1214 default: break;
1215 case 'd': // Data register (equivalent to 'r')
1216 case 'r': // General-purpose register
1217 if (VT.getSizeInBits() == 64)
1218 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1219 else if (VT.getSizeInBits() == 128)
1220 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1221 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1222
1223 case 'a': // Address register
1224 if (VT == MVT::i64)
1225 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1226 else if (VT == MVT::i128)
1227 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1228 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1229
1230 case 'h': // High-part register (an LLVM extension)
1231 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1232
1233 case 'f': // Floating-point register
1234 if (!useSoftFloat()) {
1235 if (VT.getSizeInBits() == 64)
1236 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1237 else if (VT.getSizeInBits() == 128)
1238 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1239 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1240 }
1241 break;
1242
1243 case 'v': // Vector register
1244 if (Subtarget.hasVector()) {
1245 if (VT.getSizeInBits() == 32)
1246 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1247 if (VT.getSizeInBits() == 64)
1248 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1249 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1250 }
1251 break;
1252 }
1253 }
1254 if (Constraint.size() > 0 && Constraint[0] == '{') {
1255
1256 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1257 // to check the size on.
1258 auto getVTSizeInBits = [&VT]() {
1259 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1260 };
1261
1262 // We need to override the default register parsing for GPRs and FPRs
1263 // because the interpretation depends on VT. The internal names of
1264 // the registers are also different from the external names
1265 // (F0D and F0S instead of F0, etc.).
1266 if (Constraint[1] == 'r') {
1267 if (getVTSizeInBits() == 32)
1268 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1270 if (getVTSizeInBits() == 128)
1271 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1273 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1275 }
1276 if (Constraint[1] == 'f') {
1277 if (useSoftFloat())
1278 return std::make_pair(
1279 0u, static_cast<const TargetRegisterClass *>(nullptr));
1280 if (getVTSizeInBits() == 32)
1281 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1283 if (getVTSizeInBits() == 128)
1284 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1286 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1288 }
1289 if (Constraint[1] == 'v') {
1290 if (!Subtarget.hasVector())
1291 return std::make_pair(
1292 0u, static_cast<const TargetRegisterClass *>(nullptr));
1293 if (getVTSizeInBits() == 32)
1294 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1296 if (getVTSizeInBits() == 64)
1297 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1299 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1301 }
1302 }
1303 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1304}
1305
1306// FIXME? Maybe this could be a TableGen attribute on some registers and
1307// this table could be generated automatically from RegInfo.
1310 const MachineFunction &MF) const {
1311 Register Reg =
1313 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1314 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1315 .Default(0);
1316
1317 if (Reg)
1318 return Reg;
1319 report_fatal_error("Invalid register name global variable");
1320}
1321
1323 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1324 SelectionDAG &DAG) const {
1325 // Only support length 1 constraints for now.
1326 if (Constraint.size() == 1) {
1327 switch (Constraint[0]) {
1328 case 'I': // Unsigned 8-bit constant
1329 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1330 if (isUInt<8>(C->getZExtValue()))
1331 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1332 Op.getValueType()));
1333 return;
1334
1335 case 'J': // Unsigned 12-bit constant
1336 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1337 if (isUInt<12>(C->getZExtValue()))
1338 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1339 Op.getValueType()));
1340 return;
1341
1342 case 'K': // Signed 16-bit constant
1343 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1344 if (isInt<16>(C->getSExtValue()))
1345 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1346 Op.getValueType()));
1347 return;
1348
1349 case 'L': // Signed 20-bit displacement (on all targets we support)
1350 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1351 if (isInt<20>(C->getSExtValue()))
1352 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1353 Op.getValueType()));
1354 return;
1355
1356 case 'M': // 0x7fffffff
1357 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1358 if (C->getZExtValue() == 0x7fffffff)
1359 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1360 Op.getValueType()));
1361 return;
1362 }
1363 }
1364 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1365}
1366
1367//===----------------------------------------------------------------------===//
1368// Calling conventions
1369//===----------------------------------------------------------------------===//
1370
1371#include "SystemZGenCallingConv.inc"
1372
1374 CallingConv::ID) const {
1375 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1376 SystemZ::R14D, 0 };
1377 return ScratchRegs;
1378}
1379
1381 Type *ToType) const {
1382 return isTruncateFree(FromType, ToType);
1383}
1384
1386 return CI->isTailCall();
1387}
1388
1389// We do not yet support 128-bit single-element vector types. If the user
1390// attempts to use such types as function argument or return type, prefer
1391// to error out instead of emitting code violating the ABI.
1392static void VerifyVectorType(MVT VT, EVT ArgVT) {
1393 if (ArgVT.isVector() && !VT.isVector())
1394 report_fatal_error("Unsupported vector argument or return type");
1395}
1396
1398 for (unsigned i = 0; i < Ins.size(); ++i)
1399 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1400}
1401
1403 for (unsigned i = 0; i < Outs.size(); ++i)
1404 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1405}
1406
1407// Value is a value that has been passed to us in the location described by VA
1408// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1409// any loads onto Chain.
1411 CCValAssign &VA, SDValue Chain,
1412 SDValue Value) {
1413 // If the argument has been promoted from a smaller type, insert an
1414 // assertion to capture this.
1415 if (VA.getLocInfo() == CCValAssign::SExt)
1417 DAG.getValueType(VA.getValVT()));
1418 else if (VA.getLocInfo() == CCValAssign::ZExt)
1420 DAG.getValueType(VA.getValVT()));
1421
1422 if (VA.isExtInLoc())
1423 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1424 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1425 // If this is a short vector argument loaded from the stack,
1426 // extend from i64 to full vector size and then bitcast.
1427 assert(VA.getLocVT() == MVT::i64);
1428 assert(VA.getValVT().isVector());
1429 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1430 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1431 } else
1432 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1433 return Value;
1434}
1435
1436// Value is a value of type VA.getValVT() that we need to copy into
1437// the location described by VA. Return a copy of Value converted to
1438// VA.getValVT(). The caller is responsible for handling indirect values.
1440 CCValAssign &VA, SDValue Value) {
1441 switch (VA.getLocInfo()) {
1442 case CCValAssign::SExt:
1443 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1444 case CCValAssign::ZExt:
1445 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1446 case CCValAssign::AExt:
1447 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1448 case CCValAssign::BCvt: {
1449 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1450 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1451 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1452 // For an f32 vararg we need to first promote it to an f64 and then
1453 // bitcast it to an i64.
1454 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1455 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1456 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1457 ? MVT::v2i64
1458 : VA.getLocVT();
1459 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1460 // For ELF, this is a short vector argument to be stored to the stack,
1461 // bitcast to v2i64 and then extract first element.
1462 if (BitCastToType == MVT::v2i64)
1463 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1464 DAG.getConstant(0, DL, MVT::i32));
1465 return Value;
1466 }
1467 case CCValAssign::Full:
1468 return Value;
1469 default:
1470 llvm_unreachable("Unhandled getLocInfo()");
1471 }
1472}
1473
1475 SDLoc DL(In);
1476 SDValue Lo, Hi;
1477 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1478 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1479 MVT::Untyped, Hi, Lo);
1480 return SDValue(Pair, 0);
1481}
1482
1484 SDLoc DL(In);
1485 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1486 DL, MVT::i64, In);
1487 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1488 DL, MVT::i64, In);
1489 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1490}
1491
1493 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1494 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1495 EVT ValueVT = Val.getValueType();
1496 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1497 // Inline assembly operand.
1498 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1499 return true;
1500 }
1501
1502 return false;
1503}
1504
1506 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1507 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1508 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1509 // Inline assembly operand.
1510 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1511 return DAG.getBitcast(ValueVT, Res);
1512 }
1513
1514 return SDValue();
1515}
1516
1518 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1519 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1520 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1522 MachineFrameInfo &MFI = MF.getFrameInfo();
1524 SystemZMachineFunctionInfo *FuncInfo =
1526 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1527 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1528
1529 // Detect unsupported vector argument types.
1530 if (Subtarget.hasVector())
1531 VerifyVectorTypes(Ins);
1532
1533 // Assign locations to all of the incoming arguments.
1535 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1536 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1537 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1538
1539 unsigned NumFixedGPRs = 0;
1540 unsigned NumFixedFPRs = 0;
1541 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1542 SDValue ArgValue;
1543 CCValAssign &VA = ArgLocs[I];
1544 EVT LocVT = VA.getLocVT();
1545 if (VA.isRegLoc()) {
1546 // Arguments passed in registers
1547 const TargetRegisterClass *RC;
1548 switch (LocVT.getSimpleVT().SimpleTy) {
1549 default:
1550 // Integers smaller than i64 should be promoted to i64.
1551 llvm_unreachable("Unexpected argument type");
1552 case MVT::i32:
1553 NumFixedGPRs += 1;
1554 RC = &SystemZ::GR32BitRegClass;
1555 break;
1556 case MVT::i64:
1557 NumFixedGPRs += 1;
1558 RC = &SystemZ::GR64BitRegClass;
1559 break;
1560 case MVT::f32:
1561 NumFixedFPRs += 1;
1562 RC = &SystemZ::FP32BitRegClass;
1563 break;
1564 case MVT::f64:
1565 NumFixedFPRs += 1;
1566 RC = &SystemZ::FP64BitRegClass;
1567 break;
1568 case MVT::f128:
1569 NumFixedFPRs += 2;
1570 RC = &SystemZ::FP128BitRegClass;
1571 break;
1572 case MVT::v16i8:
1573 case MVT::v8i16:
1574 case MVT::v4i32:
1575 case MVT::v2i64:
1576 case MVT::v4f32:
1577 case MVT::v2f64:
1578 RC = &SystemZ::VR128BitRegClass;
1579 break;
1580 }
1581
1582 Register VReg = MRI.createVirtualRegister(RC);
1583 MRI.addLiveIn(VA.getLocReg(), VReg);
1584 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1585 } else {
1586 assert(VA.isMemLoc() && "Argument not register or memory");
1587
1588 // Create the frame index object for this incoming parameter.
1589 // FIXME: Pre-include call frame size in the offset, should not
1590 // need to manually add it here.
1591 int64_t ArgSPOffset = VA.getLocMemOffset();
1592 if (Subtarget.isTargetXPLINK64()) {
1593 auto &XPRegs =
1595 ArgSPOffset += XPRegs.getCallFrameSize();
1596 }
1597 int FI =
1598 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1599
1600 // Create the SelectionDAG nodes corresponding to a load
1601 // from this parameter. Unpromoted ints and floats are
1602 // passed as right-justified 8-byte values.
1603 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1604 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1605 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1606 DAG.getIntPtrConstant(4, DL));
1607 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1609 }
1610
1611 // Convert the value of the argument register into the value that's
1612 // being passed.
1613 if (VA.getLocInfo() == CCValAssign::Indirect) {
1614 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1616 // If the original argument was split (e.g. i128), we need
1617 // to load all parts of it here (using the same address).
1618 unsigned ArgIndex = Ins[I].OrigArgIndex;
1619 assert (Ins[I].PartOffset == 0);
1620 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1621 CCValAssign &PartVA = ArgLocs[I + 1];
1622 unsigned PartOffset = Ins[I + 1].PartOffset;
1623 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1624 DAG.getIntPtrConstant(PartOffset, DL));
1625 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1627 ++I;
1628 }
1629 } else
1630 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1631 }
1632
1633 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1634 // Save the number of non-varargs registers for later use by va_start, etc.
1635 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1636 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1637
1638 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1639 Subtarget.getSpecialRegisters());
1640
1641 // Likewise the address (in the form of a frame index) of where the
1642 // first stack vararg would be. The 1-byte size here is arbitrary.
1643 // FIXME: Pre-include call frame size in the offset, should not
1644 // need to manually add it here.
1645 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1646 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1647 FuncInfo->setVarArgsFrameIndex(FI);
1648 }
1649
1650 if (IsVarArg && Subtarget.isTargetELF()) {
1651 // Save the number of non-varargs registers for later use by va_start, etc.
1652 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1653 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1654
1655 // Likewise the address (in the form of a frame index) of where the
1656 // first stack vararg would be. The 1-byte size here is arbitrary.
1657 int64_t VarArgsOffset = CCInfo.getStackSize();
1658 FuncInfo->setVarArgsFrameIndex(
1659 MFI.CreateFixedObject(1, VarArgsOffset, true));
1660
1661 // ...and a similar frame index for the caller-allocated save area
1662 // that will be used to store the incoming registers.
1663 int64_t RegSaveOffset =
1664 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1665 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1666 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1667
1668 // Store the FPR varargs in the reserved frame slots. (We store the
1669 // GPRs as part of the prologue.)
1670 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1672 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1673 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1674 int FI =
1676 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1678 &SystemZ::FP64BitRegClass);
1679 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1680 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1682 }
1683 // Join the stores, which are independent of one another.
1684 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1685 ArrayRef(&MemOps[NumFixedFPRs],
1686 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1687 }
1688 }
1689
1690 if (Subtarget.isTargetXPLINK64()) {
1691 // Create virual register for handling incoming "ADA" special register (R5)
1692 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1693 Register ADAvReg = MRI.createVirtualRegister(RC);
1694 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1695 Subtarget.getSpecialRegisters());
1696 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1697 FuncInfo->setADAVirtualRegister(ADAvReg);
1698 }
1699 return Chain;
1700}
1701
1702static bool canUseSiblingCall(const CCState &ArgCCInfo,
1705 // Punt if there are any indirect or stack arguments, or if the call
1706 // needs the callee-saved argument register R6, or if the call uses
1707 // the callee-saved register arguments SwiftSelf and SwiftError.
1708 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1709 CCValAssign &VA = ArgLocs[I];
1711 return false;
1712 if (!VA.isRegLoc())
1713 return false;
1714 Register Reg = VA.getLocReg();
1715 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1716 return false;
1717 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1718 return false;
1719 }
1720 return true;
1721}
1722
1724 unsigned Offset, bool LoadAdr = false) {
1727 unsigned ADAvReg = MFI->getADAVirtualRegister();
1729
1730 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1731 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1732
1733 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1734 if (!LoadAdr)
1735 Result = DAG.getLoad(
1736 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1738
1739 return Result;
1740}
1741
1742// ADA access using Global value
1743// Note: for functions, address of descriptor is returned
1745 EVT PtrVT) {
1746 unsigned ADAtype;
1747 bool LoadAddr = false;
1748 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1749 bool IsFunction =
1750 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1751 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1752
1753 if (IsFunction) {
1754 if (IsInternal) {
1756 LoadAddr = true;
1757 } else
1759 } else {
1761 }
1762 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1763
1764 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1765}
1766
1767static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1768 SDLoc &DL, SDValue &Chain) {
1769 unsigned ADADelta = 0; // ADA offset in desc.
1770 unsigned EPADelta = 8; // EPA offset in desc.
1773
1774 // XPLink calling convention.
1775 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1776 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1777 G->getGlobal()->hasPrivateLinkage());
1778 if (IsInternal) {
1781 unsigned ADAvReg = MFI->getADAVirtualRegister();
1782 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1783 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1784 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1785 return true;
1786 } else {
1788 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1789 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1790 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1791 }
1792 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1794 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1795 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1796 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1797 } else {
1798 // Function pointer case
1799 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1800 DAG.getConstant(ADADelta, DL, PtrVT));
1801 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1803 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1804 DAG.getConstant(EPADelta, DL, PtrVT));
1805 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1807 }
1808 return false;
1809}
1810
1811SDValue
1813 SmallVectorImpl<SDValue> &InVals) const {
1814 SelectionDAG &DAG = CLI.DAG;
1815 SDLoc &DL = CLI.DL;
1817 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1819 SDValue Chain = CLI.Chain;
1820 SDValue Callee = CLI.Callee;
1821 bool &IsTailCall = CLI.IsTailCall;
1822 CallingConv::ID CallConv = CLI.CallConv;
1823 bool IsVarArg = CLI.IsVarArg;
1825 EVT PtrVT = getPointerTy(MF.getDataLayout());
1826 LLVMContext &Ctx = *DAG.getContext();
1828
1829 // FIXME: z/OS support to be added in later.
1830 if (Subtarget.isTargetXPLINK64())
1831 IsTailCall = false;
1832
1833 // Detect unsupported vector argument and return types.
1834 if (Subtarget.hasVector()) {
1835 VerifyVectorTypes(Outs);
1836 VerifyVectorTypes(Ins);
1837 }
1838
1839 // Analyze the operands of the call, assigning locations to each operand.
1841 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1842 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1843
1844 // We don't support GuaranteedTailCallOpt, only automatically-detected
1845 // sibling calls.
1846 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1847 IsTailCall = false;
1848
1849 // Get a count of how many bytes are to be pushed on the stack.
1850 unsigned NumBytes = ArgCCInfo.getStackSize();
1851
1852 // Mark the start of the call.
1853 if (!IsTailCall)
1854 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1855
1856 // Copy argument values to their designated locations.
1858 SmallVector<SDValue, 8> MemOpChains;
1859 SDValue StackPtr;
1860 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1861 CCValAssign &VA = ArgLocs[I];
1862 SDValue ArgValue = OutVals[I];
1863
1864 if (VA.getLocInfo() == CCValAssign::Indirect) {
1865 // Store the argument in a stack slot and pass its address.
1866 unsigned ArgIndex = Outs[I].OrigArgIndex;
1867 EVT SlotVT;
1868 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1869 // Allocate the full stack space for a promoted (and split) argument.
1870 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1871 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1872 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1873 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1874 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1875 } else {
1876 SlotVT = Outs[I].ArgVT;
1877 }
1878 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1879 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1880 MemOpChains.push_back(
1881 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1883 // If the original argument was split (e.g. i128), we need
1884 // to store all parts of it here (and pass just one address).
1885 assert (Outs[I].PartOffset == 0);
1886 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1887 SDValue PartValue = OutVals[I + 1];
1888 unsigned PartOffset = Outs[I + 1].PartOffset;
1889 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1890 DAG.getIntPtrConstant(PartOffset, DL));
1891 MemOpChains.push_back(
1892 DAG.getStore(Chain, DL, PartValue, Address,
1894 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1895 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1896 ++I;
1897 }
1898 ArgValue = SpillSlot;
1899 } else
1900 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1901
1902 if (VA.isRegLoc()) {
1903 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1904 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1905 // and low values.
1906 if (VA.getLocVT() == MVT::i128)
1907 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1908 // Queue up the argument copies and emit them at the end.
1909 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1910 } else {
1911 assert(VA.isMemLoc() && "Argument not register or memory");
1912
1913 // Work out the address of the stack slot. Unpromoted ints and
1914 // floats are passed as right-justified 8-byte values.
1915 if (!StackPtr.getNode())
1916 StackPtr = DAG.getCopyFromReg(Chain, DL,
1917 Regs->getStackPointerRegister(), PtrVT);
1918 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1919 VA.getLocMemOffset();
1920 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1921 Offset += 4;
1922 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1924
1925 // Emit the store.
1926 MemOpChains.push_back(
1927 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1928
1929 // Although long doubles or vectors are passed through the stack when
1930 // they are vararg (non-fixed arguments), if a long double or vector
1931 // occupies the third and fourth slot of the argument list GPR3 should
1932 // still shadow the third slot of the argument list.
1933 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1934 SDValue ShadowArgValue =
1935 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1936 DAG.getIntPtrConstant(1, DL));
1937 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1938 }
1939 }
1940 }
1941
1942 // Join the stores, which are independent of one another.
1943 if (!MemOpChains.empty())
1944 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1945
1946 // Accept direct calls by converting symbolic call addresses to the
1947 // associated Target* opcodes. Force %r1 to be used for indirect
1948 // tail calls.
1949 SDValue Glue;
1950
1951 if (Subtarget.isTargetXPLINK64()) {
1952 SDValue ADA;
1953 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
1954 if (!IsBRASL) {
1955 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
1956 ->getAddressOfCalleeRegister();
1957 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
1958 Glue = Chain.getValue(1);
1959 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
1960 }
1961 RegsToPass.push_back(std::make_pair(
1962 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
1963 } else {
1964 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1965 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1966 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1967 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1968 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1969 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1970 } else if (IsTailCall) {
1971 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1972 Glue = Chain.getValue(1);
1973 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1974 }
1975 }
1976
1977 // Build a sequence of copy-to-reg nodes, chained and glued together.
1978 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1979 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1980 RegsToPass[I].second, Glue);
1981 Glue = Chain.getValue(1);
1982 }
1983
1984 // The first call operand is the chain and the second is the target address.
1986 Ops.push_back(Chain);
1987 Ops.push_back(Callee);
1988
1989 // Add argument registers to the end of the list so that they are
1990 // known live into the call.
1991 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1992 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1993 RegsToPass[I].second.getValueType()));
1994
1995 // Add a register mask operand representing the call-preserved registers.
1996 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1997 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1998 assert(Mask && "Missing call preserved mask for calling convention");
1999 Ops.push_back(DAG.getRegisterMask(Mask));
2000
2001 // Glue the call to the argument copies, if any.
2002 if (Glue.getNode())
2003 Ops.push_back(Glue);
2004
2005 // Emit the call.
2006 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2007 if (IsTailCall) {
2008 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2009 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2010 return Ret;
2011 }
2012 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2013 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2014 Glue = Chain.getValue(1);
2015
2016 // Mark the end of the call, which is glued to the call itself.
2017 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2018 Glue = Chain.getValue(1);
2019
2020 // Assign locations to each value returned by this call.
2022 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2023 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2024
2025 // Copy all of the result registers out of their specified physreg.
2026 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2027 CCValAssign &VA = RetLocs[I];
2028
2029 // Copy the value out, gluing the copy to the end of the call sequence.
2030 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2031 VA.getLocVT(), Glue);
2032 Chain = RetValue.getValue(1);
2033 Glue = RetValue.getValue(2);
2034
2035 // Convert the value of the return register into the value that's
2036 // being returned.
2037 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2038 }
2039
2040 return Chain;
2041}
2042
2043// Generate a call taking the given operands as arguments and returning a
2044// result of type RetVT.
2046 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2047 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2048 bool DoesNotReturn, bool IsReturnValueUsed) const {
2050 Args.reserve(Ops.size());
2051
2053 for (SDValue Op : Ops) {
2054 Entry.Node = Op;
2055 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2056 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2057 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2058 Args.push_back(Entry);
2059 }
2060
2061 SDValue Callee =
2062 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2063
2064 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2066 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2067 CLI.setDebugLoc(DL)
2068 .setChain(Chain)
2069 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2070 .setNoReturn(DoesNotReturn)
2071 .setDiscardResult(!IsReturnValueUsed)
2072 .setSExtResult(SignExtend)
2073 .setZExtResult(!SignExtend);
2074 return LowerCallTo(CLI);
2075}
2076
2079 MachineFunction &MF, bool isVarArg,
2081 LLVMContext &Context) const {
2082 // Detect unsupported vector return types.
2083 if (Subtarget.hasVector())
2084 VerifyVectorTypes(Outs);
2085
2086 // Special case that we cannot easily detect in RetCC_SystemZ since
2087 // i128 is not a legal type.
2088 for (auto &Out : Outs)
2089 if (Out.ArgVT == MVT::i128)
2090 return false;
2091
2093 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2094 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2095}
2096
2097SDValue
2099 bool IsVarArg,
2101 const SmallVectorImpl<SDValue> &OutVals,
2102 const SDLoc &DL, SelectionDAG &DAG) const {
2104
2105 // Detect unsupported vector return types.
2106 if (Subtarget.hasVector())
2107 VerifyVectorTypes(Outs);
2108
2109 // Assign locations to each returned value.
2111 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2112 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2113
2114 // Quick exit for void returns
2115 if (RetLocs.empty())
2116 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2117
2118 if (CallConv == CallingConv::GHC)
2119 report_fatal_error("GHC functions return void only");
2120
2121 // Copy the result values into the output registers.
2122 SDValue Glue;
2124 RetOps.push_back(Chain);
2125 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2126 CCValAssign &VA = RetLocs[I];
2127 SDValue RetValue = OutVals[I];
2128
2129 // Make the return register live on exit.
2130 assert(VA.isRegLoc() && "Can only return in registers!");
2131
2132 // Promote the value as required.
2133 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2134
2135 // Chain and glue the copies together.
2136 Register Reg = VA.getLocReg();
2137 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2138 Glue = Chain.getValue(1);
2139 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2140 }
2141
2142 // Update chain and glue.
2143 RetOps[0] = Chain;
2144 if (Glue.getNode())
2145 RetOps.push_back(Glue);
2146
2147 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2148}
2149
2150// Return true if Op is an intrinsic node with chain that returns the CC value
2151// as its only (other) argument. Provide the associated SystemZISD opcode and
2152// the mask of valid CC values if so.
2154 unsigned &CCValid) {
2155 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2156 switch (Id) {
2157 case Intrinsic::s390_tbegin:
2159 CCValid = SystemZ::CCMASK_TBEGIN;
2160 return true;
2161
2162 case Intrinsic::s390_tbegin_nofloat:
2164 CCValid = SystemZ::CCMASK_TBEGIN;
2165 return true;
2166
2167 case Intrinsic::s390_tend:
2169 CCValid = SystemZ::CCMASK_TEND;
2170 return true;
2171
2172 default:
2173 return false;
2174 }
2175}
2176
2177// Return true if Op is an intrinsic node without chain that returns the
2178// CC value as its final argument. Provide the associated SystemZISD
2179// opcode and the mask of valid CC values if so.
2180static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2181 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2182 switch (Id) {
2183 case Intrinsic::s390_vpkshs:
2184 case Intrinsic::s390_vpksfs:
2185 case Intrinsic::s390_vpksgs:
2187 CCValid = SystemZ::CCMASK_VCMP;
2188 return true;
2189
2190 case Intrinsic::s390_vpklshs:
2191 case Intrinsic::s390_vpklsfs:
2192 case Intrinsic::s390_vpklsgs:
2194 CCValid = SystemZ::CCMASK_VCMP;
2195 return true;
2196
2197 case Intrinsic::s390_vceqbs:
2198 case Intrinsic::s390_vceqhs:
2199 case Intrinsic::s390_vceqfs:
2200 case Intrinsic::s390_vceqgs:
2202 CCValid = SystemZ::CCMASK_VCMP;
2203 return true;
2204
2205 case Intrinsic::s390_vchbs:
2206 case Intrinsic::s390_vchhs:
2207 case Intrinsic::s390_vchfs:
2208 case Intrinsic::s390_vchgs:
2210 CCValid = SystemZ::CCMASK_VCMP;
2211 return true;
2212
2213 case Intrinsic::s390_vchlbs:
2214 case Intrinsic::s390_vchlhs:
2215 case Intrinsic::s390_vchlfs:
2216 case Intrinsic::s390_vchlgs:
2218 CCValid = SystemZ::CCMASK_VCMP;
2219 return true;
2220
2221 case Intrinsic::s390_vtm:
2223 CCValid = SystemZ::CCMASK_VCMP;
2224 return true;
2225
2226 case Intrinsic::s390_vfaebs:
2227 case Intrinsic::s390_vfaehs:
2228 case Intrinsic::s390_vfaefs:
2230 CCValid = SystemZ::CCMASK_ANY;
2231 return true;
2232
2233 case Intrinsic::s390_vfaezbs:
2234 case Intrinsic::s390_vfaezhs:
2235 case Intrinsic::s390_vfaezfs:
2237 CCValid = SystemZ::CCMASK_ANY;
2238 return true;
2239
2240 case Intrinsic::s390_vfeebs:
2241 case Intrinsic::s390_vfeehs:
2242 case Intrinsic::s390_vfeefs:
2244 CCValid = SystemZ::CCMASK_ANY;
2245 return true;
2246
2247 case Intrinsic::s390_vfeezbs:
2248 case Intrinsic::s390_vfeezhs:
2249 case Intrinsic::s390_vfeezfs:
2251 CCValid = SystemZ::CCMASK_ANY;
2252 return true;
2253
2254 case Intrinsic::s390_vfenebs:
2255 case Intrinsic::s390_vfenehs:
2256 case Intrinsic::s390_vfenefs:
2258 CCValid = SystemZ::CCMASK_ANY;
2259 return true;
2260
2261 case Intrinsic::s390_vfenezbs:
2262 case Intrinsic::s390_vfenezhs:
2263 case Intrinsic::s390_vfenezfs:
2265 CCValid = SystemZ::CCMASK_ANY;
2266 return true;
2267
2268 case Intrinsic::s390_vistrbs:
2269 case Intrinsic::s390_vistrhs:
2270 case Intrinsic::s390_vistrfs:
2273 return true;
2274
2275 case Intrinsic::s390_vstrcbs:
2276 case Intrinsic::s390_vstrchs:
2277 case Intrinsic::s390_vstrcfs:
2279 CCValid = SystemZ::CCMASK_ANY;
2280 return true;
2281
2282 case Intrinsic::s390_vstrczbs:
2283 case Intrinsic::s390_vstrczhs:
2284 case Intrinsic::s390_vstrczfs:
2286 CCValid = SystemZ::CCMASK_ANY;
2287 return true;
2288
2289 case Intrinsic::s390_vstrsb:
2290 case Intrinsic::s390_vstrsh:
2291 case Intrinsic::s390_vstrsf:
2293 CCValid = SystemZ::CCMASK_ANY;
2294 return true;
2295
2296 case Intrinsic::s390_vstrszb:
2297 case Intrinsic::s390_vstrszh:
2298 case Intrinsic::s390_vstrszf:
2300 CCValid = SystemZ::CCMASK_ANY;
2301 return true;
2302
2303 case Intrinsic::s390_vfcedbs:
2304 case Intrinsic::s390_vfcesbs:
2306 CCValid = SystemZ::CCMASK_VCMP;
2307 return true;
2308
2309 case Intrinsic::s390_vfchdbs:
2310 case Intrinsic::s390_vfchsbs:
2312 CCValid = SystemZ::CCMASK_VCMP;
2313 return true;
2314
2315 case Intrinsic::s390_vfchedbs:
2316 case Intrinsic::s390_vfchesbs:
2318 CCValid = SystemZ::CCMASK_VCMP;
2319 return true;
2320
2321 case Intrinsic::s390_vftcidb:
2322 case Intrinsic::s390_vftcisb:
2324 CCValid = SystemZ::CCMASK_VCMP;
2325 return true;
2326
2327 case Intrinsic::s390_tdc:
2329 CCValid = SystemZ::CCMASK_TDC;
2330 return true;
2331
2332 default:
2333 return false;
2334 }
2335}
2336
2337// Emit an intrinsic with chain and an explicit CC register result.
2339 unsigned Opcode) {
2340 // Copy all operands except the intrinsic ID.
2341 unsigned NumOps = Op.getNumOperands();
2343 Ops.reserve(NumOps - 1);
2344 Ops.push_back(Op.getOperand(0));
2345 for (unsigned I = 2; I < NumOps; ++I)
2346 Ops.push_back(Op.getOperand(I));
2347
2348 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2349 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2350 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2351 SDValue OldChain = SDValue(Op.getNode(), 1);
2352 SDValue NewChain = SDValue(Intr.getNode(), 1);
2353 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2354 return Intr.getNode();
2355}
2356
2357// Emit an intrinsic with an explicit CC register result.
2359 unsigned Opcode) {
2360 // Copy all operands except the intrinsic ID.
2361 unsigned NumOps = Op.getNumOperands();
2363 Ops.reserve(NumOps - 1);
2364 for (unsigned I = 1; I < NumOps; ++I)
2365 Ops.push_back(Op.getOperand(I));
2366
2367 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2368 return Intr.getNode();
2369}
2370
2371// CC is a comparison that will be implemented using an integer or
2372// floating-point comparison. Return the condition code mask for
2373// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2374// unsigned comparisons and clear for signed ones. In the floating-point
2375// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2377#define CONV(X) \
2378 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2379 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2380 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2381
2382 switch (CC) {
2383 default:
2384 llvm_unreachable("Invalid integer condition!");
2385
2386 CONV(EQ);
2387 CONV(NE);
2388 CONV(GT);
2389 CONV(GE);
2390 CONV(LT);
2391 CONV(LE);
2392
2393 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2395 }
2396#undef CONV
2397}
2398
2399// If C can be converted to a comparison against zero, adjust the operands
2400// as necessary.
2401static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2402 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2403 return;
2404
2405 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2406 if (!ConstOp1)
2407 return;
2408
2409 int64_t Value = ConstOp1->getSExtValue();
2410 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2411 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2412 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2413 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2414 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2415 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2416 }
2417}
2418
2419// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2420// adjust the operands as necessary.
2421static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2422 Comparison &C) {
2423 // For us to make any changes, it must a comparison between a single-use
2424 // load and a constant.
2425 if (!C.Op0.hasOneUse() ||
2426 C.Op0.getOpcode() != ISD::LOAD ||
2427 C.Op1.getOpcode() != ISD::Constant)
2428 return;
2429
2430 // We must have an 8- or 16-bit load.
2431 auto *Load = cast<LoadSDNode>(C.Op0);
2432 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2433 if ((NumBits != 8 && NumBits != 16) ||
2434 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2435 return;
2436
2437 // The load must be an extending one and the constant must be within the
2438 // range of the unextended value.
2439 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2440 uint64_t Value = ConstOp1->getZExtValue();
2441 uint64_t Mask = (1 << NumBits) - 1;
2442 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2443 // Make sure that ConstOp1 is in range of C.Op0.
2444 int64_t SignedValue = ConstOp1->getSExtValue();
2445 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2446 return;
2447 if (C.ICmpType != SystemZICMP::SignedOnly) {
2448 // Unsigned comparison between two sign-extended values is equivalent
2449 // to unsigned comparison between two zero-extended values.
2450 Value &= Mask;
2451 } else if (NumBits == 8) {
2452 // Try to treat the comparison as unsigned, so that we can use CLI.
2453 // Adjust CCMask and Value as necessary.
2454 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2455 // Test whether the high bit of the byte is set.
2456 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2457 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2458 // Test whether the high bit of the byte is clear.
2459 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2460 else
2461 // No instruction exists for this combination.
2462 return;
2463 C.ICmpType = SystemZICMP::UnsignedOnly;
2464 }
2465 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2466 if (Value > Mask)
2467 return;
2468 // If the constant is in range, we can use any comparison.
2469 C.ICmpType = SystemZICMP::Any;
2470 } else
2471 return;
2472
2473 // Make sure that the first operand is an i32 of the right extension type.
2474 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2477 if (C.Op0.getValueType() != MVT::i32 ||
2478 Load->getExtensionType() != ExtType) {
2479 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2480 Load->getBasePtr(), Load->getPointerInfo(),
2481 Load->getMemoryVT(), Load->getAlign(),
2482 Load->getMemOperand()->getFlags());
2483 // Update the chain uses.
2484 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2485 }
2486
2487 // Make sure that the second operand is an i32 with the right value.
2488 if (C.Op1.getValueType() != MVT::i32 ||
2489 Value != ConstOp1->getZExtValue())
2490 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2491}
2492
2493// Return true if Op is either an unextended load, or a load suitable
2494// for integer register-memory comparisons of type ICmpType.
2495static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2496 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2497 if (Load) {
2498 // There are no instructions to compare a register with a memory byte.
2499 if (Load->getMemoryVT() == MVT::i8)
2500 return false;
2501 // Otherwise decide on extension type.
2502 switch (Load->getExtensionType()) {
2503 case ISD::NON_EXTLOAD:
2504 return true;
2505 case ISD::SEXTLOAD:
2506 return ICmpType != SystemZICMP::UnsignedOnly;
2507 case ISD::ZEXTLOAD:
2508 return ICmpType != SystemZICMP::SignedOnly;
2509 default:
2510 break;
2511 }
2512 }
2513 return false;
2514}
2515
2516// Return true if it is better to swap the operands of C.
2517static bool shouldSwapCmpOperands(const Comparison &C) {
2518 // Leave f128 comparisons alone, since they have no memory forms.
2519 if (C.Op0.getValueType() == MVT::f128)
2520 return false;
2521
2522 // Always keep a floating-point constant second, since comparisons with
2523 // zero can use LOAD TEST and comparisons with other constants make a
2524 // natural memory operand.
2525 if (isa<ConstantFPSDNode>(C.Op1))
2526 return false;
2527
2528 // Never swap comparisons with zero since there are many ways to optimize
2529 // those later.
2530 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2531 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2532 return false;
2533
2534 // Also keep natural memory operands second if the loaded value is
2535 // only used here. Several comparisons have memory forms.
2536 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2537 return false;
2538
2539 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2540 // In that case we generally prefer the memory to be second.
2541 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2542 // The only exceptions are when the second operand is a constant and
2543 // we can use things like CHHSI.
2544 if (!ConstOp1)
2545 return true;
2546 // The unsigned memory-immediate instructions can handle 16-bit
2547 // unsigned integers.
2548 if (C.ICmpType != SystemZICMP::SignedOnly &&
2549 isUInt<16>(ConstOp1->getZExtValue()))
2550 return false;
2551 // The signed memory-immediate instructions can handle 16-bit
2552 // signed integers.
2553 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2554 isInt<16>(ConstOp1->getSExtValue()))
2555 return false;
2556 return true;
2557 }
2558
2559 // Try to promote the use of CGFR and CLGFR.
2560 unsigned Opcode0 = C.Op0.getOpcode();
2561 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2562 return true;
2563 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2564 return true;
2565 if (C.ICmpType != SystemZICMP::SignedOnly &&
2566 Opcode0 == ISD::AND &&
2567 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2568 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2569 return true;
2570
2571 return false;
2572}
2573
2574// Check whether C tests for equality between X and Y and whether X - Y
2575// or Y - X is also computed. In that case it's better to compare the
2576// result of the subtraction against zero.
2578 Comparison &C) {
2579 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2580 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2581 for (SDNode *N : C.Op0->uses()) {
2582 if (N->getOpcode() == ISD::SUB &&
2583 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2584 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2585 // Disable the nsw and nuw flags: the backend needs to handle
2586 // overflow as well during comparison elimination.
2587 SDNodeFlags Flags = N->getFlags();
2588 Flags.setNoSignedWrap(false);
2589 Flags.setNoUnsignedWrap(false);
2590 N->setFlags(Flags);
2591 C.Op0 = SDValue(N, 0);
2592 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2593 return;
2594 }
2595 }
2596 }
2597}
2598
2599// Check whether C compares a floating-point value with zero and if that
2600// floating-point value is also negated. In this case we can use the
2601// negation to set CC, so avoiding separate LOAD AND TEST and
2602// LOAD (NEGATIVE/COMPLEMENT) instructions.
2603static void adjustForFNeg(Comparison &C) {
2604 // This optimization is invalid for strict comparisons, since FNEG
2605 // does not raise any exceptions.
2606 if (C.Chain)
2607 return;
2608 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2609 if (C1 && C1->isZero()) {
2610 for (SDNode *N : C.Op0->uses()) {
2611 if (N->getOpcode() == ISD::FNEG) {
2612 C.Op0 = SDValue(N, 0);
2613 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2614 return;
2615 }
2616 }
2617 }
2618}
2619
2620// Check whether C compares (shl X, 32) with 0 and whether X is
2621// also sign-extended. In that case it is better to test the result
2622// of the sign extension using LTGFR.
2623//
2624// This case is important because InstCombine transforms a comparison
2625// with (sext (trunc X)) into a comparison with (shl X, 32).
2626static void adjustForLTGFR(Comparison &C) {
2627 // Check for a comparison between (shl X, 32) and 0.
2628 if (C.Op0.getOpcode() == ISD::SHL &&
2629 C.Op0.getValueType() == MVT::i64 &&
2630 C.Op1.getOpcode() == ISD::Constant &&
2631 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2632 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2633 if (C1 && C1->getZExtValue() == 32) {
2634 SDValue ShlOp0 = C.Op0.getOperand(0);
2635 // See whether X has any SIGN_EXTEND_INREG uses.
2636 for (SDNode *N : ShlOp0->uses()) {
2637 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2638 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2639 C.Op0 = SDValue(N, 0);
2640 return;
2641 }
2642 }
2643 }
2644 }
2645}
2646
2647// If C compares the truncation of an extending load, try to compare
2648// the untruncated value instead. This exposes more opportunities to
2649// reuse CC.
2650static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2651 Comparison &C) {
2652 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2653 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2654 C.Op1.getOpcode() == ISD::Constant &&
2655 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2656 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2657 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2658 C.Op0.getValueSizeInBits().getFixedValue()) {
2659 unsigned Type = L->getExtensionType();
2660 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2661 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2662 C.Op0 = C.Op0.getOperand(0);
2663 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2664 }
2665 }
2666 }
2667}
2668
2669// Return true if shift operation N has an in-range constant shift value.
2670// Store it in ShiftVal if so.
2671static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2672 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2673 if (!Shift)
2674 return false;
2675
2676 uint64_t Amount = Shift->getZExtValue();
2677 if (Amount >= N.getValueSizeInBits())
2678 return false;
2679
2680 ShiftVal = Amount;
2681 return true;
2682}
2683
2684// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2685// instruction and whether the CC value is descriptive enough to handle
2686// a comparison of type Opcode between the AND result and CmpVal.
2687// CCMask says which comparison result is being tested and BitSize is
2688// the number of bits in the operands. If TEST UNDER MASK can be used,
2689// return the corresponding CC mask, otherwise return 0.
2690static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2691 uint64_t Mask, uint64_t CmpVal,
2692 unsigned ICmpType) {
2693 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2694
2695 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2696 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2697 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2698 return 0;
2699
2700 // Work out the masks for the lowest and highest bits.
2702 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2703
2704 // Signed ordered comparisons are effectively unsigned if the sign
2705 // bit is dropped.
2706 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2707
2708 // Check for equality comparisons with 0, or the equivalent.
2709 if (CmpVal == 0) {
2710 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2712 if (CCMask == SystemZ::CCMASK_CMP_NE)
2714 }
2715 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2716 if (CCMask == SystemZ::CCMASK_CMP_LT)
2718 if (CCMask == SystemZ::CCMASK_CMP_GE)
2720 }
2721 if (EffectivelyUnsigned && CmpVal < Low) {
2722 if (CCMask == SystemZ::CCMASK_CMP_LE)
2724 if (CCMask == SystemZ::CCMASK_CMP_GT)
2726 }
2727
2728 // Check for equality comparisons with the mask, or the equivalent.
2729 if (CmpVal == Mask) {
2730 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2732 if (CCMask == SystemZ::CCMASK_CMP_NE)
2734 }
2735 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2736 if (CCMask == SystemZ::CCMASK_CMP_GT)
2738 if (CCMask == SystemZ::CCMASK_CMP_LE)
2740 }
2741 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2742 if (CCMask == SystemZ::CCMASK_CMP_GE)
2744 if (CCMask == SystemZ::CCMASK_CMP_LT)
2746 }
2747
2748 // Check for ordered comparisons with the top bit.
2749 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2750 if (CCMask == SystemZ::CCMASK_CMP_LE)
2752 if (CCMask == SystemZ::CCMASK_CMP_GT)
2754 }
2755 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2756 if (CCMask == SystemZ::CCMASK_CMP_LT)
2758 if (CCMask == SystemZ::CCMASK_CMP_GE)
2760 }
2761
2762 // If there are just two bits, we can do equality checks for Low and High
2763 // as well.
2764 if (Mask == Low + High) {
2765 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2767 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2769 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2771 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2773 }
2774
2775 // Looks like we've exhausted our options.
2776 return 0;
2777}
2778
2779// See whether C can be implemented as a TEST UNDER MASK instruction.
2780// Update the arguments with the TM version if so.
2782 Comparison &C) {
2783 // Check that we have a comparison with a constant.
2784 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2785 if (!ConstOp1)
2786 return;
2787 uint64_t CmpVal = ConstOp1->getZExtValue();
2788
2789 // Check whether the nonconstant input is an AND with a constant mask.
2790 Comparison NewC(C);
2791 uint64_t MaskVal;
2792 ConstantSDNode *Mask = nullptr;
2793 if (C.Op0.getOpcode() == ISD::AND) {
2794 NewC.Op0 = C.Op0.getOperand(0);
2795 NewC.Op1 = C.Op0.getOperand(1);
2796 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2797 if (!Mask)
2798 return;
2799 MaskVal = Mask->getZExtValue();
2800 } else {
2801 // There is no instruction to compare with a 64-bit immediate
2802 // so use TMHH instead if possible. We need an unsigned ordered
2803 // comparison with an i64 immediate.
2804 if (NewC.Op0.getValueType() != MVT::i64 ||
2805 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2806 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2807 NewC.ICmpType == SystemZICMP::SignedOnly)
2808 return;
2809 // Convert LE and GT comparisons into LT and GE.
2810 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2811 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2812 if (CmpVal == uint64_t(-1))
2813 return;
2814 CmpVal += 1;
2815 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2816 }
2817 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2818 // be masked off without changing the result.
2819 MaskVal = -(CmpVal & -CmpVal);
2820 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2821 }
2822 if (!MaskVal)
2823 return;
2824
2825 // Check whether the combination of mask, comparison value and comparison
2826 // type are suitable.
2827 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2828 unsigned NewCCMask, ShiftVal;
2829 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2830 NewC.Op0.getOpcode() == ISD::SHL &&
2831 isSimpleShift(NewC.Op0, ShiftVal) &&
2832 (MaskVal >> ShiftVal != 0) &&
2833 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2834 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2835 MaskVal >> ShiftVal,
2836 CmpVal >> ShiftVal,
2837 SystemZICMP::Any))) {
2838 NewC.Op0 = NewC.Op0.getOperand(0);
2839 MaskVal >>= ShiftVal;
2840 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2841 NewC.Op0.getOpcode() == ISD::SRL &&
2842 isSimpleShift(NewC.Op0, ShiftVal) &&
2843 (MaskVal << ShiftVal != 0) &&
2844 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2845 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2846 MaskVal << ShiftVal,
2847 CmpVal << ShiftVal,
2849 NewC.Op0 = NewC.Op0.getOperand(0);
2850 MaskVal <<= ShiftVal;
2851 } else {
2852 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2853 NewC.ICmpType);
2854 if (!NewCCMask)
2855 return;
2856 }
2857
2858 // Go ahead and make the change.
2859 C.Opcode = SystemZISD::TM;
2860 C.Op0 = NewC.Op0;
2861 if (Mask && Mask->getZExtValue() == MaskVal)
2862 C.Op1 = SDValue(Mask, 0);
2863 else
2864 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2865 C.CCValid = SystemZ::CCMASK_TM;
2866 C.CCMask = NewCCMask;
2867}
2868
2869// See whether the comparison argument contains a redundant AND
2870// and remove it if so. This sometimes happens due to the generic
2871// BRCOND expansion.
2873 Comparison &C) {
2874 if (C.Op0.getOpcode() != ISD::AND)
2875 return;
2876 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2877 if (!Mask)
2878 return;
2879 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2880 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2881 return;
2882
2883 C.Op0 = C.Op0.getOperand(0);
2884}
2885
2886// Return a Comparison that tests the condition-code result of intrinsic
2887// node Call against constant integer CC using comparison code Cond.
2888// Opcode is the opcode of the SystemZISD operation for the intrinsic
2889// and CCValid is the set of possible condition-code results.
2890static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2891 SDValue Call, unsigned CCValid, uint64_t CC,
2893 Comparison C(Call, SDValue(), SDValue());
2894 C.Opcode = Opcode;
2895 C.CCValid = CCValid;
2896 if (Cond == ISD::SETEQ)
2897 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2898 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2899 else if (Cond == ISD::SETNE)
2900 // ...and the inverse of that.
2901 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2902 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2903 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2904 // always true for CC>3.
2905 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2906 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2907 // ...and the inverse of that.
2908 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2909 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2910 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2911 // always true for CC>3.
2912 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2913 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2914 // ...and the inverse of that.
2915 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2916 else
2917 llvm_unreachable("Unexpected integer comparison type");
2918 C.CCMask &= CCValid;
2919 return C;
2920}
2921
2922// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2923static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2924 ISD::CondCode Cond, const SDLoc &DL,
2925 SDValue Chain = SDValue(),
2926 bool IsSignaling = false) {
2927 if (CmpOp1.getOpcode() == ISD::Constant) {
2928 assert(!Chain);
2929 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2930 unsigned Opcode, CCValid;
2931 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2932 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2933 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2934 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2935 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2936 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2937 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2938 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2939 }
2940 Comparison C(CmpOp0, CmpOp1, Chain);
2941 C.CCMask = CCMaskForCondCode(Cond);
2942 if (C.Op0.getValueType().isFloatingPoint()) {
2943 C.CCValid = SystemZ::CCMASK_FCMP;
2944 if (!C.Chain)
2945 C.Opcode = SystemZISD::FCMP;
2946 else if (!IsSignaling)
2947 C.Opcode = SystemZISD::STRICT_FCMP;
2948 else
2949 C.Opcode = SystemZISD::STRICT_FCMPS;
2951 } else {
2952 assert(!C.Chain);
2953 C.CCValid = SystemZ::CCMASK_ICMP;
2954 C.Opcode = SystemZISD::ICMP;
2955 // Choose the type of comparison. Equality and inequality tests can
2956 // use either signed or unsigned comparisons. The choice also doesn't
2957 // matter if both sign bits are known to be clear. In those cases we
2958 // want to give the main isel code the freedom to choose whichever
2959 // form fits best.
2960 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2961 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2962 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2963 C.ICmpType = SystemZICMP::Any;
2964 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2965 C.ICmpType = SystemZICMP::UnsignedOnly;
2966 else
2967 C.ICmpType = SystemZICMP::SignedOnly;
2968 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2969 adjustForRedundantAnd(DAG, DL, C);
2970 adjustZeroCmp(DAG, DL, C);
2971 adjustSubwordCmp(DAG, DL, C);
2972 adjustForSubtraction(DAG, DL, C);
2974 adjustICmpTruncate(DAG, DL, C);
2975 }
2976
2977 if (shouldSwapCmpOperands(C)) {
2978 std::swap(C.Op0, C.Op1);
2979 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2980 }
2981
2983 return C;
2984}
2985
2986// Emit the comparison instruction described by C.
2987static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2988 if (!C.Op1.getNode()) {
2989 SDNode *Node;
2990 switch (C.Op0.getOpcode()) {
2992 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2993 return SDValue(Node, 0);
2995 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2996 return SDValue(Node, Node->getNumValues() - 1);
2997 default:
2998 llvm_unreachable("Invalid comparison operands");
2999 }
3000 }
3001 if (C.Opcode == SystemZISD::ICMP)
3002 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3003 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3004 if (C.Opcode == SystemZISD::TM) {
3005 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3007 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3008 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3009 }
3010 if (C.Chain) {
3011 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3012 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3013 }
3014 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3015}
3016
3017// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3018// 64 bits. Extend is the extension type to use. Store the high part
3019// in Hi and the low part in Lo.
3020static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3021 SDValue Op0, SDValue Op1, SDValue &Hi,
3022 SDValue &Lo) {
3023 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3024 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3025 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3026 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3027 DAG.getConstant(32, DL, MVT::i64));
3028 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3029 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3030}
3031
3032// Lower a binary operation that produces two VT results, one in each
3033// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3034// and Opcode performs the GR128 operation. Store the even register result
3035// in Even and the odd register result in Odd.
3036static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3037 unsigned Opcode, SDValue Op0, SDValue Op1,
3038 SDValue &Even, SDValue &Odd) {
3039 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3040 bool Is32Bit = is32Bit(VT);
3041 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3042 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3043}
3044
3045// Return an i32 value that is 1 if the CC value produced by CCReg is
3046// in the mask CCMask and 0 otherwise. CC is known to have a value
3047// in CCValid, so other values can be ignored.
3048static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3049 unsigned CCValid, unsigned CCMask) {
3050 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3051 DAG.getConstant(0, DL, MVT::i32),
3052 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3053 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3054 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3055}
3056
3057// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3058// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3059// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3060// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3061// floating-point comparisons.
3064 switch (CC) {
3065 case ISD::SETOEQ:
3066 case ISD::SETEQ:
3067 switch (Mode) {
3068 case CmpMode::Int: return SystemZISD::VICMPE;
3069 case CmpMode::FP: return SystemZISD::VFCMPE;
3070 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3071 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3072 }
3073 llvm_unreachable("Bad mode");
3074
3075 case ISD::SETOGE:
3076 case ISD::SETGE:
3077 switch (Mode) {
3078 case CmpMode::Int: return 0;
3079 case CmpMode::FP: return SystemZISD::VFCMPHE;
3080 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3081 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3082 }
3083 llvm_unreachable("Bad mode");
3084
3085 case ISD::SETOGT:
3086 case ISD::SETGT:
3087 switch (Mode) {
3088 case CmpMode::Int: return SystemZISD::VICMPH;
3089 case CmpMode::FP: return SystemZISD::VFCMPH;
3090 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3091 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3092 }
3093 llvm_unreachable("Bad mode");
3094
3095 case ISD::SETUGT:
3096 switch (Mode) {
3097 case CmpMode::Int: return SystemZISD::VICMPHL;
3098 case CmpMode::FP: return 0;
3099 case CmpMode::StrictFP: return 0;
3100 case CmpMode::SignalingFP: return 0;
3101 }
3102 llvm_unreachable("Bad mode");
3103
3104 default:
3105 return 0;
3106 }
3107}
3108
3109// Return the SystemZISD vector comparison operation for CC or its inverse,
3110// or 0 if neither can be done directly. Indicate in Invert whether the
3111// result is for the inverse of CC. Mode is as above.
3113 bool &Invert) {
3114 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3115 Invert = false;
3116 return Opcode;
3117 }
3118
3119 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3120 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3121 Invert = true;
3122 return Opcode;
3123 }
3124
3125 return 0;
3126}
3127
3128// Return a v2f64 that contains the extended form of elements Start and Start+1
3129// of v4f32 value Op. If Chain is nonnull, return the strict form.
3130static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3131 SDValue Op, SDValue Chain) {
3132 int Mask[] = { Start, -1, Start + 1, -1 };
3133 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3134 if (Chain) {
3135 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3136 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3137 }
3138 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3139}
3140
3141// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3142// producing a result of type VT. If Chain is nonnull, return the strict form.
3143SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3144 const SDLoc &DL, EVT VT,
3145 SDValue CmpOp0,
3146 SDValue CmpOp1,
3147 SDValue Chain) const {
3148 // There is no hardware support for v4f32 (unless we have the vector
3149 // enhancements facility 1), so extend the vector into two v2f64s
3150 // and compare those.
3151 if (CmpOp0.getValueType() == MVT::v4f32 &&
3152 !Subtarget.hasVectorEnhancements1()) {
3153 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3154 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3155 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3156 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3157 if (Chain) {
3158 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3159 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3160 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3161 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3162 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3163 H1.getValue(1), L1.getValue(1),
3164 HRes.getValue(1), LRes.getValue(1) };
3165 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3166 SDValue Ops[2] = { Res, NewChain };
3167 return DAG.getMergeValues(Ops, DL);
3168 }
3169 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3170 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3171 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3172 }
3173 if (Chain) {
3174 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3175 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3176 }
3177 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3178}
3179
3180// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3181// an integer mask of type VT. If Chain is nonnull, we have a strict
3182// floating-point comparison. If in addition IsSignaling is true, we have
3183// a strict signaling floating-point comparison.
3184SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3185 const SDLoc &DL, EVT VT,
3187 SDValue CmpOp0,
3188 SDValue CmpOp1,
3189 SDValue Chain,
3190 bool IsSignaling) const {
3191 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3192 assert (!Chain || IsFP);
3193 assert (!IsSignaling || Chain);
3194 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3195 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3196 bool Invert = false;
3197 SDValue Cmp;
3198 switch (CC) {
3199 // Handle tests for order using (or (ogt y x) (oge x y)).
3200 case ISD::SETUO:
3201 Invert = true;
3202 [[fallthrough]];
3203 case ISD::SETO: {
3204 assert(IsFP && "Unexpected integer comparison");
3205 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3206 DL, VT, CmpOp1, CmpOp0, Chain);
3207 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3208 DL, VT, CmpOp0, CmpOp1, Chain);
3209 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3210 if (Chain)
3211 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3212 LT.getValue(1), GE.getValue(1));
3213 break;
3214 }
3215
3216 // Handle <> tests using (or (ogt y x) (ogt x y)).
3217 case ISD::SETUEQ:
3218 Invert = true;
3219 [[fallthrough]];
3220 case ISD::SETONE: {
3221 assert(IsFP && "Unexpected integer comparison");
3222 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3223 DL, VT, CmpOp1, CmpOp0, Chain);
3224 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3225 DL, VT, CmpOp0, CmpOp1, Chain);
3226 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3227 if (Chain)
3228 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3229 LT.getValue(1), GT.getValue(1));
3230 break;
3231 }
3232
3233 // Otherwise a single comparison is enough. It doesn't really
3234 // matter whether we try the inversion or the swap first, since
3235 // there are no cases where both work.
3236 default:
3237 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3238 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3239 else {
3241 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3242 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3243 else
3244 llvm_unreachable("Unhandled comparison");
3245 }
3246 if (Chain)
3247 Chain = Cmp.getValue(1);
3248 break;
3249 }
3250 if (Invert) {
3251 SDValue Mask =
3252 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3253 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3254 }
3255 if (Chain && Chain.getNode() != Cmp.getNode()) {
3256 SDValue Ops[2] = { Cmp, Chain };
3257 Cmp = DAG.getMergeValues(Ops, DL);
3258 }
3259 return Cmp;
3260}
3261
3262SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 SDValue CmpOp0 = Op.getOperand(0);
3265 SDValue CmpOp1 = Op.getOperand(1);
3266 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3267 SDLoc DL(Op);
3268 EVT VT = Op.getValueType();
3269 if (VT.isVector())
3270 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3271
3272 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3273 SDValue CCReg = emitCmp(DAG, DL, C);
3274 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3275}
3276
3277SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3278 SelectionDAG &DAG,
3279 bool IsSignaling) const {
3280 SDValue Chain = Op.getOperand(0);
3281 SDValue CmpOp0 = Op.getOperand(1);
3282 SDValue CmpOp1 = Op.getOperand(2);
3283 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3284 SDLoc DL(Op);
3285 EVT VT = Op.getNode()->getValueType(0);
3286 if (VT.isVector()) {
3287 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3288 Chain, IsSignaling);
3289 return Res.getValue(Op.getResNo());
3290 }
3291
3292 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3293 SDValue CCReg = emitCmp(DAG, DL, C);
3294 CCReg->setFlags(Op->getFlags());
3295 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3296 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3297 return DAG.getMergeValues(Ops, DL);
3298}
3299
3300SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3301 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3302 SDValue CmpOp0 = Op.getOperand(2);
3303 SDValue CmpOp1 = Op.getOperand(3);
3304 SDValue Dest = Op.getOperand(4);
3305 SDLoc DL(Op);
3306
3307 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3308 SDValue CCReg = emitCmp(DAG, DL, C);
3309 return DAG.getNode(
3310 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3311 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3312 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3313}
3314
3315// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3316// allowing Pos and Neg to be wider than CmpOp.
3317static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3318 return (Neg.getOpcode() == ISD::SUB &&
3319 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3320 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3321 Neg.getOperand(1) == Pos &&
3322 (Pos == CmpOp ||
3323 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3324 Pos.getOperand(0) == CmpOp)));
3325}
3326
3327// Return the absolute or negative absolute of Op; IsNegative decides which.
3329 bool IsNegative) {
3330 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3331 if (IsNegative)
3332 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3333 DAG.getConstant(0, DL, Op.getValueType()), Op);
3334 return Op;
3335}
3336
3337SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3338 SelectionDAG &DAG) const {
3339 SDValue CmpOp0 = Op.getOperand(0);
3340 SDValue CmpOp1 = Op.getOperand(1);
3341 SDValue TrueOp = Op.getOperand(2);
3342 SDValue FalseOp = Op.getOperand(3);
3343 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3344 SDLoc DL(Op);
3345
3346 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3347
3348 // Check for absolute and negative-absolute selections, including those
3349 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3350 // This check supplements the one in DAGCombiner.
3351 if (C.Opcode == SystemZISD::ICMP &&
3352 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3353 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3354 C.Op1.getOpcode() == ISD::Constant &&
3355 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3356 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3357 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3358 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3359 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3360 }
3361
3362 SDValue CCReg = emitCmp(DAG, DL, C);
3363 SDValue Ops[] = {TrueOp, FalseOp,
3364 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3365 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3366
3367 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3368}
3369
3370SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3371 SelectionDAG &DAG) const {
3372 SDLoc DL(Node);
3373 const GlobalValue *GV = Node->getGlobal();
3374 int64_t Offset = Node->getOffset();
3375 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3377
3379 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3380 if (isInt<32>(Offset)) {
3381 // Assign anchors at 1<<12 byte boundaries.
3382 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3383 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3384 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3385
3386 // The offset can be folded into the address if it is aligned to a
3387 // halfword.
3388 Offset -= Anchor;
3389 if (Offset != 0 && (Offset & 1) == 0) {
3390 SDValue Full =
3391 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3392 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3393 Offset = 0;
3394 }
3395 } else {
3396 // Conservatively load a constant offset greater than 32 bits into a
3397 // register below.
3398 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3399 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3400 }
3401 } else if (Subtarget.isTargetELF()) {
3402 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3403 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3404 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3406 } else if (Subtarget.isTargetzOS()) {
3407 Result = getADAEntry(DAG, GV, DL, PtrVT);
3408 } else
3409 llvm_unreachable("Unexpected Subtarget");
3410
3411 // If there was a non-zero offset that we didn't fold, create an explicit
3412 // addition for it.
3413 if (Offset != 0)
3414 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3415 DAG.getConstant(Offset, DL, PtrVT));
3416
3417 return Result;
3418}
3419
3420SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3421 SelectionDAG &DAG,
3422 unsigned Opcode,
3423 SDValue GOTOffset) const {
3424 SDLoc DL(Node);
3425 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3426 SDValue Chain = DAG.getEntryNode();
3427 SDValue Glue;
3428
3431 report_fatal_error("In GHC calling convention TLS is not supported");
3432
3433 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3434 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3435 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3436 Glue = Chain.getValue(1);
3437 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3438 Glue = Chain.getValue(1);
3439
3440 // The first call operand is the chain and the second is the TLS symbol.
3442 Ops.push_back(Chain);
3443 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3444 Node->getValueType(0),
3445 0, 0));
3446
3447 // Add argument registers to the end of the list so that they are
3448 // known live into the call.
3449 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3450 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3451
3452 // Add a register mask operand representing the call-preserved registers.
3453 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3454 const uint32_t *Mask =
3455 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3456 assert(Mask && "Missing call preserved mask for calling convention");
3457 Ops.push_back(DAG.getRegisterMask(Mask));
3458
3459 // Glue the call to the argument copies.
3460 Ops.push_back(Glue);
3461
3462 // Emit the call.
3463 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3464 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3465 Glue = Chain.getValue(1);
3466
3467 // Copy the return value from %r2.
3468 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3469}
3470
3471SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3472 SelectionDAG &DAG) const {
3473 SDValue Chain = DAG.getEntryNode();
3474 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3475
3476 // The high part of the thread pointer is in access register 0.
3477 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3478 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3479
3480 // The low part of the thread pointer is in access register 1.
3481 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3482 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3483
3484 // Merge them into a single 64-bit address.
3485 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3486 DAG.getConstant(32, DL, PtrVT));
3487 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3488}
3489
3490SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3491 SelectionDAG &DAG) const {
3492 if (DAG.getTarget().useEmulatedTLS())
3493 return LowerToTLSEmulatedModel(Node, DAG);
3494 SDLoc DL(Node);
3495 const GlobalValue *GV = Node->getGlobal();
3496 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3497 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3498
3501 report_fatal_error("In GHC calling convention TLS is not supported");
3502
3503 SDValue TP = lowerThreadPointer(DL, DAG);
3504
3505 // Get the offset of GA from the thread pointer, based on the TLS model.
3507 switch (model) {
3509 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3512
3513 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3514 Offset = DAG.getLoad(
3515 PtrVT, DL, DAG.getEntryNode(), Offset,
3517
3518 // Call __tls_get_offset to retrieve the offset.
3519 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3520 break;
3521 }
3522
3524 // Load the GOT offset of the module ID.
3527
3528 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3529 Offset = DAG.getLoad(
3530 PtrVT, DL, DAG.getEntryNode(), Offset,
3532
3533 // Call __tls_get_offset to retrieve the module base offset.
3534 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3535
3536 // Note: The SystemZLDCleanupPass will remove redundant computations
3537 // of the module base offset. Count total number of local-dynamic
3538 // accesses to trigger execution of that pass.
3542
3543 // Add the per-symbol offset.
3545
3546 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3547 DTPOffset = DAG.getLoad(
3548 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3550
3551 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3552 break;
3553 }
3554
3555 case TLSModel::InitialExec: {
3556 // Load the offset from the GOT.
3557 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3560 Offset =
3561 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3563 break;
3564 }
3565
3566 case TLSModel::LocalExec: {
3567 // Force the offset into the constant pool and load it from there.
3570
3571 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3572 Offset = DAG.getLoad(
3573 PtrVT, DL, DAG.getEntryNode(), Offset,
3575 break;
3576 }
3577 }
3578
3579 // Add the base and offset together.
3580 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3581}
3582
3583SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3584 SelectionDAG &DAG) const {
3585 SDLoc DL(Node);
3586 const BlockAddress *BA = Node->getBlockAddress();
3587 int64_t Offset = Node->getOffset();
3588 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3589
3590 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3591 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3592 return Result;
3593}
3594
3595SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3596 SelectionDAG &DAG) const {
3597 SDLoc DL(JT);
3598 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3599 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3600
3601 // Use LARL to load the address of the table.
3602 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3603}
3604
3605SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3606 SelectionDAG &DAG) const {
3607 SDLoc DL(CP);
3608 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3609
3611 if (CP->isMachineConstantPoolEntry())
3612 Result =
3613 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3614 else
3615 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3616 CP->getOffset());
3617
3618 // Use LARL to load the address of the constant pool entry.
3619 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3620}
3621
3622SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3623 SelectionDAG &DAG) const {
3624 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3626 MachineFrameInfo &MFI = MF.getFrameInfo();
3627 MFI.setFrameAddressIsTaken(true);
3628
3629 SDLoc DL(Op);
3630 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3631 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3632
3633 // By definition, the frame address is the address of the back chain. (In
3634 // the case of packed stack without backchain, return the address where the
3635 // backchain would have been stored. This will either be an unused space or
3636 // contain a saved register).
3637 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3638 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3639
3640 if (Depth > 0) {
3641 // FIXME The frontend should detect this case.
3642 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3643 report_fatal_error("Unsupported stack frame traversal count");
3644
3645 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3646 while (Depth--) {
3647 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3649 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3650 }
3651 }
3652
3653 return BackChain;
3654}
3655
3656SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3657 SelectionDAG &DAG) const {
3659 MachineFrameInfo &MFI = MF.getFrameInfo();
3660 MFI.setReturnAddressIsTaken(true);
3661
3663 return SDValue();
3664
3665 SDLoc DL(Op);
3666 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3667 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3668
3669 if (Depth > 0) {
3670 // FIXME The frontend should detect this case.
3671 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3672 report_fatal_error("Unsupported stack frame traversal count");
3673
3674 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3675 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3676 int Offset = (TFL->usePackedStack(MF) ? -2 : 14) *
3678 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3679 DAG.getConstant(Offset, DL, PtrVT));
3680 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3682 }
3683
3684 // Return R14D, which has the return address. Mark it an implicit live-in.
3685 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3686 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3687}
3688
3689SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3690 SelectionDAG &DAG) const {
3691 SDLoc DL(Op);
3692 SDValue In = Op.getOperand(0);
3693 EVT InVT = In.getValueType();
3694 EVT ResVT = Op.getValueType();
3695
3696 // Convert loads directly. This is normally done by DAGCombiner,
3697 // but we need this case for bitcasts that are created during lowering
3698 // and which are then lowered themselves.
3699 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3700 if (ISD::isNormalLoad(LoadN)) {
3701 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3702 LoadN->getBasePtr(), LoadN->getMemOperand());
3703 // Update the chain uses.
3704 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3705 return NewLoad;
3706 }
3707
3708 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3709 SDValue In64;
3710 if (Subtarget.hasHighWord()) {
3711 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3712 MVT::i64);
3713 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3714 MVT::i64, SDValue(U64, 0), In);
3715 } else {
3716 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3717 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3718 DAG.getConstant(32, DL, MVT::i64));
3719 }
3720 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3721 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3722 DL, MVT::f32, Out64);
3723 }
3724 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3725 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3726 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3727 MVT::f64, SDValue(U64, 0), In);
3728 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3729 if (Subtarget.hasHighWord())
3730 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3731 MVT::i32, Out64);
3732 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3733 DAG.getConstant(32, DL, MVT::i64));
3734 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3735 }
3736 llvm_unreachable("Unexpected bitcast combination");
3737}
3738
3739SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3740 SelectionDAG &DAG) const {
3741
3742 if (Subtarget.isTargetXPLINK64())
3743 return lowerVASTART_XPLINK(Op, DAG);
3744 else
3745 return lowerVASTART_ELF(Op, DAG);
3746}
3747
3748SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3749 SelectionDAG &DAG) const {
3751 SystemZMachineFunctionInfo *FuncInfo =
3753
3754 SDLoc DL(Op);
3755
3756 // vastart just stores the address of the VarArgsFrameIndex slot into the
3757 // memory location argument.
3758 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3759 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3760 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3761 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3762 MachinePointerInfo(SV));
3763}
3764
3765SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3766 SelectionDAG &DAG) const {
3768 SystemZMachineFunctionInfo *FuncInfo =
3770 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3771
3772 SDValue Chain = Op.getOperand(0);
3773 SDValue Addr = Op.getOperand(1);
3774 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3775 SDLoc DL(Op);
3776
3777 // The initial values of each field.
3778 const unsigned NumFields = 4;
3779 SDValue Fields[NumFields] = {
3780 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3781 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3782 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3783 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3784 };
3785
3786 // Store each field into its respective slot.
3787 SDValue MemOps[NumFields];
3788 unsigned Offset = 0;
3789 for (unsigned I = 0; I < NumFields; ++I) {
3790 SDValue FieldAddr = Addr;
3791 if (Offset != 0)
3792 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3794 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3796 Offset += 8;
3797 }
3798 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3799}
3800
3801SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3802 SelectionDAG &DAG) const {
3803 SDValue Chain = Op.getOperand(0);
3804 SDValue DstPtr = Op.getOperand(1);
3805 SDValue SrcPtr = Op.getOperand(2);
3806 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3807 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3808 SDLoc DL(Op);
3809
3810 uint32_t Sz =
3811 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3812 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3813 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3814 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3815 MachinePointerInfo(SrcSV));
3816}
3817
3818SDValue
3819SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3820 SelectionDAG &DAG) const {
3821 if (Subtarget.isTargetXPLINK64())
3822 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3823 else
3824 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3825}
3826
3827SDValue
3828SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3829 SelectionDAG &DAG) const {
3830 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3832 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3833 SDValue Chain = Op.getOperand(0);
3834 SDValue Size = Op.getOperand(1);
3835 SDValue Align = Op.getOperand(2);
3836 SDLoc DL(Op);
3837
3838 // If user has set the no alignment function attribute, ignore
3839 // alloca alignments.
3840 uint64_t AlignVal =
3841 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3842
3844 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3845 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3846
3847 SDValue NeededSpace = Size;
3848
3849 // Add extra space for alignment if needed.
3850 EVT PtrVT = getPointerTy(MF.getDataLayout());
3851 if (ExtraAlignSpace)
3852 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3853 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3854
3855 bool IsSigned = false;
3856 bool DoesNotReturn = false;
3857 bool IsReturnValueUsed = false;
3858 EVT VT = Op.getValueType();
3859 SDValue AllocaCall =
3860 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
3861 CallingConv::C, IsSigned, DL, DoesNotReturn,
3862 IsReturnValueUsed)
3863 .first;
3864
3865 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3866 // to end of call in order to ensure it isn't broken up from the call
3867 // sequence.
3868 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3869 Register SPReg = Regs.getStackPointerRegister();
3870 Chain = AllocaCall.getValue(1);
3871 SDValue Glue = AllocaCall.getValue(2);
3872 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3873 Chain = NewSPRegNode.getValue(1);
3874
3875 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3876 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3877 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3878
3879 // Dynamically realign if needed.
3880 if (ExtraAlignSpace) {
3881 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3882 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3883 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3884 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3885 }
3886
3887 SDValue Ops[2] = {Result, Chain};
3888 return DAG.getMergeValues(Ops, DL);
3889}
3890
3891SDValue
3892SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3893 SelectionDAG &DAG) const {
3894 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3896 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3897 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
3898
3899 SDValue Chain = Op.getOperand(0);
3900 SDValue Size = Op.getOperand(1);
3901 SDValue Align = Op.getOperand(2);
3902 SDLoc DL(Op);
3903
3904 // If user has set the no alignment function attribute, ignore
3905 // alloca alignments.
3906 uint64_t AlignVal =
3907 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3908
3910 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3911 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3912
3914 SDValue NeededSpace = Size;
3915
3916 // Get a reference to the stack pointer.
3917 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3918
3919 // If we need a backchain, save it now.
3920 SDValue Backchain;
3921 if (StoreBackchain)
3922 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3924
3925 // Add extra space for alignment if needed.
3926 if (ExtraAlignSpace)
3927 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3928 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3929
3930 // Get the new stack pointer value.
3931 SDValue NewSP;
3932 if (hasInlineStackProbe(MF)) {
3934 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3935 Chain = NewSP.