LLVM  14.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39 
40  // The operands to the comparison.
41  SDValue Op0, Op1;
42 
43  // Chain if this is a strict floating-point comparison.
44  SDValue Chain;
45 
46  // The opcode that should be used to compare Op0 and Op1.
47  unsigned Opcode;
48 
49  // A SystemZICMP value. Only used for integer comparisons.
50  unsigned ICmpType;
51 
52  // The mask of CC values that Opcode can produce.
53  unsigned CCValid;
54 
55  // The mask of CC values for which the original condition is true.
56  unsigned CCMask;
57 };
58 } // end anonymous namespace
59 
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT) {
62  switch (VT.getSimpleVT().SimpleTy) {
63  case MVT::i32:
64  return true;
65  case MVT::i64:
66  return false;
67  default:
68  llvm_unreachable("Unsupported type");
69  }
70 }
71 
72 // Return a version of MachineOperand that can be safely used before the
73 // final use.
75  if (Op.isReg())
76  Op.setIsKill(false);
77  return Op;
78 }
79 
81  const SystemZSubtarget &STI)
82  : TargetLowering(TM), Subtarget(STI) {
83  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
84 
85  auto *Regs = STI.getSpecialRegisters();
86 
87  // Set up the register classes.
88  if (Subtarget.hasHighWord())
89  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90  else
91  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93  if (!useSoftFloat()) {
94  if (Subtarget.hasVector()) {
95  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97  } else {
98  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100  }
101  if (Subtarget.hasVectorEnhancements1())
102  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103  else
104  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105 
106  if (Subtarget.hasVector()) {
107  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113  }
114  }
115 
116  // Compute derived properties from the register classes
118 
119  // Set up special registers.
120  setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121 
122  // TODO: It may be better to default to latency-oriented scheduling, however
123  // LLVM's current latency-oriented scheduler can't handle physreg definitions
124  // such as SystemZ has with CC, so set this to the register-pressure
125  // scheduler, because it can.
127 
130 
131  // Instructions are strings of 2-byte aligned 2-byte values.
133  // For performance reasons we prefer 16-byte alignment.
135 
136  // Handle operations that are handled in a similar way for all types.
137  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
139  ++I) {
140  MVT VT = MVT::SimpleValueType(I);
141  if (isTypeLegal(VT)) {
142  // Lower SET_CC into an IPM-based sequence.
146 
147  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
149 
150  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
153  }
154  }
155 
156  // Expand jump table branches as address arithmetic followed by an
157  // indirect jump.
159 
160  // Expand BRCOND into a BR_CC (see above).
162 
163  // Handle integer types.
164  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
166  ++I) {
167  MVT VT = MVT::SimpleValueType(I);
168  if (isTypeLegal(VT)) {
170 
171  // Expand individual DIV and REMs into DIVREMs.
178 
179  // Support addition/subtraction with overflow.
182 
183  // Support addition/subtraction with carry.
186 
187  // Support carry in as value rather than glue.
190 
191  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192  // stores, putting a serialization instruction after the stores.
195 
196  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197  // available, or if the operand is constant.
199 
200  // Use POPCNT on z196 and above.
201  if (Subtarget.hasPopulationCount())
203  else
205 
206  // No special instructions for these.
209 
210  // Use *MUL_LOHI where possible instead of MULH*.
215 
216  // Only z196 and above have native support for conversions to unsigned.
217  // On z10, promoting to i64 doesn't generate an inexact condition for
218  // values that are outside the i32 range but in the i64 range, so use
219  // the default expansion.
220  if (!Subtarget.hasFPExtension())
222 
223  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224  // default to Expand, so need to be modified to Legal where appropriate.
226  if (Subtarget.hasFPExtension())
228 
229  // And similarly for STRICT_[SU]INT_TO_FP.
231  if (Subtarget.hasFPExtension())
233  }
234  }
235 
236  // Type legalization will convert 8- and 16-bit atomic operations into
237  // forms that operate on i32s (but still keeping the original memory VT).
238  // Lower them into full i32 operations.
250 
251  // Even though i128 is not a legal type, we still need to custom lower
252  // the atomic operations in order to exploit SystemZ instructions.
255 
256  // We can use the CC result of compare-and-swap to implement
257  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
261 
263 
264  // Traps are legal, as we will convert them to "j .+2".
266 
267  // z10 has instructions for signed but not unsigned FP conversion.
268  // Handle unsigned 32-bit types as signed 64-bit types.
269  if (!Subtarget.hasFPExtension()) {
274  }
275 
276  // We have native support for a 64-bit CTLZ, via FLOGR.
280 
281  // On z15 we have native support for a 64-bit CTPOP.
282  if (Subtarget.hasMiscellaneousExtensions3()) {
285  }
286 
287  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
289 
290  // Expand 128 bit shifts without using a libcall.
294  setLibcallName(RTLIB::SRL_I128, nullptr);
295  setLibcallName(RTLIB::SHL_I128, nullptr);
296  setLibcallName(RTLIB::SRA_I128, nullptr);
297 
298  // Handle bitcast from fp128 to i128.
300 
301  // We have native instructions for i8, i16 and i32 extensions, but not i1.
303  for (MVT VT : MVT::integer_valuetypes()) {
307  }
308 
309  // Handle the various types of symbolic address.
315 
316  // We need to handle dynamic allocations specially because of the
317  // 160-byte area at the bottom of the stack.
320 
323 
324  // Handle prefetches with PFD or PFDRL.
326 
327  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
328  // Assume by default that all vector operations need to be expanded.
329  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
330  if (getOperationAction(Opcode, VT) == Legal)
331  setOperationAction(Opcode, VT, Expand);
332 
333  // Likewise all truncating stores and extending loads.
334  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
335  setTruncStoreAction(VT, InnerVT, Expand);
336  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
337  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
338  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
339  }
340 
341  if (isTypeLegal(VT)) {
342  // These operations are legal for anything that can be stored in a
343  // vector register, even if there is no native support for the format
344  // as such. In particular, we can do these for v4f32 even though there
345  // are no specific instructions for that format.
351 
352  // Likewise, except that we need to replace the nodes with something
353  // more specific.
356  }
357  }
358 
359  // Handle integer vector types.
361  if (isTypeLegal(VT)) {
362  // These operations have direct equivalents.
367  if (VT != MVT::v2i64)
373  if (Subtarget.hasVectorEnhancements1())
375  else
379 
380  // Convert a GPR scalar to a vector by inserting it into element 0.
382 
383  // Use a series of unpacks for extensions.
386 
387  // Detect shifts by a scalar amount and convert them into
388  // V*_BY_SCALAR.
392 
393  // At present ROTL isn't matched by DAGCombiner. ROTR should be
394  // converted into ROTL.
397 
398  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
399  // and inverting the result as necessary.
402  if (Subtarget.hasVectorEnhancements1())
404  }
405  }
406 
407  if (Subtarget.hasVector()) {
408  // There should be no need to check for float types other than v2f64
409  // since <2 x f32> isn't a legal type.
418 
427  }
428 
429  if (Subtarget.hasVectorEnhancements2()) {
438 
447  }
448 
449  // Handle floating-point types.
450  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
452  ++I) {
453  MVT VT = MVT::SimpleValueType(I);
454  if (isTypeLegal(VT)) {
455  // We can use FI for FRINT.
457 
458  // We can use the extended form of FI for other rounding operations.
459  if (Subtarget.hasFPExtension()) {
465  }
466 
467  // No special instructions for these.
473 
474  // Handle constrained floating-point operations.
484  if (Subtarget.hasFPExtension()) {
490  }
491  }
492  }
493 
494  // Handle floating-point vector types.
495  if (Subtarget.hasVector()) {
496  // Scalar-to-vector conversion is just a subreg.
499 
500  // Some insertions and extractions can be done directly but others
501  // need to go via integers.
506 
507  // These operations have direct equivalents.
522 
523  // Handle constrained floating-point operations.
536  }
537 
538  // The vector enhancements facility 1 has instructions for these.
539  if (Subtarget.hasVectorEnhancements1()) {
554 
559 
564 
569 
574 
579 
580  // Handle constrained floating-point operations.
593  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
594  MVT::v4f32, MVT::v2f64 }) {
599  }
600  }
601 
602  // We only have fused f128 multiply-addition on vector registers.
603  if (!Subtarget.hasVectorEnhancements1()) {
606  }
607 
608  // We don't have a copysign instruction on vector registers.
609  if (Subtarget.hasVectorEnhancements1())
611 
612  // Needed so that we don't try to implement f128 constant loads using
613  // a load-and-extend of a f80 constant (in cases where the constant
614  // would fit in an f80).
615  for (MVT VT : MVT::fp_valuetypes())
617 
618  // We don't have extending load instruction on vector registers.
619  if (Subtarget.hasVectorEnhancements1()) {
622  }
623 
624  // Floating-point truncation and stores need to be done separately.
628 
629  // We have 64-bit FPR<->GPR moves, but need special handling for
630  // 32-bit forms.
631  if (!Subtarget.hasVector()) {
634  }
635 
636  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
637  // structure, but VAEND is a no-op.
641 
642  // Codes for which we want to perform some z-specific combinations.
663 
664  // Handle intrinsics.
667 
668  // We want to use MVC in preference to even a single load/store pair.
669  MaxStoresPerMemcpy = 0;
671 
672  // The main memset sequence is a byte store followed by an MVC.
673  // Two STC or MV..I stores win over that, but the kind of fused stores
674  // generated by target-independent code don't when the byte value is
675  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
676  // than "STC;MVC". Handle the choice in target-specific code instead.
677  MaxStoresPerMemset = 0;
679 
680  // Default to having -disable-strictnode-mutation on
681  IsStrictFPEnabled = true;
682 }
683 
685  return Subtarget.hasSoftFloat();
686 }
687 
689  LLVMContext &, EVT VT) const {
690  if (!VT.isVector())
691  return MVT::i32;
693 }
694 
696  const MachineFunction &MF, EVT VT) const {
697  VT = VT.getScalarType();
698 
699  if (!VT.isSimple())
700  return false;
701 
702  switch (VT.getSimpleVT().SimpleTy) {
703  case MVT::f32:
704  case MVT::f64:
705  return true;
706  case MVT::f128:
707  return Subtarget.hasVectorEnhancements1();
708  default:
709  break;
710  }
711 
712  return false;
713 }
714 
715 // Return true if the constant can be generated with a vector instruction,
716 // such as VGM, VGMB or VREPI.
718  const SystemZSubtarget &Subtarget) {
719  const SystemZInstrInfo *TII =
720  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
721  if (!Subtarget.hasVector() ||
722  (isFP128 && !Subtarget.hasVectorEnhancements1()))
723  return false;
724 
725  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
726  // preferred way of creating all-zero and all-one vectors so give it
727  // priority over other methods below.
728  unsigned Mask = 0;
729  unsigned I = 0;
730  for (; I < SystemZ::VectorBytes; ++I) {
731  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
732  if (Byte == 0xff)
733  Mask |= 1ULL << I;
734  else if (Byte != 0)
735  break;
736  }
737  if (I == SystemZ::VectorBytes) {
739  OpVals.push_back(Mask);
741  return true;
742  }
743 
744  if (SplatBitSize > 64)
745  return false;
746 
747  auto tryValue = [&](uint64_t Value) -> bool {
748  // Try VECTOR REPLICATE IMMEDIATE
749  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
750  if (isInt<16>(SignedValue)) {
751  OpVals.push_back(((unsigned) SignedValue));
753  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
754  SystemZ::VectorBits / SplatBitSize);
755  return true;
756  }
757  // Try VECTOR GENERATE MASK
758  unsigned Start, End;
759  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
760  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
761  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
762  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
763  OpVals.push_back(Start - (64 - SplatBitSize));
764  OpVals.push_back(End - (64 - SplatBitSize));
766  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
767  SystemZ::VectorBits / SplatBitSize);
768  return true;
769  }
770  return false;
771  };
772 
773  // First try assuming that any undefined bits above the highest set bit
774  // and below the lowest set bit are 1s. This increases the likelihood of
775  // being able to use a sign-extended element value in VECTOR REPLICATE
776  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
777  uint64_t SplatBitsZ = SplatBits.getZExtValue();
778  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
779  uint64_t Lower =
780  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
781  uint64_t Upper =
782  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
783  if (tryValue(SplatBitsZ | Upper | Lower))
784  return true;
785 
786  // Now try assuming that any undefined bits between the first and
787  // last defined set bits are set. This increases the chances of
788  // using a non-wraparound mask.
789  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
790  return tryValue(SplatBitsZ | Middle);
791 }
792 
794  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
795  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
796  SplatBits = FPImm.bitcastToAPInt();
797  unsigned Width = SplatBits.getBitWidth();
798  IntBits <<= (SystemZ::VectorBits - Width);
799 
800  // Find the smallest splat.
801  while (Width > 8) {
802  unsigned HalfSize = Width / 2;
803  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
804  APInt LowValue = SplatBits.trunc(HalfSize);
805 
806  // If the two halves do not match, stop here.
807  if (HighValue != LowValue || 8 > HalfSize)
808  break;
809 
810  SplatBits = HighValue;
811  Width = HalfSize;
812  }
813  SplatUndef = 0;
814  SplatBitSize = Width;
815 }
816 
818  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
819  bool HasAnyUndefs;
820 
821  // Get IntBits by finding the 128 bit splat.
822  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
823  true);
824 
825  // Get SplatBits by finding the 8 bit or greater splat.
826  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
827  true);
828 }
829 
831  bool ForCodeSize) const {
832  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
833  if (Imm.isZero() || Imm.isNegZero())
834  return true;
835 
836  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
837 }
838 
839 /// Returns true if stack probing through inline assembly is requested.
841  // If the function specifically requests inline stack probes, emit them.
842  if (MF.getFunction().hasFnAttribute("probe-stack"))
843  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
844  "inline-asm";
845  return false;
846 }
847 
849  // We can use CGFI or CLGFI.
850  return isInt<32>(Imm) || isUInt<32>(Imm);
851 }
852 
854  // We can use ALGFI or SLGFI.
855  return isUInt<32>(Imm) || isUInt<32>(-Imm);
856 }
857 
859  EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
860  // Unaligned accesses should never be slower than the expanded version.
861  // We check specifically for aligned accesses in the few cases where
862  // they are required.
863  if (Fast)
864  *Fast = true;
865  return true;
866 }
867 
868 // Information about the addressing mode for a memory access.
870  // True if a long displacement is supported.
872 
873  // True if use of index register is supported.
874  bool IndexReg;
875 
876  AddressingMode(bool LongDispl, bool IdxReg) :
877  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
878 };
879 
880 // Return the desired addressing mode for a Load which has only one use (in
881 // the same block) which is a Store.
882 static AddressingMode getLoadStoreAddrMode(bool HasVector,
883  Type *Ty) {
884  // With vector support a Load->Store combination may be combined to either
885  // an MVC or vector operations and it seems to work best to allow the
886  // vector addressing mode.
887  if (HasVector)
888  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
889 
890  // Otherwise only the MVC case is special.
891  bool MVC = Ty->isIntegerTy(8);
892  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
893 }
894 
895 // Return the addressing mode which seems most desirable given an LLVM
896 // Instruction pointer.
897 static AddressingMode
899  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
900  switch (II->getIntrinsicID()) {
901  default: break;
902  case Intrinsic::memset:
903  case Intrinsic::memmove:
904  case Intrinsic::memcpy:
905  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
906  }
907  }
908 
909  if (isa<LoadInst>(I) && I->hasOneUse()) {
910  auto *SingleUser = cast<Instruction>(*I->user_begin());
911  if (SingleUser->getParent() == I->getParent()) {
912  if (isa<ICmpInst>(SingleUser)) {
913  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
914  if (C->getBitWidth() <= 64 &&
915  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
916  // Comparison of memory with 16 bit signed / unsigned immediate
917  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
918  } else if (isa<StoreInst>(SingleUser))
919  // Load->Store
920  return getLoadStoreAddrMode(HasVector, I->getType());
921  }
922  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
923  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
924  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
925  // Load->Store
926  return getLoadStoreAddrMode(HasVector, LoadI->getType());
927  }
928 
929  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
930 
931  // * Use LDE instead of LE/LEY for z13 to avoid partial register
932  // dependencies (LDE only supports small offsets).
933  // * Utilize the vector registers to hold floating point
934  // values (vector load / store instructions only support small
935  // offsets).
936 
937  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
938  I->getOperand(0)->getType());
939  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
940  bool IsVectorAccess = MemAccessTy->isVectorTy();
941 
942  // A store of an extracted vector element will be combined into a VSTE type
943  // instruction.
944  if (!IsVectorAccess && isa<StoreInst>(I)) {
945  Value *DataOp = I->getOperand(0);
946  if (isa<ExtractElementInst>(DataOp))
947  IsVectorAccess = true;
948  }
949 
950  // A load which gets inserted into a vector element will be combined into a
951  // VLE type instruction.
952  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
953  User *LoadUser = *I->user_begin();
954  if (isa<InsertElementInst>(LoadUser))
955  IsVectorAccess = true;
956  }
957 
958  if (IsFPAccess || IsVectorAccess)
959  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
960  }
961 
962  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
963 }
964 
966  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
967  // Punt on globals for now, although they can be used in limited
968  // RELATIVE LONG cases.
969  if (AM.BaseGV)
970  return false;
971 
972  // Require a 20-bit signed offset.
973  if (!isInt<20>(AM.BaseOffs))
974  return false;
975 
976  AddressingMode SupportedAM(true, true);
977  if (I != nullptr)
978  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
979 
980  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
981  return false;
982 
983  if (!SupportedAM.IndexReg)
984  // No indexing allowed.
985  return AM.Scale == 0;
986  else
987  // Indexing is OK but no scale factor can be applied.
988  return AM.Scale == 0 || AM.Scale == 1;
989 }
990 
992  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
993  return false;
994  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
995  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
996  return FromBits > ToBits;
997 }
998 
1000  if (!FromVT.isInteger() || !ToVT.isInteger())
1001  return false;
1002  unsigned FromBits = FromVT.getFixedSizeInBits();
1003  unsigned ToBits = ToVT.getFixedSizeInBits();
1004  return FromBits > ToBits;
1005 }
1006 
1007 //===----------------------------------------------------------------------===//
1008 // Inline asm support
1009 //===----------------------------------------------------------------------===//
1010 
1013  if (Constraint.size() == 1) {
1014  switch (Constraint[0]) {
1015  case 'a': // Address register
1016  case 'd': // Data register (equivalent to 'r')
1017  case 'f': // Floating-point register
1018  case 'h': // High-part register
1019  case 'r': // General-purpose register
1020  case 'v': // Vector register
1021  return C_RegisterClass;
1022 
1023  case 'Q': // Memory with base and unsigned 12-bit displacement
1024  case 'R': // Likewise, plus an index
1025  case 'S': // Memory with base and signed 20-bit displacement
1026  case 'T': // Likewise, plus an index
1027  case 'm': // Equivalent to 'T'.
1028  return C_Memory;
1029 
1030  case 'I': // Unsigned 8-bit constant
1031  case 'J': // Unsigned 12-bit constant
1032  case 'K': // Signed 16-bit constant
1033  case 'L': // Signed 20-bit displacement (on all targets we support)
1034  case 'M': // 0x7fffffff
1035  return C_Immediate;
1036 
1037  default:
1038  break;
1039  }
1040  }
1041  return TargetLowering::getConstraintType(Constraint);
1042 }
1043 
1046  const char *constraint) const {
1047  ConstraintWeight weight = CW_Invalid;
1048  Value *CallOperandVal = info.CallOperandVal;
1049  // If we don't have a value, we can't do a match,
1050  // but allow it at the lowest weight.
1051  if (!CallOperandVal)
1052  return CW_Default;
1053  Type *type = CallOperandVal->getType();
1054  // Look at the constraint type.
1055  switch (*constraint) {
1056  default:
1058  break;
1059 
1060  case 'a': // Address register
1061  case 'd': // Data register (equivalent to 'r')
1062  case 'h': // High-part register
1063  case 'r': // General-purpose register
1064  if (CallOperandVal->getType()->isIntegerTy())
1065  weight = CW_Register;
1066  break;
1067 
1068  case 'f': // Floating-point register
1069  if (type->isFloatingPointTy())
1070  weight = CW_Register;
1071  break;
1072 
1073  case 'v': // Vector register
1074  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1075  Subtarget.hasVector())
1076  weight = CW_Register;
1077  break;
1078 
1079  case 'I': // Unsigned 8-bit constant
1080  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1081  if (isUInt<8>(C->getZExtValue()))
1082  weight = CW_Constant;
1083  break;
1084 
1085  case 'J': // Unsigned 12-bit constant
1086  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1087  if (isUInt<12>(C->getZExtValue()))
1088  weight = CW_Constant;
1089  break;
1090 
1091  case 'K': // Signed 16-bit constant
1092  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1093  if (isInt<16>(C->getSExtValue()))
1094  weight = CW_Constant;
1095  break;
1096 
1097  case 'L': // Signed 20-bit displacement (on all targets we support)
1098  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1099  if (isInt<20>(C->getSExtValue()))
1100  weight = CW_Constant;
1101  break;
1102 
1103  case 'M': // 0x7fffffff
1104  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1105  if (C->getZExtValue() == 0x7fffffff)
1106  weight = CW_Constant;
1107  break;
1108  }
1109  return weight;
1110 }
1111 
1112 // Parse a "{tNNN}" register constraint for which the register type "t"
1113 // has already been verified. MC is the class associated with "t" and
1114 // Map maps 0-based register numbers to LLVM register numbers.
1115 static std::pair<unsigned, const TargetRegisterClass *>
1117  const unsigned *Map, unsigned Size) {
1118  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1119  if (isdigit(Constraint[2])) {
1120  unsigned Index;
1121  bool Failed =
1122  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1123  if (!Failed && Index < Size && Map[Index])
1124  return std::make_pair(Map[Index], RC);
1125  }
1126  return std::make_pair(0U, nullptr);
1127 }
1128 
1129 std::pair<unsigned, const TargetRegisterClass *>
1131  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1132  if (Constraint.size() == 1) {
1133  // GCC Constraint Letters
1134  switch (Constraint[0]) {
1135  default: break;
1136  case 'd': // Data register (equivalent to 'r')
1137  case 'r': // General-purpose register
1138  if (VT == MVT::i64)
1139  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1140  else if (VT == MVT::i128)
1141  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1142  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1143 
1144  case 'a': // Address register
1145  if (VT == MVT::i64)
1146  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1147  else if (VT == MVT::i128)
1148  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1149  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1150 
1151  case 'h': // High-part register (an LLVM extension)
1152  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1153 
1154  case 'f': // Floating-point register
1155  if (!useSoftFloat()) {
1156  if (VT == MVT::f64)
1157  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1158  else if (VT == MVT::f128)
1159  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1160  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1161  }
1162  break;
1163  case 'v': // Vector register
1164  if (Subtarget.hasVector()) {
1165  if (VT == MVT::f32)
1166  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1167  if (VT == MVT::f64)
1168  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1169  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1170  }
1171  break;
1172  }
1173  }
1174  if (Constraint.size() > 0 && Constraint[0] == '{') {
1175  // We need to override the default register parsing for GPRs and FPRs
1176  // because the interpretation depends on VT. The internal names of
1177  // the registers are also different from the external names
1178  // (F0D and F0S instead of F0, etc.).
1179  if (Constraint[1] == 'r') {
1180  if (VT == MVT::i32)
1181  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1182  SystemZMC::GR32Regs, 16);
1183  if (VT == MVT::i128)
1184  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1185  SystemZMC::GR128Regs, 16);
1186  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1187  SystemZMC::GR64Regs, 16);
1188  }
1189  if (Constraint[1] == 'f') {
1190  if (useSoftFloat())
1191  return std::make_pair(
1192  0u, static_cast<const TargetRegisterClass *>(nullptr));
1193  if (VT == MVT::f32)
1194  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1195  SystemZMC::FP32Regs, 16);
1196  if (VT == MVT::f128)
1197  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1198  SystemZMC::FP128Regs, 16);
1199  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1200  SystemZMC::FP64Regs, 16);
1201  }
1202  if (Constraint[1] == 'v') {
1203  if (!Subtarget.hasVector())
1204  return std::make_pair(
1205  0u, static_cast<const TargetRegisterClass *>(nullptr));
1206  if (VT == MVT::f32)
1207  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1208  SystemZMC::VR32Regs, 32);
1209  if (VT == MVT::f64)
1210  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1211  SystemZMC::VR64Regs, 32);
1212  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1213  SystemZMC::VR128Regs, 32);
1214  }
1215  }
1216  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1217 }
1218 
1219 // FIXME? Maybe this could be a TableGen attribute on some registers and
1220 // this table could be generated automatically from RegInfo.
1222  const MachineFunction &MF) const {
1223 
1225  .Case("r15", SystemZ::R15D)
1226  .Default(0);
1227  if (Reg)
1228  return Reg;
1229  report_fatal_error("Invalid register name global variable");
1230 }
1231 
1233 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1234  std::vector<SDValue> &Ops,
1235  SelectionDAG &DAG) const {
1236  // Only support length 1 constraints for now.
1237  if (Constraint.length() == 1) {
1238  switch (Constraint[0]) {
1239  case 'I': // Unsigned 8-bit constant
1240  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1241  if (isUInt<8>(C->getZExtValue()))
1242  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1243  Op.getValueType()));
1244  return;
1245 
1246  case 'J': // Unsigned 12-bit constant
1247  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1248  if (isUInt<12>(C->getZExtValue()))
1249  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1250  Op.getValueType()));
1251  return;
1252 
1253  case 'K': // Signed 16-bit constant
1254  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1255  if (isInt<16>(C->getSExtValue()))
1256  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1257  Op.getValueType()));
1258  return;
1259 
1260  case 'L': // Signed 20-bit displacement (on all targets we support)
1261  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1262  if (isInt<20>(C->getSExtValue()))
1263  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1264  Op.getValueType()));
1265  return;
1266 
1267  case 'M': // 0x7fffffff
1268  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1269  if (C->getZExtValue() == 0x7fffffff)
1270  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1271  Op.getValueType()));
1272  return;
1273  }
1274  }
1275  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1276 }
1277 
1278 //===----------------------------------------------------------------------===//
1279 // Calling conventions
1280 //===----------------------------------------------------------------------===//
1281 
1282 #include "SystemZGenCallingConv.inc"
1283 
1285  CallingConv::ID) const {
1286  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1287  SystemZ::R14D, 0 };
1288  return ScratchRegs;
1289 }
1290 
1292  Type *ToType) const {
1293  return isTruncateFree(FromType, ToType);
1294 }
1295 
1297  return CI->isTailCall();
1298 }
1299 
1300 // We do not yet support 128-bit single-element vector types. If the user
1301 // attempts to use such types as function argument or return type, prefer
1302 // to error out instead of emitting code violating the ABI.
1303 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1304  if (ArgVT.isVector() && !VT.isVector())
1305  report_fatal_error("Unsupported vector argument or return type");
1306 }
1307 
1309  for (unsigned i = 0; i < Ins.size(); ++i)
1310  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1311 }
1312 
1314  for (unsigned i = 0; i < Outs.size(); ++i)
1315  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1316 }
1317 
1318 // Value is a value that has been passed to us in the location described by VA
1319 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1320 // any loads onto Chain.
1322  CCValAssign &VA, SDValue Chain,
1323  SDValue Value) {
1324  // If the argument has been promoted from a smaller type, insert an
1325  // assertion to capture this.
1326  if (VA.getLocInfo() == CCValAssign::SExt)
1327  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1328  DAG.getValueType(VA.getValVT()));
1329  else if (VA.getLocInfo() == CCValAssign::ZExt)
1330  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1331  DAG.getValueType(VA.getValVT()));
1332 
1333  if (VA.isExtInLoc())
1334  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1335  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1336  // If this is a short vector argument loaded from the stack,
1337  // extend from i64 to full vector size and then bitcast.
1338  assert(VA.getLocVT() == MVT::i64);
1339  assert(VA.getValVT().isVector());
1341  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1342  } else
1343  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1344  return Value;
1345 }
1346 
1347 // Value is a value of type VA.getValVT() that we need to copy into
1348 // the location described by VA. Return a copy of Value converted to
1349 // VA.getValVT(). The caller is responsible for handling indirect values.
1351  CCValAssign &VA, SDValue Value) {
1352  switch (VA.getLocInfo()) {
1353  case CCValAssign::SExt:
1354  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1355  case CCValAssign::ZExt:
1356  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1357  case CCValAssign::AExt:
1358  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1359  case CCValAssign::BCvt: {
1360  assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1361  assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
1362  VA.getValVT() == MVT::f128);
1363  MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1364  ? MVT::v2i64
1365  : VA.getLocVT();
1366  Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1367  // For ELF, this is a short vector argument to be stored to the stack,
1368  // bitcast to v2i64 and then extract first element.
1369  if (BitCastToType == MVT::v2i64)
1370  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1371  DAG.getConstant(0, DL, MVT::i32));
1372  return Value;
1373  }
1374  case CCValAssign::Full:
1375  return Value;
1376  default:
1377  llvm_unreachable("Unhandled getLocInfo()");
1378  }
1379 }
1380 
1382  SDLoc DL(In);
1384  DAG.getIntPtrConstant(0, DL));
1386  DAG.getIntPtrConstant(1, DL));
1387  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1388  MVT::Untyped, Hi, Lo);
1389  return SDValue(Pair, 0);
1390 }
1391 
1393  SDLoc DL(In);
1394  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1395  DL, MVT::i64, In);
1396  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1397  DL, MVT::i64, In);
1398  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1399 }
1400 
1402  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1403  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1404  EVT ValueVT = Val.getValueType();
1405  assert((ValueVT != MVT::i128 ||
1406  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1407  (NumParts == 2 && PartVT == MVT::i64))) &&
1408  "Unknown handling of i128 value.");
1409  if (ValueVT == MVT::i128 && NumParts == 1) {
1410  // Inline assembly operand.
1411  Parts[0] = lowerI128ToGR128(DAG, Val);
1412  return true;
1413  }
1414  return false;
1415 }
1416 
1418  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1419  MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1420  assert((ValueVT != MVT::i128 ||
1421  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1422  (NumParts == 2 && PartVT == MVT::i64))) &&
1423  "Unknown handling of i128 value.");
1424  if (ValueVT == MVT::i128 && NumParts == 1)
1425  // Inline assembly operand.
1426  return lowerGR128ToI128(DAG, Parts[0]);
1427  return SDValue();
1428 }
1429 
1431  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1432  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1433  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1434  MachineFunction &MF = DAG.getMachineFunction();
1435  MachineFrameInfo &MFI = MF.getFrameInfo();
1437  SystemZMachineFunctionInfo *FuncInfo =
1439  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1440  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1441 
1442  // Detect unsupported vector argument types.
1443  if (Subtarget.hasVector())
1445 
1446  // Assign locations to all of the incoming arguments.
1448  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1449  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1450 
1451  unsigned NumFixedGPRs = 0;
1452  unsigned NumFixedFPRs = 0;
1453  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1454  SDValue ArgValue;
1455  CCValAssign &VA = ArgLocs[I];
1456  EVT LocVT = VA.getLocVT();
1457  if (VA.isRegLoc()) {
1458  // Arguments passed in registers
1459  const TargetRegisterClass *RC;
1460  switch (LocVT.getSimpleVT().SimpleTy) {
1461  default:
1462  // Integers smaller than i64 should be promoted to i64.
1463  llvm_unreachable("Unexpected argument type");
1464  case MVT::i32:
1465  NumFixedGPRs += 1;
1466  RC = &SystemZ::GR32BitRegClass;
1467  break;
1468  case MVT::i64:
1469  NumFixedGPRs += 1;
1470  RC = &SystemZ::GR64BitRegClass;
1471  break;
1472  case MVT::f32:
1473  NumFixedFPRs += 1;
1474  RC = &SystemZ::FP32BitRegClass;
1475  break;
1476  case MVT::f64:
1477  NumFixedFPRs += 1;
1478  RC = &SystemZ::FP64BitRegClass;
1479  break;
1480  case MVT::f128:
1481  NumFixedFPRs += 2;
1482  RC = &SystemZ::FP128BitRegClass;
1483  break;
1484  case MVT::v16i8:
1485  case MVT::v8i16:
1486  case MVT::v4i32:
1487  case MVT::v2i64:
1488  case MVT::v4f32:
1489  case MVT::v2f64:
1490  RC = &SystemZ::VR128BitRegClass;
1491  break;
1492  }
1493 
1494  Register VReg = MRI.createVirtualRegister(RC);
1495  MRI.addLiveIn(VA.getLocReg(), VReg);
1496  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1497  } else {
1498  assert(VA.isMemLoc() && "Argument not register or memory");
1499 
1500  // Create the frame index object for this incoming parameter.
1501  // FIXME: Pre-include call frame size in the offset, should not
1502  // need to manually add it here.
1503  int64_t ArgSPOffset = VA.getLocMemOffset();
1504  if (Subtarget.isTargetXPLINK64()) {
1505  auto &XPRegs =
1507  ArgSPOffset += XPRegs.getCallFrameSize();
1508  }
1509  int FI =
1510  MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1511 
1512  // Create the SelectionDAG nodes corresponding to a load
1513  // from this parameter. Unpromoted ints and floats are
1514  // passed as right-justified 8-byte values.
1515  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1516  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1517  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1518  DAG.getIntPtrConstant(4, DL));
1519  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1521  }
1522 
1523  // Convert the value of the argument register into the value that's
1524  // being passed.
1525  if (VA.getLocInfo() == CCValAssign::Indirect) {
1526  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1527  MachinePointerInfo()));
1528  // If the original argument was split (e.g. i128), we need
1529  // to load all parts of it here (using the same address).
1530  unsigned ArgIndex = Ins[I].OrigArgIndex;
1531  assert (Ins[I].PartOffset == 0);
1532  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1533  CCValAssign &PartVA = ArgLocs[I + 1];
1534  unsigned PartOffset = Ins[I + 1].PartOffset;
1535  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1536  DAG.getIntPtrConstant(PartOffset, DL));
1537  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1538  MachinePointerInfo()));
1539  ++I;
1540  }
1541  } else
1542  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1543  }
1544 
1545  // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1546  if (IsVarArg && Subtarget.isTargetELF()) {
1547  // Save the number of non-varargs registers for later use by va_start, etc.
1548  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1549  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1550 
1551  // Likewise the address (in the form of a frame index) of where the
1552  // first stack vararg would be. The 1-byte size here is arbitrary.
1553  int64_t StackSize = CCInfo.getNextStackOffset();
1554  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1555 
1556  // ...and a similar frame index for the caller-allocated save area
1557  // that will be used to store the incoming registers.
1558  int64_t RegSaveOffset =
1559  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1560  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1561  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1562 
1563  // Store the FPR varargs in the reserved frame slots. (We store the
1564  // GPRs as part of the prologue.)
1565  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1567  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1568  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1569  int FI =
1571  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1573  &SystemZ::FP64BitRegClass);
1574  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1575  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1577  }
1578  // Join the stores, which are independent of one another.
1579  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1580  makeArrayRef(&MemOps[NumFixedFPRs],
1581  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1582  }
1583  }
1584 
1585  // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1586  // register (R5)
1587  return Chain;
1588 }
1589 
1590 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1593  // Punt if there are any indirect or stack arguments, or if the call
1594  // needs the callee-saved argument register R6, or if the call uses
1595  // the callee-saved register arguments SwiftSelf and SwiftError.
1596  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1597  CCValAssign &VA = ArgLocs[I];
1598  if (VA.getLocInfo() == CCValAssign::Indirect)
1599  return false;
1600  if (!VA.isRegLoc())
1601  return false;
1602  Register Reg = VA.getLocReg();
1603  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1604  return false;
1605  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1606  return false;
1607  }
1608  return true;
1609 }
1610 
1611 SDValue
1613  SmallVectorImpl<SDValue> &InVals) const {
1614  SelectionDAG &DAG = CLI.DAG;
1615  SDLoc &DL = CLI.DL;
1617  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1619  SDValue Chain = CLI.Chain;
1620  SDValue Callee = CLI.Callee;
1621  bool &IsTailCall = CLI.IsTailCall;
1622  CallingConv::ID CallConv = CLI.CallConv;
1623  bool IsVarArg = CLI.IsVarArg;
1624  MachineFunction &MF = DAG.getMachineFunction();
1625  EVT PtrVT = getPointerTy(MF.getDataLayout());
1626  LLVMContext &Ctx = *DAG.getContext();
1628 
1629  // FIXME: z/OS support to be added in later.
1630  if (Subtarget.isTargetXPLINK64())
1631  IsTailCall = false;
1632 
1633  // Detect unsupported vector argument and return types.
1634  if (Subtarget.hasVector()) {
1635  VerifyVectorTypes(Outs);
1637  }
1638 
1639  // Analyze the operands of the call, assigning locations to each operand.
1641  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1642  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1643 
1644  // We don't support GuaranteedTailCallOpt, only automatically-detected
1645  // sibling calls.
1646  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1647  IsTailCall = false;
1648 
1649  // Get a count of how many bytes are to be pushed on the stack.
1650  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1651 
1652  if (Subtarget.isTargetXPLINK64())
1653  // Although the XPLINK specifications for AMODE64 state that minimum size
1654  // of the param area is minimum 32 bytes and no rounding is otherwise
1655  // specified, we round this area in 64 bytes increments to be compatible
1656  // with existing compilers.
1657  NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1658 
1659  // Mark the start of the call.
1660  if (!IsTailCall)
1661  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1662 
1663  // Copy argument values to their designated locations.
1665  SmallVector<SDValue, 8> MemOpChains;
1666  SDValue StackPtr;
1667  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1668  CCValAssign &VA = ArgLocs[I];
1669  SDValue ArgValue = OutVals[I];
1670 
1671  if (VA.getLocInfo() == CCValAssign::Indirect) {
1672  // Store the argument in a stack slot and pass its address.
1673  unsigned ArgIndex = Outs[I].OrigArgIndex;
1674  EVT SlotVT;
1675  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1676  // Allocate the full stack space for a promoted (and split) argument.
1677  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1678  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1679  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1680  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1681  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1682  } else {
1683  SlotVT = Outs[I].ArgVT;
1684  }
1685  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1686  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1687  MemOpChains.push_back(
1688  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1690  // If the original argument was split (e.g. i128), we need
1691  // to store all parts of it here (and pass just one address).
1692  assert (Outs[I].PartOffset == 0);
1693  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1694  SDValue PartValue = OutVals[I + 1];
1695  unsigned PartOffset = Outs[I + 1].PartOffset;
1696  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1697  DAG.getIntPtrConstant(PartOffset, DL));
1698  MemOpChains.push_back(
1699  DAG.getStore(Chain, DL, PartValue, Address,
1701  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1702  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1703  ++I;
1704  }
1705  ArgValue = SpillSlot;
1706  } else
1707  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1708 
1709  if (VA.isRegLoc()) {
1710  // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1711  // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1712  // and low values.
1713  if (VA.getLocVT() == MVT::i128)
1714  ArgValue = lowerI128ToGR128(DAG, ArgValue);
1715  // Queue up the argument copies and emit them at the end.
1716  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1717  } else {
1718  assert(VA.isMemLoc() && "Argument not register or memory");
1719 
1720  // Work out the address of the stack slot. Unpromoted ints and
1721  // floats are passed as right-justified 8-byte values.
1722  if (!StackPtr.getNode())
1723  StackPtr = DAG.getCopyFromReg(Chain, DL,
1724  Regs->getStackPointerRegister(), PtrVT);
1725  unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1726  VA.getLocMemOffset();
1727  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1728  Offset += 4;
1729  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1730  DAG.getIntPtrConstant(Offset, DL));
1731 
1732  // Emit the store.
1733  MemOpChains.push_back(
1734  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1735 
1736  // Although long doubles or vectors are passed through the stack when
1737  // they are vararg (non-fixed arguments), if a long double or vector
1738  // occupies the third and fourth slot of the argument list GPR3 should
1739  // still shadow the third slot of the argument list.
1740  if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1741  SDValue ShadowArgValue =
1742  DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1743  DAG.getIntPtrConstant(1, DL));
1744  RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1745  }
1746  }
1747  }
1748 
1749  // Join the stores, which are independent of one another.
1750  if (!MemOpChains.empty())
1751  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1752 
1753  // Accept direct calls by converting symbolic call addresses to the
1754  // associated Target* opcodes. Force %r1 to be used for indirect
1755  // tail calls.
1756  SDValue Glue;
1757  // FIXME: Add support for XPLINK using the ADA register.
1758  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1759  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1761  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1762  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1764  } else if (IsTailCall) {
1765  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1766  Glue = Chain.getValue(1);
1767  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1768  }
1769 
1770  // Build a sequence of copy-to-reg nodes, chained and glued together.
1771  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1772  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1773  RegsToPass[I].second, Glue);
1774  Glue = Chain.getValue(1);
1775  }
1776 
1777  // The first call operand is the chain and the second is the target address.
1779  Ops.push_back(Chain);
1780  Ops.push_back(Callee);
1781 
1782  // Add argument registers to the end of the list so that they are
1783  // known live into the call.
1784  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1785  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1786  RegsToPass[I].second.getValueType()));
1787 
1788  // Add a register mask operand representing the call-preserved registers.
1789  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1790  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1791  assert(Mask && "Missing call preserved mask for calling convention");
1792  Ops.push_back(DAG.getRegisterMask(Mask));
1793 
1794  // Glue the call to the argument copies, if any.
1795  if (Glue.getNode())
1796  Ops.push_back(Glue);
1797 
1798  // Emit the call.
1799  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1800  if (IsTailCall)
1801  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1802  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1803  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1804  Glue = Chain.getValue(1);
1805 
1806  // Mark the end of the call, which is glued to the call itself.
1807  Chain = DAG.getCALLSEQ_END(Chain,
1808  DAG.getConstant(NumBytes, DL, PtrVT, true),
1809  DAG.getConstant(0, DL, PtrVT, true),
1810  Glue, DL);
1811  Glue = Chain.getValue(1);
1812 
1813  // Assign locations to each value returned by this call.
1815  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1816  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1817 
1818  // Copy all of the result registers out of their specified physreg.
1819  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1820  CCValAssign &VA = RetLocs[I];
1821 
1822  // Copy the value out, gluing the copy to the end of the call sequence.
1823  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1824  VA.getLocVT(), Glue);
1825  Chain = RetValue.getValue(1);
1826  Glue = RetValue.getValue(2);
1827 
1828  // Convert the value of the return register into the value that's
1829  // being returned.
1830  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1831  }
1832 
1833  return Chain;
1834 }
1835 
1838  MachineFunction &MF, bool isVarArg,
1839  const SmallVectorImpl<ISD::OutputArg> &Outs,
1840  LLVMContext &Context) const {
1841  // Detect unsupported vector return types.
1842  if (Subtarget.hasVector())
1843  VerifyVectorTypes(Outs);
1844 
1845  // Special case that we cannot easily detect in RetCC_SystemZ since
1846  // i128 is not a legal type.
1847  for (auto &Out : Outs)
1848  if (Out.ArgVT == MVT::i128)
1849  return false;
1850 
1852  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1853  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1854 }
1855 
1856 SDValue
1858  bool IsVarArg,
1859  const SmallVectorImpl<ISD::OutputArg> &Outs,
1860  const SmallVectorImpl<SDValue> &OutVals,
1861  const SDLoc &DL, SelectionDAG &DAG) const {
1862  MachineFunction &MF = DAG.getMachineFunction();
1863 
1864  // Detect unsupported vector return types.
1865  if (Subtarget.hasVector())
1866  VerifyVectorTypes(Outs);
1867 
1868  // Assign locations to each returned value.
1870  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1871  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1872 
1873  // Quick exit for void returns
1874  if (RetLocs.empty())
1875  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1876 
1877  if (CallConv == CallingConv::GHC)
1878  report_fatal_error("GHC functions return void only");
1879 
1880  // Copy the result values into the output registers.
1881  SDValue Glue;
1882  SmallVector<SDValue, 4> RetOps;
1883  RetOps.push_back(Chain);
1884  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1885  CCValAssign &VA = RetLocs[I];
1886  SDValue RetValue = OutVals[I];
1887 
1888  // Make the return register live on exit.
1889  assert(VA.isRegLoc() && "Can only return in registers!");
1890 
1891  // Promote the value as required.
1892  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1893 
1894  // Chain and glue the copies together.
1895  Register Reg = VA.getLocReg();
1896  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1897  Glue = Chain.getValue(1);
1898  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1899  }
1900 
1901  // Update chain and glue.
1902  RetOps[0] = Chain;
1903  if (Glue.getNode())
1904  RetOps.push_back(Glue);
1905 
1906  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1907 }
1908 
1909 // Return true if Op is an intrinsic node with chain that returns the CC value
1910 // as its only (other) argument. Provide the associated SystemZISD opcode and
1911 // the mask of valid CC values if so.
1912 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1913  unsigned &CCValid) {
1914  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1915  switch (Id) {
1916  case Intrinsic::s390_tbegin:
1917  Opcode = SystemZISD::TBEGIN;
1918  CCValid = SystemZ::CCMASK_TBEGIN;
1919  return true;
1920 
1921  case Intrinsic::s390_tbegin_nofloat:
1922  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1923  CCValid = SystemZ::CCMASK_TBEGIN;
1924  return true;
1925 
1926  case Intrinsic::s390_tend:
1927  Opcode = SystemZISD::TEND;
1928  CCValid = SystemZ::CCMASK_TEND;
1929  return true;
1930 
1931  default:
1932  return false;
1933  }
1934 }
1935 
1936 // Return true if Op is an intrinsic node without chain that returns the
1937 // CC value as its final argument. Provide the associated SystemZISD
1938 // opcode and the mask of valid CC values if so.
1939 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1940  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1941  switch (Id) {
1942  case Intrinsic::s390_vpkshs:
1943  case Intrinsic::s390_vpksfs:
1944  case Intrinsic::s390_vpksgs:
1945  Opcode = SystemZISD::PACKS_CC;
1946  CCValid = SystemZ::CCMASK_VCMP;
1947  return true;
1948 
1949  case Intrinsic::s390_vpklshs:
1950  case Intrinsic::s390_vpklsfs:
1951  case Intrinsic::s390_vpklsgs:
1952  Opcode = SystemZISD::PACKLS_CC;
1953  CCValid = SystemZ::CCMASK_VCMP;
1954  return true;
1955 
1956  case Intrinsic::s390_vceqbs:
1957  case Intrinsic::s390_vceqhs:
1958  case Intrinsic::s390_vceqfs:
1959  case Intrinsic::s390_vceqgs:
1960  Opcode = SystemZISD::VICMPES;
1961  CCValid = SystemZ::CCMASK_VCMP;
1962  return true;
1963 
1964  case Intrinsic::s390_vchbs:
1965  case Intrinsic::s390_vchhs:
1966  case Intrinsic::s390_vchfs:
1967  case Intrinsic::s390_vchgs:
1968  Opcode = SystemZISD::VICMPHS;
1969  CCValid = SystemZ::CCMASK_VCMP;
1970  return true;
1971 
1972  case Intrinsic::s390_vchlbs:
1973  case Intrinsic::s390_vchlhs:
1974  case Intrinsic::s390_vchlfs:
1975  case Intrinsic::s390_vchlgs:
1976  Opcode = SystemZISD::VICMPHLS;
1977  CCValid = SystemZ::CCMASK_VCMP;
1978  return true;
1979 
1980  case Intrinsic::s390_vtm:
1981  Opcode = SystemZISD::VTM;
1982  CCValid = SystemZ::CCMASK_VCMP;
1983  return true;
1984 
1985  case Intrinsic::s390_vfaebs:
1986  case Intrinsic::s390_vfaehs:
1987  case Intrinsic::s390_vfaefs:
1988  Opcode = SystemZISD::VFAE_CC;
1989  CCValid = SystemZ::CCMASK_ANY;
1990  return true;
1991 
1992  case Intrinsic::s390_vfaezbs:
1993  case Intrinsic::s390_vfaezhs:
1994  case Intrinsic::s390_vfaezfs:
1995  Opcode = SystemZISD::VFAEZ_CC;
1996  CCValid = SystemZ::CCMASK_ANY;
1997  return true;
1998 
1999  case Intrinsic::s390_vfeebs:
2000  case Intrinsic::s390_vfeehs:
2001  case Intrinsic::s390_vfeefs:
2002  Opcode = SystemZISD::VFEE_CC;
2003  CCValid = SystemZ::CCMASK_ANY;
2004  return true;
2005 
2006  case Intrinsic::s390_vfeezbs:
2007  case Intrinsic::s390_vfeezhs:
2008  case Intrinsic::s390_vfeezfs:
2009  Opcode = SystemZISD::VFEEZ_CC;
2010  CCValid = SystemZ::CCMASK_ANY;
2011  return true;
2012 
2013  case Intrinsic::s390_vfenebs:
2014  case Intrinsic::s390_vfenehs:
2015  case Intrinsic::s390_vfenefs:
2016  Opcode = SystemZISD::VFENE_CC;
2017  CCValid = SystemZ::CCMASK_ANY;
2018  return true;
2019 
2020  case Intrinsic::s390_vfenezbs:
2021  case Intrinsic::s390_vfenezhs:
2022  case Intrinsic::s390_vfenezfs:
2023  Opcode = SystemZISD::VFENEZ_CC;
2024  CCValid = SystemZ::CCMASK_ANY;
2025  return true;
2026 
2027  case Intrinsic::s390_vistrbs:
2028  case Intrinsic::s390_vistrhs:
2029  case Intrinsic::s390_vistrfs:
2030  Opcode = SystemZISD::VISTR_CC;
2032  return true;
2033 
2034  case Intrinsic::s390_vstrcbs:
2035  case Intrinsic::s390_vstrchs:
2036  case Intrinsic::s390_vstrcfs:
2037  Opcode = SystemZISD::VSTRC_CC;
2038  CCValid = SystemZ::CCMASK_ANY;
2039  return true;
2040 
2041  case Intrinsic::s390_vstrczbs:
2042  case Intrinsic::s390_vstrczhs:
2043  case Intrinsic::s390_vstrczfs:
2044  Opcode = SystemZISD::VSTRCZ_CC;
2045  CCValid = SystemZ::CCMASK_ANY;
2046  return true;
2047 
2048  case Intrinsic::s390_vstrsb:
2049  case Intrinsic::s390_vstrsh:
2050  case Intrinsic::s390_vstrsf:
2051  Opcode = SystemZISD::VSTRS_CC;
2052  CCValid = SystemZ::CCMASK_ANY;
2053  return true;
2054 
2055  case Intrinsic::s390_vstrszb:
2056  case Intrinsic::s390_vstrszh:
2057  case Intrinsic::s390_vstrszf:
2058  Opcode = SystemZISD::VSTRSZ_CC;
2059  CCValid = SystemZ::CCMASK_ANY;
2060  return true;
2061 
2062  case Intrinsic::s390_vfcedbs:
2063  case Intrinsic::s390_vfcesbs:
2064  Opcode = SystemZISD::VFCMPES;
2065  CCValid = SystemZ::CCMASK_VCMP;
2066  return true;
2067 
2068  case Intrinsic::s390_vfchdbs:
2069  case Intrinsic::s390_vfchsbs:
2070  Opcode = SystemZISD::VFCMPHS;
2071  CCValid = SystemZ::CCMASK_VCMP;
2072  return true;
2073 
2074  case Intrinsic::s390_vfchedbs:
2075  case Intrinsic::s390_vfchesbs:
2076  Opcode = SystemZISD::VFCMPHES;
2077  CCValid = SystemZ::CCMASK_VCMP;
2078  return true;
2079 
2080  case Intrinsic::s390_vftcidb:
2081  case Intrinsic::s390_vftcisb:
2082  Opcode = SystemZISD::VFTCI;
2083  CCValid = SystemZ::CCMASK_VCMP;
2084  return true;
2085 
2086  case Intrinsic::s390_tdc:
2087  Opcode = SystemZISD::TDC;
2088  CCValid = SystemZ::CCMASK_TDC;
2089  return true;
2090 
2091  default:
2092  return false;
2093  }
2094 }
2095 
2096 // Emit an intrinsic with chain and an explicit CC register result.
2098  unsigned Opcode) {
2099  // Copy all operands except the intrinsic ID.
2100  unsigned NumOps = Op.getNumOperands();
2102  Ops.reserve(NumOps - 1);
2103  Ops.push_back(Op.getOperand(0));
2104  for (unsigned I = 2; I < NumOps; ++I)
2105  Ops.push_back(Op.getOperand(I));
2106 
2107  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2108  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2109  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2110  SDValue OldChain = SDValue(Op.getNode(), 1);
2111  SDValue NewChain = SDValue(Intr.getNode(), 1);
2112  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2113  return Intr.getNode();
2114 }
2115 
2116 // Emit an intrinsic with an explicit CC register result.
2118  unsigned Opcode) {
2119  // Copy all operands except the intrinsic ID.
2120  unsigned NumOps = Op.getNumOperands();
2122  Ops.reserve(NumOps - 1);
2123  for (unsigned I = 1; I < NumOps; ++I)
2124  Ops.push_back(Op.getOperand(I));
2125 
2126  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2127  return Intr.getNode();
2128 }
2129 
2130 // CC is a comparison that will be implemented using an integer or
2131 // floating-point comparison. Return the condition code mask for
2132 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2133 // unsigned comparisons and clear for signed ones. In the floating-point
2134 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2135 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2136 #define CONV(X) \
2137  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2138  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2139  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2140 
2141  switch (CC) {
2142  default:
2143  llvm_unreachable("Invalid integer condition!");
2144 
2145  CONV(EQ);
2146  CONV(NE);
2147  CONV(GT);
2148  CONV(GE);
2149  CONV(LT);
2150  CONV(LE);
2151 
2152  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2153  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2154  }
2155 #undef CONV
2156 }
2157 
2158 // If C can be converted to a comparison against zero, adjust the operands
2159 // as necessary.
2160 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2161  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2162  return;
2163 
2164  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2165  if (!ConstOp1)
2166  return;
2167 
2168  int64_t Value = ConstOp1->getSExtValue();
2169  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2170  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2171  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2172  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2173  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2174  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2175  }
2176 }
2177 
2178 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2179 // adjust the operands as necessary.
2180 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2181  Comparison &C) {
2182  // For us to make any changes, it must a comparison between a single-use
2183  // load and a constant.
2184  if (!C.Op0.hasOneUse() ||
2185  C.Op0.getOpcode() != ISD::LOAD ||
2186  C.Op1.getOpcode() != ISD::Constant)
2187  return;
2188 
2189  // We must have an 8- or 16-bit load.
2190  auto *Load = cast<LoadSDNode>(C.Op0);
2191  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2192  if ((NumBits != 8 && NumBits != 16) ||
2193  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2194  return;
2195 
2196  // The load must be an extending one and the constant must be within the
2197  // range of the unextended value.
2198  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2199  uint64_t Value = ConstOp1->getZExtValue();
2200  uint64_t Mask = (1 << NumBits) - 1;
2201  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2202  // Make sure that ConstOp1 is in range of C.Op0.
2203  int64_t SignedValue = ConstOp1->getSExtValue();
2204  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2205  return;
2206  if (C.ICmpType != SystemZICMP::SignedOnly) {
2207  // Unsigned comparison between two sign-extended values is equivalent
2208  // to unsigned comparison between two zero-extended values.
2209  Value &= Mask;
2210  } else if (NumBits == 8) {
2211  // Try to treat the comparison as unsigned, so that we can use CLI.
2212  // Adjust CCMask and Value as necessary.
2213  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2214  // Test whether the high bit of the byte is set.
2215  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2216  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2217  // Test whether the high bit of the byte is clear.
2218  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2219  else
2220  // No instruction exists for this combination.
2221  return;
2222  C.ICmpType = SystemZICMP::UnsignedOnly;
2223  }
2224  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2225  if (Value > Mask)
2226  return;
2227  // If the constant is in range, we can use any comparison.
2228  C.ICmpType = SystemZICMP::Any;
2229  } else
2230  return;
2231 
2232  // Make sure that the first operand is an i32 of the right extension type.
2233  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2234  ISD::SEXTLOAD :
2235  ISD::ZEXTLOAD);
2236  if (C.Op0.getValueType() != MVT::i32 ||
2237  Load->getExtensionType() != ExtType) {
2238  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2239  Load->getBasePtr(), Load->getPointerInfo(),
2240  Load->getMemoryVT(), Load->getAlignment(),
2241  Load->getMemOperand()->getFlags());
2242  // Update the chain uses.
2243  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2244  }
2245 
2246  // Make sure that the second operand is an i32 with the right value.
2247  if (C.Op1.getValueType() != MVT::i32 ||
2248  Value != ConstOp1->getZExtValue())
2249  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2250 }
2251 
2252 // Return true if Op is either an unextended load, or a load suitable
2253 // for integer register-memory comparisons of type ICmpType.
2254 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2255  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2256  if (Load) {
2257  // There are no instructions to compare a register with a memory byte.
2258  if (Load->getMemoryVT() == MVT::i8)
2259  return false;
2260  // Otherwise decide on extension type.
2261  switch (Load->getExtensionType()) {
2262  case ISD::NON_EXTLOAD:
2263  return true;
2264  case ISD::SEXTLOAD:
2265  return ICmpType != SystemZICMP::UnsignedOnly;
2266  case ISD::ZEXTLOAD:
2267  return ICmpType != SystemZICMP::SignedOnly;
2268  default:
2269  break;
2270  }
2271  }
2272  return false;
2273 }
2274 
2275 // Return true if it is better to swap the operands of C.
2276 static bool shouldSwapCmpOperands(const Comparison &C) {
2277  // Leave f128 comparisons alone, since they have no memory forms.
2278  if (C.Op0.getValueType() == MVT::f128)
2279  return false;
2280 
2281  // Always keep a floating-point constant second, since comparisons with
2282  // zero can use LOAD TEST and comparisons with other constants make a
2283  // natural memory operand.
2284  if (isa<ConstantFPSDNode>(C.Op1))
2285  return false;
2286 
2287  // Never swap comparisons with zero since there are many ways to optimize
2288  // those later.
2289  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2290  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2291  return false;
2292 
2293  // Also keep natural memory operands second if the loaded value is
2294  // only used here. Several comparisons have memory forms.
2295  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2296  return false;
2297 
2298  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2299  // In that case we generally prefer the memory to be second.
2300  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2301  // The only exceptions are when the second operand is a constant and
2302  // we can use things like CHHSI.
2303  if (!ConstOp1)
2304  return true;
2305  // The unsigned memory-immediate instructions can handle 16-bit
2306  // unsigned integers.
2307  if (C.ICmpType != SystemZICMP::SignedOnly &&
2308  isUInt<16>(ConstOp1->getZExtValue()))
2309  return false;
2310  // The signed memory-immediate instructions can handle 16-bit
2311  // signed integers.
2312  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2313  isInt<16>(ConstOp1->getSExtValue()))
2314  return false;
2315  return true;
2316  }
2317 
2318  // Try to promote the use of CGFR and CLGFR.
2319  unsigned Opcode0 = C.Op0.getOpcode();
2320  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2321  return true;
2322  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2323  return true;
2324  if (C.ICmpType != SystemZICMP::SignedOnly &&
2325  Opcode0 == ISD::AND &&
2326  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2327  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2328  return true;
2329 
2330  return false;
2331 }
2332 
2333 // Check whether C tests for equality between X and Y and whether X - Y
2334 // or Y - X is also computed. In that case it's better to compare the
2335 // result of the subtraction against zero.
2336 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2337  Comparison &C) {
2338  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2339  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2340  for (SDNode *N : C.Op0->uses()) {
2341  if (N->getOpcode() == ISD::SUB &&
2342  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2343  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2344  C.Op0 = SDValue(N, 0);
2345  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2346  return;
2347  }
2348  }
2349  }
2350 }
2351 
2352 // Check whether C compares a floating-point value with zero and if that
2353 // floating-point value is also negated. In this case we can use the
2354 // negation to set CC, so avoiding separate LOAD AND TEST and
2355 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2356 static void adjustForFNeg(Comparison &C) {
2357  // This optimization is invalid for strict comparisons, since FNEG
2358  // does not raise any exceptions.
2359  if (C.Chain)
2360  return;
2361  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2362  if (C1 && C1->isZero()) {
2363  for (SDNode *N : C.Op0->uses()) {
2364  if (N->getOpcode() == ISD::FNEG) {
2365  C.Op0 = SDValue(N, 0);
2366  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2367  return;
2368  }
2369  }
2370  }
2371 }
2372 
2373 // Check whether C compares (shl X, 32) with 0 and whether X is
2374 // also sign-extended. In that case it is better to test the result
2375 // of the sign extension using LTGFR.
2376 //
2377 // This case is important because InstCombine transforms a comparison
2378 // with (sext (trunc X)) into a comparison with (shl X, 32).
2379 static void adjustForLTGFR(Comparison &C) {
2380  // Check for a comparison between (shl X, 32) and 0.
2381  if (C.Op0.getOpcode() == ISD::SHL &&
2382  C.Op0.getValueType() == MVT::i64 &&
2383  C.Op1.getOpcode() == ISD::Constant &&
2384  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2385  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2386  if (C1 && C1->getZExtValue() == 32) {
2387  SDValue ShlOp0 = C.Op0.getOperand(0);
2388  // See whether X has any SIGN_EXTEND_INREG uses.
2389  for (SDNode *N : ShlOp0->uses()) {
2390  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2391  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2392  C.Op0 = SDValue(N, 0);
2393  return;
2394  }
2395  }
2396  }
2397  }
2398 }
2399 
2400 // If C compares the truncation of an extending load, try to compare
2401 // the untruncated value instead. This exposes more opportunities to
2402 // reuse CC.
2403 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2404  Comparison &C) {
2405  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2406  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2407  C.Op1.getOpcode() == ISD::Constant &&
2408  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2409  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2410  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2411  C.Op0.getValueSizeInBits().getFixedSize()) {
2412  unsigned Type = L->getExtensionType();
2413  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2414  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2415  C.Op0 = C.Op0.getOperand(0);
2416  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2417  }
2418  }
2419  }
2420 }
2421 
2422 // Return true if shift operation N has an in-range constant shift value.
2423 // Store it in ShiftVal if so.
2424 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2425  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2426  if (!Shift)
2427  return false;
2428 
2429  uint64_t Amount = Shift->getZExtValue();
2430  if (Amount >= N.getValueSizeInBits())
2431  return false;
2432 
2433  ShiftVal = Amount;
2434  return true;
2435 }
2436 
2437 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2438 // instruction and whether the CC value is descriptive enough to handle
2439 // a comparison of type Opcode between the AND result and CmpVal.
2440 // CCMask says which comparison result is being tested and BitSize is
2441 // the number of bits in the operands. If TEST UNDER MASK can be used,
2442 // return the corresponding CC mask, otherwise return 0.
2443 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2444  uint64_t Mask, uint64_t CmpVal,
2445  unsigned ICmpType) {
2446  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2447 
2448  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2451  return 0;
2452 
2453  // Work out the masks for the lowest and highest bits.
2454  unsigned HighShift = 63 - countLeadingZeros(Mask);
2455  uint64_t High = uint64_t(1) << HighShift;
2457 
2458  // Signed ordered comparisons are effectively unsigned if the sign
2459  // bit is dropped.
2460  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2461 
2462  // Check for equality comparisons with 0, or the equivalent.
2463  if (CmpVal == 0) {
2464  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2465  return SystemZ::CCMASK_TM_ALL_0;
2466  if (CCMask == SystemZ::CCMASK_CMP_NE)
2468  }
2469  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2470  if (CCMask == SystemZ::CCMASK_CMP_LT)
2471  return SystemZ::CCMASK_TM_ALL_0;
2472  if (CCMask == SystemZ::CCMASK_CMP_GE)
2474  }
2475  if (EffectivelyUnsigned && CmpVal < Low) {
2476  if (CCMask == SystemZ::CCMASK_CMP_LE)
2477  return SystemZ::CCMASK_TM_ALL_0;
2478  if (CCMask == SystemZ::CCMASK_CMP_GT)
2480  }
2481 
2482  // Check for equality comparisons with the mask, or the equivalent.
2483  if (CmpVal == Mask) {
2484  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2485  return SystemZ::CCMASK_TM_ALL_1;
2486  if (CCMask == SystemZ::CCMASK_CMP_NE)
2488  }
2489  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2490  if (CCMask == SystemZ::CCMASK_CMP_GT)
2491  return SystemZ::CCMASK_TM_ALL_1;
2492  if (CCMask == SystemZ::CCMASK_CMP_LE)
2494  }
2495  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2496  if (CCMask == SystemZ::CCMASK_CMP_GE)
2497  return SystemZ::CCMASK_TM_ALL_1;
2498  if (CCMask == SystemZ::CCMASK_CMP_LT)
2500  }
2501 
2502  // Check for ordered comparisons with the top bit.
2503  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2504  if (CCMask == SystemZ::CCMASK_CMP_LE)
2505  return SystemZ::CCMASK_TM_MSB_0;
2506  if (CCMask == SystemZ::CCMASK_CMP_GT)
2507  return SystemZ::CCMASK_TM_MSB_1;
2508  }
2509  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2510  if (CCMask == SystemZ::CCMASK_CMP_LT)
2511  return SystemZ::CCMASK_TM_MSB_0;
2512  if (CCMask == SystemZ::CCMASK_CMP_GE)
2513  return SystemZ::CCMASK_TM_MSB_1;
2514  }
2515 
2516  // If there are just two bits, we can do equality checks for Low and High
2517  // as well.
2518  if (Mask == Low + High) {
2519  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2521  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2523  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2525  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2527  }
2528 
2529  // Looks like we've exhausted our options.
2530  return 0;
2531 }
2532 
2533 // See whether C can be implemented as a TEST UNDER MASK instruction.
2534 // Update the arguments with the TM version if so.
2535 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2536  Comparison &C) {
2537  // Check that we have a comparison with a constant.
2538  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2539  if (!ConstOp1)
2540  return;
2541  uint64_t CmpVal = ConstOp1->getZExtValue();
2542 
2543  // Check whether the nonconstant input is an AND with a constant mask.
2544  Comparison NewC(C);
2545  uint64_t MaskVal;
2546  ConstantSDNode *Mask = nullptr;
2547  if (C.Op0.getOpcode() == ISD::AND) {
2548  NewC.Op0 = C.Op0.getOperand(0);
2549  NewC.Op1 = C.Op0.getOperand(1);
2550  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2551  if (!Mask)
2552  return;
2553  MaskVal = Mask->getZExtValue();
2554  } else {
2555  // There is no instruction to compare with a 64-bit immediate
2556  // so use TMHH instead if possible. We need an unsigned ordered
2557  // comparison with an i64 immediate.
2558  if (NewC.Op0.getValueType() != MVT::i64 ||
2559  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2560  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2561  NewC.ICmpType == SystemZICMP::SignedOnly)
2562  return;
2563  // Convert LE and GT comparisons into LT and GE.
2564  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2565  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2566  if (CmpVal == uint64_t(-1))
2567  return;
2568  CmpVal += 1;
2569  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2570  }
2571  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2572  // be masked off without changing the result.
2573  MaskVal = -(CmpVal & -CmpVal);
2574  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2575  }
2576  if (!MaskVal)
2577  return;
2578 
2579  // Check whether the combination of mask, comparison value and comparison
2580  // type are suitable.
2581  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2582  unsigned NewCCMask, ShiftVal;
2583  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2584  NewC.Op0.getOpcode() == ISD::SHL &&
2585  isSimpleShift(NewC.Op0, ShiftVal) &&
2586  (MaskVal >> ShiftVal != 0) &&
2587  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2588  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2589  MaskVal >> ShiftVal,
2590  CmpVal >> ShiftVal,
2591  SystemZICMP::Any))) {
2592  NewC.Op0 = NewC.Op0.getOperand(0);
2593  MaskVal >>= ShiftVal;
2594  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2595  NewC.Op0.getOpcode() == ISD::SRL &&
2596  isSimpleShift(NewC.Op0, ShiftVal) &&
2597  (MaskVal << ShiftVal != 0) &&
2598  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2599  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2600  MaskVal << ShiftVal,
2601  CmpVal << ShiftVal,
2603  NewC.Op0 = NewC.Op0.getOperand(0);
2604  MaskVal <<= ShiftVal;
2605  } else {
2606  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2607  NewC.ICmpType);
2608  if (!NewCCMask)
2609  return;
2610  }
2611 
2612  // Go ahead and make the change.
2613  C.Opcode = SystemZISD::TM;
2614  C.Op0 = NewC.Op0;
2615  if (Mask && Mask->getZExtValue() == MaskVal)
2616  C.Op1 = SDValue(Mask, 0);
2617  else
2618  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2619  C.CCValid = SystemZ::CCMASK_TM;
2620  C.CCMask = NewCCMask;
2621 }
2622 
2623 // See whether the comparison argument contains a redundant AND
2624 // and remove it if so. This sometimes happens due to the generic
2625 // BRCOND expansion.
2626 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2627  Comparison &C) {
2628  if (C.Op0.getOpcode() != ISD::AND)
2629  return;
2630  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2631  if (!Mask)
2632  return;
2633  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2634  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2635  return;
2636 
2637  C.Op0 = C.Op0.getOperand(0);
2638 }
2639 
2640 // Return a Comparison that tests the condition-code result of intrinsic
2641 // node Call against constant integer CC using comparison code Cond.
2642 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2643 // and CCValid is the set of possible condition-code results.
2644 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2645  SDValue Call, unsigned CCValid, uint64_t CC,
2646  ISD::CondCode Cond) {
2647  Comparison C(Call, SDValue(), SDValue());
2648  C.Opcode = Opcode;
2649  C.CCValid = CCValid;
2650  if (Cond == ISD::SETEQ)
2651  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2652  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2653  else if (Cond == ISD::SETNE)
2654  // ...and the inverse of that.
2655  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2656  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2657  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2658  // always true for CC>3.
2659  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2660  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2661  // ...and the inverse of that.
2662  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2663  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2664  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2665  // always true for CC>3.
2666  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2667  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2668  // ...and the inverse of that.
2669  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2670  else
2671  llvm_unreachable("Unexpected integer comparison type");
2672  C.CCMask &= CCValid;
2673  return C;
2674 }
2675 
2676 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2677 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2678  ISD::CondCode Cond, const SDLoc &DL,
2679  SDValue Chain = SDValue(),
2680  bool IsSignaling = false) {
2681  if (CmpOp1.getOpcode() == ISD::Constant) {
2682  assert(!Chain);
2683  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2684  unsigned Opcode, CCValid;
2685  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2686  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2687  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2688  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2689  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2690  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2691  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2692  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2693  }
2694  Comparison C(CmpOp0, CmpOp1, Chain);
2695  C.CCMask = CCMaskForCondCode(Cond);
2696  if (C.Op0.getValueType().isFloatingPoint()) {
2697  C.CCValid = SystemZ::CCMASK_FCMP;
2698  if (!C.Chain)
2699  C.Opcode = SystemZISD::FCMP;
2700  else if (!IsSignaling)
2701  C.Opcode = SystemZISD::STRICT_FCMP;
2702  else
2703  C.Opcode = SystemZISD::STRICT_FCMPS;
2704  adjustForFNeg(C);
2705  } else {
2706  assert(!C.Chain);
2707  C.CCValid = SystemZ::CCMASK_ICMP;
2708  C.Opcode = SystemZISD::ICMP;
2709  // Choose the type of comparison. Equality and inequality tests can
2710  // use either signed or unsigned comparisons. The choice also doesn't
2711  // matter if both sign bits are known to be clear. In those cases we
2712  // want to give the main isel code the freedom to choose whichever
2713  // form fits best.
2714  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2715  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2716  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2717  C.ICmpType = SystemZICMP::Any;
2718  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2719  C.ICmpType = SystemZICMP::UnsignedOnly;
2720  else
2721  C.ICmpType = SystemZICMP::SignedOnly;
2722  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2723  adjustForRedundantAnd(DAG, DL, C);
2724  adjustZeroCmp(DAG, DL, C);
2725  adjustSubwordCmp(DAG, DL, C);
2726  adjustForSubtraction(DAG, DL, C);
2727  adjustForLTGFR(C);
2728  adjustICmpTruncate(DAG, DL, C);
2729  }
2730 
2731  if (shouldSwapCmpOperands(C)) {
2732  std::swap(C.Op0, C.Op1);
2733  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2734  }
2735 
2736  adjustForTestUnderMask(DAG, DL, C);
2737  return C;
2738 }
2739 
2740 // Emit the comparison instruction described by C.
2741 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2742  if (!C.Op1.getNode()) {
2743  SDNode *Node;
2744  switch (C.Op0.getOpcode()) {
2746  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2747  return SDValue(Node, 0);
2749  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2750  return SDValue(Node, Node->getNumValues() - 1);
2751  default:
2752  llvm_unreachable("Invalid comparison operands");
2753  }
2754  }
2755  if (C.Opcode == SystemZISD::ICMP)
2756  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2757  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2758  if (C.Opcode == SystemZISD::TM) {
2759  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2760  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2761  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2762  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2763  }
2764  if (C.Chain) {
2765  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2766  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2767  }
2768  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2769 }
2770 
2771 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2772 // 64 bits. Extend is the extension type to use. Store the high part
2773 // in Hi and the low part in Lo.
2774 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2775  SDValue Op0, SDValue Op1, SDValue &Hi,
2776  SDValue &Lo) {
2777  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2778  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2779  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2780  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2781  DAG.getConstant(32, DL, MVT::i64));
2782  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2783  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2784 }
2785 
2786 // Lower a binary operation that produces two VT results, one in each
2787 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2788 // and Opcode performs the GR128 operation. Store the even register result
2789 // in Even and the odd register result in Odd.
2790 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2791  unsigned Opcode, SDValue Op0, SDValue Op1,
2792  SDValue &Even, SDValue &Odd) {
2793  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2794  bool Is32Bit = is32Bit(VT);
2795  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2796  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2797 }
2798 
2799 // Return an i32 value that is 1 if the CC value produced by CCReg is
2800 // in the mask CCMask and 0 otherwise. CC is known to have a value
2801 // in CCValid, so other values can be ignored.
2802 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2803  unsigned CCValid, unsigned CCMask) {
2804  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2805  DAG.getConstant(0, DL, MVT::i32),
2806  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2807  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2808  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2809 }
2810 
2811 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2812 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2813 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2814 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2815 // floating-point comparisons.
2816 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2818  switch (CC) {
2819  case ISD::SETOEQ:
2820  case ISD::SETEQ:
2821  switch (Mode) {
2822  case CmpMode::Int: return SystemZISD::VICMPE;
2823  case CmpMode::FP: return SystemZISD::VFCMPE;
2826  }
2827  llvm_unreachable("Bad mode");
2828 
2829  case ISD::SETOGE:
2830  case ISD::SETGE:
2831  switch (Mode) {
2832  case CmpMode::Int: return 0;
2833  case CmpMode::FP: return SystemZISD::VFCMPHE;
2836  }
2837  llvm_unreachable("Bad mode");
2838 
2839  case ISD::SETOGT:
2840  case ISD::SETGT:
2841  switch (Mode) {
2842  case CmpMode::Int: return SystemZISD::VICMPH;
2843  case CmpMode::FP: return SystemZISD::VFCMPH;
2846  }
2847  llvm_unreachable("Bad mode");
2848 
2849  case ISD::SETUGT:
2850  switch (Mode) {
2851  case CmpMode::Int: return SystemZISD::VICMPHL;
2852  case CmpMode::FP: return 0;
2853  case CmpMode::StrictFP: return 0;
2854  case CmpMode::SignalingFP: return 0;
2855  }
2856  llvm_unreachable("Bad mode");
2857 
2858  default:
2859  return 0;
2860  }
2861 }
2862 
2863 // Return the SystemZISD vector comparison operation for CC or its inverse,
2864 // or 0 if neither can be done directly. Indicate in Invert whether the
2865 // result is for the inverse of CC. Mode is as above.
2867  bool &Invert) {
2868  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2869  Invert = false;
2870  return Opcode;
2871  }
2872 
2874  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2875  Invert = true;
2876  return Opcode;
2877  }
2878 
2879  return 0;
2880 }
2881 
2882 // Return a v2f64 that contains the extended form of elements Start and Start+1
2883 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2884 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2885  SDValue Op, SDValue Chain) {
2886  int Mask[] = { Start, -1, Start + 1, -1 };
2888  if (Chain) {
2890  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2891  }
2892  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2893 }
2894 
2895 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2896 // producing a result of type VT. If Chain is nonnull, return the strict form.
2897 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2898  const SDLoc &DL, EVT VT,
2899  SDValue CmpOp0,
2900  SDValue CmpOp1,
2901  SDValue Chain) const {
2902  // There is no hardware support for v4f32 (unless we have the vector
2903  // enhancements facility 1), so extend the vector into two v2f64s
2904  // and compare those.
2905  if (CmpOp0.getValueType() == MVT::v4f32 &&
2906  !Subtarget.hasVectorEnhancements1()) {
2907  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2908  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2909  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2910  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2911  if (Chain) {
2913  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2914  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2915  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2916  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2917  H1.getValue(1), L1.getValue(1),
2918  HRes.getValue(1), LRes.getValue(1) };
2919  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2920  SDValue Ops[2] = { Res, NewChain };
2921  return DAG.getMergeValues(Ops, DL);
2922  }
2923  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2924  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2925  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2926  }
2927  if (Chain) {
2928  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2929  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2930  }
2931  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2932 }
2933 
2934 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2935 // an integer mask of type VT. If Chain is nonnull, we have a strict
2936 // floating-point comparison. If in addition IsSignaling is true, we have
2937 // a strict signaling floating-point comparison.
2938 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2939  const SDLoc &DL, EVT VT,
2940  ISD::CondCode CC,
2941  SDValue CmpOp0,
2942  SDValue CmpOp1,
2943  SDValue Chain,
2944  bool IsSignaling) const {
2945  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2946  assert (!Chain || IsFP);
2947  assert (!IsSignaling || Chain);
2948  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2949  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2950  bool Invert = false;
2951  SDValue Cmp;
2952  switch (CC) {
2953  // Handle tests for order using (or (ogt y x) (oge x y)).
2954  case ISD::SETUO:
2955  Invert = true;
2957  case ISD::SETO: {
2958  assert(IsFP && "Unexpected integer comparison");
2959  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2960  DL, VT, CmpOp1, CmpOp0, Chain);
2961  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2962  DL, VT, CmpOp0, CmpOp1, Chain);
2963  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2964  if (Chain)
2965  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2966  LT.getValue(1), GE.getValue(1));
2967  break;
2968  }
2969 
2970  // Handle <> tests using (or (ogt y x) (ogt x y)).
2971  case ISD::SETUEQ:
2972  Invert = true;
2974  case ISD::SETONE: {
2975  assert(IsFP && "Unexpected integer comparison");
2976  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2977  DL, VT, CmpOp1, CmpOp0, Chain);
2978  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2979  DL, VT, CmpOp0, CmpOp1, Chain);
2980  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2981  if (Chain)
2982  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2983  LT.getValue(1), GT.getValue(1));
2984  break;
2985  }
2986 
2987  // Otherwise a single comparison is enough. It doesn't really
2988  // matter whether we try the inversion or the swap first, since
2989  // there are no cases where both work.
2990  default:
2991  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2992  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2993  else {
2995  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2996  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2997  else
2998  llvm_unreachable("Unhandled comparison");
2999  }
3000  if (Chain)
3001  Chain = Cmp.getValue(1);
3002  break;
3003  }
3004  if (Invert) {
3005  SDValue Mask =
3006  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3007  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3008  }
3009  if (Chain && Chain.getNode() != Cmp.getNode()) {
3010  SDValue Ops[2] = { Cmp, Chain };
3011  Cmp = DAG.getMergeValues(Ops, DL);
3012  }
3013  return Cmp;
3014 }
3015 
3016 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3017  SelectionDAG &DAG) const {
3018  SDValue CmpOp0 = Op.getOperand(0);
3019  SDValue CmpOp1 = Op.getOperand(1);
3020  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3021  SDLoc DL(Op);
3022  EVT VT = Op.getValueType();
3023  if (VT.isVector())
3024  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3025 
3026  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3027  SDValue CCReg = emitCmp(DAG, DL, C);
3028  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3029 }
3030 
3031 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3032  SelectionDAG &DAG,
3033  bool IsSignaling) const {
3034  SDValue Chain = Op.getOperand(0);
3035  SDValue CmpOp0 = Op.getOperand(1);
3036  SDValue CmpOp1 = Op.getOperand(2);
3037  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3038  SDLoc DL(Op);
3039  EVT VT = Op.getNode()->getValueType(0);
3040  if (VT.isVector()) {
3041  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3042  Chain, IsSignaling);
3043  return Res.getValue(Op.getResNo());
3044  }
3045 
3046  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3047  SDValue CCReg = emitCmp(DAG, DL, C);
3048  CCReg->setFlags(Op->getFlags());
3049  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3050  SDValue Ops[2] = { Result, CCReg.getValue(1) };
3051  return DAG.getMergeValues(Ops, DL);
3052 }
3053 
3054 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3055  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3056  SDValue CmpOp0 = Op.getOperand(2);
3057  SDValue CmpOp1 = Op.getOperand(3);
3058  SDValue Dest = Op.getOperand(4);
3059  SDLoc DL(Op);
3060 
3061  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3062  SDValue CCReg = emitCmp(DAG, DL, C);
3063  return DAG.getNode(
3064  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3065  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3066  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3067 }
3068 
3069 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3070 // allowing Pos and Neg to be wider than CmpOp.
3071 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3072  return (Neg.getOpcode() == ISD::SUB &&
3073  Neg.getOperand(0).getOpcode() == ISD::Constant &&
3074  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3075  Neg.getOperand(1) == Pos &&
3076  (Pos == CmpOp ||
3077  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3078  Pos.getOperand(0) == CmpOp)));
3079 }
3080 
3081 // Return the absolute or negative absolute of Op; IsNegative decides which.
3083  bool IsNegative) {
3084  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3085  if (IsNegative)
3086  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3087  DAG.getConstant(0, DL, Op.getValueType()), Op);
3088  return Op;
3089 }
3090 
3091 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3092  SelectionDAG &DAG) const {
3093  SDValue CmpOp0 = Op.getOperand(0);
3094  SDValue CmpOp1 = Op.getOperand(1);
3095  SDValue TrueOp = Op.getOperand(2);
3096  SDValue FalseOp = Op.getOperand(3);
3097  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3098  SDLoc DL(Op);
3099 
3100  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3101 
3102  // Check for absolute and negative-absolute selections, including those
3103  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3104  // This check supplements the one in DAGCombiner.
3105  if (C.Opcode == SystemZISD::ICMP &&
3106  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3107  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3108  C.Op1.getOpcode() == ISD::Constant &&
3109  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3110  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3111  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3112  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3113  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3114  }
3115 
3116  SDValue CCReg = emitCmp(DAG, DL, C);
3117  SDValue Ops[] = {TrueOp, FalseOp,
3118  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3119  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3120 
3121  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3122 }
3123 
3124 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3125  SelectionDAG &DAG) const {
3126  SDLoc DL(Node);
3127  const GlobalValue *GV = Node->getGlobal();
3128  int64_t Offset = Node->getOffset();
3129  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3131 
3132  SDValue Result;
3133  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3134  if (isInt<32>(Offset)) {
3135  // Assign anchors at 1<<12 byte boundaries.
3136  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3137  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3138  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3139 
3140  // The offset can be folded into the address if it is aligned to a
3141  // halfword.
3142  Offset -= Anchor;
3143  if (Offset != 0 && (Offset & 1) == 0) {
3144  SDValue Full =
3145  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3146  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3147  Offset = 0;
3148  }
3149  } else {
3150  // Conservatively load a constant offset greater than 32 bits into a
3151  // register below.
3152  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3153  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3154  }
3155  } else {
3156  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3157  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3158  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3160  }
3161 
3162  // If there was a non-zero offset that we didn't fold, create an explicit
3163  // addition for it.
3164  if (Offset != 0)
3165  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3166  DAG.getConstant(Offset, DL, PtrVT));
3167 
3168  return Result;
3169 }
3170 
3171 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3172  SelectionDAG &DAG,
3173  unsigned Opcode,
3174  SDValue GOTOffset) const {
3175  SDLoc DL(Node);
3176  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3177  SDValue Chain = DAG.getEntryNode();
3178  SDValue Glue;
3179 
3182  report_fatal_error("In GHC calling convention TLS is not supported");
3183 
3184  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3185  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3186  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3187  Glue = Chain.getValue(1);
3188  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3189  Glue = Chain.getValue(1);
3190 
3191  // The first call operand is the chain and the second is the TLS symbol.
3193  Ops.push_back(Chain);
3194  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3195  Node->getValueType(0),
3196  0, 0));
3197 
3198  // Add argument registers to the end of the list so that they are
3199  // known live into the call.
3200  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3201  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3202 
3203  // Add a register mask operand representing the call-preserved registers.
3204  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3205  const uint32_t *Mask =
3207  assert(Mask && "Missing call preserved mask for calling convention");
3208  Ops.push_back(DAG.getRegisterMask(Mask));
3209 
3210  // Glue the call to the argument copies.
3211  Ops.push_back(Glue);
3212 
3213  // Emit the call.
3214  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3215  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3216  Glue = Chain.getValue(1);
3217 
3218  // Copy the return value from %r2.
3219  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3220 }
3221 
3222 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3223  SelectionDAG &DAG) const {
3224  SDValue Chain = DAG.getEntryNode();
3225  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3226 
3227  // The high part of the thread pointer is in access register 0.
3228  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3229  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3230 
3231  // The low part of the thread pointer is in access register 1.
3232  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3233  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3234 
3235  // Merge them into a single 64-bit address.
3236  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3237  DAG.getConstant(32, DL, PtrVT));
3238  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3239 }
3240 
3241 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3242  SelectionDAG &DAG) const {
3243  if (DAG.getTarget().useEmulatedTLS())
3244  return LowerToTLSEmulatedModel(Node, DAG);
3245  SDLoc DL(Node);
3246  const GlobalValue *GV = Node->getGlobal();
3247  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3249 
3252  report_fatal_error("In GHC calling convention TLS is not supported");
3253 
3254  SDValue TP = lowerThreadPointer(DL, DAG);
3255 
3256  // Get the offset of GA from the thread pointer, based on the TLS model.
3257  SDValue Offset;
3258  switch (model) {
3259  case TLSModel::GeneralDynamic: {
3260  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3263 
3264  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3265  Offset = DAG.getLoad(
3266  PtrVT, DL, DAG.getEntryNode(), Offset,
3268 
3269  // Call __tls_get_offset to retrieve the offset.
3270  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3271  break;
3272  }
3273 
3274  case TLSModel::LocalDynamic: {
3275  // Load the GOT offset of the module ID.
3278 
3279  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3280  Offset = DAG.getLoad(
3281  PtrVT, DL, DAG.getEntryNode(), Offset,
3283 
3284  // Call __tls_get_offset to retrieve the module base offset.
3285  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3286 
3287  // Note: The SystemZLDCleanupPass will remove redundant computations
3288  // of the module base offset. Count total number of local-dynamic
3289  // accesses to trigger execution of that pass.
3293 
3294  // Add the per-symbol offset.
3296 
3297  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3298  DTPOffset = DAG.getLoad(
3299  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3301 
3302  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3303  break;
3304  }
3305 
3306  case TLSModel::InitialExec: {
3307  // Load the offset from the GOT.
3308  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3311  Offset =
3312  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3314  break;
3315  }
3316 
3317  case TLSModel::LocalExec: {
3318  // Force the offset into the constant pool and load it from there.
3321 
3322  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3323  Offset = DAG.getLoad(
3324  PtrVT, DL, DAG.getEntryNode(), Offset,
3326  break;
3327  }
3328  }
3329 
3330  // Add the base and offset together.
3331  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3332 }
3333 
3334 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3335  SelectionDAG &DAG) const {
3336  SDLoc DL(Node);
3337  const BlockAddress *BA = Node->getBlockAddress();
3338  int64_t Offset = Node->getOffset();
3339  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3340 
3341  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3342  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3343  return Result;
3344 }
3345 
3346 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3347  SelectionDAG &DAG) const {
3348  SDLoc DL(JT);
3349  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3350  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3351 
3352  // Use LARL to load the address of the table.
3353  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3354 }
3355 
3356 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3357  SelectionDAG &DAG) const {
3358  SDLoc DL(CP);
3359  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3360 
3361  SDValue Result;
3362  if (CP->isMachineConstantPoolEntry())
3363  Result =
3364  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3365  else
3366  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3367  CP->getOffset());
3368 
3369  // Use LARL to load the address of the constant pool entry.
3370  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3371 }
3372 
3373 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3374  SelectionDAG &DAG) const {
3375  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3376  MachineFunction &MF = DAG.getMachineFunction();
3377  MachineFrameInfo &MFI = MF.getFrameInfo();
3378  MFI.setFrameAddressIsTaken(true);
3379 
3380  SDLoc DL(Op);
3381  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3382  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3383 
3384  // By definition, the frame address is the address of the back chain. (In
3385  // the case of packed stack without backchain, return the address where the
3386  // backchain would have been stored. This will either be an unused space or
3387  // contain a saved register).
3388  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3389  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3390 
3391  // FIXME The frontend should detect this case.
3392  if (Depth > 0) {
3393  report_fatal_error("Unsupported stack frame traversal count");
3394  }
3395 
3396  return BackChain;
3397 }
3398 
3399 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3400  SelectionDAG &DAG) const {
3401  MachineFunction &MF = DAG.getMachineFunction();
3402  MachineFrameInfo &MFI = MF.getFrameInfo();
3403  MFI.setReturnAddressIsTaken(true);
3404 
3406  return SDValue();
3407 
3408  SDLoc DL(Op);
3409  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3410  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3411 
3412  // FIXME The frontend should detect this case.
3413  if (Depth > 0) {
3414  report_fatal_error("Unsupported stack frame traversal count");
3415  }
3416 
3417  // Return R14D, which has the return address. Mark it an implicit live-in.
3418  Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3419  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3420 }
3421 
3422 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3423  SelectionDAG &DAG) const {
3424  SDLoc DL(Op);
3425  SDValue In = Op.getOperand(0);
3426  EVT InVT = In.getValueType();
3427  EVT ResVT = Op.getValueType();
3428 
3429  // Convert loads directly. This is normally done by DAGCombiner,
3430  // but we need this case for bitcasts that are created during lowering
3431  // and which are then lowered themselves.
3432  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3433  if (ISD::isNormalLoad(LoadN)) {
3434  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3435  LoadN->getBasePtr(), LoadN->getMemOperand());
3436  // Update the chain uses.
3437  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3438  return NewLoad;
3439  }
3440 
3441  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3442  SDValue In64;
3443  if (Subtarget.hasHighWord()) {
3444  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3445  MVT::i64);
3446  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3447  MVT::i64, SDValue(U64, 0), In);
3448  } else {
3449  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3450  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3451  DAG.getConstant(32, DL, MVT::i64));
3452  }
3453  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3454  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3455  DL, MVT::f32, Out64);
3456  }
3457  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3458  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3459  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3460  MVT::f64, SDValue(U64, 0), In);
3461  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3462  if (Subtarget.hasHighWord())
3463  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3464  MVT::i32, Out64);
3465  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3466  DAG.getConstant(32, DL, MVT::i64));
3467  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3468  }
3469  llvm_unreachable("Unexpected bitcast combination");
3470 }
3471 
3472 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3473  SelectionDAG &DAG) const {
3474  MachineFunction &MF = DAG.getMachineFunction();
3475  SystemZMachineFunctionInfo *FuncInfo =
3477  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3478 
3479  SDValue Chain = Op.getOperand(0);
3480  SDValue Addr = Op.getOperand(1);
3481  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3482  SDLoc DL(Op);
3483 
3484  // The initial values of each field.
3485  const unsigned NumFields = 4;
3486  SDValue Fields[NumFields] = {
3487  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3488  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3489  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3490  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3491  };
3492 
3493  // Store each field into its respective slot.
3494  SDValue MemOps[NumFields];
3495  unsigned Offset = 0;
3496  for (unsigned I = 0; I < NumFields; ++I) {
3497  SDValue FieldAddr = Addr;
3498  if (Offset != 0)
3499  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3500  DAG.getIntPtrConstant(Offset, DL));
3501  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3502  MachinePointerInfo(SV, Offset));
3503  Offset += 8;
3504  }
3505  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3506 }
3507 
3508 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3509  SelectionDAG &DAG) const {
3510  SDValue Chain = Op.getOperand(0);
3511  SDValue DstPtr = Op.getOperand(1);
3512  SDValue SrcPtr = Op.getOperand(2);
3513  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3514  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3515  SDLoc DL(Op);
3516 
3517  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3518  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3519  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3520  MachinePointerInfo(SrcSV));
3521 }
3522 
3523 SDValue SystemZTargetLowering::
3524 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3525  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3526  MachineFunction &MF = DAG.getMachineFunction();
3527  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3528  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3529 
3530  SDValue Chain = Op.getOperand(0);
3531  SDValue Size = Op.getOperand(1);
3532  SDValue Align = Op.getOperand(2);
3533  SDLoc DL(Op);
3534 
3535  // If user has set the no alignment function attribute, ignore
3536  // alloca alignments.
3537  uint64_t AlignVal =
3538  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3539 
3541  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3542  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3543 
3545  SDValue NeededSpace = Size;
3546 
3547  // Get a reference to the stack pointer.
3548  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3549 
3550  // If we need a backchain, save it now.
3551  SDValue Backchain;
3552  if (StoreBackchain)
3553  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3554  MachinePointerInfo());
3555 
3556  // Add extra space for alignment if needed.
3557  if (ExtraAlignSpace)
3558  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3559  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3560 
3561  // Get the new stack pointer value.
3562  SDValue NewSP;
3563  if (hasInlineStackProbe(MF)) {
3564  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3565  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3566  Chain = NewSP.getValue(1);
3567  }
3568  else {
3569  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3570  // Copy the new stack pointer back.
3571  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3572  }
3573 
3574  // The allocated data lives above the 160 bytes allocated for the standard
3575  // frame, plus any outgoing stack arguments. We don't know how much that
3576  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3577  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3578  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3579 
3580  // Dynamically realign if needed.
3581  if (RequiredAlign > StackAlign) {
3582  Result =
3583  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3584  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3585  Result =
3586  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3587  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3588  }
3589 
3590  if (StoreBackchain)
3591  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3592  MachinePointerInfo());
3593 
3594  SDValue Ops[2] = { Result, Chain };
3595  return DAG.getMergeValues(Ops, DL);
3596 }
3597 
3598 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3599  SDValue Op, SelectionDAG &DAG) const {
3600  SDLoc DL(Op);
3601 
3603 }
3604 
3605 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3606  SelectionDAG &DAG) const {
3607  EVT VT = Op.getValueType();
3608  SDLoc DL(Op);
3609  SDValue Ops[2];
3610  if (is32Bit(VT))
3611  // Just do a normal 64-bit multiplication and extract the results.
3612  // We define this so that it can be used for constant division.
3613  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3614  Op.getOperand(1), Ops[1], Ops[0]);
3615  else if (Subtarget.hasMiscellaneousExtensions2())
3616  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3617  // the high result in the even register. ISD::SMUL_LOHI is defined to
3618  // return the low half first, so the results are in reverse order.
3620  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3621  else {
3622  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3623  //
3624  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3625  //
3626  // but using the fact that the upper halves are either all zeros
3627  // or all ones:
3628  //
3629  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3630  //
3631  // and grouping the right terms together since they are quicker than the
3632  // multiplication:
3633  //
3634  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3635  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3636  SDValue LL = Op.getOperand(0);
3637  SDValue RL = Op.getOperand(1);
3638  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3639  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3640  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3641  // the high result in the even register. ISD::SMUL_LOHI is defined to
3642  // return the low half first, so the results are in reverse order.
3644  LL, RL, Ops[1], Ops[0]);
3645  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3646  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3647  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3648  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3649  }
3650  return DAG.getMergeValues(Ops, DL);
3651 }
3652 
3653 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3654  SelectionDAG &DAG) const {
3655  EVT VT = Op.getValueType();
3656  SDLoc DL(Op);
3657  SDValue Ops[2];
3658  if (is32Bit(VT))
3659  // Just do a normal 64-bit multiplication and extract the results.
3660  // We define this so that it can be used for constant division.
3661  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3662  Op.getOperand(1), Ops[1], Ops[0]);
3663  else
3664  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3665  // the high result in the even register. ISD::UMUL_LOHI is defined to
3666  // return the low half first, so the results are in reverse order.
3668  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3669  return DAG.getMergeValues(Ops, DL);
3670 }
3671 
3672 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3673  SelectionDAG &DAG) const {
3674  SDValue Op0 = Op.getOperand(0);
3675  SDValue Op1 = Op.getOperand(1);
3676  EVT VT = Op.getValueType();
3677  SDLoc DL(Op);
3678 
3679  // We use DSGF for 32-bit division. This means the first operand must
3680  // always be 64-bit, and the second operand should be 32-bit whenever
3681  // that is possible, to improve performance.
3682  if (is32Bit(VT))
3683  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3684  else if (DAG.ComputeNumSignBits(Op1) > 32)
3685  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3686 
3687  // DSG(F) returns the remainder in the even register and the
3688  // quotient in the odd register.
3689  SDValue Ops[2];
3690  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3691  return DAG.getMergeValues(Ops, DL);
3692 }
3693 
3694 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3695  SelectionDAG &DAG) const {
3696  EVT VT = Op.getValueType();
3697  SDLoc DL(Op);
3698 
3699  // DL(G) returns the remainder in the even register and the
3700  // quotient in the odd register.
3701  SDValue Ops[2];
3703  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3704  return DAG.getMergeValues(Ops, DL);
3705 }
3706 
3707 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3708  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3709 
3710  // Get the known-zero masks for each operand.
3711  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3712  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3713  DAG.computeKnownBits(Ops[1])};
3714 
3715  // See if the upper 32 bits of one operand and the lower 32 bits of the
3716  // other are known zero. They are the low and high operands respectively.
3717  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3718  Known[1].Zero.getZExtValue() };
3719  unsigned High, Low;
3720  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3721  High = 1, Low = 0;
3722  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3723  High = 0, Low = 1;
3724  else
3725  return Op;
3726 
3727  SDValue LowOp = Ops[Low];
3728  SDValue HighOp = Ops[High];
3729 
3730  // If the high part is a constant, we're better off using IILH.
3731  if (HighOp.getOpcode() == ISD::Constant)
3732  return Op;
3733 
3734  // If the low part is a constant that is outside the range of LHI,
3735  // then we're better off using IILF.
3736  if (LowOp.getOpcode() == ISD::Constant) {
3737  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3738  if (!isInt<16>(Value))
3739  return Op;
3740  }
3741 
3742  // Check whether the high part is an AND that doesn't change the
3743  // high 32 bits and just masks out low bits. We can skip it if so.
3744  if (HighOp.getOpcode() == ISD::AND &&
3745  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3746  SDValue HighOp0 = HighOp.getOperand(0);
3747  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3748  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3749  HighOp = HighOp0;
3750  }
3751 
3752  // Take advantage of the fact that all GR32 operations only change the
3753  // low 32 bits by truncating Low to an i32 and inserting it directly
3754  // using a subreg. The interesting cases are those where the truncation
3755  // can be folded.
3756  SDLoc DL(Op);
3757  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3758  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3759  MVT::i64, HighOp, Low32);
3760 }
3761 
3762 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3763 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3764  SelectionDAG &DAG) const {
3765  SDNode *N = Op.getNode();
3766  SDValue LHS = N->getOperand(0);
3767  SDValue RHS = N->getOperand(1);
3768  SDLoc DL(N);
3769  unsigned BaseOp = 0;
3770  unsigned CCValid = 0;
3771  unsigned CCMask = 0;
3772 
3773  switch (Op.getOpcode()) {
3774  default: llvm_unreachable("Unknown instruction!");
3775  case ISD::SADDO:
3776  BaseOp = SystemZISD::SADDO;
3777  CCValid = SystemZ::CCMASK_ARITH;
3779  break;
3780  case ISD::SSUBO:
3781  BaseOp = SystemZISD::SSUBO;
3782  CCValid = SystemZ::CCMASK_ARITH;
3784  break;
3785  case ISD::UADDO:
3786  BaseOp = SystemZISD::UADDO;
3787  CCValid = SystemZ::CCMASK_LOGICAL;
3789  break;
3790  case ISD::USUBO:
3791  BaseOp = SystemZISD::USUBO;
3792  CCValid = SystemZ::CCMASK_LOGICAL;
3794  break;
3795  }
3796 
3797  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3798  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3799 
3800  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3801  if (N->getValueType(1) == MVT::i1)
3802  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3803 
3804  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3805 }
3806 
3807 static bool isAddCarryChain(SDValue Carry) {
3808  while (Carry.getOpcode() == ISD::ADDCARRY)
3809  Carry = Carry.getOperand(2);
3810  return Carry.getOpcode() == ISD::UADDO;
3811 }
3812 
3813 static bool isSubBorrowChain(SDValue Carry) {
3814  while (Carry.getOpcode() == ISD::SUBCARRY)
3815  Carry = Carry.getOperand(2);
3816  return Carry.getOpcode() == ISD::USUBO;
3817 }
3818 
3819 // Lower ADDCARRY/SUBCARRY nodes.
3820 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3821  SelectionDAG &DAG) const {
3822 
3823  SDNode *N = Op.getNode();
3824  MVT VT = N->getSimpleValueType(0);
3825 
3826  // Let legalize expand this if it isn't a legal type yet.
3827  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3828  return SDValue();
3829 
3830  SDValue LHS = N->getOperand(0);
3831  SDValue RHS = N->getOperand(1);
3832  SDValue Carry = Op.getOperand(2);
3833  SDLoc DL(N);
3834  unsigned BaseOp = 0;
3835  unsigned CCValid = 0;
3836  unsigned CCMask = 0;
3837 
3838  switch (Op.getOpcode()) {
3839  default: llvm_unreachable("Unknown instruction!");
3840  case ISD::ADDCARRY:
3841  if (!isAddCarryChain(Carry))
3842  return SDValue();
3843 
3844  BaseOp = SystemZISD::ADDCARRY;
3845  CCValid = SystemZ::CCMASK_LOGICAL;
3847  break;
3848  case ISD::SUBCARRY:
3849  if (!isSubBorrowChain(Carry))
3850  return SDValue();
3851 
3852  BaseOp = SystemZISD::SUBCARRY;
3853  CCValid = SystemZ::CCMASK_LOGICAL;
3855  break;
3856  }
3857 
3858  // Set the condition code from the carry flag.
3859  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3860  DAG.getConstant(CCValid, DL, MVT::i32),
3861  DAG.getConstant(CCMask, DL, MVT::i32));
3862 
3863  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3864  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3865 
3866  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3867  if (N->getValueType(1) == MVT::i1)
3868  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3869 
3870  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3871 }
3872 
3873 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3874  SelectionDAG &DAG) const {
3875  EVT VT = Op.getValueType();
3876  SDLoc DL(Op);
3877  Op = Op.getOperand(0);
3878 
3879  // Handle vector types via VPOPCT.
3880  if (VT.isVector()) {
3881  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3883  switch (VT.getScalarSizeInBits()) {
3884  case 8:
3885  break;
3886  case 16: {
3887  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3888  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3890  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3892  break;
3893  }
3894  case 32: {
3896  DAG.getConstant(0, DL, MVT::i32));
3897  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3898  break;
3899  }
3900  case 64: {
3902  DAG.getConstant(0, DL, MVT::i32));
3903  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3904  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3905  break;
3906  }
3907  default:
3908  llvm_unreachable("Unexpected type");
3909  }
3910  return Op;
3911  }
3912 
3913  // Get the known-zero mask for the operand.
3914  KnownBits Known = DAG.computeKnownBits(Op);
3915  unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3916  if (NumSignificantBits == 0)
3917  return DAG.getConstant(0, DL, VT);
3918 
3919  // Skip known-zero high parts of the operand.
3920  int64_t OrigBitSize = VT.getSizeInBits();
3921  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3922  BitSize = std::min(BitSize, OrigBitSize);
3923 
3924  // The POPCNT instruction counts the number of bits in each byte.
3925  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3927  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3928 
3929  // Add up per-byte counts in a binary tree. All bits of Op at
3930  // position larger than BitSize remain zero throughout.
3931  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3932  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3933  if (BitSize != OrigBitSize)
3934  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3935  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3936  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3937  }
3938 
3939  // Extract overall result from high byte.
3940  if (BitSize > 8)
3941  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3942  DAG.getConstant(BitSize - 8, DL, VT));
3943 
3944  return Op;
3945 }
3946 
3947 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3948  SelectionDAG &DAG) const {
3949  SDLoc DL(Op);
3950  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3951  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3952  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3953  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3954 
3955  // The only fence that needs an instruction is a sequentially-consistent
3956  // cross-thread fence.
3957  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3958  FenceSSID == SyncScope::System) {
3959  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3960  Op.getOperand(0)),
3961  0);
3962  }
3963 
3964  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3965  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3966 }
3967 
3968 // Op is an atomic load. Lower it into a normal volatile load.
3969 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3970  SelectionDAG &DAG) const {
3971  auto *Node = cast<AtomicSDNode>(Op.getNode());
3972  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3973  Node->getChain(), Node->getBasePtr(),
3974  Node->getMemoryVT(), Node->getMemOperand());
3975 }
3976 
3977 // Op is an atomic store. Lower it into a normal volatile store.
3978 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3979  SelectionDAG &DAG) const {
3980  auto *Node = cast<AtomicSDNode>(Op.getNode());
3981  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3982  Node->getBasePtr(), Node->getMemoryVT(),
3983  Node->getMemOperand());
3984  // We have to enforce sequential consistency by performing a
3985  // serialization operation after the store.
3986  if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
3987  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3988  MVT::Other, Chain), 0);
3989  return Chain;
3990 }
3991 
3992 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3993 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3994 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3995  SelectionDAG &DAG,
3996  unsigned Opcode) const {
3997  auto *Node = cast<AtomicSDNode>(Op.getNode());
3998 
3999  // 32-bit operations need no code outside the main loop.
4000  EVT NarrowVT = Node->getMemoryVT();
4001  EVT WideVT = MVT::i32;
4002  if (NarrowVT == WideVT)
4003  return Op;
4004 
4005  int64_t BitSize = NarrowVT.getSizeInBits();
4006  SDValue ChainIn = Node->getChain();
4007  SDValue Addr = Node->getBasePtr();
4008  SDValue Src2 = Node->getVal();
4009  MachineMemOperand *MMO = Node->getMemOperand();
4010  SDLoc DL(Node);
4011  EVT PtrVT = Addr.getValueType();
4012 
4013  // Convert atomic subtracts of constants into additions.
4014  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4015  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4017  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4018  }
4019 
4020  // Get the address of the containing word.
4021  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4022  DAG.getConstant(-4, DL, PtrVT));