LLVM  16.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 #include <optional>
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "systemz-lower"
33 
34 namespace {
35 // Represents information about a comparison.
36 struct Comparison {
37  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
38  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
39  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40 
41  // The operands to the comparison.
42  SDValue Op0, Op1;
43 
44  // Chain if this is a strict floating-point comparison.
45  SDValue Chain;
46 
47  // The opcode that should be used to compare Op0 and Op1.
48  unsigned Opcode;
49 
50  // A SystemZICMP value. Only used for integer comparisons.
51  unsigned ICmpType;
52 
53  // The mask of CC values that Opcode can produce.
54  unsigned CCValid;
55 
56  // The mask of CC values for which the original condition is true.
57  unsigned CCMask;
58 };
59 } // end anonymous namespace
60 
61 // Classify VT as either 32 or 64 bit.
62 static bool is32Bit(EVT VT) {
63  switch (VT.getSimpleVT().SimpleTy) {
64  case MVT::i32:
65  return true;
66  case MVT::i64:
67  return false;
68  default:
69  llvm_unreachable("Unsupported type");
70  }
71 }
72 
73 // Return a version of MachineOperand that can be safely used before the
74 // final use.
76  if (Op.isReg())
77  Op.setIsKill(false);
78  return Op;
79 }
80 
82  const SystemZSubtarget &STI)
83  : TargetLowering(TM), Subtarget(STI) {
84  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
85 
86  auto *Regs = STI.getSpecialRegisters();
87 
88  // Set up the register classes.
89  if (Subtarget.hasHighWord())
90  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
91  else
92  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
93  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
94  if (!useSoftFloat()) {
95  if (Subtarget.hasVector()) {
96  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
97  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
98  } else {
99  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
100  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
101  }
102  if (Subtarget.hasVectorEnhancements1())
103  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
104  else
105  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
106 
107  if (Subtarget.hasVector()) {
108  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
112  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
113  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
114  }
115  }
116 
117  // Compute derived properties from the register classes
119 
120  // Set up special registers.
121  setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
122 
123  // TODO: It may be better to default to latency-oriented scheduling, however
124  // LLVM's current latency-oriented scheduler can't handle physreg definitions
125  // such as SystemZ has with CC, so set this to the register-pressure
126  // scheduler, because it can.
128 
131 
132  // Instructions are strings of 2-byte aligned 2-byte values.
134  // For performance reasons we prefer 16-byte alignment.
136 
137  // Handle operations that are handled in a similar way for all types.
138  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
140  ++I) {
141  MVT VT = MVT::SimpleValueType(I);
142  if (isTypeLegal(VT)) {
143  // Lower SET_CC into an IPM-based sequence.
147 
148  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
150 
151  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
154  }
155  }
156 
157  // Expand jump table branches as address arithmetic followed by an
158  // indirect jump.
160 
161  // Expand BRCOND into a BR_CC (see above).
163 
164  // Handle integer types.
165  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
167  ++I) {
168  MVT VT = MVT::SimpleValueType(I);
169  if (isTypeLegal(VT)) {
171 
172  // Expand individual DIV and REMs into DIVREMs.
179 
180  // Support addition/subtraction with overflow.
183 
184  // Support addition/subtraction with carry.
187 
188  // Support carry in as value rather than glue.
191 
192  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
193  // stores, putting a serialization instruction after the stores.
196 
197  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
198  // available, or if the operand is constant.
200 
201  // Use POPCNT on z196 and above.
202  if (Subtarget.hasPopulationCount())
204  else
206 
207  // No special instructions for these.
210 
211  // Use *MUL_LOHI where possible instead of MULH*.
216 
217  // Only z196 and above have native support for conversions to unsigned.
218  // On z10, promoting to i64 doesn't generate an inexact condition for
219  // values that are outside the i32 range but in the i64 range, so use
220  // the default expansion.
221  if (!Subtarget.hasFPExtension())
223 
224  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
225  // default to Expand, so need to be modified to Legal where appropriate.
227  if (Subtarget.hasFPExtension())
229 
230  // And similarly for STRICT_[SU]INT_TO_FP.
232  if (Subtarget.hasFPExtension())
234  }
235  }
236 
237  // Type legalization will convert 8- and 16-bit atomic operations into
238  // forms that operate on i32s (but still keeping the original memory VT).
239  // Lower them into full i32 operations.
251 
252  // Even though i128 is not a legal type, we still need to custom lower
253  // the atomic operations in order to exploit SystemZ instructions.
256 
257  // We can use the CC result of compare-and-swap to implement
258  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
262 
264 
265  // Traps are legal, as we will convert them to "j .+2".
267 
268  // z10 has instructions for signed but not unsigned FP conversion.
269  // Handle unsigned 32-bit types as signed 64-bit types.
270  if (!Subtarget.hasFPExtension()) {
275  }
276 
277  // We have native support for a 64-bit CTLZ, via FLOGR.
281 
282  // On z15 we have native support for a 64-bit CTPOP.
283  if (Subtarget.hasMiscellaneousExtensions3()) {
286  }
287 
288  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
290 
291  // Expand 128 bit shifts without using a libcall.
295  setLibcallName(RTLIB::SRL_I128, nullptr);
296  setLibcallName(RTLIB::SHL_I128, nullptr);
297  setLibcallName(RTLIB::SRA_I128, nullptr);
298 
299  // Handle bitcast from fp128 to i128.
301 
302  // We have native instructions for i8, i16 and i32 extensions, but not i1.
304  for (MVT VT : MVT::integer_valuetypes()) {
308  }
309 
310  // Handle the various types of symbolic address.
316 
317  // We need to handle dynamic allocations specially because of the
318  // 160-byte area at the bottom of the stack.
321 
324 
325  // Handle prefetches with PFD or PFDRL.
327 
328  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
329  // Assume by default that all vector operations need to be expanded.
330  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
331  if (getOperationAction(Opcode, VT) == Legal)
332  setOperationAction(Opcode, VT, Expand);
333 
334  // Likewise all truncating stores and extending loads.
335  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
336  setTruncStoreAction(VT, InnerVT, Expand);
337  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
338  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
339  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
340  }
341 
342  if (isTypeLegal(VT)) {
343  // These operations are legal for anything that can be stored in a
344  // vector register, even if there is no native support for the format
345  // as such. In particular, we can do these for v4f32 even though there
346  // are no specific instructions for that format.
352 
353  // Likewise, except that we need to replace the nodes with something
354  // more specific.
357  }
358  }
359 
360  // Handle integer vector types.
362  if (isTypeLegal(VT)) {
363  // These operations have direct equivalents.
368  if (VT != MVT::v2i64)
374  if (Subtarget.hasVectorEnhancements1())
376  else
380 
381  // Convert a GPR scalar to a vector by inserting it into element 0.
383 
384  // Use a series of unpacks for extensions.
387 
388  // Detect shifts by a scalar amount and convert them into
389  // V*_BY_SCALAR.
393 
394  // At present ROTL isn't matched by DAGCombiner. ROTR should be
395  // converted into ROTL.
398 
399  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
400  // and inverting the result as necessary.
403  if (Subtarget.hasVectorEnhancements1())
405  }
406  }
407 
408  if (Subtarget.hasVector()) {
409  // There should be no need to check for float types other than v2f64
410  // since <2 x f32> isn't a legal type.
419 
428  }
429 
430  if (Subtarget.hasVectorEnhancements2()) {
439 
448  }
449 
450  // Handle floating-point types.
451  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
453  ++I) {
454  MVT VT = MVT::SimpleValueType(I);
455  if (isTypeLegal(VT)) {
456  // We can use FI for FRINT.
458 
459  // We can use the extended form of FI for other rounding operations.
460  if (Subtarget.hasFPExtension()) {
466  }
467 
468  // No special instructions for these.
474 
475  // Special treatment.
477 
478  // Handle constrained floating-point operations.
488  if (Subtarget.hasFPExtension()) {
494  }
495  }
496  }
497 
498  // Handle floating-point vector types.
499  if (Subtarget.hasVector()) {
500  // Scalar-to-vector conversion is just a subreg.
503 
504  // Some insertions and extractions can be done directly but others
505  // need to go via integers.
510 
511  // These operations have direct equivalents.
526 
527  // Handle constrained floating-point operations.
540  }
541 
542  // The vector enhancements facility 1 has instructions for these.
543  if (Subtarget.hasVectorEnhancements1()) {
558 
563 
568 
573 
578 
583 
584  // Handle constrained floating-point operations.
597  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
598  MVT::v4f32, MVT::v2f64 }) {
603  }
604  }
605 
606  // We only have fused f128 multiply-addition on vector registers.
607  if (!Subtarget.hasVectorEnhancements1()) {
610  }
611 
612  // We don't have a copysign instruction on vector registers.
613  if (Subtarget.hasVectorEnhancements1())
615 
616  // Needed so that we don't try to implement f128 constant loads using
617  // a load-and-extend of a f80 constant (in cases where the constant
618  // would fit in an f80).
619  for (MVT VT : MVT::fp_valuetypes())
621 
622  // We don't have extending load instruction on vector registers.
623  if (Subtarget.hasVectorEnhancements1()) {
626  }
627 
628  // Floating-point truncation and stores need to be done separately.
632 
633  // We have 64-bit FPR<->GPR moves, but need special handling for
634  // 32-bit forms.
635  if (!Subtarget.hasVector()) {
638  }
639 
640  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
641  // structure, but VAEND is a no-op.
645 
646  // Codes for which we want to perform some z-specific combinations.
650  ISD::LOAD,
651  ISD::STORE,
660  ISD::BSWAP,
661  ISD::SDIV,
662  ISD::UDIV,
663  ISD::SREM,
664  ISD::UREM,
667 
668  // Handle intrinsics.
671 
672  // We want to use MVC in preference to even a single load/store pair.
673  MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
675 
676  // The main memset sequence is a byte store followed by an MVC.
677  // Two STC or MV..I stores win over that, but the kind of fused stores
678  // generated by target-independent code don't when the byte value is
679  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
680  // than "STC;MVC". Handle the choice in target-specific code instead.
681  MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
683 
684  // Default to having -disable-strictnode-mutation on
685  IsStrictFPEnabled = true;
686 }
687 
689  return Subtarget.hasSoftFloat();
690 }
691 
693  LLVMContext &, EVT VT) const {
694  if (!VT.isVector())
695  return MVT::i32;
697 }
698 
700  const MachineFunction &MF, EVT VT) const {
701  VT = VT.getScalarType();
702 
703  if (!VT.isSimple())
704  return false;
705 
706  switch (VT.getSimpleVT().SimpleTy) {
707  case MVT::f32:
708  case MVT::f64:
709  return true;
710  case MVT::f128:
711  return Subtarget.hasVectorEnhancements1();
712  default:
713  break;
714  }
715 
716  return false;
717 }
718 
719 // Return true if the constant can be generated with a vector instruction,
720 // such as VGM, VGMB or VREPI.
722  const SystemZSubtarget &Subtarget) {
723  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
724  if (!Subtarget.hasVector() ||
725  (isFP128 && !Subtarget.hasVectorEnhancements1()))
726  return false;
727 
728  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
729  // preferred way of creating all-zero and all-one vectors so give it
730  // priority over other methods below.
731  unsigned Mask = 0;
732  unsigned I = 0;
733  for (; I < SystemZ::VectorBytes; ++I) {
734  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
735  if (Byte == 0xff)
736  Mask |= 1ULL << I;
737  else if (Byte != 0)
738  break;
739  }
740  if (I == SystemZ::VectorBytes) {
742  OpVals.push_back(Mask);
744  return true;
745  }
746 
747  if (SplatBitSize > 64)
748  return false;
749 
750  auto tryValue = [&](uint64_t Value) -> bool {
751  // Try VECTOR REPLICATE IMMEDIATE
752  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
753  if (isInt<16>(SignedValue)) {
754  OpVals.push_back(((unsigned) SignedValue));
756  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
757  SystemZ::VectorBits / SplatBitSize);
758  return true;
759  }
760  // Try VECTOR GENERATE MASK
761  unsigned Start, End;
762  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
763  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
764  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
765  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
766  OpVals.push_back(Start - (64 - SplatBitSize));
767  OpVals.push_back(End - (64 - SplatBitSize));
769  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
770  SystemZ::VectorBits / SplatBitSize);
771  return true;
772  }
773  return false;
774  };
775 
776  // First try assuming that any undefined bits above the highest set bit
777  // and below the lowest set bit are 1s. This increases the likelihood of
778  // being able to use a sign-extended element value in VECTOR REPLICATE
779  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
780  uint64_t SplatBitsZ = SplatBits.getZExtValue();
781  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
782  uint64_t Lower =
783  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
784  uint64_t Upper =
785  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
786  if (tryValue(SplatBitsZ | Upper | Lower))
787  return true;
788 
789  // Now try assuming that any undefined bits between the first and
790  // last defined set bits are set. This increases the chances of
791  // using a non-wraparound mask.
792  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
793  return tryValue(SplatBitsZ | Middle);
794 }
795 
797  if (IntImm.isSingleWord()) {
798  IntBits = APInt(128, IntImm.getZExtValue());
799  IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
800  } else
801  IntBits = IntImm;
802  assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
803 
804  // Find the smallest splat.
805  SplatBits = IntImm;
806  unsigned Width = SplatBits.getBitWidth();
807  while (Width > 8) {
808  unsigned HalfSize = Width / 2;
809  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
810  APInt LowValue = SplatBits.trunc(HalfSize);
811 
812  // If the two halves do not match, stop here.
813  if (HighValue != LowValue || 8 > HalfSize)
814  break;
815 
816  SplatBits = HighValue;
817  Width = HalfSize;
818  }
819  SplatUndef = 0;
820  SplatBitSize = Width;
821 }
822 
824  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
825  bool HasAnyUndefs;
826 
827  // Get IntBits by finding the 128 bit splat.
828  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
829  true);
830 
831  // Get SplatBits by finding the 8 bit or greater splat.
832  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
833  true);
834 }
835 
837  bool ForCodeSize) const {
838  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
839  if (Imm.isZero() || Imm.isNegZero())
840  return true;
841 
843 }
844 
845 /// Returns true if stack probing through inline assembly is requested.
847  // If the function specifically requests inline stack probes, emit them.
848  if (MF.getFunction().hasFnAttribute("probe-stack"))
849  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
850  "inline-asm";
851  return false;
852 }
853 
855  // We can use CGFI or CLGFI.
856  return isInt<32>(Imm) || isUInt<32>(Imm);
857 }
858 
860  // We can use ALGFI or SLGFI.
861  return isUInt<32>(Imm) || isUInt<32>(-Imm);
862 }
863 
865  EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
866  // Unaligned accesses should never be slower than the expanded version.
867  // We check specifically for aligned accesses in the few cases where
868  // they are required.
869  if (Fast)
870  *Fast = 1;
871  return true;
872 }
873 
874 // Information about the addressing mode for a memory access.
876  // True if a long displacement is supported.
878 
879  // True if use of index register is supported.
880  bool IndexReg;
881 
882  AddressingMode(bool LongDispl, bool IdxReg) :
883  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
884 };
885 
886 // Return the desired addressing mode for a Load which has only one use (in
887 // the same block) which is a Store.
888 static AddressingMode getLoadStoreAddrMode(bool HasVector,
889  Type *Ty) {
890  // With vector support a Load->Store combination may be combined to either
891  // an MVC or vector operations and it seems to work best to allow the
892  // vector addressing mode.
893  if (HasVector)
894  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
895 
896  // Otherwise only the MVC case is special.
897  bool MVC = Ty->isIntegerTy(8);
898  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
899 }
900 
901 // Return the addressing mode which seems most desirable given an LLVM
902 // Instruction pointer.
903 static AddressingMode
905  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
906  switch (II->getIntrinsicID()) {
907  default: break;
908  case Intrinsic::memset:
909  case Intrinsic::memmove:
910  case Intrinsic::memcpy:
911  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
912  }
913  }
914 
915  if (isa<LoadInst>(I) && I->hasOneUse()) {
916  auto *SingleUser = cast<Instruction>(*I->user_begin());
917  if (SingleUser->getParent() == I->getParent()) {
918  if (isa<ICmpInst>(SingleUser)) {
919  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
920  if (C->getBitWidth() <= 64 &&
921  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
922  // Comparison of memory with 16 bit signed / unsigned immediate
923  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
924  } else if (isa<StoreInst>(SingleUser))
925  // Load->Store
926  return getLoadStoreAddrMode(HasVector, I->getType());
927  }
928  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
929  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
930  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
931  // Load->Store
932  return getLoadStoreAddrMode(HasVector, LoadI->getType());
933  }
934 
935  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
936 
937  // * Use LDE instead of LE/LEY for z13 to avoid partial register
938  // dependencies (LDE only supports small offsets).
939  // * Utilize the vector registers to hold floating point
940  // values (vector load / store instructions only support small
941  // offsets).
942 
943  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
944  I->getOperand(0)->getType());
945  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
946  bool IsVectorAccess = MemAccessTy->isVectorTy();
947 
948  // A store of an extracted vector element will be combined into a VSTE type
949  // instruction.
950  if (!IsVectorAccess && isa<StoreInst>(I)) {
951  Value *DataOp = I->getOperand(0);
952  if (isa<ExtractElementInst>(DataOp))
953  IsVectorAccess = true;
954  }
955 
956  // A load which gets inserted into a vector element will be combined into a
957  // VLE type instruction.
958  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
959  User *LoadUser = *I->user_begin();
960  if (isa<InsertElementInst>(LoadUser))
961  IsVectorAccess = true;
962  }
963 
964  if (IsFPAccess || IsVectorAccess)
965  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
966  }
967 
968  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
969 }
970 
972  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
973  // Punt on globals for now, although they can be used in limited
974  // RELATIVE LONG cases.
975  if (AM.BaseGV)
976  return false;
977 
978  // Require a 20-bit signed offset.
979  if (!isInt<20>(AM.BaseOffs))
980  return false;
981 
982  bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
983  AddressingMode SupportedAM(!RequireD12, true);
984  if (I != nullptr)
985  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
986 
987  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
988  return false;
989 
990  if (!SupportedAM.IndexReg)
991  // No indexing allowed.
992  return AM.Scale == 0;
993  else
994  // Indexing is OK but no scale factor can be applied.
995  return AM.Scale == 0 || AM.Scale == 1;
996 }
997 
999  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1000  unsigned SrcAS, const AttributeList &FuncAttributes) const {
1001  const int MVCFastLen = 16;
1002 
1003  if (Limit != ~unsigned(0)) {
1004  // Don't expand Op into scalar loads/stores in these cases:
1005  if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1006  return false; // Small memcpy: Use MVC
1007  if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1008  return false; // Small memset (first byte with STC/MVI): Use MVC
1009  if (Op.isZeroMemset())
1010  return false; // Memset zero: Use XC
1011  }
1012 
1013  return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1014  SrcAS, FuncAttributes);
1015 }
1016 
1018  const AttributeList &FuncAttributes) const {
1019  return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1020 }
1021 
1023  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1024  return false;
1025  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
1026  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
1027  return FromBits > ToBits;
1028 }
1029 
1031  if (!FromVT.isInteger() || !ToVT.isInteger())
1032  return false;
1033  unsigned FromBits = FromVT.getFixedSizeInBits();
1034  unsigned ToBits = ToVT.getFixedSizeInBits();
1035  return FromBits > ToBits;
1036 }
1037 
1038 //===----------------------------------------------------------------------===//
1039 // Inline asm support
1040 //===----------------------------------------------------------------------===//
1041 
1044  if (Constraint.size() == 1) {
1045  switch (Constraint[0]) {
1046  case 'a': // Address register
1047  case 'd': // Data register (equivalent to 'r')
1048  case 'f': // Floating-point register
1049  case 'h': // High-part register
1050  case 'r': // General-purpose register
1051  case 'v': // Vector register
1052  return C_RegisterClass;
1053 
1054  case 'Q': // Memory with base and unsigned 12-bit displacement
1055  case 'R': // Likewise, plus an index
1056  case 'S': // Memory with base and signed 20-bit displacement
1057  case 'T': // Likewise, plus an index
1058  case 'm': // Equivalent to 'T'.
1059  return C_Memory;
1060 
1061  case 'I': // Unsigned 8-bit constant
1062  case 'J': // Unsigned 12-bit constant
1063  case 'K': // Signed 16-bit constant
1064  case 'L': // Signed 20-bit displacement (on all targets we support)
1065  case 'M': // 0x7fffffff
1066  return C_Immediate;
1067 
1068  default:
1069  break;
1070  }
1071  } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1072  switch (Constraint[1]) {
1073  case 'Q': // Address with base and unsigned 12-bit displacement
1074  case 'R': // Likewise, plus an index
1075  case 'S': // Address with base and signed 20-bit displacement
1076  case 'T': // Likewise, plus an index
1077  return C_Address;
1078 
1079  default:
1080  break;
1081  }
1082  }
1083  return TargetLowering::getConstraintType(Constraint);
1084 }
1085 
1088  const char *constraint) const {
1089  ConstraintWeight weight = CW_Invalid;
1090  Value *CallOperandVal = info.CallOperandVal;
1091  // If we don't have a value, we can't do a match,
1092  // but allow it at the lowest weight.
1093  if (!CallOperandVal)
1094  return CW_Default;
1095  Type *type = CallOperandVal->getType();
1096  // Look at the constraint type.
1097  switch (*constraint) {
1098  default:
1100  break;
1101 
1102  case 'a': // Address register
1103  case 'd': // Data register (equivalent to 'r')
1104  case 'h': // High-part register
1105  case 'r': // General-purpose register
1106  if (CallOperandVal->getType()->isIntegerTy())
1107  weight = CW_Register;
1108  break;
1109 
1110  case 'f': // Floating-point register
1111  if (type->isFloatingPointTy())
1112  weight = CW_Register;
1113  break;
1114 
1115  case 'v': // Vector register
1116  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1117  Subtarget.hasVector())
1118  weight = CW_Register;
1119  break;
1120 
1121  case 'I': // Unsigned 8-bit constant
1122  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1123  if (isUInt<8>(C->getZExtValue()))
1124  weight = CW_Constant;
1125  break;
1126 
1127  case 'J': // Unsigned 12-bit constant
1128  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1129  if (isUInt<12>(C->getZExtValue()))
1130  weight = CW_Constant;
1131  break;
1132 
1133  case 'K': // Signed 16-bit constant
1134  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1135  if (isInt<16>(C->getSExtValue()))
1136  weight = CW_Constant;
1137  break;
1138 
1139  case 'L': // Signed 20-bit displacement (on all targets we support)
1140  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1141  if (isInt<20>(C->getSExtValue()))
1142  weight = CW_Constant;
1143  break;
1144 
1145  case 'M': // 0x7fffffff
1146  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1147  if (C->getZExtValue() == 0x7fffffff)
1148  weight = CW_Constant;
1149  break;
1150  }
1151  return weight;
1152 }
1153 
1154 // Parse a "{tNNN}" register constraint for which the register type "t"
1155 // has already been verified. MC is the class associated with "t" and
1156 // Map maps 0-based register numbers to LLVM register numbers.
1157 static std::pair<unsigned, const TargetRegisterClass *>
1159  const unsigned *Map, unsigned Size) {
1160  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1161  if (isdigit(Constraint[2])) {
1162  unsigned Index;
1163  bool Failed =
1164  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1165  if (!Failed && Index < Size && Map[Index])
1166  return std::make_pair(Map[Index], RC);
1167  }
1168  return std::make_pair(0U, nullptr);
1169 }
1170 
1171 std::pair<unsigned, const TargetRegisterClass *>
1173  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1174  if (Constraint.size() == 1) {
1175  // GCC Constraint Letters
1176  switch (Constraint[0]) {
1177  default: break;
1178  case 'd': // Data register (equivalent to 'r')
1179  case 'r': // General-purpose register
1180  if (VT == MVT::i64)
1181  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1182  else if (VT == MVT::i128)
1183  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1184  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1185 
1186  case 'a': // Address register
1187  if (VT == MVT::i64)
1188  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1189  else if (VT == MVT::i128)
1190  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1191  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1192 
1193  case 'h': // High-part register (an LLVM extension)
1194  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1195 
1196  case 'f': // Floating-point register
1197  if (!useSoftFloat()) {
1198  if (VT == MVT::f64)
1199  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1200  else if (VT == MVT::f128)
1201  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1202  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1203  }
1204  break;
1205  case 'v': // Vector register
1206  if (Subtarget.hasVector()) {
1207  if (VT == MVT::f32)
1208  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1209  if (VT == MVT::f64)
1210  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1211  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1212  }
1213  break;
1214  }
1215  }
1216  if (Constraint.size() > 0 && Constraint[0] == '{') {
1217  // We need to override the default register parsing for GPRs and FPRs
1218  // because the interpretation depends on VT. The internal names of
1219  // the registers are also different from the external names
1220  // (F0D and F0S instead of F0, etc.).
1221  if (Constraint[1] == 'r') {
1222  if (VT == MVT::i32)
1223  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1224  SystemZMC::GR32Regs, 16);
1225  if (VT == MVT::i128)
1226  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1227  SystemZMC::GR128Regs, 16);
1228  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1229  SystemZMC::GR64Regs, 16);
1230  }
1231  if (Constraint[1] == 'f') {
1232  if (useSoftFloat())
1233  return std::make_pair(
1234  0u, static_cast<const TargetRegisterClass *>(nullptr));
1235  if (VT == MVT::f32)
1236  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1237  SystemZMC::FP32Regs, 16);
1238  if (VT == MVT::f128)
1239  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1240  SystemZMC::FP128Regs, 16);
1241  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1242  SystemZMC::FP64Regs, 16);
1243  }
1244  if (Constraint[1] == 'v') {
1245  if (!Subtarget.hasVector())
1246  return std::make_pair(
1247  0u, static_cast<const TargetRegisterClass *>(nullptr));
1248  if (VT == MVT::f32)
1249  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1250  SystemZMC::VR32Regs, 32);
1251  if (VT == MVT::f64)
1252  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1253  SystemZMC::VR64Regs, 32);
1254  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1255  SystemZMC::VR128Regs, 32);
1256  }
1257  }
1258  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1259 }
1260 
1261 // FIXME? Maybe this could be a TableGen attribute on some registers and
1262 // this table could be generated automatically from RegInfo.
1263 Register
1265  const MachineFunction &MF) const {
1266  const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
1267 
1268  Register Reg =
1270  .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
1271  .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
1272  .Default(0);
1273 
1274  if (Reg)
1275  return Reg;
1276  report_fatal_error("Invalid register name global variable");
1277 }
1278 
1280 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1281  std::vector<SDValue> &Ops,
1282  SelectionDAG &DAG) const {
1283  // Only support length 1 constraints for now.
1284  if (Constraint.length() == 1) {
1285  switch (Constraint[0]) {
1286  case 'I': // Unsigned 8-bit constant
1287  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1288  if (isUInt<8>(C->getZExtValue()))
1289  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1290  Op.getValueType()));
1291  return;
1292 
1293  case 'J': // Unsigned 12-bit constant
1294  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1295  if (isUInt<12>(C->getZExtValue()))
1296  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1297  Op.getValueType()));
1298  return;
1299 
1300  case 'K': // Signed 16-bit constant
1301  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1302  if (isInt<16>(C->getSExtValue()))
1303  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1304  Op.getValueType()));
1305  return;
1306 
1307  case 'L': // Signed 20-bit displacement (on all targets we support)
1308  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1309  if (isInt<20>(C->getSExtValue()))
1310  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1311  Op.getValueType()));
1312  return;
1313 
1314  case 'M': // 0x7fffffff
1315  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1316  if (C->getZExtValue() == 0x7fffffff)
1317  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1318  Op.getValueType()));
1319  return;
1320  }
1321  }
1322  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1323 }
1324 
1325 //===----------------------------------------------------------------------===//
1326 // Calling conventions
1327 //===----------------------------------------------------------------------===//
1328 
1329 #include "SystemZGenCallingConv.inc"
1330 
1332  CallingConv::ID) const {
1333  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1334  SystemZ::R14D, 0 };
1335  return ScratchRegs;
1336 }
1337 
1339  Type *ToType) const {
1340  return isTruncateFree(FromType, ToType);
1341 }
1342 
1344  return CI->isTailCall();
1345 }
1346 
1347 // We do not yet support 128-bit single-element vector types. If the user
1348 // attempts to use such types as function argument or return type, prefer
1349 // to error out instead of emitting code violating the ABI.
1350 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1351  if (ArgVT.isVector() && !VT.isVector())
1352  report_fatal_error("Unsupported vector argument or return type");
1353 }
1354 
1356  for (unsigned i = 0; i < Ins.size(); ++i)
1357  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1358 }
1359 
1361  for (unsigned i = 0; i < Outs.size(); ++i)
1362  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1363 }
1364 
1365 // Value is a value that has been passed to us in the location described by VA
1366 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1367 // any loads onto Chain.
1369  CCValAssign &VA, SDValue Chain,
1370  SDValue Value) {
1371  // If the argument has been promoted from a smaller type, insert an
1372  // assertion to capture this.
1373  if (VA.getLocInfo() == CCValAssign::SExt)
1374  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1375  DAG.getValueType(VA.getValVT()));
1376  else if (VA.getLocInfo() == CCValAssign::ZExt)
1377  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1378  DAG.getValueType(VA.getValVT()));
1379 
1380  if (VA.isExtInLoc())
1381  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1382  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1383  // If this is a short vector argument loaded from the stack,
1384  // extend from i64 to full vector size and then bitcast.
1385  assert(VA.getLocVT() == MVT::i64);
1386  assert(VA.getValVT().isVector());
1388  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1389  } else
1390  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1391  return Value;
1392 }
1393 
1394 // Value is a value of type VA.getValVT() that we need to copy into
1395 // the location described by VA. Return a copy of Value converted to
1396 // VA.getValVT(). The caller is responsible for handling indirect values.
1398  CCValAssign &VA, SDValue Value) {
1399  switch (VA.getLocInfo()) {
1400  case CCValAssign::SExt:
1401  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1402  case CCValAssign::ZExt:
1403  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1404  case CCValAssign::AExt:
1405  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1406  case CCValAssign::BCvt: {
1407  assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1408  assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1409  VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1410  // For an f32 vararg we need to first promote it to an f64 and then
1411  // bitcast it to an i64.
1412  if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1414  MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1415  ? MVT::v2i64
1416  : VA.getLocVT();
1417  Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1418  // For ELF, this is a short vector argument to be stored to the stack,
1419  // bitcast to v2i64 and then extract first element.
1420  if (BitCastToType == MVT::v2i64)
1421  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1422  DAG.getConstant(0, DL, MVT::i32));
1423  return Value;
1424  }
1425  case CCValAssign::Full:
1426  return Value;
1427  default:
1428  llvm_unreachable("Unhandled getLocInfo()");
1429  }
1430 }
1431 
1433  SDLoc DL(In);
1435  DAG.getIntPtrConstant(0, DL));
1437  DAG.getIntPtrConstant(1, DL));
1438  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1439  MVT::Untyped, Hi, Lo);
1440  return SDValue(Pair, 0);
1441 }
1442 
1444  SDLoc DL(In);
1445  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1446  DL, MVT::i64, In);
1447  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1448  DL, MVT::i64, In);
1449  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1450 }
1451 
1453  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1454  unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1455  EVT ValueVT = Val.getValueType();
1456  assert((ValueVT != MVT::i128 ||
1457  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1458  (NumParts == 2 && PartVT == MVT::i64))) &&
1459  "Unknown handling of i128 value.");
1460  if (ValueVT == MVT::i128 && NumParts == 1) {
1461  // Inline assembly operand.
1462  Parts[0] = lowerI128ToGR128(DAG, Val);
1463  return true;
1464  }
1465  return false;
1466 }
1467 
1469  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1470  MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1471  assert((ValueVT != MVT::i128 ||
1472  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1473  (NumParts == 2 && PartVT == MVT::i64))) &&
1474  "Unknown handling of i128 value.");
1475  if (ValueVT == MVT::i128 && NumParts == 1)
1476  // Inline assembly operand.
1477  return lowerGR128ToI128(DAG, Parts[0]);
1478  return SDValue();
1479 }
1480 
1482  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1483  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1484  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1485  MachineFunction &MF = DAG.getMachineFunction();
1486  MachineFrameInfo &MFI = MF.getFrameInfo();
1488  SystemZMachineFunctionInfo *FuncInfo =
1490  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1491  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1492 
1493  // Detect unsupported vector argument types.
1494  if (Subtarget.hasVector())
1496 
1497  // Assign locations to all of the incoming arguments.
1499  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1500  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1501 
1502  unsigned NumFixedGPRs = 0;
1503  unsigned NumFixedFPRs = 0;
1504  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1505  SDValue ArgValue;
1506  CCValAssign &VA = ArgLocs[I];
1507  EVT LocVT = VA.getLocVT();
1508  if (VA.isRegLoc()) {
1509  // Arguments passed in registers
1510  const TargetRegisterClass *RC;
1511  switch (LocVT.getSimpleVT().SimpleTy) {
1512  default:
1513  // Integers smaller than i64 should be promoted to i64.
1514  llvm_unreachable("Unexpected argument type");
1515  case MVT::i32:
1516  NumFixedGPRs += 1;
1517  RC = &SystemZ::GR32BitRegClass;
1518  break;
1519  case MVT::i64:
1520  NumFixedGPRs += 1;
1521  RC = &SystemZ::GR64BitRegClass;
1522  break;
1523  case MVT::f32:
1524  NumFixedFPRs += 1;
1525  RC = &SystemZ::FP32BitRegClass;
1526  break;
1527  case MVT::f64:
1528  NumFixedFPRs += 1;
1529  RC = &SystemZ::FP64BitRegClass;
1530  break;
1531  case MVT::f128:
1532  NumFixedFPRs += 2;
1533  RC = &SystemZ::FP128BitRegClass;
1534  break;
1535  case MVT::v16i8:
1536  case MVT::v8i16:
1537  case MVT::v4i32:
1538  case MVT::v2i64:
1539  case MVT::v4f32:
1540  case MVT::v2f64:
1541  RC = &SystemZ::VR128BitRegClass;
1542  break;
1543  }
1544 
1545  Register VReg = MRI.createVirtualRegister(RC);
1546  MRI.addLiveIn(VA.getLocReg(), VReg);
1547  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1548  } else {
1549  assert(VA.isMemLoc() && "Argument not register or memory");
1550 
1551  // Create the frame index object for this incoming parameter.
1552  // FIXME: Pre-include call frame size in the offset, should not
1553  // need to manually add it here.
1554  int64_t ArgSPOffset = VA.getLocMemOffset();
1555  if (Subtarget.isTargetXPLINK64()) {
1556  auto &XPRegs =
1558  ArgSPOffset += XPRegs.getCallFrameSize();
1559  }
1560  int FI =
1561  MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1562 
1563  // Create the SelectionDAG nodes corresponding to a load
1564  // from this parameter. Unpromoted ints and floats are
1565  // passed as right-justified 8-byte values.
1566  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1567  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1568  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1569  DAG.getIntPtrConstant(4, DL));
1570  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1572  }
1573 
1574  // Convert the value of the argument register into the value that's
1575  // being passed.
1576  if (VA.getLocInfo() == CCValAssign::Indirect) {
1577  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1578  MachinePointerInfo()));
1579  // If the original argument was split (e.g. i128), we need
1580  // to load all parts of it here (using the same address).
1581  unsigned ArgIndex = Ins[I].OrigArgIndex;
1582  assert (Ins[I].PartOffset == 0);
1583  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1584  CCValAssign &PartVA = ArgLocs[I + 1];
1585  unsigned PartOffset = Ins[I + 1].PartOffset;
1586  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1587  DAG.getIntPtrConstant(PartOffset, DL));
1588  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1589  MachinePointerInfo()));
1590  ++I;
1591  }
1592  } else
1593  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1594  }
1595 
1596  // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1597  if (IsVarArg && Subtarget.isTargetELF()) {
1598  // Save the number of non-varargs registers for later use by va_start, etc.
1599  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1600  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1601 
1602  // Likewise the address (in the form of a frame index) of where the
1603  // first stack vararg would be. The 1-byte size here is arbitrary.
1604  int64_t StackSize = CCInfo.getNextStackOffset();
1605  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1606 
1607  // ...and a similar frame index for the caller-allocated save area
1608  // that will be used to store the incoming registers.
1609  int64_t RegSaveOffset =
1610  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1611  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1612  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1613 
1614  // Store the FPR varargs in the reserved frame slots. (We store the
1615  // GPRs as part of the prologue.)
1616  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1618  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1619  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1620  int FI =
1621  MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1622  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1624  &SystemZ::FP64BitRegClass);
1625  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1626  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1628  }
1629  // Join the stores, which are independent of one another.
1630  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1631  makeArrayRef(&MemOps[NumFixedFPRs],
1632  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1633  }
1634  }
1635 
1636  // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1637  // register (R5)
1638  return Chain;
1639 }
1640 
1641 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1644  // Punt if there are any indirect or stack arguments, or if the call
1645  // needs the callee-saved argument register R6, or if the call uses
1646  // the callee-saved register arguments SwiftSelf and SwiftError.
1647  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1648  CCValAssign &VA = ArgLocs[I];
1649  if (VA.getLocInfo() == CCValAssign::Indirect)
1650  return false;
1651  if (!VA.isRegLoc())
1652  return false;
1653  Register Reg = VA.getLocReg();
1654  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1655  return false;
1656  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1657  return false;
1658  }
1659  return true;
1660 }
1661 
1662 SDValue
1664  SmallVectorImpl<SDValue> &InVals) const {
1665  SelectionDAG &DAG = CLI.DAG;
1666  SDLoc &DL = CLI.DL;
1668  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1670  SDValue Chain = CLI.Chain;
1671  SDValue Callee = CLI.Callee;
1672  bool &IsTailCall = CLI.IsTailCall;
1673  CallingConv::ID CallConv = CLI.CallConv;
1674  bool IsVarArg = CLI.IsVarArg;
1675  MachineFunction &MF = DAG.getMachineFunction();
1676  EVT PtrVT = getPointerTy(MF.getDataLayout());
1677  LLVMContext &Ctx = *DAG.getContext();
1679 
1680  // FIXME: z/OS support to be added in later.
1681  if (Subtarget.isTargetXPLINK64())
1682  IsTailCall = false;
1683 
1684  // Detect unsupported vector argument and return types.
1685  if (Subtarget.hasVector()) {
1686  VerifyVectorTypes(Outs);
1688  }
1689 
1690  // Analyze the operands of the call, assigning locations to each operand.
1692  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1693  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1694 
1695  // We don't support GuaranteedTailCallOpt, only automatically-detected
1696  // sibling calls.
1697  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1698  IsTailCall = false;
1699 
1700  // Get a count of how many bytes are to be pushed on the stack.
1701  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1702 
1703  if (Subtarget.isTargetXPLINK64())
1704  // Although the XPLINK specifications for AMODE64 state that minimum size
1705  // of the param area is minimum 32 bytes and no rounding is otherwise
1706  // specified, we round this area in 64 bytes increments to be compatible
1707  // with existing compilers.
1708  NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1709 
1710  // Mark the start of the call.
1711  if (!IsTailCall)
1712  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1713 
1714  // Copy argument values to their designated locations.
1716  SmallVector<SDValue, 8> MemOpChains;
1717  SDValue StackPtr;
1718  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1719  CCValAssign &VA = ArgLocs[I];
1720  SDValue ArgValue = OutVals[I];
1721 
1722  if (VA.getLocInfo() == CCValAssign::Indirect) {
1723  // Store the argument in a stack slot and pass its address.
1724  unsigned ArgIndex = Outs[I].OrigArgIndex;
1725  EVT SlotVT;
1726  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1727  // Allocate the full stack space for a promoted (and split) argument.
1728  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1729  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1730  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1731  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1732  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1733  } else {
1734  SlotVT = Outs[I].ArgVT;
1735  }
1736  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1737  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1738  MemOpChains.push_back(
1739  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1741  // If the original argument was split (e.g. i128), we need
1742  // to store all parts of it here (and pass just one address).
1743  assert (Outs[I].PartOffset == 0);
1744  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1745  SDValue PartValue = OutVals[I + 1];
1746  unsigned PartOffset = Outs[I + 1].PartOffset;
1747  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1748  DAG.getIntPtrConstant(PartOffset, DL));
1749  MemOpChains.push_back(
1750  DAG.getStore(Chain, DL, PartValue, Address,
1752  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1753  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1754  ++I;
1755  }
1756  ArgValue = SpillSlot;
1757  } else
1758  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1759 
1760  if (VA.isRegLoc()) {
1761  // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1762  // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1763  // and low values.
1764  if (VA.getLocVT() == MVT::i128)
1765  ArgValue = lowerI128ToGR128(DAG, ArgValue);
1766  // Queue up the argument copies and emit them at the end.
1767  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1768  } else {
1769  assert(VA.isMemLoc() && "Argument not register or memory");
1770 
1771  // Work out the address of the stack slot. Unpromoted ints and
1772  // floats are passed as right-justified 8-byte values.
1773  if (!StackPtr.getNode())
1774  StackPtr = DAG.getCopyFromReg(Chain, DL,
1775  Regs->getStackPointerRegister(), PtrVT);
1776  unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1777  VA.getLocMemOffset();
1778  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1779  Offset += 4;
1780  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1781  DAG.getIntPtrConstant(Offset, DL));
1782 
1783  // Emit the store.
1784  MemOpChains.push_back(
1785  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1786 
1787  // Although long doubles or vectors are passed through the stack when
1788  // they are vararg (non-fixed arguments), if a long double or vector
1789  // occupies the third and fourth slot of the argument list GPR3 should
1790  // still shadow the third slot of the argument list.
1791  if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1792  SDValue ShadowArgValue =
1793  DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1794  DAG.getIntPtrConstant(1, DL));
1795  RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1796  }
1797  }
1798  }
1799 
1800  // Join the stores, which are independent of one another.
1801  if (!MemOpChains.empty())
1802  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1803 
1804  // Accept direct calls by converting symbolic call addresses to the
1805  // associated Target* opcodes. Force %r1 to be used for indirect
1806  // tail calls.
1807  SDValue Glue;
1808  // FIXME: Add support for XPLINK using the ADA register.
1809  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1810  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1812  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1813  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1815  } else if (IsTailCall) {
1816  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1817  Glue = Chain.getValue(1);
1818  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1819  }
1820 
1821  // Build a sequence of copy-to-reg nodes, chained and glued together.
1822  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1823  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1824  RegsToPass[I].second, Glue);
1825  Glue = Chain.getValue(1);
1826  }
1827 
1828  // The first call operand is the chain and the second is the target address.
1830  Ops.push_back(Chain);
1831  Ops.push_back(Callee);
1832 
1833  // Add argument registers to the end of the list so that they are
1834  // known live into the call.
1835  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1836  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1837  RegsToPass[I].second.getValueType()));
1838 
1839  // Add a register mask operand representing the call-preserved registers.
1840  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1841  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1842  assert(Mask && "Missing call preserved mask for calling convention");
1843  Ops.push_back(DAG.getRegisterMask(Mask));
1844 
1845  // Glue the call to the argument copies, if any.
1846  if (Glue.getNode())
1847  Ops.push_back(Glue);
1848 
1849  // Emit the call.
1850  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1851  if (IsTailCall)
1852  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1853  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1854  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1855  Glue = Chain.getValue(1);
1856 
1857  // Mark the end of the call, which is glued to the call itself.
1858  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
1859  Glue = Chain.getValue(1);
1860 
1861  // Assign locations to each value returned by this call.
1863  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1864  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1865 
1866  // Copy all of the result registers out of their specified physreg.
1867  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1868  CCValAssign &VA = RetLocs[I];
1869 
1870  // Copy the value out, gluing the copy to the end of the call sequence.
1871  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1872  VA.getLocVT(), Glue);
1873  Chain = RetValue.getValue(1);
1874  Glue = RetValue.getValue(2);
1875 
1876  // Convert the value of the return register into the value that's
1877  // being returned.
1878  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1879  }
1880 
1881  return Chain;
1882 }
1883 
1884 // Generate a call taking the given operands as arguments and returning a
1885 // result of type RetVT.
1886 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1887  SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1888  ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1889  bool DoesNotReturn, bool IsReturnValueUsed) const {
1891  Args.reserve(Ops.size());
1892 
1894  for (SDValue Op : Ops) {
1895  Entry.Node = Op;
1896  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1897  Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1898  Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1899  Args.push_back(Entry);
1900  }
1901 
1902  SDValue Callee =
1903  DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1904 
1905  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1907  bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1908  CLI.setDebugLoc(DL)
1909  .setChain(Chain)
1910  .setCallee(CallConv, RetTy, Callee, std::move(Args))
1911  .setNoReturn(DoesNotReturn)
1912  .setDiscardResult(!IsReturnValueUsed)
1913  .setSExtResult(SignExtend)
1914  .setZExtResult(!SignExtend);
1915  return LowerCallTo(CLI);
1916 }
1917 
1920  MachineFunction &MF, bool isVarArg,
1921  const SmallVectorImpl<ISD::OutputArg> &Outs,
1922  LLVMContext &Context) const {
1923  // Detect unsupported vector return types.
1924  if (Subtarget.hasVector())
1925  VerifyVectorTypes(Outs);
1926 
1927  // Special case that we cannot easily detect in RetCC_SystemZ since
1928  // i128 is not a legal type.
1929  for (auto &Out : Outs)
1930  if (Out.ArgVT == MVT::i128)
1931  return false;
1932 
1934  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1935  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1936 }
1937 
1938 SDValue
1940  bool IsVarArg,
1941  const SmallVectorImpl<ISD::OutputArg> &Outs,
1942  const SmallVectorImpl<SDValue> &OutVals,
1943  const SDLoc &DL, SelectionDAG &DAG) const {
1944  MachineFunction &MF = DAG.getMachineFunction();
1945 
1946  // Detect unsupported vector return types.
1947  if (Subtarget.hasVector())
1948  VerifyVectorTypes(Outs);
1949 
1950  // Assign locations to each returned value.
1952  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1953  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1954 
1955  // Quick exit for void returns
1956  if (RetLocs.empty())
1957  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1958 
1959  if (CallConv == CallingConv::GHC)
1960  report_fatal_error("GHC functions return void only");
1961 
1962  // Copy the result values into the output registers.
1963  SDValue Glue;
1964  SmallVector<SDValue, 4> RetOps;
1965  RetOps.push_back(Chain);
1966  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1967  CCValAssign &VA = RetLocs[I];
1968  SDValue RetValue = OutVals[I];
1969 
1970  // Make the return register live on exit.
1971  assert(VA.isRegLoc() && "Can only return in registers!");
1972 
1973  // Promote the value as required.
1974  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1975 
1976  // Chain and glue the copies together.
1977  Register Reg = VA.getLocReg();
1978  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1979  Glue = Chain.getValue(1);
1980  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1981  }
1982 
1983  // Update chain and glue.
1984  RetOps[0] = Chain;
1985  if (Glue.getNode())
1986  RetOps.push_back(Glue);
1987 
1988  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1989 }
1990 
1991 // Return true if Op is an intrinsic node with chain that returns the CC value
1992 // as its only (other) argument. Provide the associated SystemZISD opcode and
1993 // the mask of valid CC values if so.
1994 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1995  unsigned &CCValid) {
1996  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1997  switch (Id) {
1998  case Intrinsic::s390_tbegin:
1999  Opcode = SystemZISD::TBEGIN;
2000  CCValid = SystemZ::CCMASK_TBEGIN;
2001  return true;
2002 
2003  case Intrinsic::s390_tbegin_nofloat:
2004  Opcode = SystemZISD::TBEGIN_NOFLOAT;
2005  CCValid = SystemZ::CCMASK_TBEGIN;
2006  return true;
2007 
2008  case Intrinsic::s390_tend:
2009  Opcode = SystemZISD::TEND;
2010  CCValid = SystemZ::CCMASK_TEND;
2011  return true;
2012 
2013  default:
2014  return false;
2015  }
2016 }
2017 
2018 // Return true if Op is an intrinsic node without chain that returns the
2019 // CC value as its final argument. Provide the associated SystemZISD
2020 // opcode and the mask of valid CC values if so.
2021 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2022  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2023  switch (Id) {
2024  case Intrinsic::s390_vpkshs:
2025  case Intrinsic::s390_vpksfs:
2026  case Intrinsic::s390_vpksgs:
2027  Opcode = SystemZISD::PACKS_CC;
2028  CCValid = SystemZ::CCMASK_VCMP;
2029  return true;
2030 
2031  case Intrinsic::s390_vpklshs:
2032  case Intrinsic::s390_vpklsfs:
2033  case Intrinsic::s390_vpklsgs:
2034  Opcode = SystemZISD::PACKLS_CC;
2035  CCValid = SystemZ::CCMASK_VCMP;
2036  return true;
2037 
2038  case Intrinsic::s390_vceqbs:
2039  case Intrinsic::s390_vceqhs:
2040  case Intrinsic::s390_vceqfs:
2041  case Intrinsic::s390_vceqgs:
2042  Opcode = SystemZISD::VICMPES;
2043  CCValid = SystemZ::CCMASK_VCMP;
2044  return true;
2045 
2046  case Intrinsic::s390_vchbs:
2047  case Intrinsic::s390_vchhs:
2048  case Intrinsic::s390_vchfs:
2049  case Intrinsic::s390_vchgs:
2050  Opcode = SystemZISD::VICMPHS;
2051  CCValid = SystemZ::CCMASK_VCMP;
2052  return true;
2053 
2054  case Intrinsic::s390_vchlbs:
2055  case Intrinsic::s390_vchlhs:
2056  case Intrinsic::s390_vchlfs:
2057  case Intrinsic::s390_vchlgs:
2058  Opcode = SystemZISD::VICMPHLS;
2059  CCValid = SystemZ::CCMASK_VCMP;
2060  return true;
2061 
2062  case Intrinsic::s390_vtm:
2063  Opcode = SystemZISD::VTM;
2064  CCValid = SystemZ::CCMASK_VCMP;
2065  return true;
2066 
2067  case Intrinsic::s390_vfaebs:
2068  case Intrinsic::s390_vfaehs:
2069  case Intrinsic::s390_vfaefs:
2070  Opcode = SystemZISD::VFAE_CC;
2071  CCValid = SystemZ::CCMASK_ANY;
2072  return true;
2073 
2074  case Intrinsic::s390_vfaezbs:
2075  case Intrinsic::s390_vfaezhs:
2076  case Intrinsic::s390_vfaezfs:
2077  Opcode = SystemZISD::VFAEZ_CC;
2078  CCValid = SystemZ::CCMASK_ANY;
2079  return true;
2080 
2081  case Intrinsic::s390_vfeebs:
2082  case Intrinsic::s390_vfeehs:
2083  case Intrinsic::s390_vfeefs:
2084  Opcode = SystemZISD::VFEE_CC;
2085  CCValid = SystemZ::CCMASK_ANY;
2086  return true;
2087 
2088  case Intrinsic::s390_vfeezbs:
2089  case Intrinsic::s390_vfeezhs:
2090  case Intrinsic::s390_vfeezfs:
2091  Opcode = SystemZISD::VFEEZ_CC;
2092  CCValid = SystemZ::CCMASK_ANY;
2093  return true;
2094 
2095  case Intrinsic::s390_vfenebs:
2096  case Intrinsic::s390_vfenehs:
2097  case Intrinsic::s390_vfenefs:
2098  Opcode = SystemZISD::VFENE_CC;
2099  CCValid = SystemZ::CCMASK_ANY;
2100  return true;
2101 
2102  case Intrinsic::s390_vfenezbs:
2103  case Intrinsic::s390_vfenezhs:
2104  case Intrinsic::s390_vfenezfs:
2105  Opcode = SystemZISD::VFENEZ_CC;
2106  CCValid = SystemZ::CCMASK_ANY;
2107  return true;
2108 
2109  case Intrinsic::s390_vistrbs:
2110  case Intrinsic::s390_vistrhs:
2111  case Intrinsic::s390_vistrfs:
2112  Opcode = SystemZISD::VISTR_CC;
2114  return true;
2115 
2116  case Intrinsic::s390_vstrcbs:
2117  case Intrinsic::s390_vstrchs:
2118  case Intrinsic::s390_vstrcfs:
2119  Opcode = SystemZISD::VSTRC_CC;
2120  CCValid = SystemZ::CCMASK_ANY;
2121  return true;
2122 
2123  case Intrinsic::s390_vstrczbs:
2124  case Intrinsic::s390_vstrczhs:
2125  case Intrinsic::s390_vstrczfs:
2126  Opcode = SystemZISD::VSTRCZ_CC;
2127  CCValid = SystemZ::CCMASK_ANY;
2128  return true;
2129 
2130  case Intrinsic::s390_vstrsb:
2131  case Intrinsic::s390_vstrsh:
2132  case Intrinsic::s390_vstrsf:
2133  Opcode = SystemZISD::VSTRS_CC;
2134  CCValid = SystemZ::CCMASK_ANY;
2135  return true;
2136 
2137  case Intrinsic::s390_vstrszb:
2138  case Intrinsic::s390_vstrszh:
2139  case Intrinsic::s390_vstrszf:
2140  Opcode = SystemZISD::VSTRSZ_CC;
2141  CCValid = SystemZ::CCMASK_ANY;
2142  return true;
2143 
2144  case Intrinsic::s390_vfcedbs:
2145  case Intrinsic::s390_vfcesbs:
2146  Opcode = SystemZISD::VFCMPES;
2147  CCValid = SystemZ::CCMASK_VCMP;
2148  return true;
2149 
2150  case Intrinsic::s390_vfchdbs:
2151  case Intrinsic::s390_vfchsbs:
2152  Opcode = SystemZISD::VFCMPHS;
2153  CCValid = SystemZ::CCMASK_VCMP;
2154  return true;
2155 
2156  case Intrinsic::s390_vfchedbs:
2157  case Intrinsic::s390_vfchesbs:
2158  Opcode = SystemZISD::VFCMPHES;
2159  CCValid = SystemZ::CCMASK_VCMP;
2160  return true;
2161 
2162  case Intrinsic::s390_vftcidb:
2163  case Intrinsic::s390_vftcisb:
2164  Opcode = SystemZISD::VFTCI;
2165  CCValid = SystemZ::CCMASK_VCMP;
2166  return true;
2167 
2168  case Intrinsic::s390_tdc:
2169  Opcode = SystemZISD::TDC;
2170  CCValid = SystemZ::CCMASK_TDC;
2171  return true;
2172 
2173  default:
2174  return false;
2175  }
2176 }
2177 
2178 // Emit an intrinsic with chain and an explicit CC register result.
2180  unsigned Opcode) {
2181  // Copy all operands except the intrinsic ID.
2182  unsigned NumOps = Op.getNumOperands();
2184  Ops.reserve(NumOps - 1);
2185  Ops.push_back(Op.getOperand(0));
2186  for (unsigned I = 2; I < NumOps; ++I)
2187  Ops.push_back(Op.getOperand(I));
2188 
2189  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2190  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2191  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2192  SDValue OldChain = SDValue(Op.getNode(), 1);
2193  SDValue NewChain = SDValue(Intr.getNode(), 1);
2194  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2195  return Intr.getNode();
2196 }
2197 
2198 // Emit an intrinsic with an explicit CC register result.
2200  unsigned Opcode) {
2201  // Copy all operands except the intrinsic ID.
2202  unsigned NumOps = Op.getNumOperands();
2204  Ops.reserve(NumOps - 1);
2205  for (unsigned I = 1; I < NumOps; ++I)
2206  Ops.push_back(Op.getOperand(I));
2207 
2208  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2209  return Intr.getNode();
2210 }
2211 
2212 // CC is a comparison that will be implemented using an integer or
2213 // floating-point comparison. Return the condition code mask for
2214 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2215 // unsigned comparisons and clear for signed ones. In the floating-point
2216 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2218 #define CONV(X) \
2219  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2220  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2221  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2222 
2223  switch (CC) {
2224  default:
2225  llvm_unreachable("Invalid integer condition!");
2226 
2227  CONV(EQ);
2228  CONV(NE);
2229  CONV(GT);
2230  CONV(GE);
2231  CONV(LT);
2232  CONV(LE);
2233 
2234  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2235  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2236  }
2237 #undef CONV
2238 }
2239 
2240 // If C can be converted to a comparison against zero, adjust the operands
2241 // as necessary.
2242 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2243  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2244  return;
2245 
2246  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2247  if (!ConstOp1)
2248  return;
2249 
2250  int64_t Value = ConstOp1->getSExtValue();
2251  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2252  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2253  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2254  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2255  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2256  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2257  }
2258 }
2259 
2260 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2261 // adjust the operands as necessary.
2262 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2263  Comparison &C) {
2264  // For us to make any changes, it must a comparison between a single-use
2265  // load and a constant.
2266  if (!C.Op0.hasOneUse() ||
2267  C.Op0.getOpcode() != ISD::LOAD ||
2268  C.Op1.getOpcode() != ISD::Constant)
2269  return;
2270 
2271  // We must have an 8- or 16-bit load.
2272  auto *Load = cast<LoadSDNode>(C.Op0);
2273  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2274  if ((NumBits != 8 && NumBits != 16) ||
2275  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2276  return;
2277 
2278  // The load must be an extending one and the constant must be within the
2279  // range of the unextended value.
2280  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2281  uint64_t Value = ConstOp1->getZExtValue();
2282  uint64_t Mask = (1 << NumBits) - 1;
2283  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2284  // Make sure that ConstOp1 is in range of C.Op0.
2285  int64_t SignedValue = ConstOp1->getSExtValue();
2286  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2287  return;
2288  if (C.ICmpType != SystemZICMP::SignedOnly) {
2289  // Unsigned comparison between two sign-extended values is equivalent
2290  // to unsigned comparison between two zero-extended values.
2291  Value &= Mask;
2292  } else if (NumBits == 8) {
2293  // Try to treat the comparison as unsigned, so that we can use CLI.
2294  // Adjust CCMask and Value as necessary.
2295  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2296  // Test whether the high bit of the byte is set.
2297  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2298  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2299  // Test whether the high bit of the byte is clear.
2300  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2301  else
2302  // No instruction exists for this combination.
2303  return;
2304  C.ICmpType = SystemZICMP::UnsignedOnly;
2305  }
2306  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2307  if (Value > Mask)
2308  return;
2309  // If the constant is in range, we can use any comparison.
2310  C.ICmpType = SystemZICMP::Any;
2311  } else
2312  return;
2313 
2314  // Make sure that the first operand is an i32 of the right extension type.
2315  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2316  ISD::SEXTLOAD :
2317  ISD::ZEXTLOAD);
2318  if (C.Op0.getValueType() != MVT::i32 ||
2319  Load->getExtensionType() != ExtType) {
2320  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2321  Load->getBasePtr(), Load->getPointerInfo(),
2322  Load->getMemoryVT(), Load->getAlign(),
2323  Load->getMemOperand()->getFlags());
2324  // Update the chain uses.
2325  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2326  }
2327 
2328  // Make sure that the second operand is an i32 with the right value.
2329  if (C.Op1.getValueType() != MVT::i32 ||
2330  Value != ConstOp1->getZExtValue())
2331  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2332 }
2333 
2334 // Return true if Op is either an unextended load, or a load suitable
2335 // for integer register-memory comparisons of type ICmpType.
2336 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2337  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2338  if (Load) {
2339  // There are no instructions to compare a register with a memory byte.
2340  if (Load->getMemoryVT() == MVT::i8)
2341  return false;
2342  // Otherwise decide on extension type.
2343  switch (Load->getExtensionType()) {
2344  case ISD::NON_EXTLOAD:
2345  return true;
2346  case ISD::SEXTLOAD:
2347  return ICmpType != SystemZICMP::UnsignedOnly;
2348  case ISD::ZEXTLOAD:
2349  return ICmpType != SystemZICMP::SignedOnly;
2350  default:
2351  break;
2352  }
2353  }
2354  return false;
2355 }
2356 
2357 // Return true if it is better to swap the operands of C.
2358 static bool shouldSwapCmpOperands(const Comparison &C) {
2359  // Leave f128 comparisons alone, since they have no memory forms.
2360  if (C.Op0.getValueType() == MVT::f128)
2361  return false;
2362 
2363  // Always keep a floating-point constant second, since comparisons with
2364  // zero can use LOAD TEST and comparisons with other constants make a
2365  // natural memory operand.
2366  if (isa<ConstantFPSDNode>(C.Op1))
2367  return false;
2368 
2369  // Never swap comparisons with zero since there are many ways to optimize
2370  // those later.
2371  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2372  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2373  return false;
2374 
2375  // Also keep natural memory operands second if the loaded value is
2376  // only used here. Several comparisons have memory forms.
2377  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2378  return false;
2379 
2380  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2381  // In that case we generally prefer the memory to be second.
2382  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2383  // The only exceptions are when the second operand is a constant and
2384  // we can use things like CHHSI.
2385  if (!ConstOp1)
2386  return true;
2387  // The unsigned memory-immediate instructions can handle 16-bit
2388  // unsigned integers.
2389  if (C.ICmpType != SystemZICMP::SignedOnly &&
2390  isUInt<16>(ConstOp1->getZExtValue()))
2391  return false;
2392  // The signed memory-immediate instructions can handle 16-bit
2393  // signed integers.
2394  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2395  isInt<16>(ConstOp1->getSExtValue()))
2396  return false;
2397  return true;
2398  }
2399 
2400  // Try to promote the use of CGFR and CLGFR.
2401  unsigned Opcode0 = C.Op0.getOpcode();
2402  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2403  return true;
2404  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2405  return true;
2406  if (C.ICmpType != SystemZICMP::SignedOnly &&
2407  Opcode0 == ISD::AND &&
2408  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2409  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2410  return true;
2411 
2412  return false;
2413 }
2414 
2415 // Check whether C tests for equality between X and Y and whether X - Y
2416 // or Y - X is also computed. In that case it's better to compare the
2417 // result of the subtraction against zero.
2418 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2419  Comparison &C) {
2420  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2421  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2422  for (SDNode *N : C.Op0->uses()) {
2423  if (N->getOpcode() == ISD::SUB &&
2424  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2425  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2426  C.Op0 = SDValue(N, 0);
2427  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2428  return;
2429  }
2430  }
2431  }
2432 }
2433 
2434 // Check whether C compares a floating-point value with zero and if that
2435 // floating-point value is also negated. In this case we can use the
2436 // negation to set CC, so avoiding separate LOAD AND TEST and
2437 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2438 static void adjustForFNeg(Comparison &C) {
2439  // This optimization is invalid for strict comparisons, since FNEG
2440  // does not raise any exceptions.
2441  if (C.Chain)
2442  return;
2443  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2444  if (C1 && C1->isZero()) {
2445  for (SDNode *N : C.Op0->uses()) {
2446  if (N->getOpcode() == ISD::FNEG) {
2447  C.Op0 = SDValue(N, 0);
2448  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2449  return;
2450  }
2451  }
2452  }
2453 }
2454 
2455 // Check whether C compares (shl X, 32) with 0 and whether X is
2456 // also sign-extended. In that case it is better to test the result
2457 // of the sign extension using LTGFR.
2458 //
2459 // This case is important because InstCombine transforms a comparison
2460 // with (sext (trunc X)) into a comparison with (shl X, 32).
2461 static void adjustForLTGFR(Comparison &C) {
2462  // Check for a comparison between (shl X, 32) and 0.
2463  if (C.Op0.getOpcode() == ISD::SHL &&
2464  C.Op0.getValueType() == MVT::i64 &&
2465  C.Op1.getOpcode() == ISD::Constant &&
2466  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2467  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2468  if (C1 && C1->getZExtValue() == 32) {
2469  SDValue ShlOp0 = C.Op0.getOperand(0);
2470  // See whether X has any SIGN_EXTEND_INREG uses.
2471  for (SDNode *N : ShlOp0->uses()) {
2472  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2473  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2474  C.Op0 = SDValue(N, 0);
2475  return;
2476  }
2477  }
2478  }
2479  }
2480 }
2481 
2482 // If C compares the truncation of an extending load, try to compare
2483 // the untruncated value instead. This exposes more opportunities to
2484 // reuse CC.
2485 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2486  Comparison &C) {
2487  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2488  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2489  C.Op1.getOpcode() == ISD::Constant &&
2490  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2491  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2492  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2493  C.Op0.getValueSizeInBits().getFixedSize()) {
2494  unsigned Type = L->getExtensionType();
2495  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2496  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2497  C.Op0 = C.Op0.getOperand(0);
2498  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2499  }
2500  }
2501  }
2502 }
2503 
2504 // Return true if shift operation N has an in-range constant shift value.
2505 // Store it in ShiftVal if so.
2506 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2507  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2508  if (!Shift)
2509  return false;
2510 
2511  uint64_t Amount = Shift->getZExtValue();
2512  if (Amount >= N.getValueSizeInBits())
2513  return false;
2514 
2515  ShiftVal = Amount;
2516  return true;
2517 }
2518 
2519 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2520 // instruction and whether the CC value is descriptive enough to handle
2521 // a comparison of type Opcode between the AND result and CmpVal.
2522 // CCMask says which comparison result is being tested and BitSize is
2523 // the number of bits in the operands. If TEST UNDER MASK can be used,
2524 // return the corresponding CC mask, otherwise return 0.
2525 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2526  uint64_t Mask, uint64_t CmpVal,
2527  unsigned ICmpType) {
2528  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2529 
2530  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2533  return 0;
2534 
2535  // Work out the masks for the lowest and highest bits.
2536  unsigned HighShift = 63 - countLeadingZeros(Mask);
2537  uint64_t High = uint64_t(1) << HighShift;
2539 
2540  // Signed ordered comparisons are effectively unsigned if the sign
2541  // bit is dropped.
2542  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2543 
2544  // Check for equality comparisons with 0, or the equivalent.
2545  if (CmpVal == 0) {
2546  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2547  return SystemZ::CCMASK_TM_ALL_0;
2548  if (CCMask == SystemZ::CCMASK_CMP_NE)
2550  }
2551  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2552  if (CCMask == SystemZ::CCMASK_CMP_LT)
2553  return SystemZ::CCMASK_TM_ALL_0;
2554  if (CCMask == SystemZ::CCMASK_CMP_GE)
2556  }
2557  if (EffectivelyUnsigned && CmpVal < Low) {
2558  if (CCMask == SystemZ::CCMASK_CMP_LE)
2559  return SystemZ::CCMASK_TM_ALL_0;
2560  if (CCMask == SystemZ::CCMASK_CMP_GT)
2562  }
2563 
2564  // Check for equality comparisons with the mask, or the equivalent.
2565  if (CmpVal == Mask) {
2566  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2567  return SystemZ::CCMASK_TM_ALL_1;
2568  if (CCMask == SystemZ::CCMASK_CMP_NE)
2570  }
2571  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2572  if (CCMask == SystemZ::CCMASK_CMP_GT)
2573  return SystemZ::CCMASK_TM_ALL_1;
2574  if (CCMask == SystemZ::CCMASK_CMP_LE)
2576  }
2577  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2578  if (CCMask == SystemZ::CCMASK_CMP_GE)
2579  return SystemZ::CCMASK_TM_ALL_1;
2580  if (CCMask == SystemZ::CCMASK_CMP_LT)
2582  }
2583 
2584  // Check for ordered comparisons with the top bit.
2585  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2586  if (CCMask == SystemZ::CCMASK_CMP_LE)
2587  return SystemZ::CCMASK_TM_MSB_0;
2588  if (CCMask == SystemZ::CCMASK_CMP_GT)
2589  return SystemZ::CCMASK_TM_MSB_1;
2590  }
2591  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2592  if (CCMask == SystemZ::CCMASK_CMP_LT)
2593  return SystemZ::CCMASK_TM_MSB_0;
2594  if (CCMask == SystemZ::CCMASK_CMP_GE)
2595  return SystemZ::CCMASK_TM_MSB_1;
2596  }
2597 
2598  // If there are just two bits, we can do equality checks for Low and High
2599  // as well.
2600  if (Mask == Low + High) {
2601  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2603  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2605  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2607  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2609  }
2610 
2611  // Looks like we've exhausted our options.
2612  return 0;
2613 }
2614 
2615 // See whether C can be implemented as a TEST UNDER MASK instruction.
2616 // Update the arguments with the TM version if so.
2617 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2618  Comparison &C) {
2619  // Check that we have a comparison with a constant.
2620  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2621  if (!ConstOp1)
2622  return;
2623  uint64_t CmpVal = ConstOp1->getZExtValue();
2624 
2625  // Check whether the nonconstant input is an AND with a constant mask.
2626  Comparison NewC(C);
2627  uint64_t MaskVal;
2628  ConstantSDNode *Mask = nullptr;
2629  if (C.Op0.getOpcode() == ISD::AND) {
2630  NewC.Op0 = C.Op0.getOperand(0);
2631  NewC.Op1 = C.Op0.getOperand(1);
2632  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2633  if (!Mask)
2634  return;
2635  MaskVal = Mask->getZExtValue();
2636  } else {
2637  // There is no instruction to compare with a 64-bit immediate
2638  // so use TMHH instead if possible. We need an unsigned ordered
2639  // comparison with an i64 immediate.
2640  if (NewC.Op0.getValueType() != MVT::i64 ||
2641  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2642  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2643  NewC.ICmpType == SystemZICMP::SignedOnly)
2644  return;
2645  // Convert LE and GT comparisons into LT and GE.
2646  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2647  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2648  if (CmpVal == uint64_t(-1))
2649  return;
2650  CmpVal += 1;
2651  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2652  }
2653  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2654  // be masked off without changing the result.
2655  MaskVal = -(CmpVal & -CmpVal);
2656  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2657  }
2658  if (!MaskVal)
2659  return;
2660 
2661  // Check whether the combination of mask, comparison value and comparison
2662  // type are suitable.
2663  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2664  unsigned NewCCMask, ShiftVal;
2665  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2666  NewC.Op0.getOpcode() == ISD::SHL &&
2667  isSimpleShift(NewC.Op0, ShiftVal) &&
2668  (MaskVal >> ShiftVal != 0) &&
2669  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2670  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2671  MaskVal >> ShiftVal,
2672  CmpVal >> ShiftVal,
2673  SystemZICMP::Any))) {
2674  NewC.Op0 = NewC.Op0.getOperand(0);
2675  MaskVal >>= ShiftVal;
2676  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2677  NewC.Op0.getOpcode() == ISD::SRL &&
2678  isSimpleShift(NewC.Op0, ShiftVal) &&
2679  (MaskVal << ShiftVal != 0) &&
2680  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2681  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2682  MaskVal << ShiftVal,
2683  CmpVal << ShiftVal,
2685  NewC.Op0 = NewC.Op0.getOperand(0);
2686  MaskVal <<= ShiftVal;
2687  } else {
2688  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2689  NewC.ICmpType);
2690  if (!NewCCMask)
2691  return;
2692  }
2693 
2694  // Go ahead and make the change.
2695  C.Opcode = SystemZISD::TM;
2696  C.Op0 = NewC.Op0;
2697  if (Mask && Mask->getZExtValue() == MaskVal)
2698  C.Op1 = SDValue(Mask, 0);
2699  else
2700  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2701  C.CCValid = SystemZ::CCMASK_TM;
2702  C.CCMask = NewCCMask;
2703 }
2704 
2705 // See whether the comparison argument contains a redundant AND
2706 // and remove it if so. This sometimes happens due to the generic
2707 // BRCOND expansion.
2708 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2709  Comparison &C) {
2710  if (C.Op0.getOpcode() != ISD::AND)
2711  return;
2712  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2713  if (!Mask)
2714  return;
2715  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2716  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2717  return;
2718 
2719  C.Op0 = C.Op0.getOperand(0);
2720 }
2721 
2722 // Return a Comparison that tests the condition-code result of intrinsic
2723 // node Call against constant integer CC using comparison code Cond.
2724 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2725 // and CCValid is the set of possible condition-code results.
2726 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2727  SDValue Call, unsigned CCValid, uint64_t CC,
2728  ISD::CondCode Cond) {
2729  Comparison C(Call, SDValue(), SDValue());
2730  C.Opcode = Opcode;
2731  C.CCValid = CCValid;
2732  if (Cond == ISD::SETEQ)
2733  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2734  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2735  else if (Cond == ISD::SETNE)
2736  // ...and the inverse of that.
2737  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2738  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2739  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2740  // always true for CC>3.
2741  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2742  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2743  // ...and the inverse of that.
2744  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2745  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2746  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2747  // always true for CC>3.
2748  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2749  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2750  // ...and the inverse of that.
2751  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2752  else
2753  llvm_unreachable("Unexpected integer comparison type");
2754  C.CCMask &= CCValid;
2755  return C;
2756 }
2757 
2758 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2759 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2760  ISD::CondCode Cond, const SDLoc &DL,
2761  SDValue Chain = SDValue(),
2762  bool IsSignaling = false) {
2763  if (CmpOp1.getOpcode() == ISD::Constant) {
2764  assert(!Chain);
2765  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2766  unsigned Opcode, CCValid;
2767  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2768  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2769  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2770  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2771  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2772  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2773  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2774  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2775  }
2776  Comparison C(CmpOp0, CmpOp1, Chain);
2777  C.CCMask = CCMaskForCondCode(Cond);
2778  if (C.Op0.getValueType().isFloatingPoint()) {
2779  C.CCValid = SystemZ::CCMASK_FCMP;
2780  if (!C.Chain)
2781  C.Opcode = SystemZISD::FCMP;
2782  else if (!IsSignaling)
2783  C.Opcode = SystemZISD::STRICT_FCMP;
2784  else
2785  C.Opcode = SystemZISD::STRICT_FCMPS;
2786  adjustForFNeg(C);
2787  } else {
2788  assert(!C.Chain);
2789  C.CCValid = SystemZ::CCMASK_ICMP;
2790  C.Opcode = SystemZISD::ICMP;
2791  // Choose the type of comparison. Equality and inequality tests can
2792  // use either signed or unsigned comparisons. The choice also doesn't
2793  // matter if both sign bits are known to be clear. In those cases we
2794  // want to give the main isel code the freedom to choose whichever
2795  // form fits best.
2796  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2797  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2798  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2799  C.ICmpType = SystemZICMP::Any;
2800  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2801  C.ICmpType = SystemZICMP::UnsignedOnly;
2802  else
2803  C.ICmpType = SystemZICMP::SignedOnly;
2804  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2805  adjustForRedundantAnd(DAG, DL, C);
2806  adjustZeroCmp(DAG, DL, C);
2807  adjustSubwordCmp(DAG, DL, C);
2808  adjustForSubtraction(DAG, DL, C);
2809  adjustForLTGFR(C);
2810  adjustICmpTruncate(DAG, DL, C);
2811  }
2812 
2813  if (shouldSwapCmpOperands(C)) {
2814  std::swap(C.Op0, C.Op1);
2815  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2816  }
2817 
2818  adjustForTestUnderMask(DAG, DL, C);
2819  return C;
2820 }
2821 
2822 // Emit the comparison instruction described by C.
2823 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2824  if (!C.Op1.getNode()) {
2825  SDNode *Node;
2826  switch (C.Op0.getOpcode()) {
2828  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2829  return SDValue(Node, 0);
2831  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2832  return SDValue(Node, Node->getNumValues() - 1);
2833  default:
2834  llvm_unreachable("Invalid comparison operands");
2835  }
2836  }
2837  if (C.Opcode == SystemZISD::ICMP)
2838  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2839  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2840  if (C.Opcode == SystemZISD::TM) {
2841  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2842  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2843  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2844  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2845  }
2846  if (C.Chain) {
2847  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2848  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2849  }
2850  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2851 }
2852 
2853 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2854 // 64 bits. Extend is the extension type to use. Store the high part
2855 // in Hi and the low part in Lo.
2856 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2857  SDValue Op0, SDValue Op1, SDValue &Hi,
2858  SDValue &Lo) {
2859  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2860  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2861  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2862  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2863  DAG.getConstant(32, DL, MVT::i64));
2864  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2865  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2866 }
2867 
2868 // Lower a binary operation that produces two VT results, one in each
2869 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2870 // and Opcode performs the GR128 operation. Store the even register result
2871 // in Even and the odd register result in Odd.
2872 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2873  unsigned Opcode, SDValue Op0, SDValue Op1,
2874  SDValue &Even, SDValue &Odd) {
2875  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2876  bool Is32Bit = is32Bit(VT);
2877  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2878  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2879 }
2880 
2881 // Return an i32 value that is 1 if the CC value produced by CCReg is
2882 // in the mask CCMask and 0 otherwise. CC is known to have a value
2883 // in CCValid, so other values can be ignored.
2884 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2885  unsigned CCValid, unsigned CCMask) {
2886  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2887  DAG.getConstant(0, DL, MVT::i32),
2888  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2889  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2890  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2891 }
2892 
2893 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2894 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2895 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2896 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2897 // floating-point comparisons.
2898 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2900  switch (CC) {
2901  case ISD::SETOEQ:
2902  case ISD::SETEQ:
2903  switch (Mode) {
2904  case CmpMode::Int: return SystemZISD::VICMPE;
2905  case CmpMode::FP: return SystemZISD::VFCMPE;
2908  }
2909  llvm_unreachable("Bad mode");
2910 
2911  case ISD::SETOGE:
2912  case ISD::SETGE:
2913  switch (Mode) {
2914  case CmpMode::Int: return 0;
2915  case CmpMode::FP: return SystemZISD::VFCMPHE;
2918  }
2919  llvm_unreachable("Bad mode");
2920 
2921  case ISD::SETOGT:
2922  case ISD::SETGT:
2923  switch (Mode) {
2924  case CmpMode::Int: return SystemZISD::VICMPH;
2925  case CmpMode::FP: return SystemZISD::VFCMPH;
2928  }
2929  llvm_unreachable("Bad mode");
2930 
2931  case ISD::SETUGT:
2932  switch (Mode) {
2933  case CmpMode::Int: return SystemZISD::VICMPHL;
2934  case CmpMode::FP: return 0;
2935  case CmpMode::StrictFP: return 0;
2936  case CmpMode::SignalingFP: return 0;
2937  }
2938  llvm_unreachable("Bad mode");
2939 
2940  default:
2941  return 0;
2942  }
2943 }
2944 
2945 // Return the SystemZISD vector comparison operation for CC or its inverse,
2946 // or 0 if neither can be done directly. Indicate in Invert whether the
2947 // result is for the inverse of CC. Mode is as above.
2949  bool &Invert) {
2950  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2951  Invert = false;
2952  return Opcode;
2953  }
2954 
2956  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2957  Invert = true;
2958  return Opcode;
2959  }
2960 
2961  return 0;
2962 }
2963 
2964 // Return a v2f64 that contains the extended form of elements Start and Start+1
2965 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2966 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2967  SDValue Op, SDValue Chain) {
2968  int Mask[] = { Start, -1, Start + 1, -1 };
2970  if (Chain) {
2972  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2973  }
2974  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2975 }
2976 
2977 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2978 // producing a result of type VT. If Chain is nonnull, return the strict form.
2979 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2980  const SDLoc &DL, EVT VT,
2981  SDValue CmpOp0,
2982  SDValue CmpOp1,
2983  SDValue Chain) const {
2984  // There is no hardware support for v4f32 (unless we have the vector
2985  // enhancements facility 1), so extend the vector into two v2f64s
2986  // and compare those.
2987  if (CmpOp0.getValueType() == MVT::v4f32 &&
2988  !Subtarget.hasVectorEnhancements1()) {
2989  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2990  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2991  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2992  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2993  if (Chain) {
2995  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2996  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2997  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2998  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2999  H1.getValue(1), L1.getValue(1),
3000  HRes.getValue(1), LRes.getValue(1) };
3001  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3002  SDValue Ops[2] = { Res, NewChain };
3003  return DAG.getMergeValues(Ops, DL);
3004  }
3005  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3006  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3007  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3008  }
3009  if (Chain) {
3010  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3011  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3012  }
3013  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3014 }
3015 
3016 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3017 // an integer mask of type VT. If Chain is nonnull, we have a strict
3018 // floating-point comparison. If in addition IsSignaling is true, we have
3019 // a strict signaling floating-point comparison.
3020 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3021  const SDLoc &DL, EVT VT,
3022  ISD::CondCode CC,
3023  SDValue CmpOp0,
3024  SDValue CmpOp1,
3025  SDValue Chain,
3026  bool IsSignaling) const {
3027  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3028  assert (!Chain || IsFP);
3029  assert (!IsSignaling || Chain);
3030  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3031  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3032  bool Invert = false;
3033  SDValue Cmp;
3034  switch (CC) {
3035  // Handle tests for order using (or (ogt y x) (oge x y)).
3036  case ISD::SETUO:
3037  Invert = true;
3038  [[fallthrough]];
3039  case ISD::SETO: {
3040  assert(IsFP && "Unexpected integer comparison");
3041  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3042  DL, VT, CmpOp1, CmpOp0, Chain);
3043  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3044  DL, VT, CmpOp0, CmpOp1, Chain);
3045  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3046  if (Chain)
3047  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3048  LT.getValue(1), GE.getValue(1));
3049  break;
3050  }
3051 
3052  // Handle <> tests using (or (ogt y x) (ogt x y)).
3053  case ISD::SETUEQ:
3054  Invert = true;
3055  [[fallthrough]];
3056  case ISD::SETONE: {
3057  assert(IsFP && "Unexpected integer comparison");
3058  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3059  DL, VT, CmpOp1, CmpOp0, Chain);
3060  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3061  DL, VT, CmpOp0, CmpOp1, Chain);
3062  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3063  if (Chain)
3064  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3065  LT.getValue(1), GT.getValue(1));
3066  break;
3067  }
3068 
3069  // Otherwise a single comparison is enough. It doesn't really
3070  // matter whether we try the inversion or the swap first, since
3071  // there are no cases where both work.
3072  default:
3073  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3074  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3075  else {
3077  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3078  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3079  else
3080  llvm_unreachable("Unhandled comparison");
3081  }
3082  if (Chain)
3083  Chain = Cmp.getValue(1);
3084  break;
3085  }
3086  if (Invert) {
3087  SDValue Mask =
3088  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3089  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3090  }
3091  if (Chain && Chain.getNode() != Cmp.getNode()) {
3092  SDValue Ops[2] = { Cmp, Chain };
3093  Cmp = DAG.getMergeValues(Ops, DL);
3094  }
3095  return Cmp;
3096 }
3097 
3098 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3099  SelectionDAG &DAG) const {
3100  SDValue CmpOp0 = Op.getOperand(0);
3101  SDValue CmpOp1 = Op.getOperand(1);
3102  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3103  SDLoc DL(Op);
3104  EVT VT = Op.getValueType();
3105  if (VT.isVector())
3106  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3107 
3108  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3109  SDValue CCReg = emitCmp(DAG, DL, C);
3110  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3111 }
3112 
3113 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3114  SelectionDAG &DAG,
3115  bool IsSignaling) const {
3116  SDValue Chain = Op.getOperand(0);
3117  SDValue CmpOp0 = Op.getOperand(1);
3118  SDValue CmpOp1 = Op.getOperand(2);
3119  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3120  SDLoc DL(Op);
3121  EVT VT = Op.getNode()->getValueType(0);
3122  if (VT.isVector()) {
3123  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3124  Chain, IsSignaling);
3125  return Res.getValue(Op.getResNo());
3126  }
3127 
3128  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3129  SDValue CCReg = emitCmp(DAG, DL, C);
3130  CCReg->setFlags(Op->getFlags());
3131  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3132  SDValue Ops[2] = { Result, CCReg.getValue(1) };
3133  return DAG.getMergeValues(Ops, DL);
3134 }
3135 
3136 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3137  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3138  SDValue CmpOp0 = Op.getOperand(2);
3139  SDValue CmpOp1 = Op.getOperand(3);
3140  SDValue Dest = Op.getOperand(4);
3141  SDLoc DL(Op);
3142 
3143  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3144  SDValue CCReg = emitCmp(DAG, DL, C);
3145  return DAG.getNode(
3146  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3147  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3148  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3149 }
3150 
3151 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3152 // allowing Pos and Neg to be wider than CmpOp.
3153 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3154  return (Neg.getOpcode() == ISD::SUB &&
3155  Neg.getOperand(0).getOpcode() == ISD::Constant &&
3156  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3157  Neg.getOperand(1) == Pos &&
3158  (Pos == CmpOp ||
3159  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3160  Pos.getOperand(0) == CmpOp)));
3161 }
3162 
3163 // Return the absolute or negative absolute of Op; IsNegative decides which.
3165  bool IsNegative) {
3166  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3167  if (IsNegative)
3168  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3169  DAG.getConstant(0, DL, Op.getValueType()), Op);
3170  return Op;
3171 }
3172 
3173 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3174  SelectionDAG &DAG) const {
3175  SDValue CmpOp0 = Op.getOperand(0);
3176  SDValue CmpOp1 = Op.getOperand(1);
3177  SDValue TrueOp = Op.getOperand(2);
3178  SDValue FalseOp = Op.getOperand(3);
3179  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3180  SDLoc DL(Op);
3181 
3182  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3183 
3184  // Check for absolute and negative-absolute selections, including those
3185  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3186  // This check supplements the one in DAGCombiner.
3187  if (C.Opcode == SystemZISD::ICMP &&
3188  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3189  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3190  C.Op1.getOpcode() == ISD::Constant &&
3191  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3192  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3193  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3194  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3195  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3196  }
3197 
3198  SDValue CCReg = emitCmp(DAG, DL, C);
3199  SDValue Ops[] = {TrueOp, FalseOp,
3200  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3201  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3202 
3203  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3204 }
3205 
3206 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3207  SelectionDAG &DAG) const {
3208  SDLoc DL(Node);
3209  const GlobalValue *GV = Node->getGlobal();
3210  int64_t Offset = Node->getOffset();
3211  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3213 
3214  SDValue Result;
3215  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3216  if (isInt<32>(Offset)) {
3217  // Assign anchors at 1<<12 byte boundaries.
3218  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3219  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3220  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3221 
3222  // The offset can be folded into the address if it is aligned to a
3223  // halfword.
3224  Offset -= Anchor;
3225  if (Offset != 0 && (Offset & 1) == 0) {
3226  SDValue Full =
3227  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3228  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3229  Offset = 0;
3230  }
3231  } else {
3232  // Conservatively load a constant offset greater than 32 bits into a
3233  // register below.
3234  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3235  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3236  }
3237  } else {
3238  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3239  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3240  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3242  }
3243 
3244  // If there was a non-zero offset that we didn't fold, create an explicit
3245  // addition for it.
3246  if (Offset != 0)
3247  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3248  DAG.getConstant(Offset, DL, PtrVT));
3249 
3250  return Result;
3251 }
3252 
3253 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3254  SelectionDAG &DAG,
3255  unsigned Opcode,
3256  SDValue GOTOffset) const {
3257  SDLoc DL(Node);
3258  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3259  SDValue Chain = DAG.getEntryNode();
3260  SDValue Glue;
3261 
3264  report_fatal_error("In GHC calling convention TLS is not supported");
3265 
3266  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3267  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3268  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3269  Glue = Chain.getValue(1);
3270  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3271  Glue = Chain.getValue(1);
3272 
3273  // The first call operand is the chain and the second is the TLS symbol.
3275  Ops.push_back(Chain);
3276  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3277  Node->getValueType(0),
3278  0, 0));
3279 
3280  // Add argument registers to the end of the list so that they are
3281  // known live into the call.
3282  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3283  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3284 
3285  // Add a register mask operand representing the call-preserved registers.
3286  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3287  const uint32_t *Mask =
3289  assert(Mask && "Missing call preserved mask for calling convention");
3290  Ops.push_back(DAG.getRegisterMask(Mask));
3291 
3292  // Glue the call to the argument copies.
3293  Ops.push_back(Glue);
3294 
3295  // Emit the call.
3296  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3297  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3298  Glue = Chain.getValue(1);
3299 
3300  // Copy the return value from %r2.
3301  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3302 }
3303 
3304 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3305  SelectionDAG &DAG) const {
3306  SDValue Chain = DAG.getEntryNode();
3307  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3308 
3309  // The high part of the thread pointer is in access register 0.
3310  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3311  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3312 
3313  // The low part of the thread pointer is in access register 1.
3314  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3315  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3316 
3317  // Merge them into a single 64-bit address.
3318  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3319  DAG.getConstant(32, DL, PtrVT));
3320  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3321 }
3322 
3323 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3324  SelectionDAG &DAG) const {
3325  if (DAG.getTarget().useEmulatedTLS())
3326  return LowerToTLSEmulatedModel(Node, DAG);
3327  SDLoc DL(Node);
3328  const GlobalValue *GV = Node->getGlobal();
3329  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3331 
3334  report_fatal_error("In GHC calling convention TLS is not supported");
3335 
3336  SDValue TP = lowerThreadPointer(DL, DAG);
3337 
3338  // Get the offset of GA from the thread pointer, based on the TLS model.
3339  SDValue Offset;
3340  switch (model) {
3341  case TLSModel::GeneralDynamic: {
3342  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3345 
3346  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3347  Offset = DAG.getLoad(
3348  PtrVT, DL, DAG.getEntryNode(), Offset,
3350 
3351  // Call __tls_get_offset to retrieve the offset.
3352  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3353  break;
3354  }
3355 
3356  case TLSModel::LocalDynamic: {
3357  // Load the GOT offset of the module ID.
3360 
3361  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3362  Offset = DAG.getLoad(
3363  PtrVT, DL, DAG.getEntryNode(), Offset,
3365 
3366  // Call __tls_get_offset to retrieve the module base offset.
3367  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3368 
3369  // Note: The SystemZLDCleanupPass will remove redundant computations
3370  // of the module base offset. Count total number of local-dynamic
3371  // accesses to trigger execution of that pass.
3375 
3376  // Add the per-symbol offset.
3378 
3379  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3380  DTPOffset = DAG.getLoad(
3381  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3383 
3384  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3385  break;
3386  }
3387 
3388  case TLSModel::InitialExec: {
3389  // Load the offset from the GOT.
3390  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3392  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3393  Offset =
3394  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3396  break;
3397  }
3398 
3399  case TLSModel::LocalExec: {
3400  // Force the offset into the constant pool and load it from there.
3403 
3404  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3405  Offset = DAG.getLoad(
3406  PtrVT, DL, DAG.getEntryNode(), Offset,
3408  break;
3409  }
3410  }
3411 
3412  // Add the base and offset together.
3413  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3414 }
3415 
3416 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3417  SelectionDAG &DAG) const {
3418  SDLoc DL(Node);
3419  const BlockAddress *BA = Node->getBlockAddress();
3420  int64_t Offset = Node->getOffset();
3421  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3422 
3423  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3424  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3425  return Result;
3426 }
3427 
3428 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3429  SelectionDAG &DAG) const {
3430  SDLoc DL(JT);
3431  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3432  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3433 
3434  // Use LARL to load the address of the table.
3435  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3436 }
3437 
3438 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3439  SelectionDAG &DAG) const {
3440  SDLoc DL(CP);
3441  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3442 
3443  SDValue Result;
3444  if (CP->isMachineConstantPoolEntry())
3445  Result =
3446  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3447  else
3448  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3449  CP->getOffset());
3450 
3451  // Use LARL to load the address of the constant pool entry.
3452  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3453 }
3454 
3455 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3456  SelectionDAG &DAG) const {
3457  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3458  MachineFunction &MF = DAG.getMachineFunction();
3459  MachineFrameInfo &MFI = MF.getFrameInfo();
3460  MFI.setFrameAddressIsTaken(true);
3461 
3462  SDLoc DL(Op);
3463  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3464  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3465 
3466  // By definition, the frame address is the address of the back chain. (In
3467  // the case of packed stack without backchain, return the address where the
3468  // backchain would have been stored. This will either be an unused space or
3469  // contain a saved register).
3470  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3471  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3472 
3473  // FIXME The frontend should detect this case.
3474  if (Depth > 0) {
3475  report_fatal_error("Unsupported stack frame traversal count");
3476  }
3477 
3478  return BackChain;
3479 }
3480 
3481 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3482  SelectionDAG &DAG) const {
3483  MachineFunction &MF = DAG.getMachineFunction();
3484  MachineFrameInfo &MFI = MF.getFrameInfo();
3485  MFI.setReturnAddressIsTaken(true);
3486 
3488  return SDValue();
3489 
3490  SDLoc DL(Op);
3491  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3492  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3493 
3494  // FIXME The frontend should detect this case.
3495  if (Depth > 0) {
3496  report_fatal_error("Unsupported stack frame traversal count");
3497  }
3498 
3499  // Return R14D, which has the return address. Mark it an implicit live-in.
3500  Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3501  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3502 }
3503 
3504 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3505  SelectionDAG &DAG) const {
3506  SDLoc DL(Op);
3507  SDValue In = Op.getOperand(0);
3508  EVT InVT = In.getValueType();
3509  EVT ResVT = Op.getValueType();
3510 
3511  // Convert loads directly. This is normally done by DAGCombiner,
3512  // but we need this case for bitcasts that are created during lowering
3513  // and which are then lowered themselves.
3514  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3515  if (ISD::isNormalLoad(LoadN)) {
3516  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3517  LoadN->getBasePtr(), LoadN->getMemOperand());
3518  // Update the chain uses.
3519  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3520  return NewLoad;
3521  }
3522 
3523  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3524  SDValue In64;
3525  if (Subtarget.hasHighWord()) {
3526  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3527  MVT::i64);
3528  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3529  MVT::i64, SDValue(U64, 0), In);
3530  } else {
3531  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3532  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3533  DAG.getConstant(32, DL, MVT::i64));
3534  }
3535  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3536  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3537  DL, MVT::f32, Out64);
3538  }
3539  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3540  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3541  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3542  MVT::f64, SDValue(U64, 0), In);
3543  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3544  if (Subtarget.hasHighWord())
3545  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3546  MVT::i32, Out64);
3547  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3548  DAG.getConstant(32, DL, MVT::i64));
3549  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3550  }
3551  llvm_unreachable("Unexpected bitcast combination");
3552 }
3553 
3554 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3555  SelectionDAG &DAG) const {
3556 
3557  if (Subtarget.isTargetXPLINK64())
3558  return lowerVASTART_XPLINK(Op, DAG);
3559  else
3560  return lowerVASTART_ELF(Op, DAG);
3561 }
3562 
3563 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3564  SelectionDAG &DAG) const {
3565  MachineFunction &MF = DAG.getMachineFunction();
3566  SystemZMachineFunctionInfo *FuncInfo =
3568 
3569  SDLoc DL(Op);
3570 
3571  // vastart just stores the address of the VarArgsFrameIndex slot into the
3572  // memory location argument.
3573  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3574  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3575  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3576  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3577  MachinePointerInfo(SV));
3578 }
3579 
3580 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3581  SelectionDAG &DAG) const {
3582  MachineFunction &MF = DAG.getMachineFunction();
3583  SystemZMachineFunctionInfo *FuncInfo =
3585  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3586 
3587  SDValue Chain = Op.getOperand(0);
3588  SDValue Addr = Op.getOperand(1);
3589  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3590  SDLoc DL(Op);
3591 
3592  // The initial values of each field.
3593  const unsigned NumFields = 4;
3594  SDValue Fields[NumFields] = {
3595  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3596  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3597  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3598  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3599  };
3600 
3601  // Store each field into its respective slot.
3602  SDValue MemOps[NumFields];
3603  unsigned Offset = 0;
3604  for (unsigned I = 0; I < NumFields; ++I) {
3605  SDValue FieldAddr = Addr;
3606  if (Offset != 0)
3607  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3608  DAG.getIntPtrConstant(Offset, DL));
3609  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3610  MachinePointerInfo(SV, Offset));
3611  Offset += 8;
3612  }
3613  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3614 }
3615 
3616 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3617  SelectionDAG &DAG) const {
3618  SDValue Chain = Op.getOperand(0);
3619  SDValue DstPtr = Op.getOperand(1);
3620  SDValue SrcPtr = Op.getOperand(2);
3621  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3622  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3623  SDLoc DL(Op);
3624 
3625  uint32_t Sz =
3626  Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3627  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3628  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3629  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3630  MachinePointerInfo(SrcSV));
3631 }
3632 
3633 SDValue
3634 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3635  SelectionDAG &DAG) const {
3636  if (Subtarget.isTargetXPLINK64())
3637  return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3638  else
3639  return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3640 }
3641 
3642 SDValue
3643 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3644  SelectionDAG &DAG) const {
3645  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3646  MachineFunction &MF = DAG.getMachineFunction();
3647  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3648  SDValue Chain = Op.getOperand(0);
3649  SDValue Size = Op.getOperand(1);
3650  SDValue Align = Op.getOperand(2);
3651  SDLoc DL(Op);
3652 
3653  // If user has set the no alignment function attribute, ignore
3654  // alloca alignments.
3655  uint64_t AlignVal =
3656  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3657 
3659  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3660  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3661 
3662  SDValue NeededSpace = Size;
3663 
3664  // Add extra space for alignment if needed.
3665  EVT PtrVT = getPointerTy(MF.getDataLayout());
3666  if (ExtraAlignSpace)
3667  NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3668  DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3669 
3670  bool IsSigned = false;
3671  bool DoesNotReturn = false;
3672  bool IsReturnValueUsed = false;
3673  EVT VT = Op.getValueType();
3674  SDValue AllocaCall =
3675  makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace),
3676  CallingConv::C, IsSigned, DL, DoesNotReturn,
3677  IsReturnValueUsed)
3678  .first;
3679 
3680  // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3681  // to end of call in order to ensure it isn't broken up from the call
3682  // sequence.
3683  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3684  Register SPReg = Regs.getStackPointerRegister();
3685  Chain = AllocaCall.getValue(1);
3686  SDValue Glue = AllocaCall.getValue(2);
3687  SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3688  Chain = NewSPRegNode.getValue(1);
3689 
3690  MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3691  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3692  SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3693 
3694  // Dynamically realign if needed.
3695  if (ExtraAlignSpace) {
3696  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3697  DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3698  Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3699  DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3700  }
3701 
3702  SDValue Ops[2] = {Result, Chain};
3703  return DAG.getMergeValues(Ops, DL);
3704 }
3705 
3706 SDValue
3707 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3708  SelectionDAG &DAG) const {
3709  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3710  MachineFunction &MF = DAG.getMachineFunction();
3711  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3712  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3713 
3714  SDValue Chain = Op.getOperand(0);
3715  SDValue Size = Op.getOperand(1);
3716  SDValue Align = Op.getOperand(2);
3717  SDLoc DL(Op);
3718 
3719  // If user has set the no alignment function attribute, ignore
3720  // alloca alignments.
3721  uint64_t AlignVal =
3722  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3723 
3725  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3726  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3727 
3729  SDValue NeededSpace = Size;
3730 
3731  // Get a reference to the stack pointer.
3732  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3733 
3734  // If we need a backchain, save it now.
3735  SDValue Backchain;
3736  if (StoreBackchain)
3737  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3738  MachinePointerInfo());
3739 
3740  // Add extra space for alignment if needed.
3741  if (ExtraAlignSpace)
3742  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3743  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3744 
3745  // Get the new stack pointer value.
3746  SDValue NewSP;
3747  if (hasInlineStackProbe(MF)) {
3748  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3749  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3750  Chain = NewSP.getValue(1);
3751  }
3752  else {
3753  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3754  // Copy the new stack pointer back.
3755  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3756  }
3757 
3758  // The allocated data lives above the 160 bytes allocated for the standard
3759  // frame, plus any outgoing stack arguments. We don't know how much that
3760  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3761  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3762  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3763 
3764  // Dynamically realign if needed.
3765  if (RequiredAlign > StackAlign) {
3766  Result =
3767  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3768  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3769  Result =
3770  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3771  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3772  }
3773 
3774  if (StoreBackchain)
3775  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3776  MachinePointerInfo());
3777 
3778  SDValue Ops[2] = { Result, Chain };
3779  return DAG.getMergeValues(Ops, DL);
3780 }
3781 
3782 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3783  SDValue Op, SelectionDAG &DAG) const {
3784  SDLoc DL(Op);
3785 
3787 }
3788 
3789 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3790  SelectionDAG &DAG) const {
3791  EVT VT = Op.getValueType();
3792  SDLoc DL(Op);
3793  SDValue Ops[2];
3794  if (is32Bit(VT))
3795  // Just do a normal 64-bit multiplication and extract the results.
3796  // We define this so that it can be used for constant division.
3797  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3798  Op.getOperand(1), Ops[1], Ops[0]);
3799  else if (Subtarget.hasMiscellaneousExtensions2())
3800  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3801  // the high result in the even register. ISD::SMUL_LOHI is defined to
3802  // return the low half first, so the results are in reverse order.
3804  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3805  else {
3806  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3807  //
3808  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3809  //
3810  // but using the fact that the upper halves are either all zeros
3811  // or all ones:
3812  //
3813  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3814  //
3815  // and grouping the right terms together since they are quicker than the
3816  // multiplication:
3817  //
3818  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3819  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3820  SDValue LL = Op.getOperand(0);
3821  SDValue RL = Op.getOperand(1);
3822  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3823  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3824  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3825  // the high result in the even register. ISD::SMUL_LOHI is defined to
3826  // return the low half first, so the results are in reverse order.
3828  LL, RL, Ops[1], Ops[0]);
3829  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3830  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3831  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3832  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3833  }
3834  return DAG.getMergeValues(Ops, DL);
3835 }
3836 
3837 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3838  SelectionDAG &DAG) const {
3839  EVT VT = Op.getValueType();
3840  SDLoc DL(Op);
3841  SDValue Ops[2];
3842  if (is32Bit(VT))
3843  // Just do a normal 64-bit multiplication and extract the results.
3844  // We define this so that it can be used for constant division.
3845  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3846  Op.getOperand(1), Ops[1], Ops[0]);
3847  else
3848  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3849  // the high result in the even register. ISD::UMUL_LOHI is defined to
3850  // return the low half first, so the results are in reverse order.
3852  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3853  return DAG.getMergeValues(Ops, DL);
3854 }
3855 
3856 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3857  SelectionDAG &DAG) const {
3858  SDValue Op0 = Op.getOperand(0);
3859  SDValue Op1 = Op.getOperand(1);
3860  EVT VT = Op.getValueType();
3861  SDLoc DL(Op);
3862 
3863  // We use DSGF for 32-bit division. This means the first operand must
3864  // always be 64-bit, and the second operand should be 32-bit whenever
3865  // that is possible, to improve performance.
3866  if (is32Bit(VT))
3867  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3868  else if (DAG.ComputeNumSignBits(Op1) > 32)
3869  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3870 
3871  // DSG(F) returns the remainder in the even register and the
3872  // quotient in the odd register.
3873  SDValue Ops[2];
3874  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3875  return DAG.getMergeValues(Ops, DL);
3876 }
3877 
3878 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3879  SelectionDAG &DAG) const {
3880  EVT VT = Op.getValueType();
3881  SDLoc DL(Op);
3882 
3883  // DL(G) returns the remainder in the even register and the
3884  // quotient in the odd register.
3885  SDValue Ops[2];
3887  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3888  return DAG.getMergeValues(Ops, DL);
3889 }
3890 
3891 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3892  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3893 
3894  // Get the known-zero masks for each operand.
3895  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3896  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3897  DAG.computeKnownBits(Ops[1])};
3898 
3899  // See if the upper 32 bits of one operand and the lower 32 bits of the
3900  // other are known zero. They are the low and high operands respectively.
3901  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3902  Known[1].Zero.getZExtValue() };
3903  unsigned High, Low;
3904  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3905  High = 1, Low = 0;
3906  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3907  High = 0, Low = 1;
3908  else
3909  return Op;
3910 
3911  SDValue LowOp = Ops[Low];
3912  SDValue HighOp = Ops[High];
3913 
3914  // If the high part is a constant, we're better off using IILH.
3915  if (HighOp.getOpcode() == ISD::Constant)
3916  return Op;
3917 
3918  // If the low part is a constant that is outside the range of LHI,
3919  // then we're better off using IILF.
3920  if (LowOp.getOpcode() == ISD::Constant) {
3921  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3922  if (!isInt<16>(Value))
3923  return Op;
3924  }
3925 
3926  // Check whether the high part is an AND that doesn't change the
3927  // high 32 bits and just masks out low bits. We can skip it if so.
3928  if (HighOp.getOpcode() == ISD::AND &&
3929  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3930  SDValue HighOp0 = HighOp.getOperand(0);
3931  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3932  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3933  HighOp = HighOp0;
3934  }
3935 
3936  // Take advantage of the fact that all GR32 operations only change the
3937  // low 32 bits by truncating Low to an i32 and inserting it directly
3938  // using a subreg. The interesting cases are those where the truncation
3939  // can be folded.
3940  SDLoc DL(Op);
3941  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3942  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3943  MVT::i64, HighOp, Low32);
3944 }
3945 
3946 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3947 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3948  SelectionDAG &DAG) const {
3949  SDNode *N = Op.getNode();
3950  SDValue LHS = N->getOperand(0);
3951  SDValue RHS = N->getOperand(1);
3952  SDLoc DL(N);
3953  unsigned BaseOp = 0;
3954  unsigned CCValid = 0;
3955  unsigned CCMask = 0;
3956 
3957  switch (Op.getOpcode()) {
3958  default: llvm_unreachable("Unknown instruction!");
3959  case ISD::SADDO:
3960  BaseOp = SystemZISD::SADDO;
3961  CCValid = SystemZ::CCMASK_ARITH;
3963  break;
3964  case ISD::SSUBO:
3965  BaseOp = SystemZISD::SSUBO;
3966  CCValid = SystemZ::CCMASK_ARITH;
3968  break;
3969  case ISD::UADDO:
3970  BaseOp = SystemZISD::UADDO;
3971  CCValid = SystemZ::CCMASK_LOGICAL;
3973  break;
3974  case ISD::USUBO:
3975  BaseOp = SystemZISD::USUBO;
3976  CCValid = SystemZ::CCMASK_LOGICAL;
3978  break;
3979  }
3980 
3981  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3982  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3983 
3984  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3985  if (N->getValueType(1) == MVT::i1)
3986  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3987 
3988  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3989 }
3990 
3991 static bool isAddCarryChain(SDValue Carry) {
3992  while (Carry.getOpcode() == ISD::ADDCARRY)
3993  Carry = Carry.getOperand(2);
3994  return Carry.getOpcode() == ISD::UADDO;
3995 }
3996 
3997 static bool isSubBorrowChain(SDValue Carry) {
3998  while (Carry.getOpcode() == ISD::SUBCARRY)
3999  Carry = Carry.getOperand(2);
4000  return Carry.getOpcode() == ISD::USUBO;
4001 }
4002 
4003 // Lower ADDCARRY/SUBCARRY nodes.
4004 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
4005  SelectionDAG &DAG) const {
4006 
4007  SDNode *N = Op.getNode();
4008  MVT VT = N->getSimpleValueType(0);
4009 
4010  // Let legalize expand this if it isn't a legal type yet.
4011  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4012  return SDValue();
4013 
4014  SDValue LHS = N->getOperand(0);
4015  SDValue RHS = N->getOperand(1);
4016  SDValue Carry = Op.getOperand(2);
4017  SDLoc DL(N);
4018  unsigned BaseOp = 0;
4019  unsigned CCValid = 0;
4020  unsigned CCMask = 0;
4021 
4022  switch (Op.getOpcode()) {
4023  default: llvm_unreachable("Unknown instruction!");
4024  case ISD::ADDCARRY:
4025  if (!isAddCarryChain(Carry))
4026  return SDValue();
4027 
4028  BaseOp = SystemZISD::ADDCARRY;
4029  CCValid = SystemZ::CCMASK_LOGICAL;
4031  break;
4032  case ISD::SUBCARRY:
4033  if (!isSubBorrowChain(Carry))
4034  return SDValue();
4035 
4036  BaseOp = SystemZISD::SUBCARRY;
4037  CCValid = SystemZ::CCMASK_LOGICAL;
4039  break;
4040  }
4041 
4042  // Set the condition code from the carry flag.
4043  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4044  DAG.getConstant(CCValid, DL, MVT::i32),
4045  DAG.getConstant(CCMask, DL, MVT::i32));
4046 
4047  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4048  SDValue Result = DAG.