LLVM  15.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39 
40  // The operands to the comparison.
41  SDValue Op0, Op1;
42 
43  // Chain if this is a strict floating-point comparison.
44  SDValue Chain;
45 
46  // The opcode that should be used to compare Op0 and Op1.
47  unsigned Opcode;
48 
49  // A SystemZICMP value. Only used for integer comparisons.
50  unsigned ICmpType;
51 
52  // The mask of CC values that Opcode can produce.
53  unsigned CCValid;
54 
55  // The mask of CC values for which the original condition is true.
56  unsigned CCMask;
57 };
58 } // end anonymous namespace
59 
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT) {
62  switch (VT.getSimpleVT().SimpleTy) {
63  case MVT::i32:
64  return true;
65  case MVT::i64:
66  return false;
67  default:
68  llvm_unreachable("Unsupported type");
69  }
70 }
71 
72 // Return a version of MachineOperand that can be safely used before the
73 // final use.
75  if (Op.isReg())
76  Op.setIsKill(false);
77  return Op;
78 }
79 
81  const SystemZSubtarget &STI)
82  : TargetLowering(TM), Subtarget(STI) {
83  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
84 
85  auto *Regs = STI.getSpecialRegisters();
86 
87  // Set up the register classes.
88  if (Subtarget.hasHighWord())
89  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90  else
91  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93  if (!useSoftFloat()) {
94  if (Subtarget.hasVector()) {
95  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97  } else {
98  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100  }
101  if (Subtarget.hasVectorEnhancements1())
102  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103  else
104  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105 
106  if (Subtarget.hasVector()) {
107  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113  }
114  }
115 
116  // Compute derived properties from the register classes
118 
119  // Set up special registers.
120  setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121 
122  // TODO: It may be better to default to latency-oriented scheduling, however
123  // LLVM's current latency-oriented scheduler can't handle physreg definitions
124  // such as SystemZ has with CC, so set this to the register-pressure
125  // scheduler, because it can.
127 
130 
131  // Instructions are strings of 2-byte aligned 2-byte values.
133  // For performance reasons we prefer 16-byte alignment.
135 
136  // Handle operations that are handled in a similar way for all types.
137  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
139  ++I) {
140  MVT VT = MVT::SimpleValueType(I);
141  if (isTypeLegal(VT)) {
142  // Lower SET_CC into an IPM-based sequence.
146 
147  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
149 
150  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
153  }
154  }
155 
156  // Expand jump table branches as address arithmetic followed by an
157  // indirect jump.
159 
160  // Expand BRCOND into a BR_CC (see above).
162 
163  // Handle integer types.
164  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
166  ++I) {
167  MVT VT = MVT::SimpleValueType(I);
168  if (isTypeLegal(VT)) {
170 
171  // Expand individual DIV and REMs into DIVREMs.
178 
179  // Support addition/subtraction with overflow.
182 
183  // Support addition/subtraction with carry.
186 
187  // Support carry in as value rather than glue.
190 
191  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192  // stores, putting a serialization instruction after the stores.
195 
196  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197  // available, or if the operand is constant.
199 
200  // Use POPCNT on z196 and above.
201  if (Subtarget.hasPopulationCount())
203  else
205 
206  // No special instructions for these.
209 
210  // Use *MUL_LOHI where possible instead of MULH*.
215 
216  // Only z196 and above have native support for conversions to unsigned.
217  // On z10, promoting to i64 doesn't generate an inexact condition for
218  // values that are outside the i32 range but in the i64 range, so use
219  // the default expansion.
220  if (!Subtarget.hasFPExtension())
222 
223  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224  // default to Expand, so need to be modified to Legal where appropriate.
226  if (Subtarget.hasFPExtension())
228 
229  // And similarly for STRICT_[SU]INT_TO_FP.
231  if (Subtarget.hasFPExtension())
233  }
234  }
235 
236  // Type legalization will convert 8- and 16-bit atomic operations into
237  // forms that operate on i32s (but still keeping the original memory VT).
238  // Lower them into full i32 operations.
250 
251  // Even though i128 is not a legal type, we still need to custom lower
252  // the atomic operations in order to exploit SystemZ instructions.
255 
256  // We can use the CC result of compare-and-swap to implement
257  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
261 
263 
264  // Traps are legal, as we will convert them to "j .+2".
266 
267  // z10 has instructions for signed but not unsigned FP conversion.
268  // Handle unsigned 32-bit types as signed 64-bit types.
269  if (!Subtarget.hasFPExtension()) {
274  }
275 
276  // We have native support for a 64-bit CTLZ, via FLOGR.
280 
281  // On z15 we have native support for a 64-bit CTPOP.
282  if (Subtarget.hasMiscellaneousExtensions3()) {
285  }
286 
287  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
289 
290  // Expand 128 bit shifts without using a libcall.
294  setLibcallName(RTLIB::SRL_I128, nullptr);
295  setLibcallName(RTLIB::SHL_I128, nullptr);
296  setLibcallName(RTLIB::SRA_I128, nullptr);
297 
298  // Handle bitcast from fp128 to i128.
300 
301  // We have native instructions for i8, i16 and i32 extensions, but not i1.
303  for (MVT VT : MVT::integer_valuetypes()) {
307  }
308 
309  // Handle the various types of symbolic address.
315 
316  // We need to handle dynamic allocations specially because of the
317  // 160-byte area at the bottom of the stack.
320 
323 
324  // Handle prefetches with PFD or PFDRL.
326 
327  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
328  // Assume by default that all vector operations need to be expanded.
329  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
330  if (getOperationAction(Opcode, VT) == Legal)
331  setOperationAction(Opcode, VT, Expand);
332 
333  // Likewise all truncating stores and extending loads.
334  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
335  setTruncStoreAction(VT, InnerVT, Expand);
336  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
337  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
338  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
339  }
340 
341  if (isTypeLegal(VT)) {
342  // These operations are legal for anything that can be stored in a
343  // vector register, even if there is no native support for the format
344  // as such. In particular, we can do these for v4f32 even though there
345  // are no specific instructions for that format.
351 
352  // Likewise, except that we need to replace the nodes with something
353  // more specific.
356  }
357  }
358 
359  // Handle integer vector types.
361  if (isTypeLegal(VT)) {
362  // These operations have direct equivalents.
367  if (VT != MVT::v2i64)
373  if (Subtarget.hasVectorEnhancements1())
375  else
379 
380  // Convert a GPR scalar to a vector by inserting it into element 0.
382 
383  // Use a series of unpacks for extensions.
386 
387  // Detect shifts by a scalar amount and convert them into
388  // V*_BY_SCALAR.
392 
393  // At present ROTL isn't matched by DAGCombiner. ROTR should be
394  // converted into ROTL.
397 
398  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
399  // and inverting the result as necessary.
402  if (Subtarget.hasVectorEnhancements1())
404  }
405  }
406 
407  if (Subtarget.hasVector()) {
408  // There should be no need to check for float types other than v2f64
409  // since <2 x f32> isn't a legal type.
418 
427  }
428 
429  if (Subtarget.hasVectorEnhancements2()) {
438 
447  }
448 
449  // Handle floating-point types.
450  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
452  ++I) {
453  MVT VT = MVT::SimpleValueType(I);
454  if (isTypeLegal(VT)) {
455  // We can use FI for FRINT.
457 
458  // We can use the extended form of FI for other rounding operations.
459  if (Subtarget.hasFPExtension()) {
465  }
466 
467  // No special instructions for these.
473 
474  // Special treatment.
476 
477  // Handle constrained floating-point operations.
487  if (Subtarget.hasFPExtension()) {
493  }
494  }
495  }
496 
497  // Handle floating-point vector types.
498  if (Subtarget.hasVector()) {
499  // Scalar-to-vector conversion is just a subreg.
502 
503  // Some insertions and extractions can be done directly but others
504  // need to go via integers.
509 
510  // These operations have direct equivalents.
525 
526  // Handle constrained floating-point operations.
539  }
540 
541  // The vector enhancements facility 1 has instructions for these.
542  if (Subtarget.hasVectorEnhancements1()) {
557 
562 
567 
572 
577 
582 
583  // Handle constrained floating-point operations.
596  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
597  MVT::v4f32, MVT::v2f64 }) {
602  }
603  }
604 
605  // We only have fused f128 multiply-addition on vector registers.
606  if (!Subtarget.hasVectorEnhancements1()) {
609  }
610 
611  // We don't have a copysign instruction on vector registers.
612  if (Subtarget.hasVectorEnhancements1())
614 
615  // Needed so that we don't try to implement f128 constant loads using
616  // a load-and-extend of a f80 constant (in cases where the constant
617  // would fit in an f80).
618  for (MVT VT : MVT::fp_valuetypes())
620 
621  // We don't have extending load instruction on vector registers.
622  if (Subtarget.hasVectorEnhancements1()) {
625  }
626 
627  // Floating-point truncation and stores need to be done separately.
631 
632  // We have 64-bit FPR<->GPR moves, but need special handling for
633  // 32-bit forms.
634  if (!Subtarget.hasVector()) {
637  }
638 
639  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
640  // structure, but VAEND is a no-op.
644 
645  // Codes for which we want to perform some z-specific combinations.
649  ISD::LOAD,
650  ISD::STORE,
659  ISD::BSWAP,
660  ISD::SDIV,
661  ISD::UDIV,
662  ISD::SREM,
663  ISD::UREM,
666 
667  // Handle intrinsics.
670 
671  // We want to use MVC in preference to even a single load/store pair.
672  MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
674 
675  // The main memset sequence is a byte store followed by an MVC.
676  // Two STC or MV..I stores win over that, but the kind of fused stores
677  // generated by target-independent code don't when the byte value is
678  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
679  // than "STC;MVC". Handle the choice in target-specific code instead.
680  MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
682 
683  // Default to having -disable-strictnode-mutation on
684  IsStrictFPEnabled = true;
685 }
686 
688  return Subtarget.hasSoftFloat();
689 }
690 
692  LLVMContext &, EVT VT) const {
693  if (!VT.isVector())
694  return MVT::i32;
696 }
697 
699  const MachineFunction &MF, EVT VT) const {
700  VT = VT.getScalarType();
701 
702  if (!VT.isSimple())
703  return false;
704 
705  switch (VT.getSimpleVT().SimpleTy) {
706  case MVT::f32:
707  case MVT::f64:
708  return true;
709  case MVT::f128:
710  return Subtarget.hasVectorEnhancements1();
711  default:
712  break;
713  }
714 
715  return false;
716 }
717 
718 // Return true if the constant can be generated with a vector instruction,
719 // such as VGM, VGMB or VREPI.
721  const SystemZSubtarget &Subtarget) {
722  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
723  if (!Subtarget.hasVector() ||
724  (isFP128 && !Subtarget.hasVectorEnhancements1()))
725  return false;
726 
727  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
728  // preferred way of creating all-zero and all-one vectors so give it
729  // priority over other methods below.
730  unsigned Mask = 0;
731  unsigned I = 0;
732  for (; I < SystemZ::VectorBytes; ++I) {
733  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
734  if (Byte == 0xff)
735  Mask |= 1ULL << I;
736  else if (Byte != 0)
737  break;
738  }
739  if (I == SystemZ::VectorBytes) {
741  OpVals.push_back(Mask);
743  return true;
744  }
745 
746  if (SplatBitSize > 64)
747  return false;
748 
749  auto tryValue = [&](uint64_t Value) -> bool {
750  // Try VECTOR REPLICATE IMMEDIATE
751  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
752  if (isInt<16>(SignedValue)) {
753  OpVals.push_back(((unsigned) SignedValue));
755  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
756  SystemZ::VectorBits / SplatBitSize);
757  return true;
758  }
759  // Try VECTOR GENERATE MASK
760  unsigned Start, End;
761  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
762  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
763  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
764  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
765  OpVals.push_back(Start - (64 - SplatBitSize));
766  OpVals.push_back(End - (64 - SplatBitSize));
768  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
769  SystemZ::VectorBits / SplatBitSize);
770  return true;
771  }
772  return false;
773  };
774 
775  // First try assuming that any undefined bits above the highest set bit
776  // and below the lowest set bit are 1s. This increases the likelihood of
777  // being able to use a sign-extended element value in VECTOR REPLICATE
778  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
779  uint64_t SplatBitsZ = SplatBits.getZExtValue();
780  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
781  uint64_t Lower =
782  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
783  uint64_t Upper =
784  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
785  if (tryValue(SplatBitsZ | Upper | Lower))
786  return true;
787 
788  // Now try assuming that any undefined bits between the first and
789  // last defined set bits are set. This increases the chances of
790  // using a non-wraparound mask.
791  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
792  return tryValue(SplatBitsZ | Middle);
793 }
794 
796  if (IntImm.isSingleWord()) {
797  IntBits = APInt(128, IntImm.getZExtValue());
798  IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
799  } else
800  IntBits = IntImm;
801  assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
802 
803  // Find the smallest splat.
804  SplatBits = IntImm;
805  unsigned Width = SplatBits.getBitWidth();
806  while (Width > 8) {
807  unsigned HalfSize = Width / 2;
808  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
809  APInt LowValue = SplatBits.trunc(HalfSize);
810 
811  // If the two halves do not match, stop here.
812  if (HighValue != LowValue || 8 > HalfSize)
813  break;
814 
815  SplatBits = HighValue;
816  Width = HalfSize;
817  }
818  SplatUndef = 0;
819  SplatBitSize = Width;
820 }
821 
823  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
824  bool HasAnyUndefs;
825 
826  // Get IntBits by finding the 128 bit splat.
827  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
828  true);
829 
830  // Get SplatBits by finding the 8 bit or greater splat.
831  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
832  true);
833 }
834 
836  bool ForCodeSize) const {
837  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
838  if (Imm.isZero() || Imm.isNegZero())
839  return true;
840 
842 }
843 
844 /// Returns true if stack probing through inline assembly is requested.
846  // If the function specifically requests inline stack probes, emit them.
847  if (MF.getFunction().hasFnAttribute("probe-stack"))
848  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
849  "inline-asm";
850  return false;
851 }
852 
854  // We can use CGFI or CLGFI.
855  return isInt<32>(Imm) || isUInt<32>(Imm);
856 }
857 
859  // We can use ALGFI or SLGFI.
860  return isUInt<32>(Imm) || isUInt<32>(-Imm);
861 }
862 
864  EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
865  // Unaligned accesses should never be slower than the expanded version.
866  // We check specifically for aligned accesses in the few cases where
867  // they are required.
868  if (Fast)
869  *Fast = true;
870  return true;
871 }
872 
873 // Information about the addressing mode for a memory access.
875  // True if a long displacement is supported.
877 
878  // True if use of index register is supported.
879  bool IndexReg;
880 
881  AddressingMode(bool LongDispl, bool IdxReg) :
882  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
883 };
884 
885 // Return the desired addressing mode for a Load which has only one use (in
886 // the same block) which is a Store.
887 static AddressingMode getLoadStoreAddrMode(bool HasVector,
888  Type *Ty) {
889  // With vector support a Load->Store combination may be combined to either
890  // an MVC or vector operations and it seems to work best to allow the
891  // vector addressing mode.
892  if (HasVector)
893  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
894 
895  // Otherwise only the MVC case is special.
896  bool MVC = Ty->isIntegerTy(8);
897  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
898 }
899 
900 // Return the addressing mode which seems most desirable given an LLVM
901 // Instruction pointer.
902 static AddressingMode
904  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
905  switch (II->getIntrinsicID()) {
906  default: break;
907  case Intrinsic::memset:
908  case Intrinsic::memmove:
909  case Intrinsic::memcpy:
910  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
911  }
912  }
913 
914  if (isa<LoadInst>(I) && I->hasOneUse()) {
915  auto *SingleUser = cast<Instruction>(*I->user_begin());
916  if (SingleUser->getParent() == I->getParent()) {
917  if (isa<ICmpInst>(SingleUser)) {
918  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
919  if (C->getBitWidth() <= 64 &&
920  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
921  // Comparison of memory with 16 bit signed / unsigned immediate
922  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
923  } else if (isa<StoreInst>(SingleUser))
924  // Load->Store
925  return getLoadStoreAddrMode(HasVector, I->getType());
926  }
927  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
928  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
929  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
930  // Load->Store
931  return getLoadStoreAddrMode(HasVector, LoadI->getType());
932  }
933 
934  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
935 
936  // * Use LDE instead of LE/LEY for z13 to avoid partial register
937  // dependencies (LDE only supports small offsets).
938  // * Utilize the vector registers to hold floating point
939  // values (vector load / store instructions only support small
940  // offsets).
941 
942  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
943  I->getOperand(0)->getType());
944  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
945  bool IsVectorAccess = MemAccessTy->isVectorTy();
946 
947  // A store of an extracted vector element will be combined into a VSTE type
948  // instruction.
949  if (!IsVectorAccess && isa<StoreInst>(I)) {
950  Value *DataOp = I->getOperand(0);
951  if (isa<ExtractElementInst>(DataOp))
952  IsVectorAccess = true;
953  }
954 
955  // A load which gets inserted into a vector element will be combined into a
956  // VLE type instruction.
957  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
958  User *LoadUser = *I->user_begin();
959  if (isa<InsertElementInst>(LoadUser))
960  IsVectorAccess = true;
961  }
962 
963  if (IsFPAccess || IsVectorAccess)
964  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
965  }
966 
967  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
968 }
969 
971  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
972  // Punt on globals for now, although they can be used in limited
973  // RELATIVE LONG cases.
974  if (AM.BaseGV)
975  return false;
976 
977  // Require a 20-bit signed offset.
978  if (!isInt<20>(AM.BaseOffs))
979  return false;
980 
981  bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
982  AddressingMode SupportedAM(!RequireD12, true);
983  if (I != nullptr)
984  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
985 
986  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
987  return false;
988 
989  if (!SupportedAM.IndexReg)
990  // No indexing allowed.
991  return AM.Scale == 0;
992  else
993  // Indexing is OK but no scale factor can be applied.
994  return AM.Scale == 0 || AM.Scale == 1;
995 }
996 
998  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
999  unsigned SrcAS, const AttributeList &FuncAttributes) const {
1000  const int MVCFastLen = 16;
1001 
1002  if (Limit != ~unsigned(0)) {
1003  // Don't expand Op into scalar loads/stores in these cases:
1004  if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1005  return false; // Small memcpy: Use MVC
1006  if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1007  return false; // Small memset (first byte with STC/MVI): Use MVC
1008  if (Op.isZeroMemset())
1009  return false; // Memset zero: Use XC
1010  }
1011 
1012  return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1013  SrcAS, FuncAttributes);
1014 }
1015 
1017  const AttributeList &FuncAttributes) const {
1018  return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1019 }
1020 
1022  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1023  return false;
1024  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
1025  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
1026  return FromBits > ToBits;
1027 }
1028 
1030  if (!FromVT.isInteger() || !ToVT.isInteger())
1031  return false;
1032  unsigned FromBits = FromVT.getFixedSizeInBits();
1033  unsigned ToBits = ToVT.getFixedSizeInBits();
1034  return FromBits > ToBits;
1035 }
1036 
1037 //===----------------------------------------------------------------------===//
1038 // Inline asm support
1039 //===----------------------------------------------------------------------===//
1040 
1043  if (Constraint.size() == 1) {
1044  switch (Constraint[0]) {
1045  case 'a': // Address register
1046  case 'd': // Data register (equivalent to 'r')
1047  case 'f': // Floating-point register
1048  case 'h': // High-part register
1049  case 'r': // General-purpose register
1050  case 'v': // Vector register
1051  return C_RegisterClass;
1052 
1053  case 'Q': // Memory with base and unsigned 12-bit displacement
1054  case 'R': // Likewise, plus an index
1055  case 'S': // Memory with base and signed 20-bit displacement
1056  case 'T': // Likewise, plus an index
1057  case 'm': // Equivalent to 'T'.
1058  return C_Memory;
1059 
1060  case 'I': // Unsigned 8-bit constant
1061  case 'J': // Unsigned 12-bit constant
1062  case 'K': // Signed 16-bit constant
1063  case 'L': // Signed 20-bit displacement (on all targets we support)
1064  case 'M': // 0x7fffffff
1065  return C_Immediate;
1066 
1067  default:
1068  break;
1069  }
1070  } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1071  switch (Constraint[1]) {
1072  case 'Q': // Address with base and unsigned 12-bit displacement
1073  case 'R': // Likewise, plus an index
1074  case 'S': // Address with base and signed 20-bit displacement
1075  case 'T': // Likewise, plus an index
1076  return C_Address;
1077 
1078  default:
1079  break;
1080  }
1081  }
1082  return TargetLowering::getConstraintType(Constraint);
1083 }
1084 
1087  const char *constraint) const {
1088  ConstraintWeight weight = CW_Invalid;
1089  Value *CallOperandVal = info.CallOperandVal;
1090  // If we don't have a value, we can't do a match,
1091  // but allow it at the lowest weight.
1092  if (!CallOperandVal)
1093  return CW_Default;
1094  Type *type = CallOperandVal->getType();
1095  // Look at the constraint type.
1096  switch (*constraint) {
1097  default:
1099  break;
1100 
1101  case 'a': // Address register
1102  case 'd': // Data register (equivalent to 'r')
1103  case 'h': // High-part register
1104  case 'r': // General-purpose register
1105  if (CallOperandVal->getType()->isIntegerTy())
1106  weight = CW_Register;
1107  break;
1108 
1109  case 'f': // Floating-point register
1110  if (type->isFloatingPointTy())
1111  weight = CW_Register;
1112  break;
1113 
1114  case 'v': // Vector register
1115  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1116  Subtarget.hasVector())
1117  weight = CW_Register;
1118  break;
1119 
1120  case 'I': // Unsigned 8-bit constant
1121  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1122  if (isUInt<8>(C->getZExtValue()))
1123  weight = CW_Constant;
1124  break;
1125 
1126  case 'J': // Unsigned 12-bit constant
1127  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1128  if (isUInt<12>(C->getZExtValue()))
1129  weight = CW_Constant;
1130  break;
1131 
1132  case 'K': // Signed 16-bit constant
1133  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1134  if (isInt<16>(C->getSExtValue()))
1135  weight = CW_Constant;
1136  break;
1137 
1138  case 'L': // Signed 20-bit displacement (on all targets we support)
1139  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1140  if (isInt<20>(C->getSExtValue()))
1141  weight = CW_Constant;
1142  break;
1143 
1144  case 'M': // 0x7fffffff
1145  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1146  if (C->getZExtValue() == 0x7fffffff)
1147  weight = CW_Constant;
1148  break;
1149  }
1150  return weight;
1151 }
1152 
1153 // Parse a "{tNNN}" register constraint for which the register type "t"
1154 // has already been verified. MC is the class associated with "t" and
1155 // Map maps 0-based register numbers to LLVM register numbers.
1156 static std::pair<unsigned, const TargetRegisterClass *>
1158  const unsigned *Map, unsigned Size) {
1159  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1160  if (isdigit(Constraint[2])) {
1161  unsigned Index;
1162  bool Failed =
1163  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1164  if (!Failed && Index < Size && Map[Index])
1165  return std::make_pair(Map[Index], RC);
1166  }
1167  return std::make_pair(0U, nullptr);
1168 }
1169 
1170 std::pair<unsigned, const TargetRegisterClass *>
1172  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1173  if (Constraint.size() == 1) {
1174  // GCC Constraint Letters
1175  switch (Constraint[0]) {
1176  default: break;
1177  case 'd': // Data register (equivalent to 'r')
1178  case 'r': // General-purpose register
1179  if (VT == MVT::i64)
1180  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1181  else if (VT == MVT::i128)
1182  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1183  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1184 
1185  case 'a': // Address register
1186  if (VT == MVT::i64)
1187  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1188  else if (VT == MVT::i128)
1189  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1190  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1191 
1192  case 'h': // High-part register (an LLVM extension)
1193  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1194 
1195  case 'f': // Floating-point register
1196  if (!useSoftFloat()) {
1197  if (VT == MVT::f64)
1198  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1199  else if (VT == MVT::f128)
1200  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1201  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1202  }
1203  break;
1204  case 'v': // Vector register
1205  if (Subtarget.hasVector()) {
1206  if (VT == MVT::f32)
1207  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1208  if (VT == MVT::f64)
1209  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1210  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1211  }
1212  break;
1213  }
1214  }
1215  if (Constraint.size() > 0 && Constraint[0] == '{') {
1216  // We need to override the default register parsing for GPRs and FPRs
1217  // because the interpretation depends on VT. The internal names of
1218  // the registers are also different from the external names
1219  // (F0D and F0S instead of F0, etc.).
1220  if (Constraint[1] == 'r') {
1221  if (VT == MVT::i32)
1222  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1223  SystemZMC::GR32Regs, 16);
1224  if (VT == MVT::i128)
1225  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1226  SystemZMC::GR128Regs, 16);
1227  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1228  SystemZMC::GR64Regs, 16);
1229  }
1230  if (Constraint[1] == 'f') {
1231  if (useSoftFloat())
1232  return std::make_pair(
1233  0u, static_cast<const TargetRegisterClass *>(nullptr));
1234  if (VT == MVT::f32)
1235  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1236  SystemZMC::FP32Regs, 16);
1237  if (VT == MVT::f128)
1238  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1239  SystemZMC::FP128Regs, 16);
1240  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1241  SystemZMC::FP64Regs, 16);
1242  }
1243  if (Constraint[1] == 'v') {
1244  if (!Subtarget.hasVector())
1245  return std::make_pair(
1246  0u, static_cast<const TargetRegisterClass *>(nullptr));
1247  if (VT == MVT::f32)
1248  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1249  SystemZMC::VR32Regs, 32);
1250  if (VT == MVT::f64)
1251  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1252  SystemZMC::VR64Regs, 32);
1253  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1254  SystemZMC::VR128Regs, 32);
1255  }
1256  }
1257  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1258 }
1259 
1260 // FIXME? Maybe this could be a TableGen attribute on some registers and
1261 // this table could be generated automatically from RegInfo.
1262 Register
1264  const MachineFunction &MF) const {
1265  const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
1266 
1267  Register Reg =
1269  .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
1270  .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
1271  .Default(0);
1272 
1273  if (Reg)
1274  return Reg;
1275  report_fatal_error("Invalid register name global variable");
1276 }
1277 
1279 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1280  std::vector<SDValue> &Ops,
1281  SelectionDAG &DAG) const {
1282  // Only support length 1 constraints for now.
1283  if (Constraint.length() == 1) {
1284  switch (Constraint[0]) {
1285  case 'I': // Unsigned 8-bit constant
1286  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1287  if (isUInt<8>(C->getZExtValue()))
1288  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1289  Op.getValueType()));
1290  return;
1291 
1292  case 'J': // Unsigned 12-bit constant
1293  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1294  if (isUInt<12>(C->getZExtValue()))
1295  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1296  Op.getValueType()));
1297  return;
1298 
1299  case 'K': // Signed 16-bit constant
1300  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1301  if (isInt<16>(C->getSExtValue()))
1302  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1303  Op.getValueType()));
1304  return;
1305 
1306  case 'L': // Signed 20-bit displacement (on all targets we support)
1307  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1308  if (isInt<20>(C->getSExtValue()))
1309  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1310  Op.getValueType()));
1311  return;
1312 
1313  case 'M': // 0x7fffffff
1314  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1315  if (C->getZExtValue() == 0x7fffffff)
1316  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1317  Op.getValueType()));
1318  return;
1319  }
1320  }
1321  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1322 }
1323 
1324 //===----------------------------------------------------------------------===//
1325 // Calling conventions
1326 //===----------------------------------------------------------------------===//
1327 
1328 #include "SystemZGenCallingConv.inc"
1329 
1331  CallingConv::ID) const {
1332  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1333  SystemZ::R14D, 0 };
1334  return ScratchRegs;
1335 }
1336 
1338  Type *ToType) const {
1339  return isTruncateFree(FromType, ToType);
1340 }
1341 
1343  return CI->isTailCall();
1344 }
1345 
1346 // We do not yet support 128-bit single-element vector types. If the user
1347 // attempts to use such types as function argument or return type, prefer
1348 // to error out instead of emitting code violating the ABI.
1349 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1350  if (ArgVT.isVector() && !VT.isVector())
1351  report_fatal_error("Unsupported vector argument or return type");
1352 }
1353 
1355  for (unsigned i = 0; i < Ins.size(); ++i)
1356  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1357 }
1358 
1360  for (unsigned i = 0; i < Outs.size(); ++i)
1361  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1362 }
1363 
1364 // Value is a value that has been passed to us in the location described by VA
1365 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1366 // any loads onto Chain.
1368  CCValAssign &VA, SDValue Chain,
1369  SDValue Value) {
1370  // If the argument has been promoted from a smaller type, insert an
1371  // assertion to capture this.
1372  if (VA.getLocInfo() == CCValAssign::SExt)
1373  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1374  DAG.getValueType(VA.getValVT()));
1375  else if (VA.getLocInfo() == CCValAssign::ZExt)
1376  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1377  DAG.getValueType(VA.getValVT()));
1378 
1379  if (VA.isExtInLoc())
1380  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1381  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1382  // If this is a short vector argument loaded from the stack,
1383  // extend from i64 to full vector size and then bitcast.
1384  assert(VA.getLocVT() == MVT::i64);
1385  assert(VA.getValVT().isVector());
1387  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1388  } else
1389  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1390  return Value;
1391 }
1392 
1393 // Value is a value of type VA.getValVT() that we need to copy into
1394 // the location described by VA. Return a copy of Value converted to
1395 // VA.getValVT(). The caller is responsible for handling indirect values.
1397  CCValAssign &VA, SDValue Value) {
1398  switch (VA.getLocInfo()) {
1399  case CCValAssign::SExt:
1400  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1401  case CCValAssign::ZExt:
1402  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1403  case CCValAssign::AExt:
1404  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1405  case CCValAssign::BCvt: {
1406  assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1407  assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
1408  VA.getValVT() == MVT::f128);
1409  MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1410  ? MVT::v2i64
1411  : VA.getLocVT();
1412  Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1413  // For ELF, this is a short vector argument to be stored to the stack,
1414  // bitcast to v2i64 and then extract first element.
1415  if (BitCastToType == MVT::v2i64)
1416  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1417  DAG.getConstant(0, DL, MVT::i32));
1418  return Value;
1419  }
1420  case CCValAssign::Full:
1421  return Value;
1422  default:
1423  llvm_unreachable("Unhandled getLocInfo()");
1424  }
1425 }
1426 
1428  SDLoc DL(In);
1430  DAG.getIntPtrConstant(0, DL));
1432  DAG.getIntPtrConstant(1, DL));
1433  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1434  MVT::Untyped, Hi, Lo);
1435  return SDValue(Pair, 0);
1436 }
1437 
1439  SDLoc DL(In);
1440  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1441  DL, MVT::i64, In);
1442  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1443  DL, MVT::i64, In);
1444  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1445 }
1446 
1448  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1449  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1450  EVT ValueVT = Val.getValueType();
1451  assert((ValueVT != MVT::i128 ||
1452  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1453  (NumParts == 2 && PartVT == MVT::i64))) &&
1454  "Unknown handling of i128 value.");
1455  if (ValueVT == MVT::i128 && NumParts == 1) {
1456  // Inline assembly operand.
1457  Parts[0] = lowerI128ToGR128(DAG, Val);
1458  return true;
1459  }
1460  return false;
1461 }
1462 
1464  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1465  MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1466  assert((ValueVT != MVT::i128 ||
1467  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1468  (NumParts == 2 && PartVT == MVT::i64))) &&
1469  "Unknown handling of i128 value.");
1470  if (ValueVT == MVT::i128 && NumParts == 1)
1471  // Inline assembly operand.
1472  return lowerGR128ToI128(DAG, Parts[0]);
1473  return SDValue();
1474 }
1475 
1477  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1478  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1479  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1480  MachineFunction &MF = DAG.getMachineFunction();
1481  MachineFrameInfo &MFI = MF.getFrameInfo();
1483  SystemZMachineFunctionInfo *FuncInfo =
1485  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1486  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1487 
1488  // Detect unsupported vector argument types.
1489  if (Subtarget.hasVector())
1491 
1492  // Assign locations to all of the incoming arguments.
1494  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1495  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1496 
1497  unsigned NumFixedGPRs = 0;
1498  unsigned NumFixedFPRs = 0;
1499  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1500  SDValue ArgValue;
1501  CCValAssign &VA = ArgLocs[I];
1502  EVT LocVT = VA.getLocVT();
1503  if (VA.isRegLoc()) {
1504  // Arguments passed in registers
1505  const TargetRegisterClass *RC;
1506  switch (LocVT.getSimpleVT().SimpleTy) {
1507  default:
1508  // Integers smaller than i64 should be promoted to i64.
1509  llvm_unreachable("Unexpected argument type");
1510  case MVT::i32:
1511  NumFixedGPRs += 1;
1512  RC = &SystemZ::GR32BitRegClass;
1513  break;
1514  case MVT::i64:
1515  NumFixedGPRs += 1;
1516  RC = &SystemZ::GR64BitRegClass;
1517  break;
1518  case MVT::f32:
1519  NumFixedFPRs += 1;
1520  RC = &SystemZ::FP32BitRegClass;
1521  break;
1522  case MVT::f64:
1523  NumFixedFPRs += 1;
1524  RC = &SystemZ::FP64BitRegClass;
1525  break;
1526  case MVT::f128:
1527  NumFixedFPRs += 2;
1528  RC = &SystemZ::FP128BitRegClass;
1529  break;
1530  case MVT::v16i8:
1531  case MVT::v8i16:
1532  case MVT::v4i32:
1533  case MVT::v2i64:
1534  case MVT::v4f32:
1535  case MVT::v2f64:
1536  RC = &SystemZ::VR128BitRegClass;
1537  break;
1538  }
1539 
1540  Register VReg = MRI.createVirtualRegister(RC);
1541  MRI.addLiveIn(VA.getLocReg(), VReg);
1542  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1543  } else {
1544  assert(VA.isMemLoc() && "Argument not register or memory");
1545 
1546  // Create the frame index object for this incoming parameter.
1547  // FIXME: Pre-include call frame size in the offset, should not
1548  // need to manually add it here.
1549  int64_t ArgSPOffset = VA.getLocMemOffset();
1550  if (Subtarget.isTargetXPLINK64()) {
1551  auto &XPRegs =
1553  ArgSPOffset += XPRegs.getCallFrameSize();
1554  }
1555  int FI =
1556  MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1557 
1558  // Create the SelectionDAG nodes corresponding to a load
1559  // from this parameter. Unpromoted ints and floats are
1560  // passed as right-justified 8-byte values.
1561  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1562  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1563  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1564  DAG.getIntPtrConstant(4, DL));
1565  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1567  }
1568 
1569  // Convert the value of the argument register into the value that's
1570  // being passed.
1571  if (VA.getLocInfo() == CCValAssign::Indirect) {
1572  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1573  MachinePointerInfo()));
1574  // If the original argument was split (e.g. i128), we need
1575  // to load all parts of it here (using the same address).
1576  unsigned ArgIndex = Ins[I].OrigArgIndex;
1577  assert (Ins[I].PartOffset == 0);
1578  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1579  CCValAssign &PartVA = ArgLocs[I + 1];
1580  unsigned PartOffset = Ins[I + 1].PartOffset;
1581  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1582  DAG.getIntPtrConstant(PartOffset, DL));
1583  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1584  MachinePointerInfo()));
1585  ++I;
1586  }
1587  } else
1588  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1589  }
1590 
1591  // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1592  if (IsVarArg && Subtarget.isTargetELF()) {
1593  // Save the number of non-varargs registers for later use by va_start, etc.
1594  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1595  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1596 
1597  // Likewise the address (in the form of a frame index) of where the
1598  // first stack vararg would be. The 1-byte size here is arbitrary.
1599  int64_t StackSize = CCInfo.getNextStackOffset();
1600  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1601 
1602  // ...and a similar frame index for the caller-allocated save area
1603  // that will be used to store the incoming registers.
1604  int64_t RegSaveOffset =
1605  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1606  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1607  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1608 
1609  // Store the FPR varargs in the reserved frame slots. (We store the
1610  // GPRs as part of the prologue.)
1611  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1613  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1614  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1615  int FI =
1616  MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1617  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1619  &SystemZ::FP64BitRegClass);
1620  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1621  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1623  }
1624  // Join the stores, which are independent of one another.
1625  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1626  makeArrayRef(&MemOps[NumFixedFPRs],
1627  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1628  }
1629  }
1630 
1631  // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1632  // register (R5)
1633  return Chain;
1634 }
1635 
1636 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1639  // Punt if there are any indirect or stack arguments, or if the call
1640  // needs the callee-saved argument register R6, or if the call uses
1641  // the callee-saved register arguments SwiftSelf and SwiftError.
1642  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1643  CCValAssign &VA = ArgLocs[I];
1644  if (VA.getLocInfo() == CCValAssign::Indirect)
1645  return false;
1646  if (!VA.isRegLoc())
1647  return false;
1648  Register Reg = VA.getLocReg();
1649  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1650  return false;
1651  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1652  return false;
1653  }
1654  return true;
1655 }
1656 
1657 SDValue
1659  SmallVectorImpl<SDValue> &InVals) const {
1660  SelectionDAG &DAG = CLI.DAG;
1661  SDLoc &DL = CLI.DL;
1663  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1665  SDValue Chain = CLI.Chain;
1666  SDValue Callee = CLI.Callee;
1667  bool &IsTailCall = CLI.IsTailCall;
1668  CallingConv::ID CallConv = CLI.CallConv;
1669  bool IsVarArg = CLI.IsVarArg;
1670  MachineFunction &MF = DAG.getMachineFunction();
1671  EVT PtrVT = getPointerTy(MF.getDataLayout());
1672  LLVMContext &Ctx = *DAG.getContext();
1674 
1675  // FIXME: z/OS support to be added in later.
1676  if (Subtarget.isTargetXPLINK64())
1677  IsTailCall = false;
1678 
1679  // Detect unsupported vector argument and return types.
1680  if (Subtarget.hasVector()) {
1681  VerifyVectorTypes(Outs);
1683  }
1684 
1685  // Analyze the operands of the call, assigning locations to each operand.
1687  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1688  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1689 
1690  // We don't support GuaranteedTailCallOpt, only automatically-detected
1691  // sibling calls.
1692  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1693  IsTailCall = false;
1694 
1695  // Get a count of how many bytes are to be pushed on the stack.
1696  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1697 
1698  if (Subtarget.isTargetXPLINK64())
1699  // Although the XPLINK specifications for AMODE64 state that minimum size
1700  // of the param area is minimum 32 bytes and no rounding is otherwise
1701  // specified, we round this area in 64 bytes increments to be compatible
1702  // with existing compilers.
1703  NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1704 
1705  // Mark the start of the call.
1706  if (!IsTailCall)
1707  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1708 
1709  // Copy argument values to their designated locations.
1711  SmallVector<SDValue, 8> MemOpChains;
1712  SDValue StackPtr;
1713  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1714  CCValAssign &VA = ArgLocs[I];
1715  SDValue ArgValue = OutVals[I];
1716 
1717  if (VA.getLocInfo() == CCValAssign::Indirect) {
1718  // Store the argument in a stack slot and pass its address.
1719  unsigned ArgIndex = Outs[I].OrigArgIndex;
1720  EVT SlotVT;
1721  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1722  // Allocate the full stack space for a promoted (and split) argument.
1723  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1724  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1725  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1726  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1727  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1728  } else {
1729  SlotVT = Outs[I].ArgVT;
1730  }
1731  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1732  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1733  MemOpChains.push_back(
1734  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1736  // If the original argument was split (e.g. i128), we need
1737  // to store all parts of it here (and pass just one address).
1738  assert (Outs[I].PartOffset == 0);
1739  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1740  SDValue PartValue = OutVals[I + 1];
1741  unsigned PartOffset = Outs[I + 1].PartOffset;
1742  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1743  DAG.getIntPtrConstant(PartOffset, DL));
1744  MemOpChains.push_back(
1745  DAG.getStore(Chain, DL, PartValue, Address,
1747  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1748  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1749  ++I;
1750  }
1751  ArgValue = SpillSlot;
1752  } else
1753  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1754 
1755  if (VA.isRegLoc()) {
1756  // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1757  // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1758  // and low values.
1759  if (VA.getLocVT() == MVT::i128)
1760  ArgValue = lowerI128ToGR128(DAG, ArgValue);
1761  // Queue up the argument copies and emit them at the end.
1762  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1763  } else {
1764  assert(VA.isMemLoc() && "Argument not register or memory");
1765 
1766  // Work out the address of the stack slot. Unpromoted ints and
1767  // floats are passed as right-justified 8-byte values.
1768  if (!StackPtr.getNode())
1769  StackPtr = DAG.getCopyFromReg(Chain, DL,
1770  Regs->getStackPointerRegister(), PtrVT);
1771  unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1772  VA.getLocMemOffset();
1773  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1774  Offset += 4;
1775  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1776  DAG.getIntPtrConstant(Offset, DL));
1777 
1778  // Emit the store.
1779  MemOpChains.push_back(
1780  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1781 
1782  // Although long doubles or vectors are passed through the stack when
1783  // they are vararg (non-fixed arguments), if a long double or vector
1784  // occupies the third and fourth slot of the argument list GPR3 should
1785  // still shadow the third slot of the argument list.
1786  if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1787  SDValue ShadowArgValue =
1788  DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1789  DAG.getIntPtrConstant(1, DL));
1790  RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1791  }
1792  }
1793  }
1794 
1795  // Join the stores, which are independent of one another.
1796  if (!MemOpChains.empty())
1797  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1798 
1799  // Accept direct calls by converting symbolic call addresses to the
1800  // associated Target* opcodes. Force %r1 to be used for indirect
1801  // tail calls.
1802  SDValue Glue;
1803  // FIXME: Add support for XPLINK using the ADA register.
1804  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1805  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1807  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1808  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1810  } else if (IsTailCall) {
1811  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1812  Glue = Chain.getValue(1);
1813  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1814  }
1815 
1816  // Build a sequence of copy-to-reg nodes, chained and glued together.
1817  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1818  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1819  RegsToPass[I].second, Glue);
1820  Glue = Chain.getValue(1);
1821  }
1822 
1823  // The first call operand is the chain and the second is the target address.
1825  Ops.push_back(Chain);
1826  Ops.push_back(Callee);
1827 
1828  // Add argument registers to the end of the list so that they are
1829  // known live into the call.
1830  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1831  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1832  RegsToPass[I].second.getValueType()));
1833 
1834  // Add a register mask operand representing the call-preserved registers.
1835  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1836  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1837  assert(Mask && "Missing call preserved mask for calling convention");
1838  Ops.push_back(DAG.getRegisterMask(Mask));
1839 
1840  // Glue the call to the argument copies, if any.
1841  if (Glue.getNode())
1842  Ops.push_back(Glue);
1843 
1844  // Emit the call.
1845  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1846  if (IsTailCall)
1847  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1848  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1849  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1850  Glue = Chain.getValue(1);
1851 
1852  // Mark the end of the call, which is glued to the call itself.
1853  Chain = DAG.getCALLSEQ_END(Chain,
1854  DAG.getConstant(NumBytes, DL, PtrVT, true),
1855  DAG.getConstant(0, DL, PtrVT, true),
1856  Glue, DL);
1857  Glue = Chain.getValue(1);
1858 
1859  // Assign locations to each value returned by this call.
1861  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1862  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1863 
1864  // Copy all of the result registers out of their specified physreg.
1865  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1866  CCValAssign &VA = RetLocs[I];
1867 
1868  // Copy the value out, gluing the copy to the end of the call sequence.
1869  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1870  VA.getLocVT(), Glue);
1871  Chain = RetValue.getValue(1);
1872  Glue = RetValue.getValue(2);
1873 
1874  // Convert the value of the return register into the value that's
1875  // being returned.
1876  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1877  }
1878 
1879  return Chain;
1880 }
1881 
1882 // Generate a call taking the given operands as arguments and returning a
1883 // result of type RetVT.
1884 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1885  SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1886  ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1887  bool DoesNotReturn, bool IsReturnValueUsed) const {
1889  Args.reserve(Ops.size());
1890 
1892  for (SDValue Op : Ops) {
1893  Entry.Node = Op;
1894  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1895  Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1896  Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1897  Args.push_back(Entry);
1898  }
1899 
1900  SDValue Callee =
1901  DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1902 
1903  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1905  bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1906  CLI.setDebugLoc(DL)
1907  .setChain(Chain)
1908  .setCallee(CallConv, RetTy, Callee, std::move(Args))
1909  .setNoReturn(DoesNotReturn)
1910  .setDiscardResult(!IsReturnValueUsed)
1911  .setSExtResult(SignExtend)
1912  .setZExtResult(!SignExtend);
1913  return LowerCallTo(CLI);
1914 }
1915 
1918  MachineFunction &MF, bool isVarArg,
1919  const SmallVectorImpl<ISD::OutputArg> &Outs,
1920  LLVMContext &Context) const {
1921  // Detect unsupported vector return types.
1922  if (Subtarget.hasVector())
1923  VerifyVectorTypes(Outs);
1924 
1925  // Special case that we cannot easily detect in RetCC_SystemZ since
1926  // i128 is not a legal type.
1927  for (auto &Out : Outs)
1928  if (Out.ArgVT == MVT::i128)
1929  return false;
1930 
1932  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1933  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1934 }
1935 
1936 SDValue
1938  bool IsVarArg,
1939  const SmallVectorImpl<ISD::OutputArg> &Outs,
1940  const SmallVectorImpl<SDValue> &OutVals,
1941  const SDLoc &DL, SelectionDAG &DAG) const {
1942  MachineFunction &MF = DAG.getMachineFunction();
1943 
1944  // Detect unsupported vector return types.
1945  if (Subtarget.hasVector())
1946  VerifyVectorTypes(Outs);
1947 
1948  // Assign locations to each returned value.
1950  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1951  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1952 
1953  // Quick exit for void returns
1954  if (RetLocs.empty())
1955  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1956 
1957  if (CallConv == CallingConv::GHC)
1958  report_fatal_error("GHC functions return void only");
1959 
1960  // Copy the result values into the output registers.
1961  SDValue Glue;
1962  SmallVector<SDValue, 4> RetOps;
1963  RetOps.push_back(Chain);
1964  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1965  CCValAssign &VA = RetLocs[I];
1966  SDValue RetValue = OutVals[I];
1967 
1968  // Make the return register live on exit.
1969  assert(VA.isRegLoc() && "Can only return in registers!");
1970 
1971  // Promote the value as required.
1972  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1973 
1974  // Chain and glue the copies together.
1975  Register Reg = VA.getLocReg();
1976  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1977  Glue = Chain.getValue(1);
1978  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1979  }
1980 
1981  // Update chain and glue.
1982  RetOps[0] = Chain;
1983  if (Glue.getNode())
1984  RetOps.push_back(Glue);
1985 
1986  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1987 }
1988 
1989 // Return true if Op is an intrinsic node with chain that returns the CC value
1990 // as its only (other) argument. Provide the associated SystemZISD opcode and
1991 // the mask of valid CC values if so.
1992 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1993  unsigned &CCValid) {
1994  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1995  switch (Id) {
1996  case Intrinsic::s390_tbegin:
1997  Opcode = SystemZISD::TBEGIN;
1998  CCValid = SystemZ::CCMASK_TBEGIN;
1999  return true;
2000 
2001  case Intrinsic::s390_tbegin_nofloat:
2002  Opcode = SystemZISD::TBEGIN_NOFLOAT;
2003  CCValid = SystemZ::CCMASK_TBEGIN;
2004  return true;
2005 
2006  case Intrinsic::s390_tend:
2007  Opcode = SystemZISD::TEND;
2008  CCValid = SystemZ::CCMASK_TEND;
2009  return true;
2010 
2011  default:
2012  return false;
2013  }
2014 }
2015 
2016 // Return true if Op is an intrinsic node without chain that returns the
2017 // CC value as its final argument. Provide the associated SystemZISD
2018 // opcode and the mask of valid CC values if so.
2019 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2020  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2021  switch (Id) {
2022  case Intrinsic::s390_vpkshs:
2023  case Intrinsic::s390_vpksfs:
2024  case Intrinsic::s390_vpksgs:
2025  Opcode = SystemZISD::PACKS_CC;
2026  CCValid = SystemZ::CCMASK_VCMP;
2027  return true;
2028 
2029  case Intrinsic::s390_vpklshs:
2030  case Intrinsic::s390_vpklsfs:
2031  case Intrinsic::s390_vpklsgs:
2032  Opcode = SystemZISD::PACKLS_CC;
2033  CCValid = SystemZ::CCMASK_VCMP;
2034  return true;
2035 
2036  case Intrinsic::s390_vceqbs:
2037  case Intrinsic::s390_vceqhs:
2038  case Intrinsic::s390_vceqfs:
2039  case Intrinsic::s390_vceqgs:
2040  Opcode = SystemZISD::VICMPES;
2041  CCValid = SystemZ::CCMASK_VCMP;
2042  return true;
2043 
2044  case Intrinsic::s390_vchbs:
2045  case Intrinsic::s390_vchhs:
2046  case Intrinsic::s390_vchfs:
2047  case Intrinsic::s390_vchgs:
2048  Opcode = SystemZISD::VICMPHS;
2049  CCValid = SystemZ::CCMASK_VCMP;
2050  return true;
2051 
2052  case Intrinsic::s390_vchlbs:
2053  case Intrinsic::s390_vchlhs:
2054  case Intrinsic::s390_vchlfs:
2055  case Intrinsic::s390_vchlgs:
2056  Opcode = SystemZISD::VICMPHLS;
2057  CCValid = SystemZ::CCMASK_VCMP;
2058  return true;
2059 
2060  case Intrinsic::s390_vtm:
2061  Opcode = SystemZISD::VTM;
2062  CCValid = SystemZ::CCMASK_VCMP;
2063  return true;
2064 
2065  case Intrinsic::s390_vfaebs:
2066  case Intrinsic::s390_vfaehs:
2067  case Intrinsic::s390_vfaefs:
2068  Opcode = SystemZISD::VFAE_CC;
2069  CCValid = SystemZ::CCMASK_ANY;
2070  return true;
2071 
2072  case Intrinsic::s390_vfaezbs:
2073  case Intrinsic::s390_vfaezhs:
2074  case Intrinsic::s390_vfaezfs:
2075  Opcode = SystemZISD::VFAEZ_CC;
2076  CCValid = SystemZ::CCMASK_ANY;
2077  return true;
2078 
2079  case Intrinsic::s390_vfeebs:
2080  case Intrinsic::s390_vfeehs:
2081  case Intrinsic::s390_vfeefs:
2082  Opcode = SystemZISD::VFEE_CC;
2083  CCValid = SystemZ::CCMASK_ANY;
2084  return true;
2085 
2086  case Intrinsic::s390_vfeezbs:
2087  case Intrinsic::s390_vfeezhs:
2088  case Intrinsic::s390_vfeezfs:
2089  Opcode = SystemZISD::VFEEZ_CC;
2090  CCValid = SystemZ::CCMASK_ANY;
2091  return true;
2092 
2093  case Intrinsic::s390_vfenebs:
2094  case Intrinsic::s390_vfenehs:
2095  case Intrinsic::s390_vfenefs:
2096  Opcode = SystemZISD::VFENE_CC;
2097  CCValid = SystemZ::CCMASK_ANY;
2098  return true;
2099 
2100  case Intrinsic::s390_vfenezbs:
2101  case Intrinsic::s390_vfenezhs:
2102  case Intrinsic::s390_vfenezfs:
2103  Opcode = SystemZISD::VFENEZ_CC;
2104  CCValid = SystemZ::CCMASK_ANY;
2105  return true;
2106 
2107  case Intrinsic::s390_vistrbs:
2108  case Intrinsic::s390_vistrhs:
2109  case Intrinsic::s390_vistrfs:
2110  Opcode = SystemZISD::VISTR_CC;
2112  return true;
2113 
2114  case Intrinsic::s390_vstrcbs:
2115  case Intrinsic::s390_vstrchs:
2116  case Intrinsic::s390_vstrcfs:
2117  Opcode = SystemZISD::VSTRC_CC;
2118  CCValid = SystemZ::CCMASK_ANY;
2119  return true;
2120 
2121  case Intrinsic::s390_vstrczbs:
2122  case Intrinsic::s390_vstrczhs:
2123  case Intrinsic::s390_vstrczfs:
2124  Opcode = SystemZISD::VSTRCZ_CC;
2125  CCValid = SystemZ::CCMASK_ANY;
2126  return true;
2127 
2128  case Intrinsic::s390_vstrsb:
2129  case Intrinsic::s390_vstrsh:
2130  case Intrinsic::s390_vstrsf:
2131  Opcode = SystemZISD::VSTRS_CC;
2132  CCValid = SystemZ::CCMASK_ANY;
2133  return true;
2134 
2135  case Intrinsic::s390_vstrszb:
2136  case Intrinsic::s390_vstrszh:
2137  case Intrinsic::s390_vstrszf:
2138  Opcode = SystemZISD::VSTRSZ_CC;
2139  CCValid = SystemZ::CCMASK_ANY;
2140  return true;
2141 
2142  case Intrinsic::s390_vfcedbs:
2143  case Intrinsic::s390_vfcesbs:
2144  Opcode = SystemZISD::VFCMPES;
2145  CCValid = SystemZ::CCMASK_VCMP;
2146  return true;
2147 
2148  case Intrinsic::s390_vfchdbs:
2149  case Intrinsic::s390_vfchsbs:
2150  Opcode = SystemZISD::VFCMPHS;
2151  CCValid = SystemZ::CCMASK_VCMP;
2152  return true;
2153 
2154  case Intrinsic::s390_vfchedbs:
2155  case Intrinsic::s390_vfchesbs:
2156  Opcode = SystemZISD::VFCMPHES;
2157  CCValid = SystemZ::CCMASK_VCMP;
2158  return true;
2159 
2160  case Intrinsic::s390_vftcidb:
2161  case Intrinsic::s390_vftcisb:
2162  Opcode = SystemZISD::VFTCI;
2163  CCValid = SystemZ::CCMASK_VCMP;
2164  return true;
2165 
2166  case Intrinsic::s390_tdc:
2167  Opcode = SystemZISD::TDC;
2168  CCValid = SystemZ::CCMASK_TDC;
2169  return true;
2170 
2171  default:
2172  return false;
2173  }
2174 }
2175 
2176 // Emit an intrinsic with chain and an explicit CC register result.
2178  unsigned Opcode) {
2179  // Copy all operands except the intrinsic ID.
2180  unsigned NumOps = Op.getNumOperands();
2182  Ops.reserve(NumOps - 1);
2183  Ops.push_back(Op.getOperand(0));
2184  for (unsigned I = 2; I < NumOps; ++I)
2185  Ops.push_back(Op.getOperand(I));
2186 
2187  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2188  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2189  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2190  SDValue OldChain = SDValue(Op.getNode(), 1);
2191  SDValue NewChain = SDValue(Intr.getNode(), 1);
2192  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2193  return Intr.getNode();
2194 }
2195 
2196 // Emit an intrinsic with an explicit CC register result.
2198  unsigned Opcode) {
2199  // Copy all operands except the intrinsic ID.
2200  unsigned NumOps = Op.getNumOperands();
2202  Ops.reserve(NumOps - 1);
2203  for (unsigned I = 1; I < NumOps; ++I)
2204  Ops.push_back(Op.getOperand(I));
2205 
2206  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2207  return Intr.getNode();
2208 }
2209 
2210 // CC is a comparison that will be implemented using an integer or
2211 // floating-point comparison. Return the condition code mask for
2212 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2213 // unsigned comparisons and clear for signed ones. In the floating-point
2214 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2215 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2216 #define CONV(X) \
2217  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2218  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2219  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2220 
2221  switch (CC) {
2222  default:
2223  llvm_unreachable("Invalid integer condition!");
2224 
2225  CONV(EQ);
2226  CONV(NE);
2227  CONV(GT);
2228  CONV(GE);
2229  CONV(LT);
2230  CONV(LE);
2231 
2232  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2233  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2234  }
2235 #undef CONV
2236 }
2237 
2238 // If C can be converted to a comparison against zero, adjust the operands
2239 // as necessary.
2240 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2241  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2242  return;
2243 
2244  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2245  if (!ConstOp1)
2246  return;
2247 
2248  int64_t Value = ConstOp1->getSExtValue();
2249  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2250  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2251  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2252  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2253  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2254  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2255  }
2256 }
2257 
2258 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2259 // adjust the operands as necessary.
2260 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2261  Comparison &C) {
2262  // For us to make any changes, it must a comparison between a single-use
2263  // load and a constant.
2264  if (!C.Op0.hasOneUse() ||
2265  C.Op0.getOpcode() != ISD::LOAD ||
2266  C.Op1.getOpcode() != ISD::Constant)
2267  return;
2268 
2269  // We must have an 8- or 16-bit load.
2270  auto *Load = cast<LoadSDNode>(C.Op0);
2271  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2272  if ((NumBits != 8 && NumBits != 16) ||
2273  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2274  return;
2275 
2276  // The load must be an extending one and the constant must be within the
2277  // range of the unextended value.
2278  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2279  uint64_t Value = ConstOp1->getZExtValue();
2280  uint64_t Mask = (1 << NumBits) - 1;
2281  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2282  // Make sure that ConstOp1 is in range of C.Op0.
2283  int64_t SignedValue = ConstOp1->getSExtValue();
2284  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2285  return;
2286  if (C.ICmpType != SystemZICMP::SignedOnly) {
2287  // Unsigned comparison between two sign-extended values is equivalent
2288  // to unsigned comparison between two zero-extended values.
2289  Value &= Mask;
2290  } else if (NumBits == 8) {
2291  // Try to treat the comparison as unsigned, so that we can use CLI.
2292  // Adjust CCMask and Value as necessary.
2293  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2294  // Test whether the high bit of the byte is set.
2295  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2296  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2297  // Test whether the high bit of the byte is clear.
2298  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2299  else
2300  // No instruction exists for this combination.
2301  return;
2302  C.ICmpType = SystemZICMP::UnsignedOnly;
2303  }
2304  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2305  if (Value > Mask)
2306  return;
2307  // If the constant is in range, we can use any comparison.
2308  C.ICmpType = SystemZICMP::Any;
2309  } else
2310  return;
2311 
2312  // Make sure that the first operand is an i32 of the right extension type.
2313  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2314  ISD::SEXTLOAD :
2315  ISD::ZEXTLOAD);
2316  if (C.Op0.getValueType() != MVT::i32 ||
2317  Load->getExtensionType() != ExtType) {
2318  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2319  Load->getBasePtr(), Load->getPointerInfo(),
2320  Load->getMemoryVT(), Load->getAlign(),
2321  Load->getMemOperand()->getFlags());
2322  // Update the chain uses.
2323  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2324  }
2325 
2326  // Make sure that the second operand is an i32 with the right value.
2327  if (C.Op1.getValueType() != MVT::i32 ||
2328  Value != ConstOp1->getZExtValue())
2329  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2330 }
2331 
2332 // Return true if Op is either an unextended load, or a load suitable
2333 // for integer register-memory comparisons of type ICmpType.
2334 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2335  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2336  if (Load) {
2337  // There are no instructions to compare a register with a memory byte.
2338  if (Load->getMemoryVT() == MVT::i8)
2339  return false;
2340  // Otherwise decide on extension type.
2341  switch (Load->getExtensionType()) {
2342  case ISD::NON_EXTLOAD:
2343  return true;
2344  case ISD::SEXTLOAD:
2345  return ICmpType != SystemZICMP::UnsignedOnly;
2346  case ISD::ZEXTLOAD:
2347  return ICmpType != SystemZICMP::SignedOnly;
2348  default:
2349  break;
2350  }
2351  }
2352  return false;
2353 }
2354 
2355 // Return true if it is better to swap the operands of C.
2356 static bool shouldSwapCmpOperands(const Comparison &C) {
2357  // Leave f128 comparisons alone, since they have no memory forms.
2358  if (C.Op0.getValueType() == MVT::f128)
2359  return false;
2360 
2361  // Always keep a floating-point constant second, since comparisons with
2362  // zero can use LOAD TEST and comparisons with other constants make a
2363  // natural memory operand.
2364  if (isa<ConstantFPSDNode>(C.Op1))
2365  return false;
2366 
2367  // Never swap comparisons with zero since there are many ways to optimize
2368  // those later.
2369  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2370  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2371  return false;
2372 
2373  // Also keep natural memory operands second if the loaded value is
2374  // only used here. Several comparisons have memory forms.
2375  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2376  return false;
2377 
2378  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2379  // In that case we generally prefer the memory to be second.
2380  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2381  // The only exceptions are when the second operand is a constant and
2382  // we can use things like CHHSI.
2383  if (!ConstOp1)
2384  return true;
2385  // The unsigned memory-immediate instructions can handle 16-bit
2386  // unsigned integers.
2387  if (C.ICmpType != SystemZICMP::SignedOnly &&
2388  isUInt<16>(ConstOp1->getZExtValue()))
2389  return false;
2390  // The signed memory-immediate instructions can handle 16-bit
2391  // signed integers.
2392  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2393  isInt<16>(ConstOp1->getSExtValue()))
2394  return false;
2395  return true;
2396  }
2397 
2398  // Try to promote the use of CGFR and CLGFR.
2399  unsigned Opcode0 = C.Op0.getOpcode();
2400  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2401  return true;
2402  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2403  return true;
2404  if (C.ICmpType != SystemZICMP::SignedOnly &&
2405  Opcode0 == ISD::AND &&
2406  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2407  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2408  return true;
2409 
2410  return false;
2411 }
2412 
2413 // Check whether C tests for equality between X and Y and whether X - Y
2414 // or Y - X is also computed. In that case it's better to compare the
2415 // result of the subtraction against zero.
2416 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2417  Comparison &C) {
2418  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2419  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2420  for (SDNode *N : C.Op0->uses()) {
2421  if (N->getOpcode() == ISD::SUB &&
2422  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2423  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2424  C.Op0 = SDValue(N, 0);
2425  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2426  return;
2427  }
2428  }
2429  }
2430 }
2431 
2432 // Check whether C compares a floating-point value with zero and if that
2433 // floating-point value is also negated. In this case we can use the
2434 // negation to set CC, so avoiding separate LOAD AND TEST and
2435 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2436 static void adjustForFNeg(Comparison &C) {
2437  // This optimization is invalid for strict comparisons, since FNEG
2438  // does not raise any exceptions.
2439  if (C.Chain)
2440  return;
2441  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2442  if (C1 && C1->isZero()) {
2443  for (SDNode *N : C.Op0->uses()) {
2444  if (N->getOpcode() == ISD::FNEG) {
2445  C.Op0 = SDValue(N, 0);
2446  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2447  return;
2448  }
2449  }
2450  }
2451 }
2452 
2453 // Check whether C compares (shl X, 32) with 0 and whether X is
2454 // also sign-extended. In that case it is better to test the result
2455 // of the sign extension using LTGFR.
2456 //
2457 // This case is important because InstCombine transforms a comparison
2458 // with (sext (trunc X)) into a comparison with (shl X, 32).
2459 static void adjustForLTGFR(Comparison &C) {
2460  // Check for a comparison between (shl X, 32) and 0.
2461  if (C.Op0.getOpcode() == ISD::SHL &&
2462  C.Op0.getValueType() == MVT::i64 &&
2463  C.Op1.getOpcode() == ISD::Constant &&
2464  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2465  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2466  if (C1 && C1->getZExtValue() == 32) {
2467  SDValue ShlOp0 = C.Op0.getOperand(0);
2468  // See whether X has any SIGN_EXTEND_INREG uses.
2469  for (SDNode *N : ShlOp0->uses()) {
2470  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2471  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2472  C.Op0 = SDValue(N, 0);
2473  return;
2474  }
2475  }
2476  }
2477  }
2478 }
2479 
2480 // If C compares the truncation of an extending load, try to compare
2481 // the untruncated value instead. This exposes more opportunities to
2482 // reuse CC.
2483 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2484  Comparison &C) {
2485  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2486  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2487  C.Op1.getOpcode() == ISD::Constant &&
2488  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2489  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2490  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2491  C.Op0.getValueSizeInBits().getFixedSize()) {
2492  unsigned Type = L->getExtensionType();
2493  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2494  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2495  C.Op0 = C.Op0.getOperand(0);
2496  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2497  }
2498  }
2499  }
2500 }
2501 
2502 // Return true if shift operation N has an in-range constant shift value.
2503 // Store it in ShiftVal if so.
2504 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2505  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2506  if (!Shift)
2507  return false;
2508 
2509  uint64_t Amount = Shift->getZExtValue();
2510  if (Amount >= N.getValueSizeInBits())
2511  return false;
2512 
2513  ShiftVal = Amount;
2514  return true;
2515 }
2516 
2517 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2518 // instruction and whether the CC value is descriptive enough to handle
2519 // a comparison of type Opcode between the AND result and CmpVal.
2520 // CCMask says which comparison result is being tested and BitSize is
2521 // the number of bits in the operands. If TEST UNDER MASK can be used,
2522 // return the corresponding CC mask, otherwise return 0.
2523 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2524  uint64_t Mask, uint64_t CmpVal,
2525  unsigned ICmpType) {
2526  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2527 
2528  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2531  return 0;
2532 
2533  // Work out the masks for the lowest and highest bits.
2534  unsigned HighShift = 63 - countLeadingZeros(Mask);
2535  uint64_t High = uint64_t(1) << HighShift;
2537 
2538  // Signed ordered comparisons are effectively unsigned if the sign
2539  // bit is dropped.
2540  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2541 
2542  // Check for equality comparisons with 0, or the equivalent.
2543  if (CmpVal == 0) {
2544  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2545  return SystemZ::CCMASK_TM_ALL_0;
2546  if (CCMask == SystemZ::CCMASK_CMP_NE)
2548  }
2549  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2550  if (CCMask == SystemZ::CCMASK_CMP_LT)
2551  return SystemZ::CCMASK_TM_ALL_0;
2552  if (CCMask == SystemZ::CCMASK_CMP_GE)
2554  }
2555  if (EffectivelyUnsigned && CmpVal < Low) {
2556  if (CCMask == SystemZ::CCMASK_CMP_LE)
2557  return SystemZ::CCMASK_TM_ALL_0;
2558  if (CCMask == SystemZ::CCMASK_CMP_GT)
2560  }
2561 
2562  // Check for equality comparisons with the mask, or the equivalent.
2563  if (CmpVal == Mask) {
2564  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2565  return SystemZ::CCMASK_TM_ALL_1;
2566  if (CCMask == SystemZ::CCMASK_CMP_NE)
2568  }
2569  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2570  if (CCMask == SystemZ::CCMASK_CMP_GT)
2571  return SystemZ::CCMASK_TM_ALL_1;
2572  if (CCMask == SystemZ::CCMASK_CMP_LE)
2574  }
2575  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2576  if (CCMask == SystemZ::CCMASK_CMP_GE)
2577  return SystemZ::CCMASK_TM_ALL_1;
2578  if (CCMask == SystemZ::CCMASK_CMP_LT)
2580  }
2581 
2582  // Check for ordered comparisons with the top bit.
2583  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2584  if (CCMask == SystemZ::CCMASK_CMP_LE)
2585  return SystemZ::CCMASK_TM_MSB_0;
2586  if (CCMask == SystemZ::CCMASK_CMP_GT)
2587  return SystemZ::CCMASK_TM_MSB_1;
2588  }
2589  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2590  if (CCMask == SystemZ::CCMASK_CMP_LT)
2591  return SystemZ::CCMASK_TM_MSB_0;
2592  if (CCMask == SystemZ::CCMASK_CMP_GE)
2593  return SystemZ::CCMASK_TM_MSB_1;
2594  }
2595 
2596  // If there are just two bits, we can do equality checks for Low and High
2597  // as well.
2598  if (Mask == Low + High) {
2599  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2601  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2603  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2605  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2607  }
2608 
2609  // Looks like we've exhausted our options.
2610  return 0;
2611 }
2612 
2613 // See whether C can be implemented as a TEST UNDER MASK instruction.
2614 // Update the arguments with the TM version if so.
2615 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2616  Comparison &C) {
2617  // Check that we have a comparison with a constant.
2618  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2619  if (!ConstOp1)
2620  return;
2621  uint64_t CmpVal = ConstOp1->getZExtValue();
2622 
2623  // Check whether the nonconstant input is an AND with a constant mask.
2624  Comparison NewC(C);
2625  uint64_t MaskVal;
2626  ConstantSDNode *Mask = nullptr;
2627  if (C.Op0.getOpcode() == ISD::AND) {
2628  NewC.Op0 = C.Op0.getOperand(0);
2629  NewC.Op1 = C.Op0.getOperand(1);
2630  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2631  if (!Mask)
2632  return;
2633  MaskVal = Mask->getZExtValue();
2634  } else {
2635  // There is no instruction to compare with a 64-bit immediate
2636  // so use TMHH instead if possible. We need an unsigned ordered
2637  // comparison with an i64 immediate.
2638  if (NewC.Op0.getValueType() != MVT::i64 ||
2639  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2640  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2641  NewC.ICmpType == SystemZICMP::SignedOnly)
2642  return;
2643  // Convert LE and GT comparisons into LT and GE.
2644  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2645  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2646  if (CmpVal == uint64_t(-1))
2647  return;
2648  CmpVal += 1;
2649  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2650  }
2651  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2652  // be masked off without changing the result.
2653  MaskVal = -(CmpVal & -CmpVal);
2654  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2655  }
2656  if (!MaskVal)
2657  return;
2658 
2659  // Check whether the combination of mask, comparison value and comparison
2660  // type are suitable.
2661  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2662  unsigned NewCCMask, ShiftVal;
2663  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2664  NewC.Op0.getOpcode() == ISD::SHL &&
2665  isSimpleShift(NewC.Op0, ShiftVal) &&
2666  (MaskVal >> ShiftVal != 0) &&
2667  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2668  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2669  MaskVal >> ShiftVal,
2670  CmpVal >> ShiftVal,
2671  SystemZICMP::Any))) {
2672  NewC.Op0 = NewC.Op0.getOperand(0);
2673  MaskVal >>= ShiftVal;
2674  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2675  NewC.Op0.getOpcode() == ISD::SRL &&
2676  isSimpleShift(NewC.Op0, ShiftVal) &&
2677  (MaskVal << ShiftVal != 0) &&
2678  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2679  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2680  MaskVal << ShiftVal,
2681  CmpVal << ShiftVal,
2683  NewC.Op0 = NewC.Op0.getOperand(0);
2684  MaskVal <<= ShiftVal;
2685  } else {
2686  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2687  NewC.ICmpType);
2688  if (!NewCCMask)
2689  return;
2690  }
2691 
2692  // Go ahead and make the change.
2693  C.Opcode = SystemZISD::TM;
2694  C.Op0 = NewC.Op0;
2695  if (Mask && Mask->getZExtValue() == MaskVal)
2696  C.Op1 = SDValue(Mask, 0);
2697  else
2698  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2699  C.CCValid = SystemZ::CCMASK_TM;
2700  C.CCMask = NewCCMask;
2701 }
2702 
2703 // See whether the comparison argument contains a redundant AND
2704 // and remove it if so. This sometimes happens due to the generic
2705 // BRCOND expansion.
2706 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2707  Comparison &C) {
2708  if (C.Op0.getOpcode() != ISD::AND)
2709  return;
2710  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2711  if (!Mask)
2712  return;
2713  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2714  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2715  return;
2716 
2717  C.Op0 = C.Op0.getOperand(0);
2718 }
2719 
2720 // Return a Comparison that tests the condition-code result of intrinsic
2721 // node Call against constant integer CC using comparison code Cond.
2722 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2723 // and CCValid is the set of possible condition-code results.
2724 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2725  SDValue Call, unsigned CCValid, uint64_t CC,
2726  ISD::CondCode Cond) {
2727  Comparison C(Call, SDValue(), SDValue());
2728  C.Opcode = Opcode;
2729  C.CCValid = CCValid;
2730  if (Cond == ISD::SETEQ)
2731  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2732  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2733  else if (Cond == ISD::SETNE)
2734  // ...and the inverse of that.
2735  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2736  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2737  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2738  // always true for CC>3.
2739  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2740  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2741  // ...and the inverse of that.
2742  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2743  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2744  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2745  // always true for CC>3.
2746  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2747  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2748  // ...and the inverse of that.
2749  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2750  else
2751  llvm_unreachable("Unexpected integer comparison type");
2752  C.CCMask &= CCValid;
2753  return C;
2754 }
2755 
2756 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2757 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2758  ISD::CondCode Cond, const SDLoc &DL,
2759  SDValue Chain = SDValue(),
2760  bool IsSignaling = false) {
2761  if (CmpOp1.getOpcode() == ISD::Constant) {
2762  assert(!Chain);
2763  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2764  unsigned Opcode, CCValid;
2765  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2766  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2767  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2768  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2769  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2770  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2771  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2772  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2773  }
2774  Comparison C(CmpOp0, CmpOp1, Chain);
2775  C.CCMask = CCMaskForCondCode(Cond);
2776  if (C.Op0.getValueType().isFloatingPoint()) {
2777  C.CCValid = SystemZ::CCMASK_FCMP;
2778  if (!C.Chain)
2779  C.Opcode = SystemZISD::FCMP;
2780  else if (!IsSignaling)
2781  C.Opcode = SystemZISD::STRICT_FCMP;
2782  else
2783  C.Opcode = SystemZISD::STRICT_FCMPS;
2784  adjustForFNeg(C);
2785  } else {
2786  assert(!C.Chain);
2787  C.CCValid = SystemZ::CCMASK_ICMP;
2788  C.Opcode = SystemZISD::ICMP;
2789  // Choose the type of comparison. Equality and inequality tests can
2790  // use either signed or unsigned comparisons. The choice also doesn't
2791  // matter if both sign bits are known to be clear. In those cases we
2792  // want to give the main isel code the freedom to choose whichever
2793  // form fits best.
2794  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2795  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2796  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2797  C.ICmpType = SystemZICMP::Any;
2798  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2799  C.ICmpType = SystemZICMP::UnsignedOnly;
2800  else
2801  C.ICmpType = SystemZICMP::SignedOnly;
2802  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2803  adjustForRedundantAnd(DAG, DL, C);
2804  adjustZeroCmp(DAG, DL, C);
2805  adjustSubwordCmp(DAG, DL, C);
2806  adjustForSubtraction(DAG, DL, C);
2807  adjustForLTGFR(C);
2808  adjustICmpTruncate(DAG, DL, C);
2809  }
2810 
2811  if (shouldSwapCmpOperands(C)) {
2812  std::swap(C.Op0, C.Op1);
2813  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2814  }
2815 
2816  adjustForTestUnderMask(DAG, DL, C);
2817  return C;
2818 }
2819 
2820 // Emit the comparison instruction described by C.
2821 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2822  if (!C.Op1.getNode()) {
2823  SDNode *Node;
2824  switch (C.Op0.getOpcode()) {
2826  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2827  return SDValue(Node, 0);
2829  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2830  return SDValue(Node, Node->getNumValues() - 1);
2831  default:
2832  llvm_unreachable("Invalid comparison operands");
2833  }
2834  }
2835  if (C.Opcode == SystemZISD::ICMP)
2836  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2837  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2838  if (C.Opcode == SystemZISD::TM) {
2839  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2840  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2841  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2842  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2843  }
2844  if (C.Chain) {
2845  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2846  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2847  }
2848  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2849 }
2850 
2851 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2852 // 64 bits. Extend is the extension type to use. Store the high part
2853 // in Hi and the low part in Lo.
2854 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2855  SDValue Op0, SDValue Op1, SDValue &Hi,
2856  SDValue &Lo) {
2857  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2858  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2859  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2860  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2861  DAG.getConstant(32, DL, MVT::i64));
2862  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2863  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2864 }
2865 
2866 // Lower a binary operation that produces two VT results, one in each
2867 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2868 // and Opcode performs the GR128 operation. Store the even register result
2869 // in Even and the odd register result in Odd.
2870 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2871  unsigned Opcode, SDValue Op0, SDValue Op1,
2872  SDValue &Even, SDValue &Odd) {
2873  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2874  bool Is32Bit = is32Bit(VT);
2875  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2876  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2877 }
2878 
2879 // Return an i32 value that is 1 if the CC value produced by CCReg is
2880 // in the mask CCMask and 0 otherwise. CC is known to have a value
2881 // in CCValid, so other values can be ignored.
2882 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2883  unsigned CCValid, unsigned CCMask) {
2884  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2885  DAG.getConstant(0, DL, MVT::i32),
2886  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2887  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2888  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2889 }
2890 
2891 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2892 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2893 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2894 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2895 // floating-point comparisons.
2896 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2898  switch (CC) {
2899  case ISD::SETOEQ:
2900  case ISD::SETEQ:
2901  switch (Mode) {
2902  case CmpMode::Int: return SystemZISD::VICMPE;
2903  case CmpMode::FP: return SystemZISD::VFCMPE;
2906  }
2907  llvm_unreachable("Bad mode");
2908 
2909  case ISD::SETOGE:
2910  case ISD::SETGE:
2911  switch (Mode) {
2912  case CmpMode::Int: return 0;
2913  case CmpMode::FP: return SystemZISD::VFCMPHE;
2916  }
2917  llvm_unreachable("Bad mode");
2918 
2919  case ISD::SETOGT:
2920  case ISD::SETGT:
2921  switch (Mode) {
2922  case CmpMode::Int: return SystemZISD::VICMPH;
2923  case CmpMode::FP: return SystemZISD::VFCMPH;
2926  }
2927  llvm_unreachable("Bad mode");
2928 
2929  case ISD::SETUGT:
2930  switch (Mode) {
2931  case CmpMode::Int: return SystemZISD::VICMPHL;
2932  case CmpMode::FP: return 0;
2933  case CmpMode::StrictFP: return 0;
2934  case CmpMode::SignalingFP: return 0;
2935  }
2936  llvm_unreachable("Bad mode");
2937 
2938  default:
2939  return 0;
2940  }
2941 }
2942 
2943 // Return the SystemZISD vector comparison operation for CC or its inverse,
2944 // or 0 if neither can be done directly. Indicate in Invert whether the
2945 // result is for the inverse of CC. Mode is as above.
2947  bool &Invert) {
2948  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2949  Invert = false;
2950  return Opcode;
2951  }
2952 
2954  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2955  Invert = true;
2956  return Opcode;
2957  }
2958 
2959  return 0;
2960 }
2961 
2962 // Return a v2f64 that contains the extended form of elements Start and Start+1
2963 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2964 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2965  SDValue Op, SDValue Chain) {
2966  int Mask[] = { Start, -1, Start + 1, -1 };
2968  if (Chain) {
2970  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2971  }
2972  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2973 }
2974 
2975 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2976 // producing a result of type VT. If Chain is nonnull, return the strict form.
2977 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2978  const SDLoc &DL, EVT VT,
2979  SDValue CmpOp0,
2980  SDValue CmpOp1,
2981  SDValue Chain) const {
2982  // There is no hardware support for v4f32 (unless we have the vector
2983  // enhancements facility 1), so extend the vector into two v2f64s
2984  // and compare those.
2985  if (CmpOp0.getValueType() == MVT::v4f32 &&
2986  !Subtarget.hasVectorEnhancements1()) {
2987  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2988  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2989  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2990  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2991  if (Chain) {
2993  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2994  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2995  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2996  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2997  H1.getValue(1), L1.getValue(1),
2998  HRes.getValue(1), LRes.getValue(1) };
2999  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3000  SDValue Ops[2] = { Res, NewChain };
3001  return DAG.getMergeValues(Ops, DL);
3002  }
3003  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3004  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3005  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3006  }
3007  if (Chain) {
3008  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3009  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3010  }
3011  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3012 }
3013 
3014 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3015 // an integer mask of type VT. If Chain is nonnull, we have a strict
3016 // floating-point comparison. If in addition IsSignaling is true, we have
3017 // a strict signaling floating-point comparison.
3018 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3019  const SDLoc &DL, EVT VT,
3020  ISD::CondCode CC,
3021  SDValue CmpOp0,
3022  SDValue CmpOp1,
3023  SDValue Chain,
3024  bool IsSignaling) const {
3025  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3026  assert (!Chain || IsFP);
3027  assert (!IsSignaling || Chain);
3028  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3029  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3030  bool Invert = false;
3031  SDValue Cmp;
3032  switch (CC) {
3033  // Handle tests for order using (or (ogt y x) (oge x y)).
3034  case ISD::SETUO:
3035  Invert = true;
3037  case ISD::SETO: {
3038  assert(IsFP && "Unexpected integer comparison");
3039  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3040  DL, VT, CmpOp1, CmpOp0, Chain);
3041  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3042  DL, VT, CmpOp0, CmpOp1, Chain);
3043  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3044  if (Chain)
3045  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3046  LT.getValue(1), GE.getValue(1));
3047  break;
3048  }
3049 
3050  // Handle <> tests using (or (ogt y x) (ogt x y)).
3051  case ISD::SETUEQ:
3052  Invert = true;
3054  case ISD::SETONE: {
3055  assert(IsFP && "Unexpected integer comparison");
3056  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3057  DL, VT, CmpOp1, CmpOp0, Chain);
3058  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3059  DL, VT, CmpOp0, CmpOp1, Chain);
3060  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3061  if (Chain)
3062  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3063  LT.getValue(1), GT.getValue(1));
3064  break;
3065  }
3066 
3067  // Otherwise a single comparison is enough. It doesn't really
3068  // matter whether we try the inversion or the swap first, since
3069  // there are no cases where both work.
3070  default:
3071  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3072  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3073  else {
3075  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3076  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3077  else
3078  llvm_unreachable("Unhandled comparison");
3079  }
3080  if (Chain)
3081  Chain = Cmp.getValue(1);
3082  break;
3083  }
3084  if (Invert) {
3085  SDValue Mask =
3086  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3087  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3088  }
3089  if (Chain && Chain.getNode() != Cmp.getNode()) {
3090  SDValue Ops[2] = { Cmp, Chain };
3091  Cmp = DAG.getMergeValues(Ops, DL);
3092  }
3093  return Cmp;
3094 }
3095 
3096 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3097  SelectionDAG &DAG) const {
3098  SDValue CmpOp0 = Op.getOperand(0);
3099  SDValue CmpOp1 = Op.getOperand(1);
3100  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3101  SDLoc DL(Op);
3102  EVT VT = Op.getValueType();
3103  if (VT.isVector())
3104  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3105 
3106  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3107  SDValue CCReg = emitCmp(DAG, DL, C);
3108  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3109 }
3110 
3111 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3112  SelectionDAG &DAG,
3113  bool IsSignaling) const {
3114  SDValue Chain = Op.getOperand(0);
3115  SDValue CmpOp0 = Op.getOperand(1);
3116  SDValue CmpOp1 = Op.getOperand(2);
3117  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3118  SDLoc DL(Op);
3119  EVT VT = Op.getNode()->getValueType(0);
3120  if (VT.isVector()) {
3121  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3122  Chain, IsSignaling);
3123  return Res.getValue(Op.getResNo());
3124  }
3125 
3126  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3127  SDValue CCReg = emitCmp(DAG, DL, C);
3128  CCReg->setFlags(Op->getFlags());
3129  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3130  SDValue Ops[2] = { Result, CCReg.getValue(1) };
3131  return DAG.getMergeValues(Ops, DL);
3132 }
3133 
3134 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3135  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3136  SDValue CmpOp0 = Op.getOperand(2);
3137  SDValue CmpOp1 = Op.getOperand(3);
3138  SDValue Dest = Op.getOperand(4);
3139  SDLoc DL(Op);
3140 
3141  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3142  SDValue CCReg = emitCmp(DAG, DL, C);
3143  return DAG.getNode(
3144  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3145  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3146  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3147 }
3148 
3149 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3150 // allowing Pos and Neg to be wider than CmpOp.
3151 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3152  return (Neg.getOpcode() == ISD::SUB &&
3153  Neg.getOperand(0).getOpcode() == ISD::Constant &&
3154  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3155  Neg.getOperand(1) == Pos &&
3156  (Pos == CmpOp ||
3157  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3158  Pos.getOperand(0) == CmpOp)));
3159 }
3160 
3161 // Return the absolute or negative absolute of Op; IsNegative decides which.
3163  bool IsNegative) {
3164  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3165  if (IsNegative)
3166  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3167  DAG.getConstant(0, DL, Op.getValueType()), Op);
3168  return Op;
3169 }
3170 
3171 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3172  SelectionDAG &DAG) const {
3173  SDValue CmpOp0 = Op.getOperand(0);
3174  SDValue CmpOp1 = Op.getOperand(1);
3175  SDValue TrueOp = Op.getOperand(2);
3176  SDValue FalseOp = Op.getOperand(3);
3177  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3178  SDLoc DL(Op);
3179 
3180  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3181 
3182  // Check for absolute and negative-absolute selections, including those
3183  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3184  // This check supplements the one in DAGCombiner.
3185  if (C.Opcode == SystemZISD::ICMP &&
3186  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3187  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3188  C.Op1.getOpcode() == ISD::Constant &&
3189  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3190  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3191  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3192  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3193  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3194  }
3195 
3196  SDValue CCReg = emitCmp(DAG, DL, C);
3197  SDValue Ops[] = {TrueOp, FalseOp,
3198  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3199  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3200 
3201  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3202 }
3203 
3204 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3205  SelectionDAG &DAG) const {
3206  SDLoc DL(Node);
3207  const GlobalValue *GV = Node->getGlobal();
3208  int64_t Offset = Node->getOffset();
3209  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3211 
3212  SDValue Result;
3213  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3214  if (isInt<32>(Offset)) {
3215  // Assign anchors at 1<<12 byte boundaries.
3216  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3217  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3218  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3219 
3220  // The offset can be folded into the address if it is aligned to a
3221  // halfword.
3222  Offset -= Anchor;
3223  if (Offset != 0 && (Offset & 1) == 0) {
3224  SDValue Full =
3225  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3226  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3227  Offset = 0;
3228  }
3229  } else {
3230  // Conservatively load a constant offset greater than 32 bits into a
3231  // register below.
3232  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3233  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3234  }
3235  } else {
3236  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3237  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3238  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3240  }
3241 
3242  // If there was a non-zero offset that we didn't fold, create an explicit
3243  // addition for it.
3244  if (Offset != 0)
3245  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3246  DAG.getConstant(Offset, DL, PtrVT));
3247 
3248  return Result;
3249 }
3250 
3251 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3252  SelectionDAG &DAG,
3253  unsigned Opcode,
3254  SDValue GOTOffset) const {
3255  SDLoc DL(Node);
3256  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3257  SDValue Chain = DAG.getEntryNode();
3258  SDValue Glue;
3259 
3262  report_fatal_error("In GHC calling convention TLS is not supported");
3263 
3264  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3265  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3266  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3267  Glue = Chain.getValue(1);
3268  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3269  Glue = Chain.getValue(1);
3270 
3271  // The first call operand is the chain and the second is the TLS symbol.
3273  Ops.push_back(Chain);
3274  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3275  Node->getValueType(0),
3276  0, 0));
3277 
3278  // Add argument registers to the end of the list so that they are
3279  // known live into the call.
3280  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3281  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3282 
3283  // Add a register mask operand representing the call-preserved registers.
3284  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3285  const uint32_t *Mask =
3287  assert(Mask && "Missing call preserved mask for calling convention");
3288  Ops.push_back(DAG.getRegisterMask(Mask));
3289 
3290  // Glue the call to the argument copies.
3291  Ops.push_back(Glue);
3292 
3293  // Emit the call.
3294  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3295  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3296  Glue = Chain.getValue(1);
3297 
3298  // Copy the return value from %r2.
3299  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3300 }
3301 
3302 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3303  SelectionDAG &DAG) const {
3304  SDValue Chain = DAG.getEntryNode();
3305  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3306 
3307  // The high part of the thread pointer is in access register 0.
3308  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3309  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3310 
3311  // The low part of the thread pointer is in access register 1.
3312  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3313  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3314 
3315  // Merge them into a single 64-bit address.
3316  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3317  DAG.getConstant(32, DL, PtrVT));
3318  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3319 }
3320 
3321 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3322  SelectionDAG &DAG) const {
3323  if (DAG.getTarget().useEmulatedTLS())
3324  return LowerToTLSEmulatedModel(Node, DAG);
3325  SDLoc DL(Node);
3326  const GlobalValue *GV = Node->getGlobal();
3327  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3329 
3332  report_fatal_error("In GHC calling convention TLS is not supported");
3333 
3334  SDValue TP = lowerThreadPointer(DL, DAG);
3335 
3336  // Get the offset of GA from the thread pointer, based on the TLS model.
3337  SDValue Offset;
3338  switch (model) {
3339  case TLSModel::GeneralDynamic: {
3340  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3343 
3344  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3345  Offset = DAG.getLoad(
3346  PtrVT, DL, DAG.getEntryNode(), Offset,
3348 
3349  // Call __tls_get_offset to retrieve the offset.
3350  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3351  break;
3352  }
3353 
3354  case TLSModel::LocalDynamic: {
3355  // Load the GOT offset of the module ID.
3358 
3359  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3360  Offset = DAG.getLoad(
3361  PtrVT, DL, DAG.getEntryNode(), Offset,
3363 
3364  // Call __tls_get_offset to retrieve the module base offset.
3365  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3366 
3367  // Note: The SystemZLDCleanupPass will remove redundant computations
3368  // of the module base offset. Count total number of local-dynamic
3369  // accesses to trigger execution of that pass.
3373 
3374  // Add the per-symbol offset.
3376 
3377  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3378  DTPOffset = DAG.getLoad(
3379  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3381 
3382  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3383  break;
3384  }
3385 
3386  case TLSModel::InitialExec: {
3387  // Load the offset from the GOT.
3388  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3390  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3391  Offset =
3392  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3394  break;
3395  }
3396 
3397  case TLSModel::LocalExec: {
3398  // Force the offset into the constant pool and load it from there.
3401 
3402  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3403  Offset = DAG.getLoad(
3404  PtrVT, DL, DAG.getEntryNode(), Offset,
3406  break;
3407  }
3408  }
3409 
3410  // Add the base and offset together.
3411  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3412 }
3413 
3414 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3415  SelectionDAG &DAG) const {
3416  SDLoc DL(Node);
3417  const BlockAddress *BA = Node->getBlockAddress();
3418  int64_t Offset = Node->getOffset();
3419  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3420 
3421  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3422  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3423  return Result;
3424 }
3425 
3426 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3427  SelectionDAG &DAG) const {
3428  SDLoc DL(JT);
3429  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3430  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3431 
3432  // Use LARL to load the address of the table.
3433  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3434 }
3435 
3436 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3437  SelectionDAG &DAG) const {
3438  SDLoc DL(CP);
3439  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3440 
3441  SDValue Result;
3442  if (CP->isMachineConstantPoolEntry())
3443  Result =
3444  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3445  else
3446  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3447  CP->getOffset());
3448 
3449  // Use LARL to load the address of the constant pool entry.
3450  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3451 }
3452 
3453 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3454  SelectionDAG &DAG) const {
3455  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3456  MachineFunction &MF = DAG.getMachineFunction();
3457  MachineFrameInfo &MFI = MF.getFrameInfo();
3458  MFI.setFrameAddressIsTaken(true);
3459 
3460  SDLoc DL(Op);
3461  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3462  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3463 
3464  // By definition, the frame address is the address of the back chain. (In
3465  // the case of packed stack without backchain, return the address where the
3466  // backchain would have been stored. This will either be an unused space or
3467  // contain a saved register).
3468  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3469  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3470 
3471  // FIXME The frontend should detect this case.
3472  if (Depth > 0) {
3473  report_fatal_error("Unsupported stack frame traversal count");
3474  }
3475 
3476  return BackChain;
3477 }
3478 
3479 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3480  SelectionDAG &DAG) const {
3481  MachineFunction &MF = DAG.getMachineFunction();
3482  MachineFrameInfo &MFI = MF.getFrameInfo();
3483  MFI.setReturnAddressIsTaken(true);
3484 
3486  return SDValue();
3487 
3488  SDLoc DL(Op);
3489  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3490  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3491 
3492  // FIXME The frontend should detect this case.
3493  if (Depth > 0) {
3494  report_fatal_error("Unsupported stack frame traversal count");
3495  }
3496 
3497  // Return R14D, which has the return address. Mark it an implicit live-in.
3498  Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3499  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3500 }
3501 
3502 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3503  SelectionDAG &DAG) const {
3504  SDLoc DL(Op);
3505  SDValue In = Op.getOperand(0);
3506  EVT InVT = In.getValueType();
3507  EVT ResVT = Op.getValueType();
3508 
3509  // Convert loads directly. This is normally done by DAGCombiner,
3510  // but we need this case for bitcasts that are created during lowering
3511  // and which are then lowered themselves.
3512  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3513  if (ISD::isNormalLoad(LoadN)) {
3514  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3515  LoadN->getBasePtr(), LoadN->getMemOperand());
3516  // Update the chain uses.
3517  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3518  return NewLoad;
3519  }
3520 
3521  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3522  SDValue In64;
3523  if (Subtarget.hasHighWord()) {
3524  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3525  MVT::i64);
3526  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3527  MVT::i64, SDValue(U64, 0), In);
3528  } else {
3529  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3530  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3531  DAG.getConstant(32, DL, MVT::i64));
3532  }
3533  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3534  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3535  DL, MVT::f32, Out64);
3536  }
3537  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3538  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3539  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3540  MVT::f64, SDValue(U64, 0), In);
3541  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3542  if (Subtarget.hasHighWord())
3543  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3544  MVT::i32, Out64);
3545  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3546  DAG.getConstant(32, DL, MVT::i64));
3547  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3548  }
3549  llvm_unreachable("Unexpected bitcast combination");
3550 }
3551 
3552 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3553  SelectionDAG &DAG) const {
3554 
3555  if (Subtarget.isTargetXPLINK64())
3556  return lowerVASTART_XPLINK(Op, DAG);
3557  else
3558  return lowerVASTART_ELF(Op, DAG);
3559 }
3560 
3561 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3562  SelectionDAG &DAG) const {
3563  MachineFunction &MF = DAG.getMachineFunction();
3564  SystemZMachineFunctionInfo *FuncInfo =
3566 
3567  SDLoc DL(Op);
3568 
3569  // vastart just stores the address of the VarArgsFrameIndex slot into the
3570  // memory location argument.
3571  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3572  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3573  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3574  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3575  MachinePointerInfo(SV));
3576 }
3577 
3578 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3579  SelectionDAG &DAG) const {
3580  MachineFunction &MF = DAG.getMachineFunction();
3581  SystemZMachineFunctionInfo *FuncInfo =
3583  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3584 
3585  SDValue Chain = Op.getOperand(0);
3586  SDValue Addr = Op.getOperand(1);
3587  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3588  SDLoc DL(Op);
3589 
3590  // The initial values of each field.
3591  const unsigned NumFields = 4;
3592  SDValue Fields[NumFields] = {
3593  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3594  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3595  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3596  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3597  };
3598 
3599  // Store each field into its respective slot.
3600  SDValue MemOps[NumFields];
3601  unsigned Offset = 0;
3602  for (unsigned I = 0; I < NumFields; ++I) {
3603  SDValue FieldAddr = Addr;
3604  if (Offset != 0)
3605  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3606  DAG.getIntPtrConstant(Offset, DL));
3607  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3608  MachinePointerInfo(SV, Offset));
3609  Offset += 8;
3610  }
3611  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3612 }
3613 
3614 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3615  SelectionDAG &DAG) const {
3616  SDValue Chain = Op.getOperand(0);
3617  SDValue DstPtr = Op.getOperand(1);
3618  SDValue SrcPtr = Op.getOperand(2);
3619  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3620  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3621  SDLoc DL(Op);
3622 
3623  uint32_t Sz =
3624  Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3625  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3626  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3627  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3628  MachinePointerInfo(SrcSV));
3629 }
3630 
3631 SDValue
3632 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3633  SelectionDAG &DAG) const {
3634  if (Subtarget.isTargetXPLINK64())
3635  return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3636  else
3637  return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3638 }
3639 
3640 SDValue
3641 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3642  SelectionDAG &DAG) const {
3643  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3644  MachineFunction &MF = DAG.getMachineFunction();
3645  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3646  SDValue Chain = Op.getOperand(0);
3647  SDValue Size = Op.getOperand(1);
3648  SDValue Align = Op.getOperand(2);
3649  SDLoc DL(Op);
3650 
3651  // If user has set the no alignment function attribute, ignore
3652  // alloca alignments.
3653  uint64_t AlignVal =
3654  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3655 
3657  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3658  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3659 
3660  SDValue NeededSpace = Size;
3661 
3662  // Add extra space for alignment if needed.
3663  EVT PtrVT = getPointerTy(MF.getDataLayout());
3664  if (ExtraAlignSpace)
3665  NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3666  DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3667 
3668  bool IsSigned = false;
3669  bool DoesNotReturn = false;
3670  bool IsReturnValueUsed = false;
3671  EVT VT = Op.getValueType();
3672  SDValue AllocaCall =
3673  makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace),
3674  CallingConv::C, IsSigned, DL, DoesNotReturn,
3675  IsReturnValueUsed)
3676  .first;
3677 
3678  // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3679  // to end of call in order to ensure it isn't broken up from the call
3680  // sequence.
3681  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3682  Register SPReg = Regs.getStackPointerRegister();
3683  Chain = AllocaCall.getValue(1);
3684  SDValue Glue = AllocaCall.getValue(2);
3685  SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3686  Chain = NewSPRegNode.getValue(1);
3687 
3688  MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3689  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3690  SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3691 
3692  // Dynamically realign if needed.
3693  if (ExtraAlignSpace) {
3694  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3695  DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3696  Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3697  DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3698  }
3699 
3700  SDValue Ops[2] = {Result, Chain};
3701  return DAG.getMergeValues(Ops, DL);
3702 }
3703 
3704 SDValue
3705 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3706  SelectionDAG &DAG) const {
3707  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3708  MachineFunction &MF = DAG.getMachineFunction();
3709  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3710  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3711 
3712  SDValue Chain = Op.getOperand(0);
3713  SDValue Size = Op.getOperand(1);
3714  SDValue Align = Op.getOperand(2);
3715  SDLoc DL(Op);
3716 
3717  // If user has set the no alignment function attribute, ignore
3718  // alloca alignments.
3719  uint64_t AlignVal =
3720  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3721 
3723  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3724  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3725 
3727  SDValue NeededSpace = Size;
3728 
3729  // Get a reference to the stack pointer.
3730  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3731 
3732  // If we need a backchain, save it now.
3733  SDValue Backchain;
3734  if (StoreBackchain)
3735  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3736  MachinePointerInfo());
3737 
3738  // Add extra space for alignment if needed.
3739  if (ExtraAlignSpace)
3740  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3741  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3742 
3743  // Get the new stack pointer value.
3744  SDValue NewSP;
3745  if (hasInlineStackProbe(MF)) {
3746  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3747  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3748  Chain = NewSP.getValue(1);
3749  }
3750  else {
3751  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3752  // Copy the new stack pointer back.
3753  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3754  }
3755 
3756  // The allocated data lives above the 160 bytes allocated for the standard
3757  // frame, plus any outgoing stack arguments. We don't know how much that
3758  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3759  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3760  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3761 
3762  // Dynamically realign if needed.
3763  if (RequiredAlign > StackAlign) {
3764  Result =
3765  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3766  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3767  Result =
3768  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3769  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3770  }
3771 
3772  if (StoreBackchain)
3773  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3774  MachinePointerInfo());
3775 
3776  SDValue Ops[2] = { Result, Chain };
3777  return DAG.getMergeValues(Ops, DL);
3778 }
3779 
3780 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3781  SDValue Op, SelectionDAG &DAG) const {
3782  SDLoc DL(Op);
3783 
3785 }
3786 
3787 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3788  SelectionDAG &DAG) const {
3789  EVT VT = Op.getValueType();
3790  SDLoc DL(Op);
3791  SDValue Ops[2];
3792  if (is32Bit(VT))
3793  // Just do a normal 64-bit multiplication and extract the results.
3794  // We define this so that it can be used for constant division.
3795  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3796  Op.getOperand(1), Ops[1], Ops[0]);
3797  else if (Subtarget.hasMiscellaneousExtensions2())
3798  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3799  // the high result in the even register. ISD::SMUL_LOHI is defined to
3800  // return the low half first, so the results are in reverse order.
3802  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3803  else {
3804  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3805  //
3806  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3807  //
3808  // but using the fact that the upper halves are either all zeros
3809  // or all ones:
3810  //
3811  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3812  //
3813  // and grouping the right terms together since they are quicker than the
3814  // multiplication:
3815  //
3816  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3817  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3818  SDValue LL = Op.getOperand(0);
3819  SDValue RL = Op.getOperand(1);
3820  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3821  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3822  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3823  // the high result in the even register. ISD::SMUL_LOHI is defined to
3824  // return the low half first, so the results are in reverse order.
3826  LL, RL, Ops[1], Ops[0]);
3827  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3828  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3829  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3830  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3831  }
3832  return DAG.getMergeValues(Ops, DL);
3833 }
3834 
3835 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3836  SelectionDAG &DAG) const {
3837  EVT VT = Op.getValueType();
3838  SDLoc DL(Op);
3839  SDValue Ops[2];
3840  if (is32Bit(VT))
3841  // Just do a normal 64-bit multiplication and extract the results.
3842  // We define this so that it can be used for constant division.
3843  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3844  Op.getOperand(1), Ops[1], Ops[0]);
3845  else
3846  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3847  // the high result in the even register. ISD::UMUL_LOHI is defined to
3848  // return the low half first, so the results are in reverse order.
3850  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3851  return DAG.getMergeValues(Ops, DL);
3852 }
3853 
3854 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3855  SelectionDAG &DAG) const {
3856  SDValue Op0 = Op.getOperand(0);
3857  SDValue Op1 = Op.getOperand(1);
3858  EVT VT = Op.getValueType();
3859  SDLoc DL(Op);
3860 
3861  // We use DSGF for 32-bit division. This means the first operand must
3862  // always be 64-bit, and the second operand should be 32-bit whenever
3863  // that is possible, to improve performance.
3864  if (is32Bit(VT))
3865  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3866  else if (DAG.ComputeNumSignBits(Op1) > 32)
3867  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3868 
3869  // DSG(F) returns the remainder in the even register and the
3870  // quotient in the odd register.
3871  SDValue Ops[2];
3872  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3873  return DAG.getMergeValues(Ops, DL);
3874 }
3875 
3876 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3877  SelectionDAG &DAG) const {
3878  EVT VT = Op.getValueType();
3879  SDLoc DL(Op);
3880 
3881  // DL(G) returns the remainder in the even register and the
3882  // quotient in the odd register.
3883  SDValue Ops[2];
3885  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3886  return DAG.getMergeValues(Ops, DL);
3887 }
3888 
3889 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3890  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3891 
3892  // Get the known-zero masks for each operand.
3893  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3894  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3895  DAG.computeKnownBits(Ops[1])};
3896 
3897  // See if the upper 32 bits of one operand and the lower 32 bits of the
3898  // other are known zero. They are the low and high operands respectively.
3899  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3900  Known[1].Zero.getZExtValue() };
3901  unsigned High, Low;
3902  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3903  High = 1, Low = 0;
3904  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3905  High = 0, Low = 1;
3906  else
3907  return Op;
3908 
3909  SDValue LowOp = Ops[Low];
3910  SDValue HighOp = Ops[High];
3911 
3912  // If the high part is a constant, we're better off using IILH.
3913  if (HighOp.getOpcode() == ISD::Constant)
3914  return Op;
3915 
3916  // If the low part is a constant that is outside the range of LHI,
3917  // then we're better off using IILF.
3918  if (LowOp.getOpcode() == ISD::Constant) {
3919  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3920  if (!isInt<16>(Value))
3921  return Op;
3922  }
3923 
3924  // Check whether the high part is an AND that doesn't change the
3925  // high 32 bits and just masks out low bits. We can skip it if so.
3926  if (HighOp.getOpcode() == ISD::AND &&
3927  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3928  SDValue HighOp0 = HighOp.getOperand(0);
3929  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3930  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3931  HighOp = HighOp0;
3932  }
3933 
3934  // Take advantage of the fact that all GR32 operations only change the
3935  // low 32 bits by truncating Low to an i32 and inserting it directly
3936  // using a subreg. The interesting cases are those where the truncation
3937  // can be folded.
3938  SDLoc DL(Op);
3939  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3940  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3941  MVT::i64, HighOp, Low32);
3942 }
3943 
3944 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3945 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3946  SelectionDAG &DAG) const {
3947  SDNode *N = Op.getNode();
3948  SDValue LHS = N->getOperand(0);
3949  SDValue RHS = N->getOperand(1);
3950  SDLoc DL(N);
3951  unsigned BaseOp = 0;
3952  unsigned CCValid = 0;
3953  unsigned CCMask = 0;
3954 
3955  switch (Op.getOpcode()) {
3956  default: llvm_unreachable("Unknown instruction!");
3957  case ISD::SADDO:
3958  BaseOp = SystemZISD::SADDO;
3959  CCValid = SystemZ::CCMASK_ARITH;
3961  break;
3962  case ISD::SSUBO:
3963  BaseOp = SystemZISD::SSUBO;
3964  CCValid = SystemZ::CCMASK_ARITH;
3966  break;
3967  case ISD::UADDO:
3968  BaseOp = SystemZISD::UADDO;
3969  CCValid = SystemZ::CCMASK_LOGICAL;
3971  break;
3972  case ISD::USUBO:
3973  BaseOp = SystemZISD::USUBO;
3974  CCValid = SystemZ::CCMASK_LOGICAL;
3976  break;
3977  }
3978 
3979  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3980  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3981 
3982  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3983  if (N->getValueType(1) == MVT::i1)
3984  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3985 
3986  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3987 }
3988 
3989 static bool isAddCarryChain(SDValue Carry) {
3990  while (Carry.getOpcode() == ISD::ADDCARRY)
3991  Carry = Carry.getOperand(2);
3992  return Carry.getOpcode() == ISD::UADDO;
3993 }
3994 
3995 static bool isSubBorrowChain(SDValue Carry) {
3996  while (Carry.getOpcode() == ISD::SUBCARRY)
3997  Carry = Carry.getOperand(2);
3998  return Carry.getOpcode() == ISD::USUBO;
3999 }
4000 
4001 // Lower ADDCARRY/SUBCARRY nodes.
4002 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
4003  SelectionDAG &DAG) const {
4004 
4005  SDNode *N = Op.getNode();
4006  MVT VT = N->getSimpleValueType(0);
4007 
4008  // Let legalize expand this if it isn't a legal type yet.
4009  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4010  return SDValue();
4011 
4012  SDValue LHS = N->getOperand(0);
4013  SDValue RHS = N->getOperand(1);
4014  SDValue Carry = Op.getOperand(2);
4015  SDLoc DL(N);
4016  unsigned BaseOp = 0;
4017  unsigned CCValid = 0;
4018  unsigned CCMask = 0;
4019 
4020  switch (Op.getOpcode()) {
4021  default: llvm_unreachable("Unknown instruction!");
4022  case ISD::ADDCARRY:
4023  if (!isAddCarryChain(Carry))
4024  return SDValue();
4025 
4026  BaseOp = SystemZISD::ADDCARRY;
4027  CCValid = SystemZ::CCMASK_LOGICAL;
4029  break;
4030  case ISD::SUBCARRY:
4031  if (!isSubBorrowChain(Carry))
4032  return SDValue();
4033 
4034  BaseOp = SystemZISD::SUBCARRY;
4035  CCValid = SystemZ::CCMASK_LOGICAL;
4037  break;
4038  }
4039 
4040  // Set the condition code from the carry flag.
4041  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4042  DAG.getConstant(CCValid, DL, MVT::i32),
4043  DAG.getConstant(CCMask, DL, MVT::i32));
4044 
4045  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4046  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4047 
4048  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4049  if (N->getValueType(1) == MVT::i1)
4050  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4051 
4052  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4053 }
4054 
4055 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4056  SelectionDAG &DAG) const {
4057  EVT VT = Op.getValueType();
4058  SDLoc DL(