LLVM  9.0.0svn
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/Support/KnownBits.h"
26 #include <cctype>
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "systemz-lower"
31 
32 namespace {
33 // Represents information about a comparison.
34 struct Comparison {
35  Comparison(SDValue Op0In, SDValue Op1In)
36  : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
37 
38  // The operands to the comparison.
39  SDValue Op0, Op1;
40 
41  // The opcode that should be used to compare Op0 and Op1.
42  unsigned Opcode;
43 
44  // A SystemZICMP value. Only used for integer comparisons.
45  unsigned ICmpType;
46 
47  // The mask of CC values that Opcode can produce.
48  unsigned CCValid;
49 
50  // The mask of CC values for which the original condition is true.
51  unsigned CCMask;
52 };
53 } // end anonymous namespace
54 
55 // Classify VT as either 32 or 64 bit.
56 static bool is32Bit(EVT VT) {
57  switch (VT.getSimpleVT().SimpleTy) {
58  case MVT::i32:
59  return true;
60  case MVT::i64:
61  return false;
62  default:
63  llvm_unreachable("Unsupported type");
64  }
65 }
66 
67 // Return a version of MachineOperand that can be safely used before the
68 // final use.
70  if (Op.isReg())
71  Op.setIsKill(false);
72  return Op;
73 }
74 
76  const SystemZSubtarget &STI)
77  : TargetLowering(TM), Subtarget(STI) {
78  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
79 
80  // Set up the register classes.
81  if (Subtarget.hasHighWord())
82  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
83  else
84  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
85  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
86  if (Subtarget.hasVector()) {
87  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
88  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
89  } else {
90  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
91  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
92  }
93  if (Subtarget.hasVectorEnhancements1())
94  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
95  else
96  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
97 
98  if (Subtarget.hasVector()) {
99  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
100  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
101  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
102  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
103  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
104  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
105  }
106 
107  // Compute derived properties from the register classes
109 
110  // Set up special registers.
112 
113  // TODO: It may be better to default to latency-oriented scheduling, however
114  // LLVM's current latency-oriented scheduler can't handle physreg definitions
115  // such as SystemZ has with CC, so set this to the register-pressure
116  // scheduler, because it can.
118 
121 
122  // Instructions are strings of 2-byte aligned 2-byte values.
124  // For performance reasons we prefer 16-byte alignment.
126 
127  // Handle operations that are handled in a similar way for all types.
128  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
130  ++I) {
131  MVT VT = MVT::SimpleValueType(I);
132  if (isTypeLegal(VT)) {
133  // Lower SET_CC into an IPM-based sequence.
135 
136  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
138 
139  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
142  }
143  }
144 
145  // Expand jump table branches as address arithmetic followed by an
146  // indirect jump.
148 
149  // Expand BRCOND into a BR_CC (see above).
151 
152  // Handle integer types.
153  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
155  ++I) {
156  MVT VT = MVT::SimpleValueType(I);
157  if (isTypeLegal(VT)) {
158  // Expand individual DIV and REMs into DIVREMs.
165 
166  // Support addition/subtraction with overflow.
169 
170  // Support addition/subtraction with carry.
173 
174  // Support carry in as value rather than glue.
177 
178  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
179  // stores, putting a serialization instruction after the stores.
182 
183  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
184  // available, or if the operand is constant.
186 
187  // Use POPCNT on z196 and above.
188  if (Subtarget.hasPopulationCount())
190  else
192 
193  // No special instructions for these.
196 
197  // Use *MUL_LOHI where possible instead of MULH*.
202 
203  // Only z196 and above have native support for conversions to unsigned.
204  // On z10, promoting to i64 doesn't generate an inexact condition for
205  // values that are outside the i32 range but in the i64 range, so use
206  // the default expansion.
207  if (!Subtarget.hasFPExtension())
209  }
210  }
211 
212  // Type legalization will convert 8- and 16-bit atomic operations into
213  // forms that operate on i32s (but still keeping the original memory VT).
214  // Lower them into full i32 operations.
226 
227  // Even though i128 is not a legal type, we still need to custom lower
228  // the atomic operations in order to exploit SystemZ instructions.
231 
232  // We can use the CC result of compare-and-swap to implement
233  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
237 
239 
240  // Traps are legal, as we will convert them to "j .+2".
242 
243  // z10 has instructions for signed but not unsigned FP conversion.
244  // Handle unsigned 32-bit types as signed 64-bit types.
245  if (!Subtarget.hasFPExtension()) {
248  }
249 
250  // We have native support for a 64-bit CTLZ, via FLOGR.
254 
255  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
257 
258  // FIXME: Can we support these natively?
262 
263  // We have native instructions for i8, i16 and i32 extensions, but not i1.
265  for (MVT VT : MVT::integer_valuetypes()) {
269  }
270 
271  // Handle the various types of symbolic address.
277 
278  // We need to handle dynamic allocations specially because of the
279  // 160-byte area at the bottom of the stack.
282 
283  // Use custom expanders so that we can force the function to use
284  // a frame pointer.
287 
288  // Handle prefetches with PFD or PFDRL.
290 
291  for (MVT VT : MVT::vector_valuetypes()) {
292  // Assume by default that all vector operations need to be expanded.
293  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
294  if (getOperationAction(Opcode, VT) == Legal)
295  setOperationAction(Opcode, VT, Expand);
296 
297  // Likewise all truncating stores and extending loads.
298  for (MVT InnerVT : MVT::vector_valuetypes()) {
299  setTruncStoreAction(VT, InnerVT, Expand);
300  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
301  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
302  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
303  }
304 
305  if (isTypeLegal(VT)) {
306  // These operations are legal for anything that can be stored in a
307  // vector register, even if there is no native support for the format
308  // as such. In particular, we can do these for v4f32 even though there
309  // are no specific instructions for that format.
315 
316  // Likewise, except that we need to replace the nodes with something
317  // more specific.
320  }
321  }
322 
323  // Handle integer vector types.
324  for (MVT VT : MVT::integer_vector_valuetypes()) {
325  if (isTypeLegal(VT)) {
326  // These operations have direct equivalents.
331  if (VT != MVT::v2i64)
336  if (Subtarget.hasVectorEnhancements1())
338  else
342 
343  // Convert a GPR scalar to a vector by inserting it into element 0.
345 
346  // Use a series of unpacks for extensions.
349 
350  // Detect shifts by a scalar amount and convert them into
351  // V*_BY_SCALAR.
355 
356  // At present ROTL isn't matched by DAGCombiner. ROTR should be
357  // converted into ROTL.
360 
361  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
362  // and inverting the result as necessary.
364  }
365  }
366 
367  if (Subtarget.hasVector()) {
368  // There should be no need to check for float types other than v2f64
369  // since <2 x f32> isn't a legal type.
378  }
379 
380  // Handle floating-point types.
381  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
383  ++I) {
384  MVT VT = MVT::SimpleValueType(I);
385  if (isTypeLegal(VT)) {
386  // We can use FI for FRINT.
388 
389  // We can use the extended form of FI for other rounding operations.
390  if (Subtarget.hasFPExtension()) {
396  }
397 
398  // No special instructions for these.
404  }
405  }
406 
407  // Handle floating-point vector types.
408  if (Subtarget.hasVector()) {
409  // Scalar-to-vector conversion is just a subreg.
412 
413  // Some insertions and extractions can be done directly but others
414  // need to go via integers.
419 
420  // These operations have direct equivalents.
435  }
436 
437  // The vector enhancements facility 1 has instructions for these.
438  if (Subtarget.hasVectorEnhancements1()) {
453 
458 
463 
468 
473 
478  }
479 
480  // We have fused multiply-addition for f32 and f64 but not f128.
483  if (Subtarget.hasVectorEnhancements1())
485  else
487 
488  // We don't have a copysign instruction on vector registers.
489  if (Subtarget.hasVectorEnhancements1())
491 
492  // Needed so that we don't try to implement f128 constant loads using
493  // a load-and-extend of a f80 constant (in cases where the constant
494  // would fit in an f80).
495  for (MVT VT : MVT::fp_valuetypes())
497 
498  // We don't have extending load instruction on vector registers.
499  if (Subtarget.hasVectorEnhancements1()) {
502  }
503 
504  // Floating-point truncation and stores need to be done separately.
508 
509  // We have 64-bit FPR<->GPR moves, but need special handling for
510  // 32-bit forms.
511  if (!Subtarget.hasVector()) {
514  }
515 
516  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
517  // structure, but VAEND is a no-op.
521 
522  // Codes for which we want to perform some z-specific combinations.
536 
537  // Handle intrinsics.
540 
541  // We want to use MVC in preference to even a single load/store pair.
542  MaxStoresPerMemcpy = 0;
544 
545  // The main memset sequence is a byte store followed by an MVC.
546  // Two STC or MV..I stores win over that, but the kind of fused stores
547  // generated by target-independent code don't when the byte value is
548  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
549  // than "STC;MVC". Handle the choice in target-specific code instead.
550  MaxStoresPerMemset = 0;
552 }
553 
555  LLVMContext &, EVT VT) const {
556  if (!VT.isVector())
557  return MVT::i32;
559 }
560 
562  VT = VT.getScalarType();
563 
564  if (!VT.isSimple())
565  return false;
566 
567  switch (VT.getSimpleVT().SimpleTy) {
568  case MVT::f32:
569  case MVT::f64:
570  return true;
571  case MVT::f128:
572  return Subtarget.hasVectorEnhancements1();
573  default:
574  break;
575  }
576 
577  return false;
578 }
579 
580 // Return true if the constant can be generated with a vector instruction,
581 // such as VGM, VGMB or VREPI.
583  const SystemZSubtarget &Subtarget) {
584  const SystemZInstrInfo *TII =
585  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
586  if (!Subtarget.hasVector() ||
587  (isFP128 && !Subtarget.hasVectorEnhancements1()))
588  return false;
589 
590  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
591  // preferred way of creating all-zero and all-one vectors so give it
592  // priority over other methods below.
593  unsigned Mask = 0;
594  unsigned I = 0;
595  for (; I < SystemZ::VectorBytes; ++I) {
596  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
597  if (Byte == 0xff)
598  Mask |= 1ULL << I;
599  else if (Byte != 0)
600  break;
601  }
602  if (I == SystemZ::VectorBytes) {
603  Opcode = SystemZISD::BYTE_MASK;
604  OpVals.push_back(Mask);
605  VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
606  return true;
607  }
608 
609  if (SplatBitSize > 64)
610  return false;
611 
612  auto tryValue = [&](uint64_t Value) -> bool {
613  // Try VECTOR REPLICATE IMMEDIATE
614  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
615  if (isInt<16>(SignedValue)) {
616  OpVals.push_back(((unsigned) SignedValue));
617  Opcode = SystemZISD::REPLICATE;
618  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
619  SystemZ::VectorBits / SplatBitSize);
620  return true;
621  }
622  // Try VECTOR GENERATE MASK
623  unsigned Start, End;
624  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
625  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
626  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
627  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
628  OpVals.push_back(Start - (64 - SplatBitSize));
629  OpVals.push_back(End - (64 - SplatBitSize));
630  Opcode = SystemZISD::ROTATE_MASK;
631  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
632  SystemZ::VectorBits / SplatBitSize);
633  return true;
634  }
635  return false;
636  };
637 
638  // First try assuming that any undefined bits above the highest set bit
639  // and below the lowest set bit are 1s. This increases the likelihood of
640  // being able to use a sign-extended element value in VECTOR REPLICATE
641  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
642  uint64_t SplatBitsZ = SplatBits.getZExtValue();
643  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
644  uint64_t Lower =
645  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
646  uint64_t Upper =
647  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
648  if (tryValue(SplatBitsZ | Upper | Lower))
649  return true;
650 
651  // Now try assuming that any undefined bits between the first and
652  // last defined set bits are set. This increases the chances of
653  // using a non-wraparound mask.
654  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
655  return tryValue(SplatBitsZ | Middle);
656 }
657 
659  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
660  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
661 
662  // Find the smallest splat.
663  SplatBits = FPImm.bitcastToAPInt();
664  unsigned Width = SplatBits.getBitWidth();
665  while (Width > 8) {
666  unsigned HalfSize = Width / 2;
667  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
668  APInt LowValue = SplatBits.trunc(HalfSize);
669 
670  // If the two halves do not match, stop here.
671  if (HighValue != LowValue || 8 > HalfSize)
672  break;
673 
674  SplatBits = HighValue;
675  Width = HalfSize;
676  }
677  SplatUndef = 0;
678  SplatBitSize = Width;
679 }
680 
682  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
683  bool HasAnyUndefs;
684 
685  // Get IntBits by finding the 128 bit splat.
686  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
687  true);
688 
689  // Get SplatBits by finding the 8 bit or greater splat.
690  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
691  true);
692 }
693 
695  bool ForCodeSize) const {
696  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
697  if (Imm.isZero() || Imm.isNegZero())
698  return true;
699 
700  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
701 }
702 
704  // We can use CGFI or CLGFI.
705  return isInt<32>(Imm) || isUInt<32>(Imm);
706 }
707 
709  // We can use ALGFI or SLGFI.
710  return isUInt<32>(Imm) || isUInt<32>(-Imm);
711 }
712 
714  unsigned,
715  unsigned,
716  bool *Fast) const {
717  // Unaligned accesses should never be slower than the expanded version.
718  // We check specifically for aligned accesses in the few cases where
719  // they are required.
720  if (Fast)
721  *Fast = true;
722  return true;
723 }
724 
725 // Information about the addressing mode for a memory access.
727  // True if a long displacement is supported.
729 
730  // True if use of index register is supported.
731  bool IndexReg;
732 
733  AddressingMode(bool LongDispl, bool IdxReg) :
734  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
735 };
736 
737 // Return the desired addressing mode for a Load which has only one use (in
738 // the same block) which is a Store.
739 static AddressingMode getLoadStoreAddrMode(bool HasVector,
740  Type *Ty) {
741  // With vector support a Load->Store combination may be combined to either
742  // an MVC or vector operations and it seems to work best to allow the
743  // vector addressing mode.
744  if (HasVector)
745  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
746 
747  // Otherwise only the MVC case is special.
748  bool MVC = Ty->isIntegerTy(8);
749  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
750 }
751 
752 // Return the addressing mode which seems most desirable given an LLVM
753 // Instruction pointer.
754 static AddressingMode
756  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
757  switch (II->getIntrinsicID()) {
758  default: break;
759  case Intrinsic::memset:
760  case Intrinsic::memmove:
761  case Intrinsic::memcpy:
762  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
763  }
764  }
765 
766  if (isa<LoadInst>(I) && I->hasOneUse()) {
767  auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
768  if (SingleUser->getParent() == I->getParent()) {
769  if (isa<ICmpInst>(SingleUser)) {
770  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
771  if (C->getBitWidth() <= 64 &&
772  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
773  // Comparison of memory with 16 bit signed / unsigned immediate
774  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
775  } else if (isa<StoreInst>(SingleUser))
776  // Load->Store
777  return getLoadStoreAddrMode(HasVector, I->getType());
778  }
779  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
780  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
781  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
782  // Load->Store
783  return getLoadStoreAddrMode(HasVector, LoadI->getType());
784  }
785 
786  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
787 
788  // * Use LDE instead of LE/LEY for z13 to avoid partial register
789  // dependencies (LDE only supports small offsets).
790  // * Utilize the vector registers to hold floating point
791  // values (vector load / store instructions only support small
792  // offsets).
793 
794  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
795  I->getOperand(0)->getType());
796  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
797  bool IsVectorAccess = MemAccessTy->isVectorTy();
798 
799  // A store of an extracted vector element will be combined into a VSTE type
800  // instruction.
801  if (!IsVectorAccess && isa<StoreInst>(I)) {
802  Value *DataOp = I->getOperand(0);
803  if (isa<ExtractElementInst>(DataOp))
804  IsVectorAccess = true;
805  }
806 
807  // A load which gets inserted into a vector element will be combined into a
808  // VLE type instruction.
809  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
810  User *LoadUser = *I->user_begin();
811  if (isa<InsertElementInst>(LoadUser))
812  IsVectorAccess = true;
813  }
814 
815  if (IsFPAccess || IsVectorAccess)
816  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
817  }
818 
819  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
820 }
821 
823  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
824  // Punt on globals for now, although they can be used in limited
825  // RELATIVE LONG cases.
826  if (AM.BaseGV)
827  return false;
828 
829  // Require a 20-bit signed offset.
830  if (!isInt<20>(AM.BaseOffs))
831  return false;
832 
833  AddressingMode SupportedAM(true, true);
834  if (I != nullptr)
835  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
836 
837  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
838  return false;
839 
840  if (!SupportedAM.IndexReg)
841  // No indexing allowed.
842  return AM.Scale == 0;
843  else
844  // Indexing is OK but no scale factor can be applied.
845  return AM.Scale == 0 || AM.Scale == 1;
846 }
847 
849  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
850  return false;
851  unsigned FromBits = FromType->getPrimitiveSizeInBits();
852  unsigned ToBits = ToType->getPrimitiveSizeInBits();
853  return FromBits > ToBits;
854 }
855 
857  if (!FromVT.isInteger() || !ToVT.isInteger())
858  return false;
859  unsigned FromBits = FromVT.getSizeInBits();
860  unsigned ToBits = ToVT.getSizeInBits();
861  return FromBits > ToBits;
862 }
863 
864 //===----------------------------------------------------------------------===//
865 // Inline asm support
866 //===----------------------------------------------------------------------===//
867 
870  if (Constraint.size() == 1) {
871  switch (Constraint[0]) {
872  case 'a': // Address register
873  case 'd': // Data register (equivalent to 'r')
874  case 'f': // Floating-point register
875  case 'h': // High-part register
876  case 'r': // General-purpose register
877  case 'v': // Vector register
878  return C_RegisterClass;
879 
880  case 'Q': // Memory with base and unsigned 12-bit displacement
881  case 'R': // Likewise, plus an index
882  case 'S': // Memory with base and signed 20-bit displacement
883  case 'T': // Likewise, plus an index
884  case 'm': // Equivalent to 'T'.
885  return C_Memory;
886 
887  case 'I': // Unsigned 8-bit constant
888  case 'J': // Unsigned 12-bit constant
889  case 'K': // Signed 16-bit constant
890  case 'L': // Signed 20-bit displacement (on all targets we support)
891  case 'M': // 0x7fffffff
892  return C_Other;
893 
894  default:
895  break;
896  }
897  }
898  return TargetLowering::getConstraintType(Constraint);
899 }
900 
903  const char *constraint) const {
904  ConstraintWeight weight = CW_Invalid;
905  Value *CallOperandVal = info.CallOperandVal;
906  // If we don't have a value, we can't do a match,
907  // but allow it at the lowest weight.
908  if (!CallOperandVal)
909  return CW_Default;
910  Type *type = CallOperandVal->getType();
911  // Look at the constraint type.
912  switch (*constraint) {
913  default:
914  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
915  break;
916 
917  case 'a': // Address register
918  case 'd': // Data register (equivalent to 'r')
919  case 'h': // High-part register
920  case 'r': // General-purpose register
921  if (CallOperandVal->getType()->isIntegerTy())
922  weight = CW_Register;
923  break;
924 
925  case 'f': // Floating-point register
926  if (type->isFloatingPointTy())
927  weight = CW_Register;
928  break;
929 
930  case 'v': // Vector register
931  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
932  Subtarget.hasVector())
933  weight = CW_Register;
934  break;
935 
936  case 'I': // Unsigned 8-bit constant
937  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
938  if (isUInt<8>(C->getZExtValue()))
939  weight = CW_Constant;
940  break;
941 
942  case 'J': // Unsigned 12-bit constant
943  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
944  if (isUInt<12>(C->getZExtValue()))
945  weight = CW_Constant;
946  break;
947 
948  case 'K': // Signed 16-bit constant
949  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
950  if (isInt<16>(C->getSExtValue()))
951  weight = CW_Constant;
952  break;
953 
954  case 'L': // Signed 20-bit displacement (on all targets we support)
955  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
956  if (isInt<20>(C->getSExtValue()))
957  weight = CW_Constant;
958  break;
959 
960  case 'M': // 0x7fffffff
961  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
962  if (C->getZExtValue() == 0x7fffffff)
963  weight = CW_Constant;
964  break;
965  }
966  return weight;
967 }
968 
969 // Parse a "{tNNN}" register constraint for which the register type "t"
970 // has already been verified. MC is the class associated with "t" and
971 // Map maps 0-based register numbers to LLVM register numbers.
972 static std::pair<unsigned, const TargetRegisterClass *>
974  const unsigned *Map, unsigned Size) {
975  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
976  if (isdigit(Constraint[2])) {
977  unsigned Index;
978  bool Failed =
979  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
980  if (!Failed && Index < Size && Map[Index])
981  return std::make_pair(Map[Index], RC);
982  }
983  return std::make_pair(0U, nullptr);
984 }
985 
986 std::pair<unsigned, const TargetRegisterClass *>
988  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
989  if (Constraint.size() == 1) {
990  // GCC Constraint Letters
991  switch (Constraint[0]) {
992  default: break;
993  case 'd': // Data register (equivalent to 'r')
994  case 'r': // General-purpose register
995  if (VT == MVT::i64)
996  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
997  else if (VT == MVT::i128)
998  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
999  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1000 
1001  case 'a': // Address register
1002  if (VT == MVT::i64)
1003  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1004  else if (VT == MVT::i128)
1005  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1006  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1007 
1008  case 'h': // High-part register (an LLVM extension)
1009  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1010 
1011  case 'f': // Floating-point register
1012  if (VT == MVT::f64)
1013  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1014  else if (VT == MVT::f128)
1015  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1016  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1017 
1018  case 'v': // Vector register
1019  if (Subtarget.hasVector()) {
1020  if (VT == MVT::f32)
1021  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1022  if (VT == MVT::f64)
1023  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1024  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1025  }
1026  break;
1027  }
1028  }
1029  if (Constraint.size() > 0 && Constraint[0] == '{') {
1030  // We need to override the default register parsing for GPRs and FPRs
1031  // because the interpretation depends on VT. The internal names of
1032  // the registers are also different from the external names
1033  // (F0D and F0S instead of F0, etc.).
1034  if (Constraint[1] == 'r') {
1035  if (VT == MVT::i32)
1036  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1037  SystemZMC::GR32Regs, 16);
1038  if (VT == MVT::i128)
1039  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1040  SystemZMC::GR128Regs, 16);
1041  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1042  SystemZMC::GR64Regs, 16);
1043  }
1044  if (Constraint[1] == 'f') {
1045  if (VT == MVT::f32)
1046  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1047  SystemZMC::FP32Regs, 16);
1048  if (VT == MVT::f128)
1049  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1050  SystemZMC::FP128Regs, 16);
1051  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1052  SystemZMC::FP64Regs, 16);
1053  }
1054  if (Constraint[1] == 'v') {
1055  if (VT == MVT::f32)
1056  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1057  SystemZMC::VR32Regs, 32);
1058  if (VT == MVT::f64)
1059  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1060  SystemZMC::VR64Regs, 32);
1061  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1062  SystemZMC::VR128Regs, 32);
1063  }
1064  }
1065  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1066 }
1067 
1069 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1070  std::vector<SDValue> &Ops,
1071  SelectionDAG &DAG) const {
1072  // Only support length 1 constraints for now.
1073  if (Constraint.length() == 1) {
1074  switch (Constraint[0]) {
1075  case 'I': // Unsigned 8-bit constant
1076  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1077  if (isUInt<8>(C->getZExtValue()))
1078  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1079  Op.getValueType()));
1080  return;
1081 
1082  case 'J': // Unsigned 12-bit constant
1083  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1084  if (isUInt<12>(C->getZExtValue()))
1085  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1086  Op.getValueType()));
1087  return;
1088 
1089  case 'K': // Signed 16-bit constant
1090  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1091  if (isInt<16>(C->getSExtValue()))
1092  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1093  Op.getValueType()));
1094  return;
1095 
1096  case 'L': // Signed 20-bit displacement (on all targets we support)
1097  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1098  if (isInt<20>(C->getSExtValue()))
1099  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1100  Op.getValueType()));
1101  return;
1102 
1103  case 'M': // 0x7fffffff
1104  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1105  if (C->getZExtValue() == 0x7fffffff)
1106  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1107  Op.getValueType()));
1108  return;
1109  }
1110  }
1111  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1112 }
1113 
1114 //===----------------------------------------------------------------------===//
1115 // Calling conventions
1116 //===----------------------------------------------------------------------===//
1117 
1118 #include "SystemZGenCallingConv.inc"
1119 
1121  CallingConv::ID) const {
1122  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1123  SystemZ::R14D, 0 };
1124  return ScratchRegs;
1125 }
1126 
1128  Type *ToType) const {
1129  return isTruncateFree(FromType, ToType);
1130 }
1131 
1133  return CI->isTailCall();
1134 }
1135 
1136 // We do not yet support 128-bit single-element vector types. If the user
1137 // attempts to use such types as function argument or return type, prefer
1138 // to error out instead of emitting code violating the ABI.
1139 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1140  if (ArgVT.isVector() && !VT.isVector())
1141  report_fatal_error("Unsupported vector argument or return type");
1142 }
1143 
1145  for (unsigned i = 0; i < Ins.size(); ++i)
1146  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1147 }
1148 
1150  for (unsigned i = 0; i < Outs.size(); ++i)
1151  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1152 }
1153 
1154 // Value is a value that has been passed to us in the location described by VA
1155 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1156 // any loads onto Chain.
1158  CCValAssign &VA, SDValue Chain,
1159  SDValue Value) {
1160  // If the argument has been promoted from a smaller type, insert an
1161  // assertion to capture this.
1162  if (VA.getLocInfo() == CCValAssign::SExt)
1163  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1164  DAG.getValueType(VA.getValVT()));
1165  else if (VA.getLocInfo() == CCValAssign::ZExt)
1166  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1167  DAG.getValueType(VA.getValVT()));
1168 
1169  if (VA.isExtInLoc())
1170  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1171  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1172  // If this is a short vector argument loaded from the stack,
1173  // extend from i64 to full vector size and then bitcast.
1174  assert(VA.getLocVT() == MVT::i64);
1175  assert(VA.getValVT().isVector());
1176  Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1177  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1178  } else
1179  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1180  return Value;
1181 }
1182 
1183 // Value is a value of type VA.getValVT() that we need to copy into
1184 // the location described by VA. Return a copy of Value converted to
1185 // VA.getValVT(). The caller is responsible for handling indirect values.
1187  CCValAssign &VA, SDValue Value) {
1188  switch (VA.getLocInfo()) {
1189  case CCValAssign::SExt:
1190  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1191  case CCValAssign::ZExt:
1192  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1193  case CCValAssign::AExt:
1194  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1195  case CCValAssign::BCvt:
1196  // If this is a short vector argument to be stored to the stack,
1197  // bitcast to v2i64 and then extract first element.
1198  assert(VA.getLocVT() == MVT::i64);
1199  assert(VA.getValVT().isVector());
1200  Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1201  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1202  DAG.getConstant(0, DL, MVT::i32));
1203  case CCValAssign::Full:
1204  return Value;
1205  default:
1206  llvm_unreachable("Unhandled getLocInfo()");
1207  }
1208 }
1209 
1211  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1212  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1213  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1214  MachineFunction &MF = DAG.getMachineFunction();
1215  MachineFrameInfo &MFI = MF.getFrameInfo();
1217  SystemZMachineFunctionInfo *FuncInfo =
1219  auto *TFL =
1220  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1221  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1222 
1223  // Detect unsupported vector argument types.
1224  if (Subtarget.hasVector())
1225  VerifyVectorTypes(Ins);
1226 
1227  // Assign locations to all of the incoming arguments.
1229  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1230  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1231 
1232  unsigned NumFixedGPRs = 0;
1233  unsigned NumFixedFPRs = 0;
1234  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1235  SDValue ArgValue;
1236  CCValAssign &VA = ArgLocs[I];
1237  EVT LocVT = VA.getLocVT();
1238  if (VA.isRegLoc()) {
1239  // Arguments passed in registers
1240  const TargetRegisterClass *RC;
1241  switch (LocVT.getSimpleVT().SimpleTy) {
1242  default:
1243  // Integers smaller than i64 should be promoted to i64.
1244  llvm_unreachable("Unexpected argument type");
1245  case MVT::i32:
1246  NumFixedGPRs += 1;
1247  RC = &SystemZ::GR32BitRegClass;
1248  break;
1249  case MVT::i64:
1250  NumFixedGPRs += 1;
1251  RC = &SystemZ::GR64BitRegClass;
1252  break;
1253  case MVT::f32:
1254  NumFixedFPRs += 1;
1255  RC = &SystemZ::FP32BitRegClass;
1256  break;
1257  case MVT::f64:
1258  NumFixedFPRs += 1;
1259  RC = &SystemZ::FP64BitRegClass;
1260  break;
1261  case MVT::v16i8:
1262  case MVT::v8i16:
1263  case MVT::v4i32:
1264  case MVT::v2i64:
1265  case MVT::v4f32:
1266  case MVT::v2f64:
1267  RC = &SystemZ::VR128BitRegClass;
1268  break;
1269  }
1270 
1271  unsigned VReg = MRI.createVirtualRegister(RC);
1272  MRI.addLiveIn(VA.getLocReg(), VReg);
1273  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1274  } else {
1275  assert(VA.isMemLoc() && "Argument not register or memory");
1276 
1277  // Create the frame index object for this incoming parameter.
1278  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1279  VA.getLocMemOffset(), true);
1280 
1281  // Create the SelectionDAG nodes corresponding to a load
1282  // from this parameter. Unpromoted ints and floats are
1283  // passed as right-justified 8-byte values.
1284  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1285  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1286  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1287  DAG.getIntPtrConstant(4, DL));
1288  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1290  }
1291 
1292  // Convert the value of the argument register into the value that's
1293  // being passed.
1294  if (VA.getLocInfo() == CCValAssign::Indirect) {
1295  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1296  MachinePointerInfo()));
1297  // If the original argument was split (e.g. i128), we need
1298  // to load all parts of it here (using the same address).
1299  unsigned ArgIndex = Ins[I].OrigArgIndex;
1300  assert (Ins[I].PartOffset == 0);
1301  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1302  CCValAssign &PartVA = ArgLocs[I + 1];
1303  unsigned PartOffset = Ins[I + 1].PartOffset;
1304  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1305  DAG.getIntPtrConstant(PartOffset, DL));
1306  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1307  MachinePointerInfo()));
1308  ++I;
1309  }
1310  } else
1311  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1312  }
1313 
1314  if (IsVarArg) {
1315  // Save the number of non-varargs registers for later use by va_start, etc.
1316  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1317  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1318 
1319  // Likewise the address (in the form of a frame index) of where the
1320  // first stack vararg would be. The 1-byte size here is arbitrary.
1321  int64_t StackSize = CCInfo.getNextStackOffset();
1322  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1323 
1324  // ...and a similar frame index for the caller-allocated save area
1325  // that will be used to store the incoming registers.
1326  int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1327  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1328  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1329 
1330  // Store the FPR varargs in the reserved frame slots. (We store the
1331  // GPRs as part of the prologue.)
1332  if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1333  SDValue MemOps[SystemZ::NumArgFPRs];
1334  for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1335  unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1336  int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1337  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1338  unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1339  &SystemZ::FP64BitRegClass);
1340  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1341  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1343  }
1344  // Join the stores, which are independent of one another.
1345  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1346  makeArrayRef(&MemOps[NumFixedFPRs],
1347  SystemZ::NumArgFPRs-NumFixedFPRs));
1348  }
1349  }
1350 
1351  return Chain;
1352 }
1353 
1354 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1357  // Punt if there are any indirect or stack arguments, or if the call
1358  // needs the callee-saved argument register R6, or if the call uses
1359  // the callee-saved register arguments SwiftSelf and SwiftError.
1360  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1361  CCValAssign &VA = ArgLocs[I];
1362  if (VA.getLocInfo() == CCValAssign::Indirect)
1363  return false;
1364  if (!VA.isRegLoc())
1365  return false;
1366  unsigned Reg = VA.getLocReg();
1367  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1368  return false;
1369  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1370  return false;
1371  }
1372  return true;
1373 }
1374 
1375 SDValue
1377  SmallVectorImpl<SDValue> &InVals) const {
1378  SelectionDAG &DAG = CLI.DAG;
1379  SDLoc &DL = CLI.DL;
1381  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1383  SDValue Chain = CLI.Chain;
1384  SDValue Callee = CLI.Callee;
1385  bool &IsTailCall = CLI.IsTailCall;
1386  CallingConv::ID CallConv = CLI.CallConv;
1387  bool IsVarArg = CLI.IsVarArg;
1388  MachineFunction &MF = DAG.getMachineFunction();
1389  EVT PtrVT = getPointerTy(MF.getDataLayout());
1390 
1391  // Detect unsupported vector argument and return types.
1392  if (Subtarget.hasVector()) {
1393  VerifyVectorTypes(Outs);
1394  VerifyVectorTypes(Ins);
1395  }
1396 
1397  // Analyze the operands of the call, assigning locations to each operand.
1399  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1400  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1401 
1402  // We don't support GuaranteedTailCallOpt, only automatically-detected
1403  // sibling calls.
1404  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1405  IsTailCall = false;
1406 
1407  // Get a count of how many bytes are to be pushed on the stack.
1408  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1409 
1410  // Mark the start of the call.
1411  if (!IsTailCall)
1412  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1413 
1414  // Copy argument values to their designated locations.
1416  SmallVector<SDValue, 8> MemOpChains;
1417  SDValue StackPtr;
1418  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1419  CCValAssign &VA = ArgLocs[I];
1420  SDValue ArgValue = OutVals[I];
1421 
1422  if (VA.getLocInfo() == CCValAssign::Indirect) {
1423  // Store the argument in a stack slot and pass its address.
1424  SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1425  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1426  MemOpChains.push_back(
1427  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1429  // If the original argument was split (e.g. i128), we need
1430  // to store all parts of it here (and pass just one address).
1431  unsigned ArgIndex = Outs[I].OrigArgIndex;
1432  assert (Outs[I].PartOffset == 0);
1433  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1434  SDValue PartValue = OutVals[I + 1];
1435  unsigned PartOffset = Outs[I + 1].PartOffset;
1436  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1437  DAG.getIntPtrConstant(PartOffset, DL));
1438  MemOpChains.push_back(
1439  DAG.getStore(Chain, DL, PartValue, Address,
1441  ++I;
1442  }
1443  ArgValue = SpillSlot;
1444  } else
1445  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1446 
1447  if (VA.isRegLoc())
1448  // Queue up the argument copies and emit them at the end.
1449  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1450  else {
1451  assert(VA.isMemLoc() && "Argument not register or memory");
1452 
1453  // Work out the address of the stack slot. Unpromoted ints and
1454  // floats are passed as right-justified 8-byte values.
1455  if (!StackPtr.getNode())
1456  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1458  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1459  Offset += 4;
1460  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1461  DAG.getIntPtrConstant(Offset, DL));
1462 
1463  // Emit the store.
1464  MemOpChains.push_back(
1465  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1466  }
1467  }
1468 
1469  // Join the stores, which are independent of one another.
1470  if (!MemOpChains.empty())
1471  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1472 
1473  // Accept direct calls by converting symbolic call addresses to the
1474  // associated Target* opcodes. Force %r1 to be used for indirect
1475  // tail calls.
1476  SDValue Glue;
1477  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1478  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1479  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1480  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1481  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1482  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1483  } else if (IsTailCall) {
1484  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1485  Glue = Chain.getValue(1);
1486  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1487  }
1488 
1489  // Build a sequence of copy-to-reg nodes, chained and glued together.
1490  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1491  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1492  RegsToPass[I].second, Glue);
1493  Glue = Chain.getValue(1);
1494  }
1495 
1496  // The first call operand is the chain and the second is the target address.
1498  Ops.push_back(Chain);
1499  Ops.push_back(Callee);
1500 
1501  // Add argument registers to the end of the list so that they are
1502  // known live into the call.
1503  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1504  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1505  RegsToPass[I].second.getValueType()));
1506 
1507  // Add a register mask operand representing the call-preserved registers.
1508  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1509  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1510  assert(Mask && "Missing call preserved mask for calling convention");
1511  Ops.push_back(DAG.getRegisterMask(Mask));
1512 
1513  // Glue the call to the argument copies, if any.
1514  if (Glue.getNode())
1515  Ops.push_back(Glue);
1516 
1517  // Emit the call.
1518  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1519  if (IsTailCall)
1520  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1521  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1522  Glue = Chain.getValue(1);
1523 
1524  // Mark the end of the call, which is glued to the call itself.
1525  Chain = DAG.getCALLSEQ_END(Chain,
1526  DAG.getConstant(NumBytes, DL, PtrVT, true),
1527  DAG.getConstant(0, DL, PtrVT, true),
1528  Glue, DL);
1529  Glue = Chain.getValue(1);
1530 
1531  // Assign locations to each value returned by this call.
1533  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1534  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1535 
1536  // Copy all of the result registers out of their specified physreg.
1537  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1538  CCValAssign &VA = RetLocs[I];
1539 
1540  // Copy the value out, gluing the copy to the end of the call sequence.
1541  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1542  VA.getLocVT(), Glue);
1543  Chain = RetValue.getValue(1);
1544  Glue = RetValue.getValue(2);
1545 
1546  // Convert the value of the return register into the value that's
1547  // being returned.
1548  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1549  }
1550 
1551  return Chain;
1552 }
1553 
1556  MachineFunction &MF, bool isVarArg,
1557  const SmallVectorImpl<ISD::OutputArg> &Outs,
1558  LLVMContext &Context) const {
1559  // Detect unsupported vector return types.
1560  if (Subtarget.hasVector())
1561  VerifyVectorTypes(Outs);
1562 
1563  // Special case that we cannot easily detect in RetCC_SystemZ since
1564  // i128 is not a legal type.
1565  for (auto &Out : Outs)
1566  if (Out.ArgVT == MVT::i128)
1567  return false;
1568 
1570  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1571  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1572 }
1573 
1574 SDValue
1576  bool IsVarArg,
1577  const SmallVectorImpl<ISD::OutputArg> &Outs,
1578  const SmallVectorImpl<SDValue> &OutVals,
1579  const SDLoc &DL, SelectionDAG &DAG) const {
1580  MachineFunction &MF = DAG.getMachineFunction();
1581 
1582  // Detect unsupported vector return types.
1583  if (Subtarget.hasVector())
1584  VerifyVectorTypes(Outs);
1585 
1586  // Assign locations to each returned value.
1588  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1589  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1590 
1591  // Quick exit for void returns
1592  if (RetLocs.empty())
1593  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1594 
1595  // Copy the result values into the output registers.
1596  SDValue Glue;
1597  SmallVector<SDValue, 4> RetOps;
1598  RetOps.push_back(Chain);
1599  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1600  CCValAssign &VA = RetLocs[I];
1601  SDValue RetValue = OutVals[I];
1602 
1603  // Make the return register live on exit.
1604  assert(VA.isRegLoc() && "Can only return in registers!");
1605 
1606  // Promote the value as required.
1607  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1608 
1609  // Chain and glue the copies together.
1610  unsigned Reg = VA.getLocReg();
1611  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1612  Glue = Chain.getValue(1);
1613  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1614  }
1615 
1616  // Update chain and glue.
1617  RetOps[0] = Chain;
1618  if (Glue.getNode())
1619  RetOps.push_back(Glue);
1620 
1621  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1622 }
1623 
1624 // Return true if Op is an intrinsic node with chain that returns the CC value
1625 // as its only (other) argument. Provide the associated SystemZISD opcode and
1626 // the mask of valid CC values if so.
1627 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1628  unsigned &CCValid) {
1629  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1630  switch (Id) {
1631  case Intrinsic::s390_tbegin:
1632  Opcode = SystemZISD::TBEGIN;
1633  CCValid = SystemZ::CCMASK_TBEGIN;
1634  return true;
1635 
1636  case Intrinsic::s390_tbegin_nofloat:
1637  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1638  CCValid = SystemZ::CCMASK_TBEGIN;
1639  return true;
1640 
1641  case Intrinsic::s390_tend:
1642  Opcode = SystemZISD::TEND;
1643  CCValid = SystemZ::CCMASK_TEND;
1644  return true;
1645 
1646  default:
1647  return false;
1648  }
1649 }
1650 
1651 // Return true if Op is an intrinsic node without chain that returns the
1652 // CC value as its final argument. Provide the associated SystemZISD
1653 // opcode and the mask of valid CC values if so.
1654 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1655  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1656  switch (Id) {
1657  case Intrinsic::s390_vpkshs:
1658  case Intrinsic::s390_vpksfs:
1659  case Intrinsic::s390_vpksgs:
1660  Opcode = SystemZISD::PACKS_CC;
1661  CCValid = SystemZ::CCMASK_VCMP;
1662  return true;
1663 
1664  case Intrinsic::s390_vpklshs:
1665  case Intrinsic::s390_vpklsfs:
1666  case Intrinsic::s390_vpklsgs:
1667  Opcode = SystemZISD::PACKLS_CC;
1668  CCValid = SystemZ::CCMASK_VCMP;
1669  return true;
1670 
1671  case Intrinsic::s390_vceqbs:
1672  case Intrinsic::s390_vceqhs:
1673  case Intrinsic::s390_vceqfs:
1674  case Intrinsic::s390_vceqgs:
1675  Opcode = SystemZISD::VICMPES;
1676  CCValid = SystemZ::CCMASK_VCMP;
1677  return true;
1678 
1679  case Intrinsic::s390_vchbs:
1680  case Intrinsic::s390_vchhs:
1681  case Intrinsic::s390_vchfs:
1682  case Intrinsic::s390_vchgs:
1683  Opcode = SystemZISD::VICMPHS;
1684  CCValid = SystemZ::CCMASK_VCMP;
1685  return true;
1686 
1687  case Intrinsic::s390_vchlbs:
1688  case Intrinsic::s390_vchlhs:
1689  case Intrinsic::s390_vchlfs:
1690  case Intrinsic::s390_vchlgs:
1691  Opcode = SystemZISD::VICMPHLS;
1692  CCValid = SystemZ::CCMASK_VCMP;
1693  return true;
1694 
1695  case Intrinsic::s390_vtm:
1696  Opcode = SystemZISD::VTM;
1697  CCValid = SystemZ::CCMASK_VCMP;
1698  return true;
1699 
1700  case Intrinsic::s390_vfaebs:
1701  case Intrinsic::s390_vfaehs:
1702  case Intrinsic::s390_vfaefs:
1703  Opcode = SystemZISD::VFAE_CC;
1704  CCValid = SystemZ::CCMASK_ANY;
1705  return true;
1706 
1707  case Intrinsic::s390_vfaezbs:
1708  case Intrinsic::s390_vfaezhs:
1709  case Intrinsic::s390_vfaezfs:
1710  Opcode = SystemZISD::VFAEZ_CC;
1711  CCValid = SystemZ::CCMASK_ANY;
1712  return true;
1713 
1714  case Intrinsic::s390_vfeebs:
1715  case Intrinsic::s390_vfeehs:
1716  case Intrinsic::s390_vfeefs:
1717  Opcode = SystemZISD::VFEE_CC;
1718  CCValid = SystemZ::CCMASK_ANY;
1719  return true;
1720 
1721  case Intrinsic::s390_vfeezbs:
1722  case Intrinsic::s390_vfeezhs:
1723  case Intrinsic::s390_vfeezfs:
1724  Opcode = SystemZISD::VFEEZ_CC;
1725  CCValid = SystemZ::CCMASK_ANY;
1726  return true;
1727 
1728  case Intrinsic::s390_vfenebs:
1729  case Intrinsic::s390_vfenehs:
1730  case Intrinsic::s390_vfenefs:
1731  Opcode = SystemZISD::VFENE_CC;
1732  CCValid = SystemZ::CCMASK_ANY;
1733  return true;
1734 
1735  case Intrinsic::s390_vfenezbs:
1736  case Intrinsic::s390_vfenezhs:
1737  case Intrinsic::s390_vfenezfs:
1738  Opcode = SystemZISD::VFENEZ_CC;
1739  CCValid = SystemZ::CCMASK_ANY;
1740  return true;
1741 
1742  case Intrinsic::s390_vistrbs:
1743  case Intrinsic::s390_vistrhs:
1744  case Intrinsic::s390_vistrfs:
1745  Opcode = SystemZISD::VISTR_CC;
1747  return true;
1748 
1749  case Intrinsic::s390_vstrcbs:
1750  case Intrinsic::s390_vstrchs:
1751  case Intrinsic::s390_vstrcfs:
1752  Opcode = SystemZISD::VSTRC_CC;
1753  CCValid = SystemZ::CCMASK_ANY;
1754  return true;
1755 
1756  case Intrinsic::s390_vstrczbs:
1757  case Intrinsic::s390_vstrczhs:
1758  case Intrinsic::s390_vstrczfs:
1759  Opcode = SystemZISD::VSTRCZ_CC;
1760  CCValid = SystemZ::CCMASK_ANY;
1761  return true;
1762 
1763  case Intrinsic::s390_vfcedbs:
1764  case Intrinsic::s390_vfcesbs:
1765  Opcode = SystemZISD::VFCMPES;
1766  CCValid = SystemZ::CCMASK_VCMP;
1767  return true;
1768 
1769  case Intrinsic::s390_vfchdbs:
1770  case Intrinsic::s390_vfchsbs:
1771  Opcode = SystemZISD::VFCMPHS;
1772  CCValid = SystemZ::CCMASK_VCMP;
1773  return true;
1774 
1775  case Intrinsic::s390_vfchedbs:
1776  case Intrinsic::s390_vfchesbs:
1777  Opcode = SystemZISD::VFCMPHES;
1778  CCValid = SystemZ::CCMASK_VCMP;
1779  return true;
1780 
1781  case Intrinsic::s390_vftcidb:
1782  case Intrinsic::s390_vftcisb:
1783  Opcode = SystemZISD::VFTCI;
1784  CCValid = SystemZ::CCMASK_VCMP;
1785  return true;
1786 
1787  case Intrinsic::s390_tdc:
1788  Opcode = SystemZISD::TDC;
1789  CCValid = SystemZ::CCMASK_TDC;
1790  return true;
1791 
1792  default:
1793  return false;
1794  }
1795 }
1796 
1797 // Emit an intrinsic with chain and an explicit CC register result.
1799  unsigned Opcode) {
1800  // Copy all operands except the intrinsic ID.
1801  unsigned NumOps = Op.getNumOperands();
1803  Ops.reserve(NumOps - 1);
1804  Ops.push_back(Op.getOperand(0));
1805  for (unsigned I = 2; I < NumOps; ++I)
1806  Ops.push_back(Op.getOperand(I));
1807 
1808  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1809  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
1810  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1811  SDValue OldChain = SDValue(Op.getNode(), 1);
1812  SDValue NewChain = SDValue(Intr.getNode(), 1);
1813  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1814  return Intr.getNode();
1815 }
1816 
1817 // Emit an intrinsic with an explicit CC register result.
1819  unsigned Opcode) {
1820  // Copy all operands except the intrinsic ID.
1821  unsigned NumOps = Op.getNumOperands();
1823  Ops.reserve(NumOps - 1);
1824  for (unsigned I = 1; I < NumOps; ++I)
1825  Ops.push_back(Op.getOperand(I));
1826 
1827  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
1828  return Intr.getNode();
1829 }
1830 
1831 // CC is a comparison that will be implemented using an integer or
1832 // floating-point comparison. Return the condition code mask for
1833 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1834 // unsigned comparisons and clear for signed ones. In the floating-point
1835 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1836 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1837 #define CONV(X) \
1838  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1839  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1840  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1841 
1842  switch (CC) {
1843  default:
1844  llvm_unreachable("Invalid integer condition!");
1845 
1846  CONV(EQ);
1847  CONV(NE);
1848  CONV(GT);
1849  CONV(GE);
1850  CONV(LT);
1851  CONV(LE);
1852 
1853  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1854  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1855  }
1856 #undef CONV
1857 }
1858 
1859 // If C can be converted to a comparison against zero, adjust the operands
1860 // as necessary.
1861 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1862  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1863  return;
1864 
1865  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1866  if (!ConstOp1)
1867  return;
1868 
1869  int64_t Value = ConstOp1->getSExtValue();
1870  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1871  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1872  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1873  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1874  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1875  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1876  }
1877 }
1878 
1879 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1880 // adjust the operands as necessary.
1881 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1882  Comparison &C) {
1883  // For us to make any changes, it must a comparison between a single-use
1884  // load and a constant.
1885  if (!C.Op0.hasOneUse() ||
1886  C.Op0.getOpcode() != ISD::LOAD ||
1887  C.Op1.getOpcode() != ISD::Constant)
1888  return;
1889 
1890  // We must have an 8- or 16-bit load.
1891  auto *Load = cast<LoadSDNode>(C.Op0);
1892  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1893  if (NumBits != 8 && NumBits != 16)
1894  return;
1895 
1896  // The load must be an extending one and the constant must be within the
1897  // range of the unextended value.
1898  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1899  uint64_t Value = ConstOp1->getZExtValue();
1900  uint64_t Mask = (1 << NumBits) - 1;
1901  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1902  // Make sure that ConstOp1 is in range of C.Op0.
1903  int64_t SignedValue = ConstOp1->getSExtValue();
1904  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
1905  return;
1906  if (C.ICmpType != SystemZICMP::SignedOnly) {
1907  // Unsigned comparison between two sign-extended values is equivalent
1908  // to unsigned comparison between two zero-extended values.
1909  Value &= Mask;
1910  } else if (NumBits == 8) {
1911  // Try to treat the comparison as unsigned, so that we can use CLI.
1912  // Adjust CCMask and Value as necessary.
1913  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
1914  // Test whether the high bit of the byte is set.
1915  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1916  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
1917  // Test whether the high bit of the byte is clear.
1918  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
1919  else
1920  // No instruction exists for this combination.
1921  return;
1922  C.ICmpType = SystemZICMP::UnsignedOnly;
1923  }
1924  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
1925  if (Value > Mask)
1926  return;
1927  // If the constant is in range, we can use any comparison.
1928  C.ICmpType = SystemZICMP::Any;
1929  } else
1930  return;
1931 
1932  // Make sure that the first operand is an i32 of the right extension type.
1934  ISD::SEXTLOAD :
1935  ISD::ZEXTLOAD);
1936  if (C.Op0.getValueType() != MVT::i32 ||
1937  Load->getExtensionType() != ExtType) {
1938  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
1939  Load->getBasePtr(), Load->getPointerInfo(),
1940  Load->getMemoryVT(), Load->getAlignment(),
1941  Load->getMemOperand()->getFlags());
1942  // Update the chain uses.
1943  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
1944  }
1945 
1946  // Make sure that the second operand is an i32 with the right value.
1947  if (C.Op1.getValueType() != MVT::i32 ||
1948  Value != ConstOp1->getZExtValue())
1949  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
1950 }
1951 
1952 // Return true if Op is either an unextended load, or a load suitable
1953 // for integer register-memory comparisons of type ICmpType.
1954 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
1955  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
1956  if (Load) {
1957  // There are no instructions to compare a register with a memory byte.
1958  if (Load->getMemoryVT() == MVT::i8)
1959  return false;
1960  // Otherwise decide on extension type.
1961  switch (Load->getExtensionType()) {
1962  case ISD::NON_EXTLOAD:
1963  return true;
1964  case ISD::SEXTLOAD:
1965  return ICmpType != SystemZICMP::UnsignedOnly;
1966  case ISD::ZEXTLOAD:
1967  return ICmpType != SystemZICMP::SignedOnly;
1968  default:
1969  break;
1970  }
1971  }
1972  return false;
1973 }
1974 
1975 // Return true if it is better to swap the operands of C.
1976 static bool shouldSwapCmpOperands(const Comparison &C) {
1977  // Leave f128 comparisons alone, since they have no memory forms.
1978  if (C.Op0.getValueType() == MVT::f128)
1979  return false;
1980 
1981  // Always keep a floating-point constant second, since comparisons with
1982  // zero can use LOAD TEST and comparisons with other constants make a
1983  // natural memory operand.
1984  if (isa<ConstantFPSDNode>(C.Op1))
1985  return false;
1986 
1987  // Never swap comparisons with zero since there are many ways to optimize
1988  // those later.
1989  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
1990  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
1991  return false;
1992 
1993  // Also keep natural memory operands second if the loaded value is
1994  // only used here. Several comparisons have memory forms.
1995  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
1996  return false;
1997 
1998  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1999  // In that case we generally prefer the memory to be second.
2000  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2001  // The only exceptions are when the second operand is a constant and
2002  // we can use things like CHHSI.
2003  if (!ConstOp1)
2004  return true;
2005  // The unsigned memory-immediate instructions can handle 16-bit
2006  // unsigned integers.
2007  if (C.ICmpType != SystemZICMP::SignedOnly &&
2008  isUInt<16>(ConstOp1->getZExtValue()))
2009  return false;
2010  // The signed memory-immediate instructions can handle 16-bit
2011  // signed integers.
2012  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2013  isInt<16>(ConstOp1->getSExtValue()))
2014  return false;
2015  return true;
2016  }
2017 
2018  // Try to promote the use of CGFR and CLGFR.
2019  unsigned Opcode0 = C.Op0.getOpcode();
2020  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2021  return true;
2022  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2023  return true;
2024  if (C.ICmpType != SystemZICMP::SignedOnly &&
2025  Opcode0 == ISD::AND &&
2026  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2027  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2028  return true;
2029 
2030  return false;
2031 }
2032 
2033 // Return a version of comparison CC mask CCMask in which the LT and GT
2034 // actions are swapped.
2035 static unsigned reverseCCMask(unsigned CCMask) {
2036  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
2038  (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
2039  (CCMask & SystemZ::CCMASK_CMP_UO));
2040 }
2041 
2042 // Check whether C tests for equality between X and Y and whether X - Y
2043 // or Y - X is also computed. In that case it's better to compare the
2044 // result of the subtraction against zero.
2045 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2046  Comparison &C) {
2047  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2048  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2049  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2050  SDNode *N = *I;
2051  if (N->getOpcode() == ISD::SUB &&
2052  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2053  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2054  C.Op0 = SDValue(N, 0);
2055  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2056  return;
2057  }
2058  }
2059  }
2060 }
2061 
2062 // Check whether C compares a floating-point value with zero and if that
2063 // floating-point value is also negated. In this case we can use the
2064 // negation to set CC, so avoiding separate LOAD AND TEST and
2065 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2066 static void adjustForFNeg(Comparison &C) {
2067  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2068  if (C1 && C1->isZero()) {
2069  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2070  SDNode *N = *I;
2071  if (N->getOpcode() == ISD::FNEG) {
2072  C.Op0 = SDValue(N, 0);
2073  C.CCMask = reverseCCMask(C.CCMask);
2074  return;
2075  }
2076  }
2077  }
2078 }
2079 
2080 // Check whether C compares (shl X, 32) with 0 and whether X is
2081 // also sign-extended. In that case it is better to test the result
2082 // of the sign extension using LTGFR.
2083 //
2084 // This case is important because InstCombine transforms a comparison
2085 // with (sext (trunc X)) into a comparison with (shl X, 32).
2086 static void adjustForLTGFR(Comparison &C) {
2087  // Check for a comparison between (shl X, 32) and 0.
2088  if (C.Op0.getOpcode() == ISD::SHL &&
2089  C.Op0.getValueType() == MVT::i64 &&
2090  C.Op1.getOpcode() == ISD::Constant &&
2091  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2092  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2093  if (C1 && C1->getZExtValue() == 32) {
2094  SDValue ShlOp0 = C.Op0.getOperand(0);
2095  // See whether X has any SIGN_EXTEND_INREG uses.
2096  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2097  SDNode *N = *I;
2098  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2099  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2100  C.Op0 = SDValue(N, 0);
2101  return;
2102  }
2103  }
2104  }
2105  }
2106 }
2107 
2108 // If C compares the truncation of an extending load, try to compare
2109 // the untruncated value instead. This exposes more opportunities to
2110 // reuse CC.
2111 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2112  Comparison &C) {
2113  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2114  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2115  C.Op1.getOpcode() == ISD::Constant &&
2116  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2117  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2118  if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
2119  unsigned Type = L->getExtensionType();
2120  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2121  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2122  C.Op0 = C.Op0.getOperand(0);
2123  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2124  }
2125  }
2126  }
2127 }
2128 
2129 // Return true if shift operation N has an in-range constant shift value.
2130 // Store it in ShiftVal if so.
2131 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2132  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2133  if (!Shift)
2134  return false;
2135 
2136  uint64_t Amount = Shift->getZExtValue();
2137  if (Amount >= N.getValueSizeInBits())
2138  return false;
2139 
2140  ShiftVal = Amount;
2141  return true;
2142 }
2143 
2144 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2145 // instruction and whether the CC value is descriptive enough to handle
2146 // a comparison of type Opcode between the AND result and CmpVal.
2147 // CCMask says which comparison result is being tested and BitSize is
2148 // the number of bits in the operands. If TEST UNDER MASK can be used,
2149 // return the corresponding CC mask, otherwise return 0.
2150 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2151  uint64_t Mask, uint64_t CmpVal,
2152  unsigned ICmpType) {
2153  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2154 
2155  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2156  if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2157  !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2158  return 0;
2159 
2160  // Work out the masks for the lowest and highest bits.
2161  unsigned HighShift = 63 - countLeadingZeros(Mask);
2162  uint64_t High = uint64_t(1) << HighShift;
2163  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2164 
2165  // Signed ordered comparisons are effectively unsigned if the sign
2166  // bit is dropped.
2167  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2168 
2169  // Check for equality comparisons with 0, or the equivalent.
2170  if (CmpVal == 0) {
2171  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2172  return SystemZ::CCMASK_TM_ALL_0;
2173  if (CCMask == SystemZ::CCMASK_CMP_NE)
2175  }
2176  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2177  if (CCMask == SystemZ::CCMASK_CMP_LT)
2178  return SystemZ::CCMASK_TM_ALL_0;
2179  if (CCMask == SystemZ::CCMASK_CMP_GE)
2181  }
2182  if (EffectivelyUnsigned && CmpVal < Low) {
2183  if (CCMask == SystemZ::CCMASK_CMP_LE)
2184  return SystemZ::CCMASK_TM_ALL_0;
2185  if (CCMask == SystemZ::CCMASK_CMP_GT)
2187  }
2188 
2189  // Check for equality comparisons with the mask, or the equivalent.
2190  if (CmpVal == Mask) {
2191  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2192  return SystemZ::CCMASK_TM_ALL_1;
2193  if (CCMask == SystemZ::CCMASK_CMP_NE)
2195  }
2196  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2197  if (CCMask == SystemZ::CCMASK_CMP_GT)
2198  return SystemZ::CCMASK_TM_ALL_1;
2199  if (CCMask == SystemZ::CCMASK_CMP_LE)
2201  }
2202  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2203  if (CCMask == SystemZ::CCMASK_CMP_GE)
2204  return SystemZ::CCMASK_TM_ALL_1;
2205  if (CCMask == SystemZ::CCMASK_CMP_LT)
2207  }
2208 
2209  // Check for ordered comparisons with the top bit.
2210  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2211  if (CCMask == SystemZ::CCMASK_CMP_LE)
2212  return SystemZ::CCMASK_TM_MSB_0;
2213  if (CCMask == SystemZ::CCMASK_CMP_GT)
2214  return SystemZ::CCMASK_TM_MSB_1;
2215  }
2216  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2217  if (CCMask == SystemZ::CCMASK_CMP_LT)
2218  return SystemZ::CCMASK_TM_MSB_0;
2219  if (CCMask == SystemZ::CCMASK_CMP_GE)
2220  return SystemZ::CCMASK_TM_MSB_1;
2221  }
2222 
2223  // If there are just two bits, we can do equality checks for Low and High
2224  // as well.
2225  if (Mask == Low + High) {
2226  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2228  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2230  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2232  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2234  }
2235 
2236  // Looks like we've exhausted our options.
2237  return 0;
2238 }
2239 
2240 // See whether C can be implemented as a TEST UNDER MASK instruction.
2241 // Update the arguments with the TM version if so.
2242 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2243  Comparison &C) {
2244  // Check that we have a comparison with a constant.
2245  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2246  if (!ConstOp1)
2247  return;
2248  uint64_t CmpVal = ConstOp1->getZExtValue();
2249 
2250  // Check whether the nonconstant input is an AND with a constant mask.
2251  Comparison NewC(C);
2252  uint64_t MaskVal;
2253  ConstantSDNode *Mask = nullptr;
2254  if (C.Op0.getOpcode() == ISD::AND) {
2255  NewC.Op0 = C.Op0.getOperand(0);
2256  NewC.Op1 = C.Op0.getOperand(1);
2257  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2258  if (!Mask)
2259  return;
2260  MaskVal = Mask->getZExtValue();
2261  } else {
2262  // There is no instruction to compare with a 64-bit immediate
2263  // so use TMHH instead if possible. We need an unsigned ordered
2264  // comparison with an i64 immediate.
2265  if (NewC.Op0.getValueType() != MVT::i64 ||
2266  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2267  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2268  NewC.ICmpType == SystemZICMP::SignedOnly)
2269  return;
2270  // Convert LE and GT comparisons into LT and GE.
2271  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2272  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2273  if (CmpVal == uint64_t(-1))
2274  return;
2275  CmpVal += 1;
2276  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2277  }
2278  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2279  // be masked off without changing the result.
2280  MaskVal = -(CmpVal & -CmpVal);
2281  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2282  }
2283  if (!MaskVal)
2284  return;
2285 
2286  // Check whether the combination of mask, comparison value and comparison
2287  // type are suitable.
2288  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2289  unsigned NewCCMask, ShiftVal;
2290  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2291  NewC.Op0.getOpcode() == ISD::SHL &&
2292  isSimpleShift(NewC.Op0, ShiftVal) &&
2293  (MaskVal >> ShiftVal != 0) &&
2294  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2295  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2296  MaskVal >> ShiftVal,
2297  CmpVal >> ShiftVal,
2298  SystemZICMP::Any))) {
2299  NewC.Op0 = NewC.Op0.getOperand(0);
2300  MaskVal >>= ShiftVal;
2301  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2302  NewC.Op0.getOpcode() == ISD::SRL &&
2303  isSimpleShift(NewC.Op0, ShiftVal) &&
2304  (MaskVal << ShiftVal != 0) &&
2305  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2306  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2307  MaskVal << ShiftVal,
2308  CmpVal << ShiftVal,
2310  NewC.Op0 = NewC.Op0.getOperand(0);
2311  MaskVal <<= ShiftVal;
2312  } else {
2313  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2314  NewC.ICmpType);
2315  if (!NewCCMask)
2316  return;
2317  }
2318 
2319  // Go ahead and make the change.
2320  C.Opcode = SystemZISD::TM;
2321  C.Op0 = NewC.Op0;
2322  if (Mask && Mask->getZExtValue() == MaskVal)
2323  C.Op1 = SDValue(Mask, 0);
2324  else
2325  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2326  C.CCValid = SystemZ::CCMASK_TM;
2327  C.CCMask = NewCCMask;
2328 }
2329 
2330 // See whether the comparison argument contains a redundant AND
2331 // and remove it if so. This sometimes happens due to the generic
2332 // BRCOND expansion.
2333 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2334  Comparison &C) {
2335  if (C.Op0.getOpcode() != ISD::AND)
2336  return;
2337  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2338  if (!Mask)
2339  return;
2340  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2341  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2342  return;
2343 
2344  C.Op0 = C.Op0.getOperand(0);
2345 }
2346 
2347 // Return a Comparison that tests the condition-code result of intrinsic
2348 // node Call against constant integer CC using comparison code Cond.
2349 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2350 // and CCValid is the set of possible condition-code results.
2351 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2352  SDValue Call, unsigned CCValid, uint64_t CC,
2353  ISD::CondCode Cond) {
2354  Comparison C(Call, SDValue());
2355  C.Opcode = Opcode;
2356  C.CCValid = CCValid;
2357  if (Cond == ISD::SETEQ)
2358  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2359  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2360  else if (Cond == ISD::SETNE)
2361  // ...and the inverse of that.
2362  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2363  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2364  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2365  // always true for CC>3.
2366  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2367  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2368  // ...and the inverse of that.
2369  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2370  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2371  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2372  // always true for CC>3.
2373  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2374  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2375  // ...and the inverse of that.
2376  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2377  else
2378  llvm_unreachable("Unexpected integer comparison type");
2379  C.CCMask &= CCValid;
2380  return C;
2381 }
2382 
2383 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2384 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2385  ISD::CondCode Cond, const SDLoc &DL) {
2386  if (CmpOp1.getOpcode() == ISD::Constant) {
2387  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2388  unsigned Opcode, CCValid;
2389  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2390  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2391  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2392  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2393  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2394  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2395  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2396  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2397  }
2398  Comparison C(CmpOp0, CmpOp1);
2399  C.CCMask = CCMaskForCondCode(Cond);
2400  if (C.Op0.getValueType().isFloatingPoint()) {
2401  C.CCValid = SystemZ::CCMASK_FCMP;
2402  C.Opcode = SystemZISD::FCMP;
2403  adjustForFNeg(C);
2404  } else {
2405  C.CCValid = SystemZ::CCMASK_ICMP;
2406  C.Opcode = SystemZISD::ICMP;
2407  // Choose the type of comparison. Equality and inequality tests can
2408  // use either signed or unsigned comparisons. The choice also doesn't
2409  // matter if both sign bits are known to be clear. In those cases we
2410  // want to give the main isel code the freedom to choose whichever
2411  // form fits best.
2412  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2413  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2414  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2415  C.ICmpType = SystemZICMP::Any;
2416  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2417  C.ICmpType = SystemZICMP::UnsignedOnly;
2418  else
2419  C.ICmpType = SystemZICMP::SignedOnly;
2420  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2421  adjustForRedundantAnd(DAG, DL, C);
2422  adjustZeroCmp(DAG, DL, C);
2423  adjustSubwordCmp(DAG, DL, C);
2424  adjustForSubtraction(DAG, DL, C);
2425  adjustForLTGFR(C);
2426  adjustICmpTruncate(DAG, DL, C);
2427  }
2428 
2429  if (shouldSwapCmpOperands(C)) {
2430  std::swap(C.Op0, C.Op1);
2431  C.CCMask = reverseCCMask(C.CCMask);
2432  }
2433 
2434  adjustForTestUnderMask(DAG, DL, C);
2435  return C;
2436 }
2437 
2438 // Emit the comparison instruction described by C.
2439 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2440  if (!C.Op1.getNode()) {
2441  SDNode *Node;
2442  switch (C.Op0.getOpcode()) {
2444  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2445  return SDValue(Node, 0);
2447  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2448  return SDValue(Node, Node->getNumValues() - 1);
2449  default:
2450  llvm_unreachable("Invalid comparison operands");
2451  }
2452  }
2453  if (C.Opcode == SystemZISD::ICMP)
2454  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2455  DAG.getConstant(C.ICmpType, DL, MVT::i32));
2456  if (C.Opcode == SystemZISD::TM) {
2457  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2458  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2459  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2460  DAG.getConstant(RegisterOnly, DL, MVT::i32));
2461  }
2462  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2463 }
2464 
2465 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2466 // 64 bits. Extend is the extension type to use. Store the high part
2467 // in Hi and the low part in Lo.
2468 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2469  SDValue Op0, SDValue Op1, SDValue &Hi,
2470  SDValue &Lo) {
2471  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2472  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2473  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2474  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2475  DAG.getConstant(32, DL, MVT::i64));
2476  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2477  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2478 }
2479 
2480 // Lower a binary operation that produces two VT results, one in each
2481 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2482 // and Opcode performs the GR128 operation. Store the even register result
2483 // in Even and the odd register result in Odd.
2484 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2485  unsigned Opcode, SDValue Op0, SDValue Op1,
2486  SDValue &Even, SDValue &Odd) {
2487  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2488  bool Is32Bit = is32Bit(VT);
2489  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2490  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2491 }
2492 
2493 // Return an i32 value that is 1 if the CC value produced by CCReg is
2494 // in the mask CCMask and 0 otherwise. CC is known to have a value
2495 // in CCValid, so other values can be ignored.
2496 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2497  unsigned CCValid, unsigned CCMask) {
2498  SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
2499  DAG.getConstant(0, DL, MVT::i32),
2500  DAG.getConstant(CCValid, DL, MVT::i32),
2501  DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
2502  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2503 }
2504 
2505 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2506 // be done directly. IsFP is true if CC is for a floating-point rather than
2507 // integer comparison.
2508 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2509  switch (CC) {
2510  case ISD::SETOEQ:
2511  case ISD::SETEQ:
2512  return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
2513 
2514  case ISD::SETOGE:
2515  case ISD::SETGE:
2516  return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
2517 
2518  case ISD::SETOGT:
2519  case ISD::SETGT:
2520  return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
2521 
2522  case ISD::SETUGT:
2523  return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
2524 
2525  default:
2526  return 0;
2527  }
2528 }
2529 
2530 // Return the SystemZISD vector comparison operation for CC or its inverse,
2531 // or 0 if neither can be done directly. Indicate in Invert whether the
2532 // result is for the inverse of CC. IsFP is true if CC is for a
2533 // floating-point rather than integer comparison.
2534 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2535  bool &Invert) {
2536  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2537  Invert = false;
2538  return Opcode;
2539  }
2540 
2541  CC = ISD::getSetCCInverse(CC, !IsFP);
2542  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2543  Invert = true;
2544  return Opcode;
2545  }
2546 
2547  return 0;
2548 }
2549 
2550 // Return a v2f64 that contains the extended form of elements Start and Start+1
2551 // of v4f32 value Op.
2552 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2553  SDValue Op) {
2554  int Mask[] = { Start, -1, Start + 1, -1 };
2555  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2556  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2557 }
2558 
2559 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2560 // producing a result of type VT.
2561 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2562  const SDLoc &DL, EVT VT,
2563  SDValue CmpOp0,
2564  SDValue CmpOp1) const {
2565  // There is no hardware support for v4f32 (unless we have the vector
2566  // enhancements facility 1), so extend the vector into two v2f64s
2567  // and compare those.
2568  if (CmpOp0.getValueType() == MVT::v4f32 &&
2569  !Subtarget.hasVectorEnhancements1()) {
2570  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2571  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2572  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2573  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2574  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2575  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2576  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2577  }
2578  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2579 }
2580 
2581 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2582 // an integer mask of type VT.
2583 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2584  const SDLoc &DL, EVT VT,
2585  ISD::CondCode CC,
2586  SDValue CmpOp0,
2587  SDValue CmpOp1) const {
2588  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2589  bool Invert = false;
2590  SDValue Cmp;
2591  switch (CC) {
2592  // Handle tests for order using (or (ogt y x) (oge x y)).
2593  case ISD::SETUO:
2594  Invert = true;
2596  case ISD::SETO: {
2597  assert(IsFP && "Unexpected integer comparison");
2598  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2599  SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2600  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2601  break;
2602  }
2603 
2604  // Handle <> tests using (or (ogt y x) (ogt x y)).
2605  case ISD::SETUEQ:
2606  Invert = true;
2608  case ISD::SETONE: {
2609  assert(IsFP && "Unexpected integer comparison");
2610  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2611  SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2612  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2613  break;
2614  }
2615 
2616  // Otherwise a single comparison is enough. It doesn't really
2617  // matter whether we try the inversion or the swap first, since
2618  // there are no cases where both work.
2619  default:
2620  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2621  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2622  else {
2624  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2625  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2626  else
2627  llvm_unreachable("Unhandled comparison");
2628  }
2629  break;
2630  }
2631  if (Invert) {
2632  SDValue Mask =
2633  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2634  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2635  }
2636  return Cmp;
2637 }
2638 
2639 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2640  SelectionDAG &DAG) const {
2641  SDValue CmpOp0 = Op.getOperand(0);
2642  SDValue CmpOp1 = Op.getOperand(1);
2643  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2644  SDLoc DL(Op);
2645  EVT VT = Op.getValueType();
2646  if (VT.isVector())
2647  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2648 
2649  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2650  SDValue CCReg = emitCmp(DAG, DL, C);
2651  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2652 }
2653 
2654 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2655  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2656  SDValue CmpOp0 = Op.getOperand(2);
2657  SDValue CmpOp1 = Op.getOperand(3);
2658  SDValue Dest = Op.getOperand(4);
2659  SDLoc DL(Op);
2660 
2661  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2662  SDValue CCReg = emitCmp(DAG, DL, C);
2663  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
2664  Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2665  DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2666 }
2667 
2668 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2669 // allowing Pos and Neg to be wider than CmpOp.
2670 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2671  return (Neg.getOpcode() == ISD::SUB &&
2672  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2673  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2674  Neg.getOperand(1) == Pos &&
2675  (Pos == CmpOp ||
2676  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2677  Pos.getOperand(0) == CmpOp)));
2678 }
2679 
2680 // Return the absolute or negative absolute of Op; IsNegative decides which.
2681 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2682  bool IsNegative) {
2683  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2684  if (IsNegative)
2685  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2686  DAG.getConstant(0, DL, Op.getValueType()), Op);
2687  return Op;
2688 }
2689 
2690 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2691  SelectionDAG &DAG) const {
2692  SDValue CmpOp0 = Op.getOperand(0);
2693  SDValue CmpOp1 = Op.getOperand(1);
2694  SDValue TrueOp = Op.getOperand(2);
2695  SDValue FalseOp = Op.getOperand(3);
2696  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2697  SDLoc DL(Op);
2698 
2699  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2700 
2701  // Check for absolute and negative-absolute selections, including those
2702  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2703  // This check supplements the one in DAGCombiner.
2704  if (C.Opcode == SystemZISD::ICMP &&
2705  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2706  C.CCMask != SystemZ::CCMASK_CMP_NE &&
2707  C.Op1.getOpcode() == ISD::Constant &&
2708  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2709  if (isAbsolute(C.Op0, TrueOp, FalseOp))
2710  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2711  if (isAbsolute(C.Op0, FalseOp, TrueOp))
2712  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2713  }
2714 
2715  SDValue CCReg = emitCmp(DAG, DL, C);
2716  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2717  DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
2718 
2719  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
2720 }
2721 
2722 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2723  SelectionDAG &DAG) const {
2724  SDLoc DL(Node);
2725  const GlobalValue *GV = Node->getGlobal();
2726  int64_t Offset = Node->getOffset();
2727  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2729 
2730  SDValue Result;
2731  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2732  // Assign anchors at 1<<12 byte boundaries.
2733  uint64_t Anchor = Offset & ~uint64_t(0xfff);
2734  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2735  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2736 
2737  // The offset can be folded into the address if it is aligned to a halfword.
2738  Offset -= Anchor;
2739  if (Offset != 0 && (Offset & 1) == 0) {
2740  SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2741  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2742  Offset = 0;
2743  }
2744  } else {
2745  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2746  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2747  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2749  }
2750 
2751  // If there was a non-zero offset that we didn't fold, create an explicit
2752  // addition for it.
2753  if (Offset != 0)
2754  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2755  DAG.getConstant(Offset, DL, PtrVT));
2756 
2757  return Result;
2758 }
2759 
2760 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2761  SelectionDAG &DAG,
2762  unsigned Opcode,
2763  SDValue GOTOffset) const {
2764  SDLoc DL(Node);
2765  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2766  SDValue Chain = DAG.getEntryNode();
2767  SDValue Glue;
2768 
2769  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2770  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2771  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2772  Glue = Chain.getValue(1);
2773  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2774  Glue = Chain.getValue(1);
2775 
2776  // The first call operand is the chain and the second is the TLS symbol.
2778  Ops.push_back(Chain);
2779  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2780  Node->getValueType(0),
2781  0, 0));
2782 
2783  // Add argument registers to the end of the list so that they are
2784  // known live into the call.
2785  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2786  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2787 
2788  // Add a register mask operand representing the call-preserved registers.
2789  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2790  const uint32_t *Mask =
2792  assert(Mask && "Missing call preserved mask for calling convention");
2793  Ops.push_back(DAG.getRegisterMask(Mask));
2794 
2795  // Glue the call to the argument copies.
2796  Ops.push_back(Glue);
2797 
2798  // Emit the call.
2799  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2800  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2801  Glue = Chain.getValue(1);
2802 
2803  // Copy the return value from %r2.
2804  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2805 }
2806 
2807 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2808  SelectionDAG &DAG) const {
2809  SDValue Chain = DAG.getEntryNode();
2810  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2811 
2812  // The high part of the thread pointer is in access register 0.
2813  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
2814  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2815 
2816  // The low part of the thread pointer is in access register 1.
2817  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
2818  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2819 
2820  // Merge them into a single 64-bit address.
2821  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2822  DAG.getConstant(32, DL, PtrVT));
2823  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2824 }
2825 
2826 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2827  SelectionDAG &DAG) const {
2828  if (DAG.getTarget().useEmulatedTLS())
2829  return LowerToTLSEmulatedModel(Node, DAG);
2830  SDLoc DL(Node);
2831  const GlobalValue *GV = Node->getGlobal();
2832  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2833  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2834 
2835  SDValue TP = lowerThreadPointer(DL, DAG);
2836 
2837  // Get the offset of GA from the thread pointer, based on the TLS model.
2838  SDValue Offset;
2839  switch (model) {
2840  case TLSModel::GeneralDynamic: {
2841  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2844 
2845  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2846  Offset = DAG.getLoad(
2847  PtrVT, DL, DAG.getEntryNode(), Offset,
2849 
2850  // Call __tls_get_offset to retrieve the offset.
2851  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2852  break;
2853  }
2854 
2855  case TLSModel::LocalDynamic: {
2856  // Load the GOT offset of the module ID.
2859 
2860  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2861  Offset = DAG.getLoad(
2862  PtrVT, DL, DAG.getEntryNode(), Offset,
2864 
2865  // Call __tls_get_offset to retrieve the module base offset.
2866  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2867 
2868  // Note: The SystemZLDCleanupPass will remove redundant computations
2869  // of the module base offset. Count total number of local-dynamic
2870  // accesses to trigger execution of that pass.
2874 
2875  // Add the per-symbol offset.
2877 
2878  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2879  DTPOffset = DAG.getLoad(
2880  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
2882 
2883  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2884  break;
2885  }
2886 
2887  case TLSModel::InitialExec: {
2888  // Load the offset from the GOT.
2889  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2891  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2892  Offset =
2893  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2895  break;
2896  }
2897 
2898  case TLSModel::LocalExec: {
2899  // Force the offset into the constant pool and load it from there.
2902 
2903  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2904  Offset = DAG.getLoad(
2905  PtrVT, DL, DAG.getEntryNode(), Offset,
2907  break;
2908  }
2909  }
2910 
2911  // Add the base and offset together.
2912  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2913 }
2914 
2915 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2916  SelectionDAG &DAG) const {
2917  SDLoc DL(Node);
2918  const BlockAddress *BA = Node->getBlockAddress();
2919  int64_t Offset = Node->getOffset();
2920  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2921 
2922  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
2923  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2924  return Result;
2925 }
2926 
2927 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
2928  SelectionDAG &DAG) const {
2929  SDLoc DL(JT);
2930  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2931  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2932 
2933  // Use LARL to load the address of the table.
2934  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2935 }
2936 
2937 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
2938  SelectionDAG &DAG) const {
2939  SDLoc DL(CP);
2940  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2941 
2942  SDValue Result;
2943  if (CP->isMachineConstantPoolEntry())
2944  Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2945  CP->getAlignment());
2946  else
2947  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2948  CP->getAlignment(), CP->getOffset());
2949 
2950  // Use LARL to load the address of the constant pool entry.
2951  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2952 }
2953 
2954 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
2955  SelectionDAG &DAG) const {
2956  MachineFunction &MF = DAG.getMachineFunction();
2957  MachineFrameInfo &MFI = MF.getFrameInfo();
2958  MFI.setFrameAddressIsTaken(true);
2959 
2960  SDLoc DL(Op);
2961  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2962  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2963 
2964  // If the back chain frame index has not been allocated yet, do so.
2966  int BackChainIdx = FI->getFramePointerSaveIndex();
2967  if (!BackChainIdx) {
2968  // By definition, the frame address is the address of the back chain.
2969  BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
2970  FI->setFramePointerSaveIndex(BackChainIdx);
2971  }
2972  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
2973 
2974  // FIXME The frontend should detect this case.
2975  if (Depth > 0) {
2976  report_fatal_error("Unsupported stack frame traversal count");
2977  }
2978 
2979  return BackChain;
2980 }
2981 
2982 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
2983  SelectionDAG &DAG) const {
2984  MachineFunction &MF = DAG.getMachineFunction();
2985  MachineFrameInfo &MFI = MF.getFrameInfo();
2986  MFI.setReturnAddressIsTaken(true);
2987 
2989  return SDValue();
2990 
2991  SDLoc DL(Op);
2992  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2993  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2994 
2995  // FIXME The frontend should detect this case.
2996  if (Depth > 0) {
2997  report_fatal_error("Unsupported stack frame traversal count");
2998  }
2999 
3000  // Return R14D, which has the return address. Mark it an implicit live-in.
3001  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3002  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3003 }
3004 
3005 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3006  SelectionDAG &DAG) const {
3007  SDLoc DL(Op);
3008  SDValue In = Op.getOperand(0);
3009  EVT InVT = In.getValueType();
3010  EVT ResVT = Op.getValueType();
3011 
3012  // Convert loads directly. This is normally done by DAGCombiner,
3013  // but we need this case for bitcasts that are created during lowering
3014  // and which are then lowered themselves.
3015  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3016  if (ISD::isNormalLoad(LoadN)) {
3017  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3018  LoadN->getBasePtr(), LoadN->getMemOperand());
3019  // Update the chain uses.
3020  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3021  return NewLoad;
3022  }
3023 
3024  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3025  SDValue In64;
3026  if (Subtarget.hasHighWord()) {
3027  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3028  MVT::i64);
3029  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3030  MVT::i64, SDValue(U64, 0), In);
3031  } else {
3032  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3033  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3034  DAG.getConstant(32, DL, MVT::i64));
3035  }
3036  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3037  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3038  DL, MVT::f32, Out64);
3039  }
3040  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3041  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3042  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3043  MVT::f64, SDValue(U64, 0), In);
3044  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3045  if (Subtarget.hasHighWord())
3046  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3047  MVT::i32, Out64);
3048  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3049  DAG.getConstant(32, DL, MVT::i64));
3050  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3051  }
3052  llvm_unreachable("Unexpected bitcast combination");
3053 }
3054 
3055 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3056  SelectionDAG &DAG) const {
3057  MachineFunction &MF = DAG.getMachineFunction();
3058  SystemZMachineFunctionInfo *FuncInfo =
3060  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3061 
3062  SDValue Chain = Op.getOperand(0);
3063  SDValue Addr = Op.getOperand(1);
3064  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3065  SDLoc DL(Op);
3066 
3067  // The initial values of each field.
3068  const unsigned NumFields = 4;
3069  SDValue Fields[NumFields] = {
3070  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3071  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3072  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3073  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3074  };
3075 
3076  // Store each field into its respective slot.
3077  SDValue MemOps[NumFields];
3078  unsigned Offset = 0;
3079  for (unsigned I = 0; I < NumFields; ++I) {
3080  SDValue FieldAddr = Addr;
3081  if (Offset != 0)
3082  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3083  DAG.getIntPtrConstant(Offset, DL));
3084  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3085  MachinePointerInfo(SV, Offset));
3086  Offset += 8;
3087  }
3088  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3089 }
3090 
3091 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3092  SelectionDAG &DAG) const {
3093  SDValue Chain = Op.getOperand(0);
3094  SDValue DstPtr = Op.getOperand(1);
3095  SDValue SrcPtr = Op.getOperand(2);
3096  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3097  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3098  SDLoc DL(Op);
3099 
3100  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3101  /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3102  /*isTailCall*/false,
3103  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
3104 }
3105 
3106 SDValue SystemZTargetLowering::
3107 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3108  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3109  MachineFunction &MF = DAG.getMachineFunction();
3110  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3111  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3112 
3113  SDValue Chain = Op.getOperand(0);
3114  SDValue Size = Op.getOperand(1);
3115  SDValue Align = Op.getOperand(2);
3116  SDLoc DL(Op);
3117 
3118  // If user has set the no alignment function attribute, ignore
3119  // alloca alignments.
3120  uint64_t AlignVal = (RealignOpt ?
3121  dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3122 
3123  uint64_t StackAlign = TFI->getStackAlignment();
3124  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3125  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3126 
3127  unsigned SPReg = getStackPointerRegisterToSaveRestore();
3128  SDValue NeededSpace = Size;
3129 
3130  // Get a reference to the stack pointer.
3131  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3132 
3133  // If we need a backchain, save it now.
3134  SDValue Backchain;
3135  if (StoreBackchain)
3136  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3137 
3138  // Add extra space for alignment if needed.
3139  if (ExtraAlignSpace)
3140  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3141  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3142 
3143  // Get the new stack pointer value.
3144  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3145 
3146  // Copy the new stack pointer back.
3147  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3148 
3149  // The allocated data lives above the 160 bytes allocated for the standard
3150  // frame, plus any outgoing stack arguments. We don't know how much that
3151  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3152  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3153  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3154 
3155  // Dynamically realign if needed.
3156  if (RequiredAlign > StackAlign) {
3157  Result =
3158  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3159  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3160  Result =
3161  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3162  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3163  }
3164 
3165  if (StoreBackchain)
3166  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3167 
3168  SDValue Ops[2] = { Result, Chain };
3169  return DAG.getMergeValues(Ops, DL);
3170 }
3171 
3172 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3173  SDValue Op, SelectionDAG &DAG) const {
3174  SDLoc DL(Op);
3175 
3176  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3177 }
3178 
3179 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3180  SelectionDAG &DAG) const {
3181  EVT VT = Op.getValueType();
3182  SDLoc DL(Op);
3183  SDValue Ops[2];
3184  if (is32Bit(VT))
3185  // Just do a normal 64-bit multiplication and extract the results.
3186  // We define this so that it can be used for constant division.
3188  Op.getOperand(1), Ops[1], Ops[0]);
3189  else if (Subtarget.hasMiscellaneousExtensions2())
3190  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3191  // the high result in the even register. ISD::SMUL_LOHI is defined to
3192  // return the low half first, so the results are in reverse order.
3194  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3195  else {
3196  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3197  //
3198  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3199  //
3200  // but using the fact that the upper halves are either all zeros
3201  // or all ones:
3202  //
3203  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3204  //
3205  // and grouping the right terms together since they are quicker than the
3206  // multiplication:
3207  //
3208  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3209  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3210  SDValue LL = Op.getOperand(0);
3211  SDValue RL = Op.getOperand(1);
3212  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3213  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3214  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3215  // the high result in the even register. ISD::SMUL_LOHI is defined to
3216  // return the low half first, so the results are in reverse order.
3218  LL, RL, Ops[1], Ops[0]);
3219  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3220  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3221  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3222  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3223  }
3224  return DAG.getMergeValues(Ops, DL);
3225 }
3226 
3227 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3228  SelectionDAG &DAG) const {
3229  EVT VT = Op.getValueType();
3230  SDLoc DL(Op);
3231  SDValue Ops[2];
3232  if (is32Bit(VT))
3233  // Just do a normal 64-bit multiplication and extract the results.
3234  // We define this so that it can be used for constant division.
3236  Op.getOperand(1), Ops[1], Ops[0]);
3237  else
3238  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3239  // the high result in the even register. ISD::UMUL_LOHI is defined to
3240  // return the low half first, so the results are in reverse order.
3242  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3243  return DAG.getMergeValues(Ops, DL);
3244 }
3245 
3246 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3247  SelectionDAG &DAG) const {
3248  SDValue Op0 = Op.getOperand(0);
3249  SDValue Op1 = Op.getOperand(1);
3250  EVT VT = Op.getValueType();
3251  SDLoc DL(Op);
3252 
3253  // We use DSGF for 32-bit division. This means the first operand must
3254  // always be 64-bit, and the second operand should be 32-bit whenever
3255  // that is possible, to improve performance.
3256  if (is32Bit(VT))
3257  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3258  else if (DAG.ComputeNumSignBits(Op1) > 32)
3259  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3260 
3261  // DSG(F) returns the remainder in the even register and the
3262  // quotient in the odd register.
3263  SDValue Ops[2];
3264  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3265  return DAG.getMergeValues(Ops, DL);
3266 }
3267 
3268 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3269  SelectionDAG &DAG) const {
3270  EVT VT = Op.getValueType();
3271  SDLoc DL(Op);
3272 
3273  // DL(G) returns the remainder in the even register and the
3274  // quotient in the odd register.
3275  SDValue Ops[2];
3277  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3278  return DAG.getMergeValues(Ops, DL);
3279 }
3280 
3281 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3282  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3283 
3284  // Get the known-zero masks for each operand.
3285  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3286  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3287  DAG.computeKnownBits(Ops[1])};
3288 
3289  // See if the upper 32 bits of one operand and the lower 32 bits of the
3290  // other are known zero. They are the low and high operands respectively.
3291  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3292  Known[1].Zero.getZExtValue() };
3293  unsigned High, Low;
3294  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3295  High = 1, Low = 0;
3296  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3297  High = 0, Low = 1;
3298  else
3299  return Op;
3300 
3301  SDValue LowOp = Ops[Low];
3302  SDValue HighOp = Ops[High];
3303 
3304  // If the high part is a constant, we're better off using IILH.
3305  if (HighOp.getOpcode() == ISD::Constant)
3306  return Op;
3307 
3308  // If the low part is a constant that is outside the range of LHI,
3309  // then we're better off using IILF.
3310  if (LowOp.getOpcode() == ISD::Constant) {
3311  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3312  if (!isInt<16>(Value))
3313  return Op;
3314  }
3315 
3316  // Check whether the high part is an AND that doesn't change the
3317  // high 32 bits and just masks out low bits. We can skip it if so.
3318  if (HighOp.getOpcode() == ISD::AND &&
3319  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3320  SDValue HighOp0 = HighOp.getOperand(0);
3321  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3322  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3323  HighOp = HighOp0;
3324  }
3325 
3326  // Take advantage of the fact that all GR32 operations only change the
3327  // low 32 bits by truncating Low to an i32 and inserting it directly
3328  // using a subreg. The interesting cases are those where the truncation
3329  // can be folded.
3330  SDLoc DL(Op);
3331  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3332  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3333  MVT::i64, HighOp, Low32);
3334 }
3335 
3336 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3337 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3338  SelectionDAG &DAG) const {
3339  SDNode *N = Op.getNode();
3340  SDValue LHS = N->getOperand(0);
3341  SDValue RHS = N->getOperand(1);
3342  SDLoc DL(N);
3343  unsigned BaseOp = 0;
3344  unsigned CCValid = 0;
3345  unsigned CCMask = 0;
3346 
3347  switch (Op.getOpcode()) {
3348  default: llvm_unreachable("Unknown instruction!");
3349  case ISD::SADDO:
3350  BaseOp = SystemZISD::SADDO;
3351  CCValid = SystemZ::CCMASK_ARITH;
3353  break;
3354  case ISD::SSUBO:
3355  BaseOp = SystemZISD::SSUBO;
3356  CCValid = SystemZ::CCMASK_ARITH;
3358  break;
3359  case ISD::UADDO:
3360  BaseOp = SystemZISD::UADDO;
3361  CCValid = SystemZ::CCMASK_LOGICAL;
3363  break;
3364  case ISD::USUBO:
3365  BaseOp = SystemZISD::USUBO;
3366  CCValid = SystemZ::CCMASK_LOGICAL;
3368  break;
3369  }
3370 
3371  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3372  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3373 
3374  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3375  if (N->getValueType(1) == MVT::i1)
3376  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3377 
3378  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3379 }
3380 
3381 // Lower ADDCARRY/SUBCARRY nodes.
3382 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3383  SelectionDAG &DAG) const {
3384 
3385  SDNode *N = Op.getNode();
3386  MVT VT = N->getSimpleValueType(0);
3387 
3388  // Let legalize expand this if it isn't a legal type yet.
3389  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3390  return SDValue();
3391 
3392  SDValue LHS = N->getOperand(0);
3393  SDValue RHS = N->getOperand(1);
3394  SDValue Carry = Op.getOperand(2);
3395  SDLoc DL(N);
3396  unsigned BaseOp = 0;
3397  unsigned CCValid = 0;
3398  unsigned CCMask = 0;
3399 
3400  switch (Op.getOpcode()) {
3401  default: llvm_unreachable("Unknown instruction!");
3402  case ISD::ADDCARRY:
3403  BaseOp = SystemZISD::ADDCARRY;
3404  CCValid = SystemZ::CCMASK_LOGICAL;
3406  break;
3407  case ISD::SUBCARRY:
3408  BaseOp = SystemZISD::SUBCARRY;
3409  CCValid = SystemZ::CCMASK_LOGICAL;
3411  break;
3412  }
3413 
3414  // Set the condition code from the carry flag.
3415  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3416  DAG.getConstant(CCValid, DL, MVT::i32),
3417  DAG.getConstant(CCMask, DL, MVT::i32));
3418 
3419  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3420  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3421 
3422  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3423  if (N->getValueType(1) == MVT::i1)
3424  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3425 
3426  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3427 }
3428 
3429 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3430  SelectionDAG &DAG) const {
3431  EVT VT = Op.getValueType();
3432  SDLoc DL(Op);
3433  Op = Op.getOperand(0);
3434 
3435  // Handle vector types via VPOPCT.
3436  if (VT.isVector()) {
3437  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3438  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3439  switch (VT.getScalarSizeInBits()) {
3440  case 8:
3441  break;
3442  case 16: {
3443  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3444  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3445  SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3446  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3447  Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3448  break;
3449  }
3450  case 32: {
3451  SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3452  DAG.getConstant(0, DL, MVT::i32));
3453  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3454  break;
3455  }
3456  case 64: {
3457  SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3458  DAG.getConstant(0, DL, MVT::i32));
3459  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3460  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3461  break;
3462  }
3463  default:
3464  llvm_unreachable("Unexpected type");
3465  }
3466  return Op;
3467  }
3468 
3469  // Get the known-zero mask for the operand.
3470  KnownBits Known = DAG.computeKnownBits(Op);
3471  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
3472  if (NumSignificantBits == 0)
3473  return DAG.getConstant(0, DL, VT);
3474 
3475  // Skip known-zero high parts of the operand.
3476  int64_t OrigBitSize = VT.getSizeInBits();
3477  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3478  BitSize = std::min(BitSize, OrigBitSize);
3479 
3480  // The POPCNT instruction counts the number of bits in each byte.
3481  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3482  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3483  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3484 
3485  // Add up per-byte counts in a binary tree. All bits of Op at
3486  // position larger than BitSize remain zero throughout.
3487  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3488  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3489  if (BitSize != OrigBitSize)
3490  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3491  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3492  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3493  }
3494 
3495  // Extract overall result from high byte.
3496  if (BitSize > 8)
3497  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3498  DAG.getConstant(BitSize - 8, DL, VT));
3499 
3500  return Op;
3501 }
3502 
3503 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3504  SelectionDAG &DAG) const {
3505  SDLoc DL(Op);
3506  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3507  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3508  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3509  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3510 
3511  // The only fence that needs an instruction is a sequentially-consistent
3512  // cross-thread fence.
3513  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3514  FenceSSID == SyncScope::System) {
3515  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3516  Op.getOperand(0)),
3517  0);
3518  }
3519 
3520  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3521  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3522 }
3523 
3524 // Op is an atomic load. Lower it into a normal volatile load.
3525 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3526  SelectionDAG &DAG) const {
3527  auto *Node = cast<AtomicSDNode>(Op.getNode());
3528  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3529  Node->getChain(), Node->getBasePtr(),
3530  Node->getMemoryVT(), Node->getMemOperand());
3531 }
3532 
3533 // Op is an atomic store. Lower it into a normal volatile store.
3534 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3535  SelectionDAG &DAG) const {
3536  auto *Node = cast<AtomicSDNode>(Op.getNode());
3537  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3538  Node->getBasePtr(), Node->getMemoryVT(),
3539  Node->getMemOperand());
3540  // We have to enforce sequential consistency by performing a
3541  // serialization operation after the store.
3542  if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3543  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3544  MVT::Other, Chain), 0);
3545  return Chain;
3546 }
3547 
3548 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3549 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3550 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3551  SelectionDAG &DAG,
3552  unsigned Opcode) const {
3553  auto *Node = cast<AtomicSDNode>(Op.getNode());
3554 
3555  // 32-bit operations need no code outside the main loop.
3556  EVT NarrowVT = Node->getMemoryVT();
3557  EVT WideVT = MVT::i32;
3558  if (NarrowVT == WideVT)
3559  return Op;
3560 
3561  int64_t BitSize = NarrowVT.getSizeInBits();
3562  SDValue ChainIn = Node->getChain();
3563  SDValue Addr = Node->getBasePtr();
3564  SDValue Src2 = Node->getVal();
3565  MachineMemOperand *MMO = Node->getMemOperand();
3566  SDLoc DL(Node);
3567  EVT PtrVT = Addr.getValueType();
3568 
3569  // Convert atomic subtracts of constants into additions.
3570  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3571  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3573  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3574  }
3575 
3576  // Get the address of the containing word.
3577  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3578  DAG.getConstant(-4, DL, PtrVT));
3579 
3580  // Get the number of bits that the word must be rotated left in order
3581  // to bring the field to the top bits of a GR32.
3582  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3583  DAG.getConstant(3, DL, PtrVT));
3584  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3585 
3586  // Get the complementing shift amount, for rotating a field in the top
3587  // bits back to its proper position.
3588  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3589  DAG.getConstant(0, DL, WideVT), BitShift);
3590 
3591  // Extend the source operand to 32 bits and prepare it for the inner loop.
3592  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3593  // operations require the source to be shifted in advance. (This shift
3594  // can be folded if the source is constant.) For AND and NAND, the lower
3595  // bits must be set, while for other opcodes they should be left clear.
3596  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3597  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3598  DAG.getConstant(32 - BitSize, DL, WideVT));
3599  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3601  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3602  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3603 
3604  // Construct the ATOMIC_LOADW_* node.
3605  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3606  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3607  DAG.getConstant(BitSize, DL, WideVT) };
3608  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3609  NarrowVT, MMO);
3610 
3611  // Rotate the result of the final CS so that the field is in the lower
3612  // bits of a GR32, then truncate it.
3613  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3614  DAG.getConstant(BitSize, DL, WideVT));
3615  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3616 
3617  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3618  return DAG.getMergeValues(RetOps, DL);
3619 }
3620 
3621 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3622 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3623 // operations into additions.
3624 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3625  SelectionDAG &DAG) const {
3626  auto *Node = cast<AtomicSDNode>(Op.getNode());
3627  EVT MemVT = Node->getMemoryVT();
3628  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3629  // A full-width operation.
3630  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3631  SDValue Src2 = Node->getVal();
3632  SDValue NegSrc2;
3633  SDLoc DL(Src2);
3634 
3635  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3636  // Use an addition if the operand is constant and either LAA(G) is
3637  // available or the negative value is in the range of A(G)FHI.
3638  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3639  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3640  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3641  } else if (Subtarget.hasInterlockedAccess1())
3642  // Use LAA(G) if available.
3643  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3644  Src2);
3645 
3646  if (NegSrc2.getNode())
3647  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3648  Node->getChain(), Node->getBasePtr(), NegSrc2,
3649  Node->getMemOperand());
3650 
3651  // Use the node as-is.
3652  return Op;
3653  }
3654 
3655  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3656 }
3657 
3658 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3659 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3660  SelectionDAG &DAG) const {
3661  auto *Node = cast<AtomicSDNode>(Op.getNode());
3662  SDValue ChainIn = Node->getOperand(0);
3663  SDValue Addr = Node->getOperand(1);
3664  SDValue CmpVal = Node->getOperand(2);
3665  SDValue SwapVal = Node->getOperand(3);
3666  MachineMemOperand *MMO = Node->getMemOperand();
3667  SDLoc DL(Node);
3668 
3669  // We have native support for 32-bit and 64-bit compare and swap, but we
3670  // still need to expand extracting the "success" result from the CC.
3671  EVT NarrowVT = Node->getMemoryVT();
3672  EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
3673  if (NarrowVT == WideVT) {
3674  SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3675  SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3677  DL, Tys, Ops, NarrowVT, MMO);
3678  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3680 
3681  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3683  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3684  return SDValue();
3685  }
3686 
3687  // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3688  // via a fullword ATOMIC_CMP_SWAPW operation.
3689  int64_t BitSize = NarrowVT.getSizeInBits();
3690  EVT PtrVT = Addr.getValueType();
3691 
3692  // Get the address of the containing word.
3693  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3694  DAG.getConstant(-4, DL, PtrVT));
3695 
3696  // Get the number of bits that the word must be rotated left in order
3697  // to bring the field to the top bits of a GR32.
3698  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3699  DAG.getConstant(3, DL, PtrVT));
3700  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3701 
3702  // Get the complementing shift amount, for rotating a field in the top
3703  // bits back to its proper position.
3704  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3705  DAG.getConstant(0, DL, WideVT), BitShift);
3706 
3707  // Construct the ATOMIC_CMP_SWAPW node.
3708  SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3709  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3710  NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3712  VTList, Ops, NarrowVT, MMO);
3713  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3715 
3716  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3718  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3719  return SDValue();
3720 }
3721 
3723 SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
3724  // Because of how we convert atomic_load and atomic_store to normal loads and
3725  // stores in the DAG, we need to ensure that the MMOs are marked volatile
3726  // since DAGCombine hasn't been updated to account for atomic, but non
3727  // volatile loads. (See D57601)
3728  if (auto *SI = dyn_cast<StoreInst>(&I))
3729  if (SI->isAtomic())
3731  if (auto *LI = dyn_cast<LoadInst>(&I))
3732  if (LI->isAtomic())
3734  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
3735  if (AI->isAtomic())
3737  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
3738  if (AI->isAtomic())
3741 }
3742 
3743 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3744  SelectionDAG &DAG) const {
3745  MachineFunction &MF = DAG.getMachineFunction();
3746  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3747  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3748  SystemZ::R15D, Op.getValueType());
3749 }
3750 
3751 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3752  SelectionDAG &DAG) const {
3753  MachineFunction &MF = DAG.getMachineFunction();
3754  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3755  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3756 
3757  SDValue Chain = Op.getOperand(0);
3758  SDValue NewSP = Op.getOperand(1);
3759  SDValue Backchain;
3760  SDLoc DL(Op);
3761 
3762  if (StoreBackchain) {
3763  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
3764  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3765  }
3766 
3767  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3768 
3769  if (StoreBackchain)
3770  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3771 
3772  return Chain;
3773 }
3774 
3775 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3776  SelectionDAG &DAG) const {
3777  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3778  if (!IsData)
3779  // Just preserve the chain.
3780  return Op.getOperand(0);
3781 
3782  SDLoc DL(Op);
3783  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3784  unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
3785  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3786  SDValue Ops[] = {
3787  Op.getOperand(0),
3788  DAG.getConstant(Code, DL, MVT::i32),
3789  Op.getOperand(1)
3790  };
3792  Node->getVTList(), Ops,
3793  Node->getMemoryVT(), Node->getMemOperand());
3794 }
3795 
3796 // Convert condition code in CCReg to an i32 value.
3798  SDLoc DL(CCReg);
3799  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
3800  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3802 }
3803 
3804 SDValue
3805 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3806  SelectionDAG &DAG) const {
3807  unsigned Opcode, CCValid;
3808  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3809  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3810  SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
3811  SDValue CC = getCCResult(DAG, SDValue(Node, 0));
3812  DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3813  return SDValue();
3814  }
3815 
3816  return SDValue();
3817 }
3818 
3819 SDValue
3820 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3821  SelectionDAG &DAG) const {
3822  unsigned Opcode, CCValid;
3823  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3824  SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
3825  if (Op->getNumValues() == 1)
3826  return getCCResult(DAG, SDValue(Node, 0));
3827  assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3828  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
3829  SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
3830  }
3831 
3832  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3833  switch (Id) {
3834  case Intrinsic::thread_pointer:
3835  return lowerThreadPointer(SDLoc(Op), DAG);
3836 
3837  case Intrinsic::s390_vpdi:
3838  return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3839  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3840 
3841  case Intrinsic::s390_vperm:
3842  return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3843  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3844 
3845  case Intrinsic::s390_vuphb:
3846  case Intrinsic::s390_vuphh:
3847  case Intrinsic::s390_vuphf:
3848  return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3849  Op.getOperand(1));
3850 
3851  case Intrinsic::s390_vuplhb:
3852  case Intrinsic::s390_vuplhh:
3853  case Intrinsic::s390_vuplhf:
3854  return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3855  Op.getOperand(1));
3856 
3857  case Intrinsic::s390_vuplb:
3858  case Intrinsic::s390_vuplhw:
3859  case Intrinsic::s390_vuplf:
3860  return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3861  Op.getOperand(1));
3862 
3863  case Intrinsic::s390_vupllb:
3864  case Intrinsic::s390_vupllh:
3865  case Intrinsic::s390_vupllf:
3866  return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3867  Op.getOperand(1));
3868 
3869  case Intrinsic::s390_vsumb:
3870  case Intrinsic::s390_vsumh:
3871  case Intrinsic::s390_vsumgh:
3872  case Intrinsic::s390_vsumgf:
3873  case Intrinsic::s390_vsumqf:
3874  case Intrinsic::s390_vsumqg:
3875  return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3876  Op.getOperand(1), Op.getOperand(2));
3877  }
3878 
3879  return SDValue();
3880 }
3881 
3882 namespace {
3883 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3884 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3885 // Operand is the constant third operand, otherwise it is the number of
3886 // bytes in each element of the result.
3887 struct Permute {
3888  unsigned Opcode;
3889  unsigned Operand;
3890  unsigned char Bytes[SystemZ::VectorBytes];
3891 };
3892 }
3893 
3894 static const Permute PermuteForms[] = {
3895  // VMRHG
3897  { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3898  // VMRHF
3900  { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3901  // VMRHH
3903  { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3904  // VMRHB
3906  { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3907  // VMRLG
3908  { SystemZISD::MERGE_LOW, 8,
3909  { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3910  // VMRLF
3911  { SystemZISD::MERGE_LOW, 4,
3912  { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3913  // VMRLH
3914  { SystemZISD::MERGE_LOW, 2,
3915  { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3916  // VMRLB
3917  { SystemZISD::MERGE_LOW, 1,
3918  { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3919  // VPKG
3920  { SystemZISD::PACK, 4,
3921  { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3922  // VPKF
3923  { SystemZISD::PACK, 2,
3924  { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3925  // VPKH
3926  { SystemZISD::PACK, 1,
3927  { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3928  // VPDI V1, V2, 4 (low half of V1, high half of V2)
3930  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3931  // VPDI V1, V2, 1 (high half of V1, low half of V2)
3933  { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3934 };
3935 
3936 // Called after matching a vector shuffle against a particular pattern.
3937 // Both the original shuffle and the pattern have two vector operands.
3938 // OpNos[0] is the operand of the original shuffle that should be used for
3939 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3940 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3941 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3942 // for operands 0 and 1 of the pattern.
3943 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
3944  if (OpNos[0] < 0) {
3945  if (OpNos[1] < 0)
3946  return false;
3947  OpNo0 = OpNo1 = OpNos[1];
3948  } else if (OpNos[1] < 0) {
3949  OpNo0 = OpNo1 = OpNos[0];
3950  } else {
3951  OpNo0 = OpNos[0];
3952  OpNo1 = OpNos[1];
3953  }
3954  return true;
3955 }
3956 
3957 // Bytes is a VPERM-like permute vector, except that -1 is used for
3958 // undefined bytes. Return true if the VPERM can be implemented using P.
3959 // When returning true set OpNo0 to the VPERM operand that should be
3960 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3961 //
3962 // For example, if swapping the VPERM operands allows P to match, OpNo0
3963 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3964 // operand, but rewriting it to use two duplicated operands allows it to
3965 // match P, then OpNo0 and OpNo1 will be the same.
3966 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
3967  unsigned &OpNo0, unsigned &OpNo1) {
3968  int OpNos[] = { -1, -1 };
3969  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
3970  int Elt = Bytes[I];
3971  if (Elt >= 0) {
3972  // Make sure that the two permute vectors use the same suboperand
3973  // byte number. Only the operand numbers (the high bits) are
3974  // allowed to differ.
3975  if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
3976  return false;
3977  int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
3978  int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
3979  // Make sure that the operand mappings are consistent with previous
3980  // elements.
3981  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3982  return false;
3983  OpNos[ModelOpNo] = RealOpNo;
3984  }
3985  }
3986  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3987 }
3988 
3989 // As above, but search for a matching permute.
3990 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
3991  unsigned &OpNo0, unsigned &OpNo1) {
3992  for (auto &P : PermuteForms)
3993  if (matchPermute(Bytes, P, OpNo0, OpNo1))
3994  return &P;
3995  return nullptr;
3996 }
3997 
3998 // Bytes is a VPERM-like permute vector, except that -1 is used for
3999 // undefined bytes. This permute is an operand of an outer permute.
4000 // See whether redistributing the -1 bytes gives a shuffle that can be
4001 // implemented using P. If so, set Transform to a VPERM-like permute vector
4002 // that, when applied to the result of P, gives the original permute in Bytes.
4003 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4004  const Permute &P,
4005  SmallVectorImpl<int> &Transform) {
4006  unsigned To = 0;
4007  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4008  int Elt = Bytes[From];
4009  if (Elt < 0)
4010  // Byte number From of the result is undefined.
4011  Transform[From] = -1;
4012  else {
4013  while (P.Bytes[To] != Elt) {
4014  To += 1;
4015  if (To == SystemZ::VectorBytes)
4016  return false;
4017  }
4018  Transform[From] = To;
4019  }
4020  }
4021  return true;
4022 }
4023 
4024 // As above, but search for a matching permute.
4025 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4026  SmallVectorImpl<int> &Transform) {
4027  for (auto &P : PermuteForms)
4028  if (matchDoublePermute(Bytes, P, Transform))
4029  return &P;
4030  return nullptr;
4031 }
4032 
4033 // Convert the mask of the given shuffle op into a byte-level mask,
4034 // as if it had type vNi8.
4035 static bool getVPermMask(SDValue ShuffleOp,
4036  SmallVectorImpl<int> &Bytes) {
4037  EVT VT = ShuffleOp.getValueType();
4038  unsigned NumElements = VT.getVectorNumElements();
4039  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4040 
4041  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4042  Bytes.resize(NumElements * BytesPerElement, -1);
4043  for (unsigned I = 0; I < NumElements; ++I) {
4044  int Index = VSN->getMaskElt(I);
4045  if (Index >= 0)
4046  for (unsigned J = 0; J < BytesPerElement; ++J)
4047  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4048  }
4049  return true;
4050  }
4051  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4052  isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4053  unsigned Index = ShuffleOp.getConstantOperandVal(1);
4054  Bytes.resize(NumElements * BytesPerElement, -1);
4055  for (unsigned I = 0; I < NumElements; ++I)
4056  for (unsigned J = 0; J < BytesPerElement; ++J)
4057  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4058  return true;
4059  }
4060  return false;
4061 }
4062 
4063 // Bytes is a VPERM-like permute vector, except that -1 is used for
4064 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4065 // the result come from a contiguous sequence of bytes from one input.
4066 // Set Base to the selector for the first byte if so.
4067 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4068  unsigned BytesPerElement, int &Base) {
4069  Base = -1;
4070  for (unsigned I = 0; I < BytesPerElement; ++I) {
4071  if (Bytes[Start + I] >= 0) {
4072  unsigned Elem = Bytes[Start + I];
4073  if (Base < 0) {
4074  Base = Elem - I;
4075  // Make sure the bytes would come from one input operand.
4076  if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4077  return false;
4078  } else if (unsigned(Base) != Elem - I)
4079  return false;
4080  }
4081  }
4082  return true;
4083 }
4084 
4085 // Bytes is a VPERM-like permute vector, except that -1 is used for
4086 // undefined bytes. Return true if it can be performed using VSLDI.
4087 // When returning true, set StartIndex to the shift amount and OpNo0
4088 // and OpNo1 to the VPERM operands that should be used as the first
4089 // and second shift operand respectively.
4090 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4091  unsigned &StartIndex, unsigned &OpNo0,
4092  unsigned &OpNo1) {
4093  int OpNos[] = { -1, -1 };
4094  int Shift = -1;
4095  for (unsigned I = 0; I < 16; ++I) {
4096  int Index = Bytes[I];
4097  if (Index >= 0) {
4098  int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4099  int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4100  int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4101  if (Shift < 0)
4102  Shift = ExpectedShift;
4103  else if (Shift != ExpectedShift)
4104  return false;
4105  // Make sure that the operand mappings are consistent with previous
4106  // elements.
4107  if (OpNos[ModelOpNo] == 1 - RealOpNo)
4108  return false;
4109  OpNos[ModelOpNo] = RealOpNo;
4110  }
4111  }
4112  StartIndex = Shift;
4113  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4114 }
4115 
4116 // Create a node that performs P on operands Op0 and Op1, casting the
4117 // operands to the appropriate type. The type of the result is determined by P.
4118 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4119  const Permute &P, SDValue Op0, SDValue Op1) {
4120  // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4121  // elements of a PACK are twice as wide as the outputs.
4122  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4123  P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4124  P.Operand);
4125  // Cast both operands to the appropriate type.
4126  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4127  SystemZ::VectorBytes / InBytes);
4128  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4129  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4130  SDValue Op;
4131  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4132  SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
4133  Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4134  } else if (P.Opcode == SystemZISD::PACK) {
4135  MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4136  SystemZ::VectorBytes / P.Operand);
4137  Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4138  } else {
4139  Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4140  }
4141  return Op;
4142 }
4143 
4144 // Bytes is a VPERM-like permute vector, except that -1 is used for
4145 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4146 // VSLDI or VPERM.
4148  SDValue *Ops,
4149  const SmallVectorImpl<int> &Bytes) {
4150  for (unsigned I = 0; I < 2; ++I)
4151  Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4152 
4153  // First see whether VSLDI can be used.
4154  unsigned StartIndex, OpNo0, OpNo1;
4155  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4156  return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4157  Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
4158 
4159  // Fall back on VPERM. Construct an SDNode for the permute vector.
4160  SDValue IndexNodes[SystemZ::VectorBytes];
4161  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4162  if (Bytes[I] >= 0)
4163  IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4164  else
4165  IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4166  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4167  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
4168 }
4169 
4170 namespace {
4171 // Describes a general N-operand vector shuffle.
4172 struct GeneralShuffle {
4173  GeneralShuffle(EVT vt) : VT(vt) {}
4174  void addUndef();
4175  bool add(SDValue, unsigned);
4176  SDValue getNode(SelectionDAG &, const SDLoc &);
4177 
4178  // The operands of the shuffle.
4180 
4181  // Index I is -1 if byte I of the result is undefined. Otherwise the
4182  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4183  // Bytes[I] / SystemZ::VectorBytes.
4185 
4186  // The type of the shuffle result.
4187  EVT VT;
4188 };
4189 }
4190 
4191 // Add an extra undefined element to the shuffle.
4192 void GeneralShuffle::addUndef() {
4193  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4194  for (unsigned I = 0; I < BytesPerElement; ++I)
4195  Bytes.push_back(-1);
4196 }
4197 
4198 // Add an extra element to the shuffle, taking it from element Elem of Op.
4199 // A null Op indicates a vector input whose value will be calculated later;
4200 // there is at most one such input per shuffle and it always has the same
4201 // type as the result. Aborts and returns false if the source vector elements
4202 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4203 // LLVM they become implicitly extended, but this is rare and not optimized.
4204 bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4205  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4206 
4207  // The source vector can have wider elements than the result,
4208  // either through an explicit TRUNCATE or because of type legalization.
4209  // We want the least significant part.
4210  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4211  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4212 
4213  // Return false if the source elements are smaller than their destination
4214  // elements.
4215  if (FromBytesPerElement < BytesPerElement)
4216  return false;
4217 
4218  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4219  (FromBytesPerElement - BytesPerElement));
4220 
4221  // Look through things like shuffles and bitcasts.
4222  while (Op.getNode()) {
4223  if (Op.getOpcode() == ISD::BITCAST)
4224  Op = Op.getOperand(0);
4225  else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4226  // See whether the bytes we need come from a contiguous part of one
4227  // operand.
4229  if (!getVPermMask(Op, OpBytes))
4230  break;
4231  int NewByte;
4232  if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4233  break;
4234  if (NewByte < 0) {
4235  addUndef();
4236  return true;
4237  }
4238  Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4239  Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4240  } else if (Op.isUndef()) {
4241  addUndef();
4242  return true;
4243  } else
4244  break;
4245  }
4246 
4247  // Make sure that the source of the extraction is in Ops.
4248  unsigned OpNo = 0;
4249  for (; OpNo < Ops.size(); ++OpNo)
4250  if (Ops[OpNo] == Op)
4251  break;
4252  if (OpNo == Ops.size())
4253  Ops.push_back(Op);
4254 
4255  // Add the element to Bytes.
4256  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4257  for (unsigned I = 0; I < BytesPerElement; ++I)
4258  Bytes.push_back(Base + I);
4259 
4260  return true;
4261 }
4262 
4263 // Return SDNodes for the completed shuffle.
4264 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4265  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4266 
4267  if (Ops.size() == 0)
4268  return DAG.getUNDEF(VT);
4269 
4270  // Make sure that there are at least two shuffle operands.
4271  if (Ops.size() == 1)
4272  Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4273 
4274  // Create a tree of shuffles, deferring root node until after the loop.
4275  // Try to redistribute the undefined elements of non-root nodes so that
4276  // the non-root shuffles match something like a pack or merge, then adjust
4277  // the parent node's permute vector to compensate for the new order.
4278  // Among other things, this copes with vectors like <2 x i16> that were
4279  // padded with undefined elements during type legalization.
4280  //
4281  // In the best case this redistribution will lead to the whole tree
4282  // using packs and merges. It should rarely be a loss in other cases.
4283  unsigned Stride = 1;
4284  for (; Stride * 2 < Ops.size(); Stride *= 2) {
4285  for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4286  SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4287 
4288  // Create a mask for just these two operands.
4290  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4291  unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4292  unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4293  if (OpNo == I)
4294  NewBytes[J] = Byte;
4295  else if (OpNo == I + Stride)
4296  NewBytes[J] = SystemZ::VectorBytes + Byte;
4297  else
4298  NewBytes[J] = -1;
4299  }
4300  // See if it would be better to reorganize NewMask to avoid using VPERM.
4301  SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4302  if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4303  Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4304  // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4305  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4306  if (NewBytes[J] >= 0) {
4307  assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4308  "Invalid double permute");
4309  Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4310  } else
4311  assert(NewBytesMap[J] < 0 && "Invalid double permute");
4312  }
4313  } else {
4314  // Just use NewBytes on the operands.
4315  Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4316  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4317  if (NewBytes[J] >= 0)
4318  Bytes[J] = I * SystemZ::VectorBytes + J;
4319  }
4320  }
4321  }
4322 
4323  // Now we just have 2 inputs. Put the second operand in Ops[1].
4324  if (Stride > 1) {
4325  Ops[1] = Ops[Stride];
4326  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4327  if (Bytes[I] >= int(SystemZ::VectorBytes))
4328  Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4329  }
4330 
4331  // Look for an instruction that can do the permute without resorting
4332  // to VPERM.
4333  unsigned OpNo0, OpNo1;
4334  SDValue Op;
4335  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4336  Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4337  else
4338  Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4339  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4340 }
4341 
4342 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4343 static bool isScalarToVector(SDValue Op) {
4344  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4345  if (!Op.getOperand(I).isUndef())
4346  return false;
4347  return true;
4348 }
4349 
4350 // Return a vector of type VT that contains Value in the first element.
4351 // The other elements don't matter.
4352 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4353  SDValue Value) {
4354  // If we have a constant, replicate it to all elements and let the
4355  // BUILD_VECTOR lowering take care of it.
4356  if (Value.getOpcode() == ISD::Constant ||
4357  Value.getOpcode() == ISD::ConstantFP) {
4359  return DAG.getBuildVector(VT, DL, Ops);
4360  }
4361  if (Value.isUndef())
4362  return DAG.getUNDEF(VT);
4363  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4364 }
4365 
4366 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4367 // element 1. Used for cases in which replication is cheap.
4368 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4369  SDValue Op0, SDValue Op1) {
4370  if (Op0.isUndef()) {
4371  if (Op1.isUndef())
4372  return DAG.getUNDEF(VT);
4373  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4374  }
4375  if (Op1.isUndef())
4376  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4377  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4378  buildScalarToVector(DAG, DL, VT, Op0),
4379  buildScalarToVector(DAG, DL, VT, Op1));
4380 }
4381 
4382 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4383 // vector for them.
4384 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4385  SDValue Op1) {
4386  if (Op0.isUndef() && Op1.isUndef())
4387  return DAG.getUNDEF(MVT::v2i64);
4388  // If one of the two inputs is undefined then replicate the other one,
4389  // in order to avoid using another register unnecessarily.
4390  if (Op0.isUndef())
4391  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4392  else if (Op1.isUndef())
4393  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4394  else {
4395  Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4396  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4397  }
4398  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4399 }
4400 
4401 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4402 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4403 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4404 // would benefit from this representation and return it if so.
4406  BuildVectorSDNode *BVN) {
4407  EVT VT = BVN->getValueType(0);
4408  unsigned NumElements = VT.getVectorNumElements();
4409 
4410  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4411  // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4412  // need a BUILD_VECTOR, add an additional placeholder operand for that
4413  // BUILD_VECTOR and store its operands in ResidueOps.
4414  GeneralShuffle GS(VT);
4416  bool FoundOne = false;
4417  for (unsigned I = 0; I < NumElements; ++I) {
4418  SDValue Op = BVN->getOperand(I);
4419  if (Op.getOpcode() == ISD::TRUNCATE)
4420  Op = Op.getOperand(0);
4421  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4422  Op.getOperand(1).getOpcode() == ISD::Constant) {
4423  unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
4424  if (!GS.add(Op.getOperand(0), Elem))
4425  return SDValue();
4426  FoundOne = true;
4427  } else if (Op.isUndef()) {
4428  GS.addUndef();
4429  } else {
4430  if (!GS.add(SDValue(), ResidueOps.size()))
4431  return SDValue();
4432  ResidueOps.push_back(BVN->getOperand(I));
4433  }
4434  }
4435 
4436  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4437  if (!FoundOne)
4438  return SDValue();
4439 
4440  // Create the BUILD_VECTOR for the remaining elements, if any.
4441  if (!ResidueOps.empty()) {
4442  while (ResidueOps.size() < NumElements)
4443  ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
4444  for (auto &Op : GS.Ops) {
4445  if (!Op.getNode()) {
4446  Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
4447  break;
4448  }
4449  }
4450  }
4451  return GS.getNode(DAG, SDLoc(BVN));
4452 }
4453 
4454 // Combine GPR scalar values Elems into a vector of type VT.
4455 static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4456  SmallVectorImpl<SDValue> &Elems) {
4457  // See whether there is a single replicated value.
4458  SDValue Single;
4459  unsigned int NumElements = Elems.size();
4460  unsigned int Count = 0;
4461  for (auto Elem : Elems) {
4462  if (!Elem.isUndef()) {
4463  if (!Single.getNode())
4464  Single = Elem;
4465  else if (Elem != Single) {
4466  Single = SDValue();
4467  break;
4468  }
4469  Count += 1;
4470  }
4471  }
4472  // There are three cases here:
4473  //
4474  // - if the only defined element is a loaded one, the best sequence
4475  // is a replicating load.
4476  //
4477  // - otherwise, if the only defined element is an i64 value, we will
4478  // end up with the same VLVGP sequence regardless of whether we short-cut
4479  // for replication or fall through to the later code.
4480  //
4481  // - otherwise, if the only defined element is an i32 or smaller value,
4482  // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4483  // This is only a win if the single defined element is used more than once.
4484  // In other cases we're better off using a single VLVGx.
4485  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
4486  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4487 
4488  // If all elements are loads, use VLREP/VLEs (below).
4489  bool AllLoads = true;
4490  for (auto Elem : Elems)
4491  if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
4492  AllLoads = false;
4493  break;
4494  }
4495 
4496  // The best way of building a v2i64 from two i64s is to use VLVGP.
4497  if (VT == MVT::v2i64 && !AllLoads)
4498  return joinDwords(DAG, DL, Elems[0], Elems[1]);
4499 
4500  // Use a 64-bit merge high to combine two doubles.
4501  if (VT == MVT::v2f64 && !AllLoads)
4502  return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4503 
4504  // Build v4f32 values directly from the FPRs:
4505  //
4506  // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4507  // V V VMRHF
4508  // <ABxx> <CDxx>
4509  // V VMRHG
4510  // <ABCD>
4511  if (VT == MVT::v4f32 && !AllLoads) {
4512  SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4513  SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4514  // Avoid unnecessary undefs by reusing the other operand.
4515  if (Op01.isUndef())
4516  Op01 = Op23;
4517  else if (Op23.isUndef())
4518  Op23 = Op01;
4519  // Merging identical replications is a no-op.
4520  if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
4521  return Op01;
4522  Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4523  Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4525  DL, MVT::v2i64, Op01, Op23);
4526  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4527  }
4528 
4529  // Collect the constant terms.
4531  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4532 
4533  unsigned NumConstants = 0;
4534  for (unsigned I = 0; I < NumElements; ++I) {
4535  SDValue Elem = Elems[I];
4536  if (Elem.getOpcode() == ISD::Constant ||
4537  Elem.getOpcode() == ISD::ConstantFP) {
4538  NumConstants += 1;
4539  Constants[I] = Elem;
4540  Done[I] = true;
4541  }
4542  }
4543  // If there was at least one constant, fill in the other elements of
4544  // Constants with undefs to get a full vector constant and use that
4545  // as the starting point.
4546  SDValue Result;
4547  SDValue ReplicatedVal;
4548  if (NumConstants > 0) {
4549  for (unsigned I = 0; I < NumElements; ++I)
4550  if (!Constants[I].getNode())
4551  Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4552  Result = DAG.getBuildVector(VT, DL, Constants);
4553  } else {
4554  // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4555  // avoid a false dependency on any previous contents of the vector
4556  // register.
4557 
4558  // Use a VLREP if at least one element is a load. Make sure to replicate
4559  // the load with the most elements having its value.
4560  std::map<const SDNode*, unsigned> UseCounts;
4561  SDNode *LoadMaxUses = nullptr;
4562  for (unsigned I = 0; I < NumElements; ++I)
4563  if (Elems[I].getOpcode() == ISD::LOAD &&
4564  cast<LoadSDNode>(Elems[I])->isUnindexed()) {
4565  SDNode *Ld = Elems[I].getNode();
4566  UseCounts[Ld]++;
4567  if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
4568  LoadMaxUses = Ld;
4569  }
4570  if (LoadMaxUses != nullptr) {
4571  ReplicatedVal = SDValue(LoadMaxUses, 0);
4572  Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
4573  } else {
4574  // Try to use VLVGP.
4575  unsigned I1 = NumElements / 2 - 1;
4576  unsigned I2 = NumElements - 1;
4577  bool Def1 = !Elems[I1].isUndef();
4578  bool Def2 = !Elems[I2].isUndef();
4579  if (Def1 || Def2) {
4580  SDValue Elem1 = Elems[Def1 ? I1 : I2];
4581  SDValue Elem2 = Elems[Def2 ? I2 : I1];
4582  Result = DAG.getNode(ISD::BITCAST, DL, VT,
4583  joinDwords(DAG, DL, Elem1, Elem2));
4584  Done[I1] = true;
4585  Done[I2] = true;
4586  } else
4587  Result = DAG.getUNDEF(VT);
4588  }
4589  }
4590 
4591  // Use VLVGx to insert the other elements.
4592  for (unsigned I = 0; I < NumElements; ++I)
4593  if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
4594  Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4595  DAG.getConstant(I, DL, MVT::i32));
4596  return Result;
4597 }
4598 
4599 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4600  SelectionDAG &DAG) const {
4601  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4602  SDLoc DL(Op);
4603  EVT VT = Op.getValueType();
4604 
4605  if (BVN->isConstant()) {
4607  return Op;
4608 
4609  // Fall back to loading it from memory.
4610  return SDValue();
4611  }
4612 
4613  // See if we should use shuffles to construct the vector from other vectors.
4614  if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
4615  return Res;
4616 
4617  // Detect SCALAR_TO_VECTOR conversions.
4619  return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4620 
4621  // Otherwise use buildVector to build the vector up from GPRs.
4622  unsigned NumElements = Op.getNumOperands();
4624  for (unsigned I = 0; I < NumElements; ++I)
4625  Ops[I] = Op.getOperand(I);
4626  return buildVector(DAG, DL, VT, Ops);
4627 }
4628 
4629 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4630  SelectionDAG &DAG) const {
4631  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4632  SDLoc DL(Op);
4633  EVT VT = Op.getValueType();
4634  unsigned NumElements = VT.getVectorNumElements();
4635 
4636  if (VSN->isSplat()) {
4637  SDValue Op0 = Op.getOperand(0);
4638  unsigned Index = VSN->getSplatIndex();
4639  assert(Index < VT.getVectorNumElements() &&
4640  "Splat index should be defined and in first operand");
4641  // See whether the value we're splatting is directly available as a scalar.
4642  if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4643  Op0.getOpcode() == ISD::BUILD_VECTOR)
4644  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4645  // Otherwise keep it as a vector-to-vector operation.
4646  return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4647  DAG.getConstant(Index, DL, MVT::i32));
4648  }
4649 
4650  GeneralShuffle GS(VT);
4651  for (unsigned I = 0; I < NumElements; ++