LLVM  10.0.0svn
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/Support/KnownBits.h"
26 #include <cctype>
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "systemz-lower"
31 
32 namespace {
33 // Represents information about a comparison.
34 struct Comparison {
35  Comparison(SDValue Op0In, SDValue Op1In)
36  : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
37 
38  // The operands to the comparison.
39  SDValue Op0, Op1;
40 
41  // The opcode that should be used to compare Op0 and Op1.
42  unsigned Opcode;
43 
44  // A SystemZICMP value. Only used for integer comparisons.
45  unsigned ICmpType;
46 
47  // The mask of CC values that Opcode can produce.
48  unsigned CCValid;
49 
50  // The mask of CC values for which the original condition is true.
51  unsigned CCMask;
52 };
53 } // end anonymous namespace
54 
55 // Classify VT as either 32 or 64 bit.
56 static bool is32Bit(EVT VT) {
57  switch (VT.getSimpleVT().SimpleTy) {
58  case MVT::i32:
59  return true;
60  case MVT::i64:
61  return false;
62  default:
63  llvm_unreachable("Unsupported type");
64  }
65 }
66 
67 // Return a version of MachineOperand that can be safely used before the
68 // final use.
70  if (Op.isReg())
71  Op.setIsKill(false);
72  return Op;
73 }
74 
76  const SystemZSubtarget &STI)
77  : TargetLowering(TM), Subtarget(STI) {
78  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
79 
80  // Set up the register classes.
81  if (Subtarget.hasHighWord())
82  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
83  else
84  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
85  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
86  if (Subtarget.hasVector()) {
87  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
88  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
89  } else {
90  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
91  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
92  }
93  if (Subtarget.hasVectorEnhancements1())
94  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
95  else
96  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
97 
98  if (Subtarget.hasVector()) {
99  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
100  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
101  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
102  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
103  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
104  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
105  }
106 
107  // Compute derived properties from the register classes
109 
110  // Set up special registers.
112 
113  // TODO: It may be better to default to latency-oriented scheduling, however
114  // LLVM's current latency-oriented scheduler can't handle physreg definitions
115  // such as SystemZ has with CC, so set this to the register-pressure
116  // scheduler, because it can.
118 
121 
122  // Instructions are strings of 2-byte aligned 2-byte values.
124  // For performance reasons we prefer 16-byte alignment.
126 
127  // Handle operations that are handled in a similar way for all types.
128  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
130  ++I) {
131  MVT VT = MVT::SimpleValueType(I);
132  if (isTypeLegal(VT)) {
133  // Lower SET_CC into an IPM-based sequence.
135 
136  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
138 
139  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
142  }
143  }
144 
145  // Expand jump table branches as address arithmetic followed by an
146  // indirect jump.
148 
149  // Expand BRCOND into a BR_CC (see above).
151 
152  // Handle integer types.
153  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
155  ++I) {
156  MVT VT = MVT::SimpleValueType(I);
157  if (isTypeLegal(VT)) {
158  // Expand individual DIV and REMs into DIVREMs.
165 
166  // Support addition/subtraction with overflow.
169 
170  // Support addition/subtraction with carry.
173 
174  // Support carry in as value rather than glue.
177 
178  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
179  // stores, putting a serialization instruction after the stores.
182 
183  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
184  // available, or if the operand is constant.
186 
187  // Use POPCNT on z196 and above.
188  if (Subtarget.hasPopulationCount())
190  else
192 
193  // No special instructions for these.
196 
197  // Use *MUL_LOHI where possible instead of MULH*.
202 
203  // Only z196 and above have native support for conversions to unsigned.
204  // On z10, promoting to i64 doesn't generate an inexact condition for
205  // values that are outside the i32 range but in the i64 range, so use
206  // the default expansion.
207  if (!Subtarget.hasFPExtension())
209 
210  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
211  // default to Expand, so need to be modified to Legal where appropriate.
213  if (Subtarget.hasFPExtension())
215  }
216  }
217 
218  // Type legalization will convert 8- and 16-bit atomic operations into
219  // forms that operate on i32s (but still keeping the original memory VT).
220  // Lower them into full i32 operations.
232 
233  // Even though i128 is not a legal type, we still need to custom lower
234  // the atomic operations in order to exploit SystemZ instructions.
237 
238  // We can use the CC result of compare-and-swap to implement
239  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
243 
245 
246  // Traps are legal, as we will convert them to "j .+2".
248 
249  // z10 has instructions for signed but not unsigned FP conversion.
250  // Handle unsigned 32-bit types as signed 64-bit types.
251  if (!Subtarget.hasFPExtension()) {
254  }
255 
256  // We have native support for a 64-bit CTLZ, via FLOGR.
260 
261  // On z15 we have native support for a 64-bit CTPOP.
262  if (Subtarget.hasMiscellaneousExtensions3()) {
265  }
266 
267  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
269 
270  // FIXME: Can we support these natively?
274 
275  // We have native instructions for i8, i16 and i32 extensions, but not i1.
277  for (MVT VT : MVT::integer_valuetypes()) {
281  }
282 
283  // Handle the various types of symbolic address.
289 
290  // We need to handle dynamic allocations specially because of the
291  // 160-byte area at the bottom of the stack.
294 
295  // Use custom expanders so that we can force the function to use
296  // a frame pointer.
299 
300  // Handle prefetches with PFD or PFDRL.
302 
303  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
304  // Assume by default that all vector operations need to be expanded.
305  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
306  if (getOperationAction(Opcode, VT) == Legal)
307  setOperationAction(Opcode, VT, Expand);
308 
309  // Likewise all truncating stores and extending loads.
310  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
311  setTruncStoreAction(VT, InnerVT, Expand);
312  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
313  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
314  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
315  }
316 
317  if (isTypeLegal(VT)) {
318  // These operations are legal for anything that can be stored in a
319  // vector register, even if there is no native support for the format
320  // as such. In particular, we can do these for v4f32 even though there
321  // are no specific instructions for that format.
327 
328  // Likewise, except that we need to replace the nodes with something
329  // more specific.
332  }
333  }
334 
335  // Handle integer vector types.
337  if (isTypeLegal(VT)) {
338  // These operations have direct equivalents.
343  if (VT != MVT::v2i64)
348  if (Subtarget.hasVectorEnhancements1())
350  else
354 
355  // Convert a GPR scalar to a vector by inserting it into element 0.
357 
358  // Use a series of unpacks for extensions.
361 
362  // Detect shifts by a scalar amount and convert them into
363  // V*_BY_SCALAR.
367 
368  // At present ROTL isn't matched by DAGCombiner. ROTR should be
369  // converted into ROTL.
372 
373  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
374  // and inverting the result as necessary.
376  }
377  }
378 
379  if (Subtarget.hasVector()) {
380  // There should be no need to check for float types other than v2f64
381  // since <2 x f32> isn't a legal type.
390 
395  }
396 
397  if (Subtarget.hasVectorEnhancements2()) {
406 
411  }
412 
413  // Handle floating-point types.
414  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
416  ++I) {
417  MVT VT = MVT::SimpleValueType(I);
418  if (isTypeLegal(VT)) {
419  // We can use FI for FRINT.
421 
422  // We can use the extended form of FI for other rounding operations.
423  if (Subtarget.hasFPExtension()) {
429  }
430 
431  // No special instructions for these.
437 
438  // Handle constrained floating-point operations.
448  if (Subtarget.hasFPExtension()) {
454  }
455  }
456  }
457 
458  // Handle floating-point vector types.
459  if (Subtarget.hasVector()) {
460  // Scalar-to-vector conversion is just a subreg.
463 
464  // Some insertions and extractions can be done directly but others
465  // need to go via integers.
470 
471  // These operations have direct equivalents.
486 
487  // Handle constrained floating-point operations.
500  }
501 
502  // The vector enhancements facility 1 has instructions for these.
503  if (Subtarget.hasVectorEnhancements1()) {
518 
523 
528 
533 
538 
543 
544  // Handle constrained floating-point operations.
557  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
558  MVT::v4f32, MVT::v2f64 }) {
561  }
562  }
563 
564  // We have fused multiply-addition for f32 and f64 but not f128.
567  if (Subtarget.hasVectorEnhancements1())
569  else
571 
572  // We don't have a copysign instruction on vector registers.
573  if (Subtarget.hasVectorEnhancements1())
575 
576  // Needed so that we don't try to implement f128 constant loads using
577  // a load-and-extend of a f80 constant (in cases where the constant
578  // would fit in an f80).
579  for (MVT VT : MVT::fp_valuetypes())
581 
582  // We don't have extending load instruction on vector registers.
583  if (Subtarget.hasVectorEnhancements1()) {
586  }
587 
588  // Floating-point truncation and stores need to be done separately.
592 
593  // We have 64-bit FPR<->GPR moves, but need special handling for
594  // 32-bit forms.
595  if (!Subtarget.hasVector()) {
598  }
599 
600  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
601  // structure, but VAEND is a no-op.
605 
606  // Codes for which we want to perform some z-specific combinations.
621 
622  // Handle intrinsics.
625 
626  // We want to use MVC in preference to even a single load/store pair.
627  MaxStoresPerMemcpy = 0;
629 
630  // The main memset sequence is a byte store followed by an MVC.
631  // Two STC or MV..I stores win over that, but the kind of fused stores
632  // generated by target-independent code don't when the byte value is
633  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
634  // than "STC;MVC". Handle the choice in target-specific code instead.
635  MaxStoresPerMemset = 0;
637 }
638 
640  LLVMContext &, EVT VT) const {
641  if (!VT.isVector())
642  return MVT::i32;
644 }
645 
647  VT = VT.getScalarType();
648 
649  if (!VT.isSimple())
650  return false;
651 
652  switch (VT.getSimpleVT().SimpleTy) {
653  case MVT::f32:
654  case MVT::f64:
655  return true;
656  case MVT::f128:
657  return Subtarget.hasVectorEnhancements1();
658  default:
659  break;
660  }
661 
662  return false;
663 }
664 
665 // Return true if the constant can be generated with a vector instruction,
666 // such as VGM, VGMB or VREPI.
668  const SystemZSubtarget &Subtarget) {
669  const SystemZInstrInfo *TII =
670  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
671  if (!Subtarget.hasVector() ||
672  (isFP128 && !Subtarget.hasVectorEnhancements1()))
673  return false;
674 
675  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
676  // preferred way of creating all-zero and all-one vectors so give it
677  // priority over other methods below.
678  unsigned Mask = 0;
679  unsigned I = 0;
680  for (; I < SystemZ::VectorBytes; ++I) {
681  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
682  if (Byte == 0xff)
683  Mask |= 1ULL << I;
684  else if (Byte != 0)
685  break;
686  }
687  if (I == SystemZ::VectorBytes) {
688  Opcode = SystemZISD::BYTE_MASK;
689  OpVals.push_back(Mask);
690  VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
691  return true;
692  }
693 
694  if (SplatBitSize > 64)
695  return false;
696 
697  auto tryValue = [&](uint64_t Value) -> bool {
698  // Try VECTOR REPLICATE IMMEDIATE
699  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
700  if (isInt<16>(SignedValue)) {
701  OpVals.push_back(((unsigned) SignedValue));
702  Opcode = SystemZISD::REPLICATE;
703  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
704  SystemZ::VectorBits / SplatBitSize);
705  return true;
706  }
707  // Try VECTOR GENERATE MASK
708  unsigned Start, End;
709  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
710  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
711  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
712  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
713  OpVals.push_back(Start - (64 - SplatBitSize));
714  OpVals.push_back(End - (64 - SplatBitSize));
715  Opcode = SystemZISD::ROTATE_MASK;
716  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
717  SystemZ::VectorBits / SplatBitSize);
718  return true;
719  }
720  return false;
721  };
722 
723  // First try assuming that any undefined bits above the highest set bit
724  // and below the lowest set bit are 1s. This increases the likelihood of
725  // being able to use a sign-extended element value in VECTOR REPLICATE
726  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
727  uint64_t SplatBitsZ = SplatBits.getZExtValue();
728  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
729  uint64_t Lower =
730  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
731  uint64_t Upper =
732  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
733  if (tryValue(SplatBitsZ | Upper | Lower))
734  return true;
735 
736  // Now try assuming that any undefined bits between the first and
737  // last defined set bits are set. This increases the chances of
738  // using a non-wraparound mask.
739  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
740  return tryValue(SplatBitsZ | Middle);
741 }
742 
744  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
745  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
746 
747  // Find the smallest splat.
748  SplatBits = FPImm.bitcastToAPInt();
749  unsigned Width = SplatBits.getBitWidth();
750  while (Width > 8) {
751  unsigned HalfSize = Width / 2;
752  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
753  APInt LowValue = SplatBits.trunc(HalfSize);
754 
755  // If the two halves do not match, stop here.
756  if (HighValue != LowValue || 8 > HalfSize)
757  break;
758 
759  SplatBits = HighValue;
760  Width = HalfSize;
761  }
762  SplatUndef = 0;
763  SplatBitSize = Width;
764 }
765 
767  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
768  bool HasAnyUndefs;
769 
770  // Get IntBits by finding the 128 bit splat.
771  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
772  true);
773 
774  // Get SplatBits by finding the 8 bit or greater splat.
775  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
776  true);
777 }
778 
780  bool ForCodeSize) const {
781  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
782  if (Imm.isZero() || Imm.isNegZero())
783  return true;
784 
785  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
786 }
787 
789  // We can use CGFI or CLGFI.
790  return isInt<32>(Imm) || isUInt<32>(Imm);
791 }
792 
794  // We can use ALGFI or SLGFI.
795  return isUInt<32>(Imm) || isUInt<32>(-Imm);
796 }
797 
799  EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
800  // Unaligned accesses should never be slower than the expanded version.
801  // We check specifically for aligned accesses in the few cases where
802  // they are required.
803  if (Fast)
804  *Fast = true;
805  return true;
806 }
807 
808 // Information about the addressing mode for a memory access.
810  // True if a long displacement is supported.
812 
813  // True if use of index register is supported.
814  bool IndexReg;
815 
816  AddressingMode(bool LongDispl, bool IdxReg) :
817  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
818 };
819 
820 // Return the desired addressing mode for a Load which has only one use (in
821 // the same block) which is a Store.
822 static AddressingMode getLoadStoreAddrMode(bool HasVector,
823  Type *Ty) {
824  // With vector support a Load->Store combination may be combined to either
825  // an MVC or vector operations and it seems to work best to allow the
826  // vector addressing mode.
827  if (HasVector)
828  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
829 
830  // Otherwise only the MVC case is special.
831  bool MVC = Ty->isIntegerTy(8);
832  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
833 }
834 
835 // Return the addressing mode which seems most desirable given an LLVM
836 // Instruction pointer.
837 static AddressingMode
839  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
840  switch (II->getIntrinsicID()) {
841  default: break;
842  case Intrinsic::memset:
843  case Intrinsic::memmove:
844  case Intrinsic::memcpy:
845  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
846  }
847  }
848 
849  if (isa<LoadInst>(I) && I->hasOneUse()) {
850  auto *SingleUser = cast<Instruction>(*I->user_begin());
851  if (SingleUser->getParent() == I->getParent()) {
852  if (isa<ICmpInst>(SingleUser)) {
853  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
854  if (C->getBitWidth() <= 64 &&
855  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
856  // Comparison of memory with 16 bit signed / unsigned immediate
857  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
858  } else if (isa<StoreInst>(SingleUser))
859  // Load->Store
860  return getLoadStoreAddrMode(HasVector, I->getType());
861  }
862  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
863  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
864  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
865  // Load->Store
866  return getLoadStoreAddrMode(HasVector, LoadI->getType());
867  }
868 
869  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
870 
871  // * Use LDE instead of LE/LEY for z13 to avoid partial register
872  // dependencies (LDE only supports small offsets).
873  // * Utilize the vector registers to hold floating point
874  // values (vector load / store instructions only support small
875  // offsets).
876 
877  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
878  I->getOperand(0)->getType());
879  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
880  bool IsVectorAccess = MemAccessTy->isVectorTy();
881 
882  // A store of an extracted vector element will be combined into a VSTE type
883  // instruction.
884  if (!IsVectorAccess && isa<StoreInst>(I)) {
885  Value *DataOp = I->getOperand(0);
886  if (isa<ExtractElementInst>(DataOp))
887  IsVectorAccess = true;
888  }
889 
890  // A load which gets inserted into a vector element will be combined into a
891  // VLE type instruction.
892  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
893  User *LoadUser = *I->user_begin();
894  if (isa<InsertElementInst>(LoadUser))
895  IsVectorAccess = true;
896  }
897 
898  if (IsFPAccess || IsVectorAccess)
899  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
900  }
901 
902  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
903 }
904 
906  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
907  // Punt on globals for now, although they can be used in limited
908  // RELATIVE LONG cases.
909  if (AM.BaseGV)
910  return false;
911 
912  // Require a 20-bit signed offset.
913  if (!isInt<20>(AM.BaseOffs))
914  return false;
915 
916  AddressingMode SupportedAM(true, true);
917  if (I != nullptr)
918  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
919 
920  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
921  return false;
922 
923  if (!SupportedAM.IndexReg)
924  // No indexing allowed.
925  return AM.Scale == 0;
926  else
927  // Indexing is OK but no scale factor can be applied.
928  return AM.Scale == 0 || AM.Scale == 1;
929 }
930 
932  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
933  return false;
934  unsigned FromBits = FromType->getPrimitiveSizeInBits();
935  unsigned ToBits = ToType->getPrimitiveSizeInBits();
936  return FromBits > ToBits;
937 }
938 
940  if (!FromVT.isInteger() || !ToVT.isInteger())
941  return false;
942  unsigned FromBits = FromVT.getSizeInBits();
943  unsigned ToBits = ToVT.getSizeInBits();
944  return FromBits > ToBits;
945 }
946 
947 //===----------------------------------------------------------------------===//
948 // Inline asm support
949 //===----------------------------------------------------------------------===//
950 
953  if (Constraint.size() == 1) {
954  switch (Constraint[0]) {
955  case 'a': // Address register
956  case 'd': // Data register (equivalent to 'r')
957  case 'f': // Floating-point register
958  case 'h': // High-part register
959  case 'r': // General-purpose register
960  case 'v': // Vector register
961  return C_RegisterClass;
962 
963  case 'Q': // Memory with base and unsigned 12-bit displacement
964  case 'R': // Likewise, plus an index
965  case 'S': // Memory with base and signed 20-bit displacement
966  case 'T': // Likewise, plus an index
967  case 'm': // Equivalent to 'T'.
968  return C_Memory;
969 
970  case 'I': // Unsigned 8-bit constant
971  case 'J': // Unsigned 12-bit constant
972  case 'K': // Signed 16-bit constant
973  case 'L': // Signed 20-bit displacement (on all targets we support)
974  case 'M': // 0x7fffffff
975  return C_Immediate;
976 
977  default:
978  break;
979  }
980  }
981  return TargetLowering::getConstraintType(Constraint);
982 }
983 
986  const char *constraint) const {
987  ConstraintWeight weight = CW_Invalid;
988  Value *CallOperandVal = info.CallOperandVal;
989  // If we don't have a value, we can't do a match,
990  // but allow it at the lowest weight.
991  if (!CallOperandVal)
992  return CW_Default;
993  Type *type = CallOperandVal->getType();
994  // Look at the constraint type.
995  switch (*constraint) {
996  default:
997  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
998  break;
999 
1000  case 'a': // Address register
1001  case 'd': // Data register (equivalent to 'r')
1002  case 'h': // High-part register
1003  case 'r': // General-purpose register
1004  if (CallOperandVal->getType()->isIntegerTy())
1005  weight = CW_Register;
1006  break;
1007 
1008  case 'f': // Floating-point register
1009  if (type->isFloatingPointTy())
1010  weight = CW_Register;
1011  break;
1012 
1013  case 'v': // Vector register
1014  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1015  Subtarget.hasVector())
1016  weight = CW_Register;
1017  break;
1018 
1019  case 'I': // Unsigned 8-bit constant
1020  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1021  if (isUInt<8>(C->getZExtValue()))
1022  weight = CW_Constant;
1023  break;
1024 
1025  case 'J': // Unsigned 12-bit constant
1026  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1027  if (isUInt<12>(C->getZExtValue()))
1028  weight = CW_Constant;
1029  break;
1030 
1031  case 'K': // Signed 16-bit constant
1032  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1033  if (isInt<16>(C->getSExtValue()))
1034  weight = CW_Constant;
1035  break;
1036 
1037  case 'L': // Signed 20-bit displacement (on all targets we support)
1038  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1039  if (isInt<20>(C->getSExtValue()))
1040  weight = CW_Constant;
1041  break;
1042 
1043  case 'M': // 0x7fffffff
1044  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1045  if (C->getZExtValue() == 0x7fffffff)
1046  weight = CW_Constant;
1047  break;
1048  }
1049  return weight;
1050 }
1051 
1052 // Parse a "{tNNN}" register constraint for which the register type "t"
1053 // has already been verified. MC is the class associated with "t" and
1054 // Map maps 0-based register numbers to LLVM register numbers.
1055 static std::pair<unsigned, const TargetRegisterClass *>
1057  const unsigned *Map, unsigned Size) {
1058  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1059  if (isdigit(Constraint[2])) {
1060  unsigned Index;
1061  bool Failed =
1062  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1063  if (!Failed && Index < Size && Map[Index])
1064  return std::make_pair(Map[Index], RC);
1065  }
1066  return std::make_pair(0U, nullptr);
1067 }
1068 
1069 std::pair<unsigned, const TargetRegisterClass *>
1071  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1072  if (Constraint.size() == 1) {
1073  // GCC Constraint Letters
1074  switch (Constraint[0]) {
1075  default: break;
1076  case 'd': // Data register (equivalent to 'r')
1077  case 'r': // General-purpose register
1078  if (VT == MVT::i64)
1079  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1080  else if (VT == MVT::i128)
1081  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1082  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1083 
1084  case 'a': // Address register
1085  if (VT == MVT::i64)
1086  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1087  else if (VT == MVT::i128)
1088  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1089  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1090 
1091  case 'h': // High-part register (an LLVM extension)
1092  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1093 
1094  case 'f': // Floating-point register
1095  if (VT == MVT::f64)
1096  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1097  else if (VT == MVT::f128)
1098  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1099  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1100 
1101  case 'v': // Vector register
1102  if (Subtarget.hasVector()) {
1103  if (VT == MVT::f32)
1104  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1105  if (VT == MVT::f64)
1106  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1107  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1108  }
1109  break;
1110  }
1111  }
1112  if (Constraint.size() > 0 && Constraint[0] == '{') {
1113  // We need to override the default register parsing for GPRs and FPRs
1114  // because the interpretation depends on VT. The internal names of
1115  // the registers are also different from the external names
1116  // (F0D and F0S instead of F0, etc.).
1117  if (Constraint[1] == 'r') {
1118  if (VT == MVT::i32)
1119  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1120  SystemZMC::GR32Regs, 16);
1121  if (VT == MVT::i128)
1122  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1123  SystemZMC::GR128Regs, 16);
1124  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1125  SystemZMC::GR64Regs, 16);
1126  }
1127  if (Constraint[1] == 'f') {
1128  if (VT == MVT::f32)
1129  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1130  SystemZMC::FP32Regs, 16);
1131  if (VT == MVT::f128)
1132  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1133  SystemZMC::FP128Regs, 16);
1134  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1135  SystemZMC::FP64Regs, 16);
1136  }
1137  if (Constraint[1] == 'v') {
1138  if (VT == MVT::f32)
1139  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1140  SystemZMC::VR32Regs, 32);
1141  if (VT == MVT::f64)
1142  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1143  SystemZMC::VR64Regs, 32);
1144  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1145  SystemZMC::VR128Regs, 32);
1146  }
1147  }
1148  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1149 }
1150 
1152 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1153  std::vector<SDValue> &Ops,
1154  SelectionDAG &DAG) const {
1155  // Only support length 1 constraints for now.
1156  if (Constraint.length() == 1) {
1157  switch (Constraint[0]) {
1158  case 'I': // Unsigned 8-bit constant
1159  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1160  if (isUInt<8>(C->getZExtValue()))
1161  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1162  Op.getValueType()));
1163  return;
1164 
1165  case 'J': // Unsigned 12-bit constant
1166  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1167  if (isUInt<12>(C->getZExtValue()))
1168  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1169  Op.getValueType()));
1170  return;
1171 
1172  case 'K': // Signed 16-bit constant
1173  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1174  if (isInt<16>(C->getSExtValue()))
1175  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1176  Op.getValueType()));
1177  return;
1178 
1179  case 'L': // Signed 20-bit displacement (on all targets we support)
1180  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1181  if (isInt<20>(C->getSExtValue()))
1182  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1183  Op.getValueType()));
1184  return;
1185 
1186  case 'M': // 0x7fffffff
1187  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1188  if (C->getZExtValue() == 0x7fffffff)
1189  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1190  Op.getValueType()));
1191  return;
1192  }
1193  }
1194  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1195 }
1196 
1197 //===----------------------------------------------------------------------===//
1198 // Calling conventions
1199 //===----------------------------------------------------------------------===//
1200 
1201 #include "SystemZGenCallingConv.inc"
1202 
1204  CallingConv::ID) const {
1205  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1206  SystemZ::R14D, 0 };
1207  return ScratchRegs;
1208 }
1209 
1211  Type *ToType) const {
1212  return isTruncateFree(FromType, ToType);
1213 }
1214 
1216  return CI->isTailCall();
1217 }
1218 
1219 // We do not yet support 128-bit single-element vector types. If the user
1220 // attempts to use such types as function argument or return type, prefer
1221 // to error out instead of emitting code violating the ABI.
1222 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1223  if (ArgVT.isVector() && !VT.isVector())
1224  report_fatal_error("Unsupported vector argument or return type");
1225 }
1226 
1228  for (unsigned i = 0; i < Ins.size(); ++i)
1229  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1230 }
1231 
1233  for (unsigned i = 0; i < Outs.size(); ++i)
1234  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1235 }
1236 
1237 // Value is a value that has been passed to us in the location described by VA
1238 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1239 // any loads onto Chain.
1241  CCValAssign &VA, SDValue Chain,
1242  SDValue Value) {
1243  // If the argument has been promoted from a smaller type, insert an
1244  // assertion to capture this.
1245  if (VA.getLocInfo() == CCValAssign::SExt)
1246  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1247  DAG.getValueType(VA.getValVT()));
1248  else if (VA.getLocInfo() == CCValAssign::ZExt)
1249  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1250  DAG.getValueType(VA.getValVT()));
1251 
1252  if (VA.isExtInLoc())
1253  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1254  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1255  // If this is a short vector argument loaded from the stack,
1256  // extend from i64 to full vector size and then bitcast.
1257  assert(VA.getLocVT() == MVT::i64);
1258  assert(VA.getValVT().isVector());
1259  Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1260  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1261  } else
1262  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1263  return Value;
1264 }
1265 
1266 // Value is a value of type VA.getValVT() that we need to copy into
1267 // the location described by VA. Return a copy of Value converted to
1268 // VA.getValVT(). The caller is responsible for handling indirect values.
1270  CCValAssign &VA, SDValue Value) {
1271  switch (VA.getLocInfo()) {
1272  case CCValAssign::SExt:
1273  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1274  case CCValAssign::ZExt:
1275  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1276  case CCValAssign::AExt:
1277  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1278  case CCValAssign::BCvt:
1279  // If this is a short vector argument to be stored to the stack,
1280  // bitcast to v2i64 and then extract first element.
1281  assert(VA.getLocVT() == MVT::i64);
1282  assert(VA.getValVT().isVector());
1283  Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1284  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1285  DAG.getConstant(0, DL, MVT::i32));
1286  case CCValAssign::Full:
1287  return Value;
1288  default:
1289  llvm_unreachable("Unhandled getLocInfo()");
1290  }
1291 }
1292 
1294  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1295  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1296  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1297  MachineFunction &MF = DAG.getMachineFunction();
1298  MachineFrameInfo &MFI = MF.getFrameInfo();
1300  SystemZMachineFunctionInfo *FuncInfo =
1302  auto *TFL =
1303  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1304  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1305 
1306  // Detect unsupported vector argument types.
1307  if (Subtarget.hasVector())
1308  VerifyVectorTypes(Ins);
1309 
1310  // Assign locations to all of the incoming arguments.
1312  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1313  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1314 
1315  unsigned NumFixedGPRs = 0;
1316  unsigned NumFixedFPRs = 0;
1317  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1318  SDValue ArgValue;
1319  CCValAssign &VA = ArgLocs[I];
1320  EVT LocVT = VA.getLocVT();
1321  if (VA.isRegLoc()) {
1322  // Arguments passed in registers
1323  const TargetRegisterClass *RC;
1324  switch (LocVT.getSimpleVT().SimpleTy) {
1325  default:
1326  // Integers smaller than i64 should be promoted to i64.
1327  llvm_unreachable("Unexpected argument type");
1328  case MVT::i32:
1329  NumFixedGPRs += 1;
1330  RC = &SystemZ::GR32BitRegClass;
1331  break;
1332  case MVT::i64:
1333  NumFixedGPRs += 1;
1334  RC = &SystemZ::GR64BitRegClass;
1335  break;
1336  case MVT::f32:
1337  NumFixedFPRs += 1;
1338  RC = &SystemZ::FP32BitRegClass;
1339  break;
1340  case MVT::f64:
1341  NumFixedFPRs += 1;
1342  RC = &SystemZ::FP64BitRegClass;
1343  break;
1344  case MVT::v16i8:
1345  case MVT::v8i16:
1346  case MVT::v4i32:
1347  case MVT::v2i64:
1348  case MVT::v4f32:
1349  case MVT::v2f64:
1350  RC = &SystemZ::VR128BitRegClass;
1351  break;
1352  }
1353 
1354  Register VReg = MRI.createVirtualRegister(RC);
1355  MRI.addLiveIn(VA.getLocReg(), VReg);
1356  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1357  } else {
1358  assert(VA.isMemLoc() && "Argument not register or memory");
1359 
1360  // Create the frame index object for this incoming parameter.
1361  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1362  VA.getLocMemOffset(), true);
1363 
1364  // Create the SelectionDAG nodes corresponding to a load
1365  // from this parameter. Unpromoted ints and floats are
1366  // passed as right-justified 8-byte values.
1367  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1368  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1369  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1370  DAG.getIntPtrConstant(4, DL));
1371  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1373  }
1374 
1375  // Convert the value of the argument register into the value that's
1376  // being passed.
1377  if (VA.getLocInfo() == CCValAssign::Indirect) {
1378  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1379  MachinePointerInfo()));
1380  // If the original argument was split (e.g. i128), we need
1381  // to load all parts of it here (using the same address).
1382  unsigned ArgIndex = Ins[I].OrigArgIndex;
1383  assert (Ins[I].PartOffset == 0);
1384  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1385  CCValAssign &PartVA = ArgLocs[I + 1];
1386  unsigned PartOffset = Ins[I + 1].PartOffset;
1387  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1388  DAG.getIntPtrConstant(PartOffset, DL));
1389  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1390  MachinePointerInfo()));
1391  ++I;
1392  }
1393  } else
1394  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1395  }
1396 
1397  if (IsVarArg) {
1398  // Save the number of non-varargs registers for later use by va_start, etc.
1399  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1400  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1401 
1402  // Likewise the address (in the form of a frame index) of where the
1403  // first stack vararg would be. The 1-byte size here is arbitrary.
1404  int64_t StackSize = CCInfo.getNextStackOffset();
1405  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1406 
1407  // ...and a similar frame index for the caller-allocated save area
1408  // that will be used to store the incoming registers.
1409  int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1410  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1411  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1412 
1413  // Store the FPR varargs in the reserved frame slots. (We store the
1414  // GPRs as part of the prologue.)
1415  if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1416  SDValue MemOps[SystemZ::NumArgFPRs];
1417  for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1418  unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1419  int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1420  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1421  unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1422  &SystemZ::FP64BitRegClass);
1423  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1424  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1426  }
1427  // Join the stores, which are independent of one another.
1428  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1429  makeArrayRef(&MemOps[NumFixedFPRs],
1430  SystemZ::NumArgFPRs-NumFixedFPRs));
1431  }
1432  }
1433 
1434  return Chain;
1435 }
1436 
1437 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1440  // Punt if there are any indirect or stack arguments, or if the call
1441  // needs the callee-saved argument register R6, or if the call uses
1442  // the callee-saved register arguments SwiftSelf and SwiftError.
1443  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1444  CCValAssign &VA = ArgLocs[I];
1445  if (VA.getLocInfo() == CCValAssign::Indirect)
1446  return false;
1447  if (!VA.isRegLoc())
1448  return false;
1449  Register Reg = VA.getLocReg();
1450  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1451  return false;
1452  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1453  return false;
1454  }
1455  return true;
1456 }
1457 
1458 SDValue
1460  SmallVectorImpl<SDValue> &InVals) const {
1461  SelectionDAG &DAG = CLI.DAG;
1462  SDLoc &DL = CLI.DL;
1464  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1466  SDValue Chain = CLI.Chain;
1467  SDValue Callee = CLI.Callee;
1468  bool &IsTailCall = CLI.IsTailCall;
1469  CallingConv::ID CallConv = CLI.CallConv;
1470  bool IsVarArg = CLI.IsVarArg;
1471  MachineFunction &MF = DAG.getMachineFunction();
1472  EVT PtrVT = getPointerTy(MF.getDataLayout());
1473 
1474  // Detect unsupported vector argument and return types.
1475  if (Subtarget.hasVector()) {
1476  VerifyVectorTypes(Outs);
1477  VerifyVectorTypes(Ins);
1478  }
1479 
1480  // Analyze the operands of the call, assigning locations to each operand.
1482  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1483  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1484 
1485  // We don't support GuaranteedTailCallOpt, only automatically-detected
1486  // sibling calls.
1487  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1488  IsTailCall = false;
1489 
1490  // Get a count of how many bytes are to be pushed on the stack.
1491  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1492 
1493  // Mark the start of the call.
1494  if (!IsTailCall)
1495  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1496 
1497  // Copy argument values to their designated locations.
1499  SmallVector<SDValue, 8> MemOpChains;
1500  SDValue StackPtr;
1501  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1502  CCValAssign &VA = ArgLocs[I];
1503  SDValue ArgValue = OutVals[I];
1504 
1505  if (VA.getLocInfo() == CCValAssign::Indirect) {
1506  // Store the argument in a stack slot and pass its address.
1507  SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1508  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1509  MemOpChains.push_back(
1510  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1512  // If the original argument was split (e.g. i128), we need
1513  // to store all parts of it here (and pass just one address).
1514  unsigned ArgIndex = Outs[I].OrigArgIndex;
1515  assert (Outs[I].PartOffset == 0);
1516  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1517  SDValue PartValue = OutVals[I + 1];
1518  unsigned PartOffset = Outs[I + 1].PartOffset;
1519  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1520  DAG.getIntPtrConstant(PartOffset, DL));
1521  MemOpChains.push_back(
1522  DAG.getStore(Chain, DL, PartValue, Address,
1524  ++I;
1525  }
1526  ArgValue = SpillSlot;
1527  } else
1528  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1529 
1530  if (VA.isRegLoc())
1531  // Queue up the argument copies and emit them at the end.
1532  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1533  else {
1534  assert(VA.isMemLoc() && "Argument not register or memory");
1535 
1536  // Work out the address of the stack slot. Unpromoted ints and
1537  // floats are passed as right-justified 8-byte values.
1538  if (!StackPtr.getNode())
1539  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1541  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1542  Offset += 4;
1543  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1544  DAG.getIntPtrConstant(Offset, DL));
1545 
1546  // Emit the store.
1547  MemOpChains.push_back(
1548  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1549  }
1550  }
1551 
1552  // Join the stores, which are independent of one another.
1553  if (!MemOpChains.empty())
1554  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1555 
1556  // Accept direct calls by converting symbolic call addresses to the
1557  // associated Target* opcodes. Force %r1 to be used for indirect
1558  // tail calls.
1559  SDValue Glue;
1560  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1561  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1562  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1563  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1564  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1565  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1566  } else if (IsTailCall) {
1567  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1568  Glue = Chain.getValue(1);
1569  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1570  }
1571 
1572  // Build a sequence of copy-to-reg nodes, chained and glued together.
1573  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1574  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1575  RegsToPass[I].second, Glue);
1576  Glue = Chain.getValue(1);
1577  }
1578 
1579  // The first call operand is the chain and the second is the target address.
1581  Ops.push_back(Chain);
1582  Ops.push_back(Callee);
1583 
1584  // Add argument registers to the end of the list so that they are
1585  // known live into the call.
1586  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1587  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1588  RegsToPass[I].second.getValueType()));
1589 
1590  // Add a register mask operand representing the call-preserved registers.
1591  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1592  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1593  assert(Mask && "Missing call preserved mask for calling convention");
1594  Ops.push_back(DAG.getRegisterMask(Mask));
1595 
1596  // Glue the call to the argument copies, if any.
1597  if (Glue.getNode())
1598  Ops.push_back(Glue);
1599 
1600  // Emit the call.
1601  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1602  if (IsTailCall)
1603  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1604  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1605  Glue = Chain.getValue(1);
1606 
1607  // Mark the end of the call, which is glued to the call itself.
1608  Chain = DAG.getCALLSEQ_END(Chain,
1609  DAG.getConstant(NumBytes, DL, PtrVT, true),
1610  DAG.getConstant(0, DL, PtrVT, true),
1611  Glue, DL);
1612  Glue = Chain.getValue(1);
1613 
1614  // Assign locations to each value returned by this call.
1616  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1617  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1618 
1619  // Copy all of the result registers out of their specified physreg.
1620  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1621  CCValAssign &VA = RetLocs[I];
1622 
1623  // Copy the value out, gluing the copy to the end of the call sequence.
1624  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1625  VA.getLocVT(), Glue);
1626  Chain = RetValue.getValue(1);
1627  Glue = RetValue.getValue(2);
1628 
1629  // Convert the value of the return register into the value that's
1630  // being returned.
1631  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1632  }
1633 
1634  return Chain;
1635 }
1636 
1639  MachineFunction &MF, bool isVarArg,
1640  const SmallVectorImpl<ISD::OutputArg> &Outs,
1641  LLVMContext &Context) const {
1642  // Detect unsupported vector return types.
1643  if (Subtarget.hasVector())
1644  VerifyVectorTypes(Outs);
1645 
1646  // Special case that we cannot easily detect in RetCC_SystemZ since
1647  // i128 is not a legal type.
1648  for (auto &Out : Outs)
1649  if (Out.ArgVT == MVT::i128)
1650  return false;
1651 
1653  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1654  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1655 }
1656 
1657 SDValue
1659  bool IsVarArg,
1660  const SmallVectorImpl<ISD::OutputArg> &Outs,
1661  const SmallVectorImpl<SDValue> &OutVals,
1662  const SDLoc &DL, SelectionDAG &DAG) const {
1663  MachineFunction &MF = DAG.getMachineFunction();
1664 
1665  // Detect unsupported vector return types.
1666  if (Subtarget.hasVector())
1667  VerifyVectorTypes(Outs);
1668 
1669  // Assign locations to each returned value.
1671  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1672  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1673 
1674  // Quick exit for void returns
1675  if (RetLocs.empty())
1676  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1677 
1678  // Copy the result values into the output registers.
1679  SDValue Glue;
1680  SmallVector<SDValue, 4> RetOps;
1681  RetOps.push_back(Chain);
1682  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1683  CCValAssign &VA = RetLocs[I];
1684  SDValue RetValue = OutVals[I];
1685 
1686  // Make the return register live on exit.
1687  assert(VA.isRegLoc() && "Can only return in registers!");
1688 
1689  // Promote the value as required.
1690  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1691 
1692  // Chain and glue the copies together.
1693  Register Reg = VA.getLocReg();
1694  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1695  Glue = Chain.getValue(1);
1696  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1697  }
1698 
1699  // Update chain and glue.
1700  RetOps[0] = Chain;
1701  if (Glue.getNode())
1702  RetOps.push_back(Glue);
1703 
1704  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1705 }
1706 
1707 // Return true if Op is an intrinsic node with chain that returns the CC value
1708 // as its only (other) argument. Provide the associated SystemZISD opcode and
1709 // the mask of valid CC values if so.
1710 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1711  unsigned &CCValid) {
1712  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1713  switch (Id) {
1714  case Intrinsic::s390_tbegin:
1715  Opcode = SystemZISD::TBEGIN;
1716  CCValid = SystemZ::CCMASK_TBEGIN;
1717  return true;
1718 
1719  case Intrinsic::s390_tbegin_nofloat:
1720  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1721  CCValid = SystemZ::CCMASK_TBEGIN;
1722  return true;
1723 
1724  case Intrinsic::s390_tend:
1725  Opcode = SystemZISD::TEND;
1726  CCValid = SystemZ::CCMASK_TEND;
1727  return true;
1728 
1729  default:
1730  return false;
1731  }
1732 }
1733 
1734 // Return true if Op is an intrinsic node without chain that returns the
1735 // CC value as its final argument. Provide the associated SystemZISD
1736 // opcode and the mask of valid CC values if so.
1737 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1738  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1739  switch (Id) {
1740  case Intrinsic::s390_vpkshs:
1741  case Intrinsic::s390_vpksfs:
1742  case Intrinsic::s390_vpksgs:
1743  Opcode = SystemZISD::PACKS_CC;
1744  CCValid = SystemZ::CCMASK_VCMP;
1745  return true;
1746 
1747  case Intrinsic::s390_vpklshs:
1748  case Intrinsic::s390_vpklsfs:
1749  case Intrinsic::s390_vpklsgs:
1750  Opcode = SystemZISD::PACKLS_CC;
1751  CCValid = SystemZ::CCMASK_VCMP;
1752  return true;
1753 
1754  case Intrinsic::s390_vceqbs:
1755  case Intrinsic::s390_vceqhs:
1756  case Intrinsic::s390_vceqfs:
1757  case Intrinsic::s390_vceqgs:
1758  Opcode = SystemZISD::VICMPES;
1759  CCValid = SystemZ::CCMASK_VCMP;
1760  return true;
1761 
1762  case Intrinsic::s390_vchbs:
1763  case Intrinsic::s390_vchhs:
1764  case Intrinsic::s390_vchfs:
1765  case Intrinsic::s390_vchgs:
1766  Opcode = SystemZISD::VICMPHS;
1767  CCValid = SystemZ::CCMASK_VCMP;
1768  return true;
1769 
1770  case Intrinsic::s390_vchlbs:
1771  case Intrinsic::s390_vchlhs:
1772  case Intrinsic::s390_vchlfs:
1773  case Intrinsic::s390_vchlgs:
1774  Opcode = SystemZISD::VICMPHLS;
1775  CCValid = SystemZ::CCMASK_VCMP;
1776  return true;
1777 
1778  case Intrinsic::s390_vtm:
1779  Opcode = SystemZISD::VTM;
1780  CCValid = SystemZ::CCMASK_VCMP;
1781  return true;
1782 
1783  case Intrinsic::s390_vfaebs:
1784  case Intrinsic::s390_vfaehs:
1785  case Intrinsic::s390_vfaefs:
1786  Opcode = SystemZISD::VFAE_CC;
1787  CCValid = SystemZ::CCMASK_ANY;
1788  return true;
1789 
1790  case Intrinsic::s390_vfaezbs:
1791  case Intrinsic::s390_vfaezhs:
1792  case Intrinsic::s390_vfaezfs:
1793  Opcode = SystemZISD::VFAEZ_CC;
1794  CCValid = SystemZ::CCMASK_ANY;
1795  return true;
1796 
1797  case Intrinsic::s390_vfeebs:
1798  case Intrinsic::s390_vfeehs:
1799  case Intrinsic::s390_vfeefs:
1800  Opcode = SystemZISD::VFEE_CC;
1801  CCValid = SystemZ::CCMASK_ANY;
1802  return true;
1803 
1804  case Intrinsic::s390_vfeezbs:
1805  case Intrinsic::s390_vfeezhs:
1806  case Intrinsic::s390_vfeezfs:
1807  Opcode = SystemZISD::VFEEZ_CC;
1808  CCValid = SystemZ::CCMASK_ANY;
1809  return true;
1810 
1811  case Intrinsic::s390_vfenebs:
1812  case Intrinsic::s390_vfenehs:
1813  case Intrinsic::s390_vfenefs:
1814  Opcode = SystemZISD::VFENE_CC;
1815  CCValid = SystemZ::CCMASK_ANY;
1816  return true;
1817 
1818  case Intrinsic::s390_vfenezbs:
1819  case Intrinsic::s390_vfenezhs:
1820  case Intrinsic::s390_vfenezfs:
1821  Opcode = SystemZISD::VFENEZ_CC;
1822  CCValid = SystemZ::CCMASK_ANY;
1823  return true;
1824 
1825  case Intrinsic::s390_vistrbs:
1826  case Intrinsic::s390_vistrhs:
1827  case Intrinsic::s390_vistrfs:
1828  Opcode = SystemZISD::VISTR_CC;
1830  return true;
1831 
1832  case Intrinsic::s390_vstrcbs:
1833  case Intrinsic::s390_vstrchs:
1834  case Intrinsic::s390_vstrcfs:
1835  Opcode = SystemZISD::VSTRC_CC;
1836  CCValid = SystemZ::CCMASK_ANY;
1837  return true;
1838 
1839  case Intrinsic::s390_vstrczbs:
1840  case Intrinsic::s390_vstrczhs:
1841  case Intrinsic::s390_vstrczfs:
1842  Opcode = SystemZISD::VSTRCZ_CC;
1843  CCValid = SystemZ::CCMASK_ANY;
1844  return true;
1845 
1846  case Intrinsic::s390_vstrsb:
1847  case Intrinsic::s390_vstrsh:
1848  case Intrinsic::s390_vstrsf:
1849  Opcode = SystemZISD::VSTRS_CC;
1850  CCValid = SystemZ::CCMASK_ANY;
1851  return true;
1852 
1853  case Intrinsic::s390_vstrszb:
1854  case Intrinsic::s390_vstrszh:
1855  case Intrinsic::s390_vstrszf:
1856  Opcode = SystemZISD::VSTRSZ_CC;
1857  CCValid = SystemZ::CCMASK_ANY;
1858  return true;
1859 
1860  case Intrinsic::s390_vfcedbs:
1861  case Intrinsic::s390_vfcesbs:
1862  Opcode = SystemZISD::VFCMPES;
1863  CCValid = SystemZ::CCMASK_VCMP;
1864  return true;
1865 
1866  case Intrinsic::s390_vfchdbs:
1867  case Intrinsic::s390_vfchsbs:
1868  Opcode = SystemZISD::VFCMPHS;
1869  CCValid = SystemZ::CCMASK_VCMP;
1870  return true;
1871 
1872  case Intrinsic::s390_vfchedbs:
1873  case Intrinsic::s390_vfchesbs:
1874  Opcode = SystemZISD::VFCMPHES;
1875  CCValid = SystemZ::CCMASK_VCMP;
1876  return true;
1877 
1878  case Intrinsic::s390_vftcidb:
1879  case Intrinsic::s390_vftcisb:
1880  Opcode = SystemZISD::VFTCI;
1881  CCValid = SystemZ::CCMASK_VCMP;
1882  return true;
1883 
1884  case Intrinsic::s390_tdc:
1885  Opcode = SystemZISD::TDC;
1886  CCValid = SystemZ::CCMASK_TDC;
1887  return true;
1888 
1889  default:
1890  return false;
1891  }
1892 }
1893 
1894 // Emit an intrinsic with chain and an explicit CC register result.
1896  unsigned Opcode) {
1897  // Copy all operands except the intrinsic ID.
1898  unsigned NumOps = Op.getNumOperands();
1900  Ops.reserve(NumOps - 1);
1901  Ops.push_back(Op.getOperand(0));
1902  for (unsigned I = 2; I < NumOps; ++I)
1903  Ops.push_back(Op.getOperand(I));
1904 
1905  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1906  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
1907  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1908  SDValue OldChain = SDValue(Op.getNode(), 1);
1909  SDValue NewChain = SDValue(Intr.getNode(), 1);
1910  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1911  return Intr.getNode();
1912 }
1913 
1914 // Emit an intrinsic with an explicit CC register result.
1916  unsigned Opcode) {
1917  // Copy all operands except the intrinsic ID.
1918  unsigned NumOps = Op.getNumOperands();
1920  Ops.reserve(NumOps - 1);
1921  for (unsigned I = 1; I < NumOps; ++I)
1922  Ops.push_back(Op.getOperand(I));
1923 
1924  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
1925  return Intr.getNode();
1926 }
1927 
1928 // CC is a comparison that will be implemented using an integer or
1929 // floating-point comparison. Return the condition code mask for
1930 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1931 // unsigned comparisons and clear for signed ones. In the floating-point
1932 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1933 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1934 #define CONV(X) \
1935  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1936  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1937  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1938 
1939  switch (CC) {
1940  default:
1941  llvm_unreachable("Invalid integer condition!");
1942 
1943  CONV(EQ);
1944  CONV(NE);
1945  CONV(GT);
1946  CONV(GE);
1947  CONV(LT);
1948  CONV(LE);
1949 
1950  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1951  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1952  }
1953 #undef CONV
1954 }
1955 
1956 // If C can be converted to a comparison against zero, adjust the operands
1957 // as necessary.
1958 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1959  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1960  return;
1961 
1962  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1963  if (!ConstOp1)
1964  return;
1965 
1966  int64_t Value = ConstOp1->getSExtValue();
1967  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1968  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1969  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1970  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1971  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1972  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1973  }
1974 }
1975 
1976 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1977 // adjust the operands as necessary.
1978 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1979  Comparison &C) {
1980  // For us to make any changes, it must a comparison between a single-use
1981  // load and a constant.
1982  if (!C.Op0.hasOneUse() ||
1983  C.Op0.getOpcode() != ISD::LOAD ||
1984  C.Op1.getOpcode() != ISD::Constant)
1985  return;
1986 
1987  // We must have an 8- or 16-bit load.
1988  auto *Load = cast<LoadSDNode>(C.Op0);
1989  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1990  if (NumBits != 8 && NumBits != 16)
1991  return;
1992 
1993  // The load must be an extending one and the constant must be within the
1994  // range of the unextended value.
1995  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1996  uint64_t Value = ConstOp1->getZExtValue();
1997  uint64_t Mask = (1 << NumBits) - 1;
1998  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1999  // Make sure that ConstOp1 is in range of C.Op0.
2000  int64_t SignedValue = ConstOp1->getSExtValue();
2001  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2002  return;
2003  if (C.ICmpType != SystemZICMP::SignedOnly) {
2004  // Unsigned comparison between two sign-extended values is equivalent
2005  // to unsigned comparison between two zero-extended values.
2006  Value &= Mask;
2007  } else if (NumBits == 8) {
2008  // Try to treat the comparison as unsigned, so that we can use CLI.
2009  // Adjust CCMask and Value as necessary.
2010  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2011  // Test whether the high bit of the byte is set.
2012  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2013  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2014  // Test whether the high bit of the byte is clear.
2015  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2016  else
2017  // No instruction exists for this combination.
2018  return;
2019  C.ICmpType = SystemZICMP::UnsignedOnly;
2020  }
2021  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2022  if (Value > Mask)
2023  return;
2024  // If the constant is in range, we can use any comparison.
2025  C.ICmpType = SystemZICMP::Any;
2026  } else
2027  return;
2028 
2029  // Make sure that the first operand is an i32 of the right extension type.
2031  ISD::SEXTLOAD :
2032  ISD::ZEXTLOAD);
2033  if (C.Op0.getValueType() != MVT::i32 ||
2034  Load->getExtensionType() != ExtType) {
2035  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2036  Load->getBasePtr(), Load->getPointerInfo(),
2037  Load->getMemoryVT(), Load->getAlignment(),
2038  Load->getMemOperand()->getFlags());
2039  // Update the chain uses.
2040  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2041  }
2042 
2043  // Make sure that the second operand is an i32 with the right value.
2044  if (C.Op1.getValueType() != MVT::i32 ||
2045  Value != ConstOp1->getZExtValue())
2046  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2047 }
2048 
2049 // Return true if Op is either an unextended load, or a load suitable
2050 // for integer register-memory comparisons of type ICmpType.
2051 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2052  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2053  if (Load) {
2054  // There are no instructions to compare a register with a memory byte.
2055  if (Load->getMemoryVT() == MVT::i8)
2056  return false;
2057  // Otherwise decide on extension type.
2058  switch (Load->getExtensionType()) {
2059  case ISD::NON_EXTLOAD:
2060  return true;
2061  case ISD::SEXTLOAD:
2062  return ICmpType != SystemZICMP::UnsignedOnly;
2063  case ISD::ZEXTLOAD:
2064  return ICmpType != SystemZICMP::SignedOnly;
2065  default:
2066  break;
2067  }
2068  }
2069  return false;
2070 }
2071 
2072 // Return true if it is better to swap the operands of C.
2073 static bool shouldSwapCmpOperands(const Comparison &C) {
2074  // Leave f128 comparisons alone, since they have no memory forms.
2075  if (C.Op0.getValueType() == MVT::f128)
2076  return false;
2077 
2078  // Always keep a floating-point constant second, since comparisons with
2079  // zero can use LOAD TEST and comparisons with other constants make a
2080  // natural memory operand.
2081  if (isa<ConstantFPSDNode>(C.Op1))
2082  return false;
2083 
2084  // Never swap comparisons with zero since there are many ways to optimize
2085  // those later.
2086  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2087  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2088  return false;
2089 
2090  // Also keep natural memory operands second if the loaded value is
2091  // only used here. Several comparisons have memory forms.
2092  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2093  return false;
2094 
2095  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2096  // In that case we generally prefer the memory to be second.
2097  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2098  // The only exceptions are when the second operand is a constant and
2099  // we can use things like CHHSI.
2100  if (!ConstOp1)
2101  return true;
2102  // The unsigned memory-immediate instructions can handle 16-bit
2103  // unsigned integers.
2104  if (C.ICmpType != SystemZICMP::SignedOnly &&
2105  isUInt<16>(ConstOp1->getZExtValue()))
2106  return false;
2107  // The signed memory-immediate instructions can handle 16-bit
2108  // signed integers.
2109  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2110  isInt<16>(ConstOp1->getSExtValue()))
2111  return false;
2112  return true;
2113  }
2114 
2115  // Try to promote the use of CGFR and CLGFR.
2116  unsigned Opcode0 = C.Op0.getOpcode();
2117  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2118  return true;
2119  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2120  return true;
2121  if (C.ICmpType != SystemZICMP::SignedOnly &&
2122  Opcode0 == ISD::AND &&
2123  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2124  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2125  return true;
2126 
2127  return false;
2128 }
2129 
2130 // Return a version of comparison CC mask CCMask in which the LT and GT
2131 // actions are swapped.
2132 static unsigned reverseCCMask(unsigned CCMask) {
2133  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
2135  (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
2136  (CCMask & SystemZ::CCMASK_CMP_UO));
2137 }
2138 
2139 // Check whether C tests for equality between X and Y and whether X - Y
2140 // or Y - X is also computed. In that case it's better to compare the
2141 // result of the subtraction against zero.
2142 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2143  Comparison &C) {
2144  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2145  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2146  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2147  SDNode *N = *I;
2148  if (N->getOpcode() == ISD::SUB &&
2149  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2150  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2151  C.Op0 = SDValue(N, 0);
2152  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2153  return;
2154  }
2155  }
2156  }
2157 }
2158 
2159 // Check whether C compares a floating-point value with zero and if that
2160 // floating-point value is also negated. In this case we can use the
2161 // negation to set CC, so avoiding separate LOAD AND TEST and
2162 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2163 static void adjustForFNeg(Comparison &C) {
2164  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2165  if (C1 && C1->isZero()) {
2166  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2167  SDNode *N = *I;
2168  if (N->getOpcode() == ISD::FNEG) {
2169  C.Op0 = SDValue(N, 0);
2170  C.CCMask = reverseCCMask(C.CCMask);
2171  return;
2172  }
2173  }
2174  }
2175 }
2176 
2177 // Check whether C compares (shl X, 32) with 0 and whether X is
2178 // also sign-extended. In that case it is better to test the result
2179 // of the sign extension using LTGFR.
2180 //
2181 // This case is important because InstCombine transforms a comparison
2182 // with (sext (trunc X)) into a comparison with (shl X, 32).
2183 static void adjustForLTGFR(Comparison &C) {
2184  // Check for a comparison between (shl X, 32) and 0.
2185  if (C.Op0.getOpcode() == ISD::SHL &&
2186  C.Op0.getValueType() == MVT::i64 &&
2187  C.Op1.getOpcode() == ISD::Constant &&
2188  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2189  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2190  if (C1 && C1->getZExtValue() == 32) {
2191  SDValue ShlOp0 = C.Op0.getOperand(0);
2192  // See whether X has any SIGN_EXTEND_INREG uses.
2193  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2194  SDNode *N = *I;
2195  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2196  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2197  C.Op0 = SDValue(N, 0);
2198  return;
2199  }
2200  }
2201  }
2202  }
2203 }
2204 
2205 // If C compares the truncation of an extending load, try to compare
2206 // the untruncated value instead. This exposes more opportunities to
2207 // reuse CC.
2208 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2209  Comparison &C) {
2210  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2211  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2212  C.Op1.getOpcode() == ISD::Constant &&
2213  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2214  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2215  if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
2216  unsigned Type = L->getExtensionType();
2217  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2218  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2219  C.Op0 = C.Op0.getOperand(0);
2220  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2221  }
2222  }
2223  }
2224 }
2225 
2226 // Return true if shift operation N has an in-range constant shift value.
2227 // Store it in ShiftVal if so.
2228 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2229  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2230  if (!Shift)
2231  return false;
2232 
2233  uint64_t Amount = Shift->getZExtValue();
2234  if (Amount >= N.getValueSizeInBits())
2235  return false;
2236 
2237  ShiftVal = Amount;
2238  return true;
2239 }
2240 
2241 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2242 // instruction and whether the CC value is descriptive enough to handle
2243 // a comparison of type Opcode between the AND result and CmpVal.
2244 // CCMask says which comparison result is being tested and BitSize is
2245 // the number of bits in the operands. If TEST UNDER MASK can be used,
2246 // return the corresponding CC mask, otherwise return 0.
2247 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2248  uint64_t Mask, uint64_t CmpVal,
2249  unsigned ICmpType) {
2250  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2251 
2252  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2253  if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2254  !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2255  return 0;
2256 
2257  // Work out the masks for the lowest and highest bits.
2258  unsigned HighShift = 63 - countLeadingZeros(Mask);
2259  uint64_t High = uint64_t(1) << HighShift;
2260  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2261 
2262  // Signed ordered comparisons are effectively unsigned if the sign
2263  // bit is dropped.
2264  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2265 
2266  // Check for equality comparisons with 0, or the equivalent.
2267  if (CmpVal == 0) {
2268  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2269  return SystemZ::CCMASK_TM_ALL_0;
2270  if (CCMask == SystemZ::CCMASK_CMP_NE)
2272  }
2273  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2274  if (CCMask == SystemZ::CCMASK_CMP_LT)
2275  return SystemZ::CCMASK_TM_ALL_0;
2276  if (CCMask == SystemZ::CCMASK_CMP_GE)
2278  }
2279  if (EffectivelyUnsigned && CmpVal < Low) {
2280  if (CCMask == SystemZ::CCMASK_CMP_LE)
2281  return SystemZ::CCMASK_TM_ALL_0;
2282  if (CCMask == SystemZ::CCMASK_CMP_GT)
2284  }
2285 
2286  // Check for equality comparisons with the mask, or the equivalent.
2287  if (CmpVal == Mask) {
2288  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2289  return SystemZ::CCMASK_TM_ALL_1;
2290  if (CCMask == SystemZ::CCMASK_CMP_NE)
2292  }
2293  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2294  if (CCMask == SystemZ::CCMASK_CMP_GT)
2295  return SystemZ::CCMASK_TM_ALL_1;
2296  if (CCMask == SystemZ::CCMASK_CMP_LE)
2298  }
2299  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2300  if (CCMask == SystemZ::CCMASK_CMP_GE)
2301  return SystemZ::CCMASK_TM_ALL_1;
2302  if (CCMask == SystemZ::CCMASK_CMP_LT)
2304  }
2305 
2306  // Check for ordered comparisons with the top bit.
2307  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2308  if (CCMask == SystemZ::CCMASK_CMP_LE)
2309  return SystemZ::CCMASK_TM_MSB_0;
2310  if (CCMask == SystemZ::CCMASK_CMP_GT)
2311  return SystemZ::CCMASK_TM_MSB_1;
2312  }
2313  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2314  if (CCMask == SystemZ::CCMASK_CMP_LT)
2315  return SystemZ::CCMASK_TM_MSB_0;
2316  if (CCMask == SystemZ::CCMASK_CMP_GE)
2317  return SystemZ::CCMASK_TM_MSB_1;
2318  }
2319 
2320  // If there are just two bits, we can do equality checks for Low and High
2321  // as well.
2322  if (Mask == Low + High) {
2323  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2325  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2327  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2329  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2331  }
2332 
2333  // Looks like we've exhausted our options.
2334  return 0;
2335 }
2336 
2337 // See whether C can be implemented as a TEST UNDER MASK instruction.
2338 // Update the arguments with the TM version if so.
2339 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2340  Comparison &C) {
2341  // Check that we have a comparison with a constant.
2342  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2343  if (!ConstOp1)
2344  return;
2345  uint64_t CmpVal = ConstOp1->getZExtValue();
2346 
2347  // Check whether the nonconstant input is an AND with a constant mask.
2348  Comparison NewC(C);
2349  uint64_t MaskVal;
2350  ConstantSDNode *Mask = nullptr;
2351  if (C.Op0.getOpcode() == ISD::AND) {
2352  NewC.Op0 = C.Op0.getOperand(0);
2353  NewC.Op1 = C.Op0.getOperand(1);
2354  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2355  if (!Mask)
2356  return;
2357  MaskVal = Mask->getZExtValue();
2358  } else {
2359  // There is no instruction to compare with a 64-bit immediate
2360  // so use TMHH instead if possible. We need an unsigned ordered
2361  // comparison with an i64 immediate.
2362  if (NewC.Op0.getValueType() != MVT::i64 ||
2363  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2364  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2365  NewC.ICmpType == SystemZICMP::SignedOnly)
2366  return;
2367  // Convert LE and GT comparisons into LT and GE.
2368  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2369  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2370  if (CmpVal == uint64_t(-1))
2371  return;
2372  CmpVal += 1;
2373  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2374  }
2375  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2376  // be masked off without changing the result.
2377  MaskVal = -(CmpVal & -CmpVal);
2378  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2379  }
2380  if (!MaskVal)
2381  return;
2382 
2383  // Check whether the combination of mask, comparison value and comparison
2384  // type are suitable.
2385  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2386  unsigned NewCCMask, ShiftVal;
2387  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2388  NewC.Op0.getOpcode() == ISD::SHL &&
2389  isSimpleShift(NewC.Op0, ShiftVal) &&
2390  (MaskVal >> ShiftVal != 0) &&
2391  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2392  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2393  MaskVal >> ShiftVal,
2394  CmpVal >> ShiftVal,
2395  SystemZICMP::Any))) {
2396  NewC.Op0 = NewC.Op0.getOperand(0);
2397  MaskVal >>= ShiftVal;
2398  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2399  NewC.Op0.getOpcode() == ISD::SRL &&
2400  isSimpleShift(NewC.Op0, ShiftVal) &&
2401  (MaskVal << ShiftVal != 0) &&
2402  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2403  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2404  MaskVal << ShiftVal,
2405  CmpVal << ShiftVal,
2407  NewC.Op0 = NewC.Op0.getOperand(0);
2408  MaskVal <<= ShiftVal;
2409  } else {
2410  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2411  NewC.ICmpType);
2412  if (!NewCCMask)
2413  return;
2414  }
2415 
2416  // Go ahead and make the change.
2417  C.Opcode = SystemZISD::TM;
2418  C.Op0 = NewC.Op0;
2419  if (Mask && Mask->getZExtValue() == MaskVal)
2420  C.Op1 = SDValue(Mask, 0);
2421  else
2422  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2423  C.CCValid = SystemZ::CCMASK_TM;
2424  C.CCMask = NewCCMask;
2425 }
2426 
2427 // See whether the comparison argument contains a redundant AND
2428 // and remove it if so. This sometimes happens due to the generic
2429 // BRCOND expansion.
2430 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2431  Comparison &C) {
2432  if (C.Op0.getOpcode() != ISD::AND)
2433  return;
2434  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2435  if (!Mask)
2436  return;
2437  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2438  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2439  return;
2440 
2441  C.Op0 = C.Op0.getOperand(0);
2442 }
2443 
2444 // Return a Comparison that tests the condition-code result of intrinsic
2445 // node Call against constant integer CC using comparison code Cond.
2446 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2447 // and CCValid is the set of possible condition-code results.
2448 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2449  SDValue Call, unsigned CCValid, uint64_t CC,
2450  ISD::CondCode Cond) {
2451  Comparison C(Call, SDValue());
2452  C.Opcode = Opcode;
2453  C.CCValid = CCValid;
2454  if (Cond == ISD::SETEQ)
2455  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2456  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2457  else if (Cond == ISD::SETNE)
2458  // ...and the inverse of that.
2459  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2460  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2461  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2462  // always true for CC>3.
2463  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2464  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2465  // ...and the inverse of that.
2466  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2467  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2468  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2469  // always true for CC>3.
2470  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2471  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2472  // ...and the inverse of that.
2473  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2474  else
2475  llvm_unreachable("Unexpected integer comparison type");
2476  C.CCMask &= CCValid;
2477  return C;
2478 }
2479 
2480 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2481 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2482  ISD::CondCode Cond, const SDLoc &DL) {
2483  if (CmpOp1.getOpcode() == ISD::Constant) {
2484  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2485  unsigned Opcode, CCValid;
2486  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2487  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2488  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2489  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2490  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2491  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2492  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2493  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2494  }
2495  Comparison C(CmpOp0, CmpOp1);
2496  C.CCMask = CCMaskForCondCode(Cond);
2497  if (C.Op0.getValueType().isFloatingPoint()) {
2498  C.CCValid = SystemZ::CCMASK_FCMP;
2499  C.Opcode = SystemZISD::FCMP;
2500  adjustForFNeg(C);
2501  } else {
2502  C.CCValid = SystemZ::CCMASK_ICMP;
2503  C.Opcode = SystemZISD::ICMP;
2504  // Choose the type of comparison. Equality and inequality tests can
2505  // use either signed or unsigned comparisons. The choice also doesn't
2506  // matter if both sign bits are known to be clear. In those cases we
2507  // want to give the main isel code the freedom to choose whichever
2508  // form fits best.
2509  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2510  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2511  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2512  C.ICmpType = SystemZICMP::Any;
2513  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2514  C.ICmpType = SystemZICMP::UnsignedOnly;
2515  else
2516  C.ICmpType = SystemZICMP::SignedOnly;
2517  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2518  adjustForRedundantAnd(DAG, DL, C);
2519  adjustZeroCmp(DAG, DL, C);
2520  adjustSubwordCmp(DAG, DL, C);
2521  adjustForSubtraction(DAG, DL, C);
2522  adjustForLTGFR(C);
2523  adjustICmpTruncate(DAG, DL, C);
2524  }
2525 
2526  if (shouldSwapCmpOperands(C)) {
2527  std::swap(C.Op0, C.Op1);
2528  C.CCMask = reverseCCMask(C.CCMask);
2529  }
2530 
2531  adjustForTestUnderMask(DAG, DL, C);
2532  return C;
2533 }
2534 
2535 // Emit the comparison instruction described by C.
2536 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2537  if (!C.Op1.getNode()) {
2538  SDNode *Node;
2539  switch (C.Op0.getOpcode()) {
2541  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2542  return SDValue(Node, 0);
2544  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2545  return SDValue(Node, Node->getNumValues() - 1);
2546  default:
2547  llvm_unreachable("Invalid comparison operands");
2548  }
2549  }
2550  if (C.Opcode == SystemZISD::ICMP)
2551  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2552  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2553  if (C.Opcode == SystemZISD::TM) {
2554  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2555  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2556  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2557  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2558  }
2559  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2560 }
2561 
2562 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2563 // 64 bits. Extend is the extension type to use. Store the high part
2564 // in Hi and the low part in Lo.
2565 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2566  SDValue Op0, SDValue Op1, SDValue &Hi,
2567  SDValue &Lo) {
2568  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2569  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2570  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2571  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2572  DAG.getConstant(32, DL, MVT::i64));
2573  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2574  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2575 }
2576 
2577 // Lower a binary operation that produces two VT results, one in each
2578 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2579 // and Opcode performs the GR128 operation. Store the even register result
2580 // in Even and the odd register result in Odd.
2581 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2582  unsigned Opcode, SDValue Op0, SDValue Op1,
2583  SDValue &Even, SDValue &Odd) {
2584  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2585  bool Is32Bit = is32Bit(VT);
2586  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2587  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2588 }
2589 
2590 // Return an i32 value that is 1 if the CC value produced by CCReg is
2591 // in the mask CCMask and 0 otherwise. CC is known to have a value
2592 // in CCValid, so other values can be ignored.
2593 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2594  unsigned CCValid, unsigned CCMask) {
2595  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2596  DAG.getConstant(0, DL, MVT::i32),
2597  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2598  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2599  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2600 }
2601 
2602 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2603 // be done directly. IsFP is true if CC is for a floating-point rather than
2604 // integer comparison.
2605 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2606  switch (CC) {
2607  case ISD::SETOEQ:
2608  case ISD::SETEQ:
2609  return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
2610 
2611  case ISD::SETOGE:
2612  case ISD::SETGE:
2613  return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
2614 
2615  case ISD::SETOGT:
2616  case ISD::SETGT:
2617  return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
2618 
2619  case ISD::SETUGT:
2620  return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
2621 
2622  default:
2623  return 0;
2624  }
2625 }
2626 
2627 // Return the SystemZISD vector comparison operation for CC or its inverse,
2628 // or 0 if neither can be done directly. Indicate in Invert whether the
2629 // result is for the inverse of CC. IsFP is true if CC is for a
2630 // floating-point rather than integer comparison.
2631 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2632  bool &Invert) {
2633  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2634  Invert = false;
2635  return Opcode;
2636  }
2637 
2638  CC = ISD::getSetCCInverse(CC, !IsFP);
2639  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2640  Invert = true;
2641  return Opcode;
2642  }
2643 
2644  return 0;
2645 }
2646 
2647 // Return a v2f64 that contains the extended form of elements Start and Start+1
2648 // of v4f32 value Op.
2649 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2650  SDValue Op) {
2651  int Mask[] = { Start, -1, Start + 1, -1 };
2652  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2653  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2654 }
2655 
2656 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2657 // producing a result of type VT.
2658 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2659  const SDLoc &DL, EVT VT,
2660  SDValue CmpOp0,
2661  SDValue CmpOp1) const {
2662  // There is no hardware support for v4f32 (unless we have the vector
2663  // enhancements facility 1), so extend the vector into two v2f64s
2664  // and compare those.
2665  if (CmpOp0.getValueType() == MVT::v4f32 &&
2666  !Subtarget.hasVectorEnhancements1()) {
2667  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2668  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2669  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2670  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2671  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2672  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2673  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2674  }
2675  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2676 }
2677 
2678 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2679 // an integer mask of type VT.
2680 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2681  const SDLoc &DL, EVT VT,
2682  ISD::CondCode CC,
2683  SDValue CmpOp0,
2684  SDValue CmpOp1) const {
2685  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2686  bool Invert = false;
2687  SDValue Cmp;
2688  switch (CC) {
2689  // Handle tests for order using (or (ogt y x) (oge x y)).
2690  case ISD::SETUO:
2691  Invert = true;
2693  case ISD::SETO: {
2694  assert(IsFP && "Unexpected integer comparison");
2695  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2696  SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2697  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2698  break;
2699  }
2700 
2701  // Handle <> tests using (or (ogt y x) (ogt x y)).
2702  case ISD::SETUEQ:
2703  Invert = true;
2705  case ISD::SETONE: {
2706  assert(IsFP && "Unexpected integer comparison");
2707  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2708  SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2709  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2710  break;
2711  }
2712 
2713  // Otherwise a single comparison is enough. It doesn't really
2714  // matter whether we try the inversion or the swap first, since
2715  // there are no cases where both work.
2716  default:
2717  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2718  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2719  else {
2721  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2722  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2723  else
2724  llvm_unreachable("Unhandled comparison");
2725  }
2726  break;
2727  }
2728  if (Invert) {
2729  SDValue Mask =
2730  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2731  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2732  }
2733  return Cmp;
2734 }
2735 
2736 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2737  SelectionDAG &DAG) const {
2738  SDValue CmpOp0 = Op.getOperand(0);
2739  SDValue CmpOp1 = Op.getOperand(1);
2740  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2741  SDLoc DL(Op);
2742  EVT VT = Op.getValueType();
2743  if (VT.isVector())
2744  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2745 
2746  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2747  SDValue CCReg = emitCmp(DAG, DL, C);
2748  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2749 }
2750 
2751 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2752  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2753  SDValue CmpOp0 = Op.getOperand(2);
2754  SDValue CmpOp1 = Op.getOperand(3);
2755  SDValue Dest = Op.getOperand(4);
2756  SDLoc DL(Op);
2757 
2758  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2759  SDValue CCReg = emitCmp(DAG, DL, C);
2760  return DAG.getNode(
2762  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
2763  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2764 }
2765 
2766 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2767 // allowing Pos and Neg to be wider than CmpOp.
2768 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2769  return (Neg.getOpcode() == ISD::SUB &&
2770  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2771  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2772  Neg.getOperand(1) == Pos &&
2773  (Pos == CmpOp ||
2774  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2775  Pos.getOperand(0) == CmpOp)));
2776 }
2777 
2778 // Return the absolute or negative absolute of Op; IsNegative decides which.
2779 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2780  bool IsNegative) {
2781  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2782  if (IsNegative)
2783  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2784  DAG.getConstant(0, DL, Op.getValueType()), Op);
2785  return Op;
2786 }
2787 
2788 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2789  SelectionDAG &DAG) const {
2790  SDValue CmpOp0 = Op.getOperand(0);
2791  SDValue CmpOp1 = Op.getOperand(1);
2792  SDValue TrueOp = Op.getOperand(2);
2793  SDValue FalseOp = Op.getOperand(3);
2794  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2795  SDLoc DL(Op);
2796 
2797  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2798 
2799  // Check for absolute and negative-absolute selections, including those
2800  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2801  // This check supplements the one in DAGCombiner.
2802  if (C.Opcode == SystemZISD::ICMP &&
2803  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2804  C.CCMask != SystemZ::CCMASK_CMP_NE &&
2805  C.Op1.getOpcode() == ISD::Constant &&
2806  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2807  if (isAbsolute(C.Op0, TrueOp, FalseOp))
2808  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2809  if (isAbsolute(C.Op0, FalseOp, TrueOp))
2810  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2811  }
2812 
2813  SDValue CCReg = emitCmp(DAG, DL, C);
2814  SDValue Ops[] = {TrueOp, FalseOp,
2815  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
2816  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
2817 
2818  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
2819 }
2820 
2821 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2822  SelectionDAG &DAG) const {
2823  SDLoc DL(Node);
2824  const GlobalValue *GV = Node->getGlobal();
2825  int64_t Offset = Node->getOffset();
2826  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2828 
2829  SDValue Result;
2830  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2831  // Assign anchors at 1<<12 byte boundaries.
2832  uint64_t Anchor = Offset & ~uint64_t(0xfff);
2833  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2834  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2835 
2836  // The offset can be folded into the address if it is aligned to a halfword.
2837  Offset -= Anchor;
2838  if (Offset != 0 && (Offset & 1) == 0) {
2839  SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2840  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2841  Offset = 0;
2842  }
2843  } else {
2844  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2845  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2846  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2848  }
2849 
2850  // If there was a non-zero offset that we didn't fold, create an explicit
2851  // addition for it.
2852  if (Offset != 0)
2853  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2854  DAG.getConstant(Offset, DL, PtrVT));
2855 
2856  return Result;
2857 }
2858 
2859 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2860  SelectionDAG &DAG,
2861  unsigned Opcode,
2862  SDValue GOTOffset) const {
2863  SDLoc DL(Node);
2864  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2865  SDValue Chain = DAG.getEntryNode();
2866  SDValue Glue;
2867 
2868  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2869  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2870  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2871  Glue = Chain.getValue(1);
2872  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2873  Glue = Chain.getValue(1);
2874 
2875  // The first call operand is the chain and the second is the TLS symbol.
2877  Ops.push_back(Chain);
2878  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2879  Node->getValueType(0),
2880  0, 0));
2881 
2882  // Add argument registers to the end of the list so that they are
2883  // known live into the call.
2884  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2885  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2886 
2887  // Add a register mask operand representing the call-preserved registers.
2888  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2889  const uint32_t *Mask =
2891  assert(Mask && "Missing call preserved mask for calling convention");
2892  Ops.push_back(DAG.getRegisterMask(Mask));
2893 
2894  // Glue the call to the argument copies.
2895  Ops.push_back(Glue);
2896 
2897  // Emit the call.
2898  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2899  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2900  Glue = Chain.getValue(1);
2901 
2902  // Copy the return value from %r2.
2903  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2904 }
2905 
2906 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2907  SelectionDAG &DAG) const {
2908  SDValue Chain = DAG.getEntryNode();
2909  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2910 
2911  // The high part of the thread pointer is in access register 0.
2912  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
2913  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2914 
2915  // The low part of the thread pointer is in access register 1.
2916  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
2917  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2918 
2919  // Merge them into a single 64-bit address.
2920  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2921  DAG.getConstant(32, DL, PtrVT));
2922  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2923 }
2924 
2925 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2926  SelectionDAG &DAG) const {
2927  if (DAG.getTarget().useEmulatedTLS())
2928  return LowerToTLSEmulatedModel(Node, DAG);
2929  SDLoc DL(Node);
2930  const GlobalValue *GV = Node->getGlobal();
2931  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2932  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2933 
2934  SDValue TP = lowerThreadPointer(DL, DAG);
2935 
2936  // Get the offset of GA from the thread pointer, based on the TLS model.
2937  SDValue Offset;
2938  switch (model) {
2939  case TLSModel::GeneralDynamic: {
2940  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2943 
2944  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2945  Offset = DAG.getLoad(
2946  PtrVT, DL, DAG.getEntryNode(), Offset,
2948 
2949  // Call __tls_get_offset to retrieve the offset.
2950  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2951  break;
2952  }
2953 
2954  case TLSModel::LocalDynamic: {
2955  // Load the GOT offset of the module ID.
2958 
2959  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2960  Offset = DAG.getLoad(
2961  PtrVT, DL, DAG.getEntryNode(), Offset,
2963 
2964  // Call __tls_get_offset to retrieve the module base offset.
2965  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2966 
2967  // Note: The SystemZLDCleanupPass will remove redundant computations
2968  // of the module base offset. Count total number of local-dynamic
2969  // accesses to trigger execution of that pass.
2973 
2974  // Add the per-symbol offset.
2976 
2977  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2978  DTPOffset = DAG.getLoad(
2979  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
2981 
2982  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2983  break;
2984  }
2985 
2986  case TLSModel::InitialExec: {
2987  // Load the offset from the GOT.
2988  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2990  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2991  Offset =
2992  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2994  break;
2995  }
2996 
2997  case TLSModel::LocalExec: {
2998  // Force the offset into the constant pool and load it from there.
3001 
3002  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
3003  Offset = DAG.getLoad(
3004  PtrVT, DL, DAG.getEntryNode(), Offset,
3006  break;
3007  }
3008  }
3009 
3010  // Add the base and offset together.
3011  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3012 }
3013 
3014 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3015  SelectionDAG &DAG) const {
3016  SDLoc DL(Node);
3017  const BlockAddress *BA = Node->getBlockAddress();
3018  int64_t Offset = Node->getOffset();
3019  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3020 
3021  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3022  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3023  return Result;
3024 }
3025 
3026 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3027  SelectionDAG &DAG) const {
3028  SDLoc DL(JT);
3029  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3030  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3031 
3032  // Use LARL to load the address of the table.
3033  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3034 }
3035 
3036 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3037  SelectionDAG &DAG) const {
3038  SDLoc DL(CP);
3039  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3040 
3041  SDValue Result;
3042  if (CP->isMachineConstantPoolEntry())
3043  Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
3044  CP->getAlignment());
3045  else
3046  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
3047  CP->getAlignment(), CP->getOffset());
3048 
3049  // Use LARL to load the address of the constant pool entry.
3050  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3051 }
3052 
3053 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3054  SelectionDAG &DAG) const {
3055  MachineFunction &MF = DAG.getMachineFunction();
3056  MachineFrameInfo &MFI = MF.getFrameInfo();
3057  MFI.setFrameAddressIsTaken(true);
3058 
3059  SDLoc DL(Op);
3060  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3061  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3062 
3063  // If the back chain frame index has not been allocated yet, do so.
3065  int BackChainIdx = FI->getFramePointerSaveIndex();
3066  if (!BackChainIdx) {
3067  // By definition, the frame address is the address of the back chain.
3068  BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
3069  FI->setFramePointerSaveIndex(BackChainIdx);
3070  }
3071  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3072 
3073  // FIXME The frontend should detect this case.
3074  if (Depth > 0) {
3075  report_fatal_error("Unsupported stack frame traversal count");
3076  }
3077 
3078  return BackChain;
3079 }
3080 
3081 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3082  SelectionDAG &DAG) const {
3083  MachineFunction &MF = DAG.getMachineFunction();
3084  MachineFrameInfo &MFI = MF.getFrameInfo();
3085  MFI.setReturnAddressIsTaken(true);
3086 
3088  return SDValue();
3089 
3090  SDLoc DL(Op);
3091  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3092  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3093 
3094  // FIXME The frontend should detect this case.
3095  if (Depth > 0) {
3096  report_fatal_error("Unsupported stack frame traversal count");
3097  }
3098 
3099  // Return R14D, which has the return address. Mark it an implicit live-in.
3100  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3101  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3102 }
3103 
3104 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3105  SelectionDAG &DAG) const {
3106  SDLoc DL(Op);
3107  SDValue In = Op.getOperand(0);
3108  EVT InVT = In.getValueType();
3109  EVT ResVT = Op.getValueType();
3110 
3111  // Convert loads directly. This is normally done by DAGCombiner,
3112  // but we need this case for bitcasts that are created during lowering
3113  // and which are then lowered themselves.
3114  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3115  if (ISD::isNormalLoad(LoadN)) {
3116  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3117  LoadN->getBasePtr(), LoadN->getMemOperand());
3118  // Update the chain uses.
3119  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3120  return NewLoad;
3121  }
3122 
3123  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3124  SDValue In64;
3125  if (Subtarget.hasHighWord()) {
3126  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3127  MVT::i64);
3128  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3129  MVT::i64, SDValue(U64, 0), In);
3130  } else {
3131  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3132  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3133  DAG.getConstant(32, DL, MVT::i64));
3134  }
3135  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3136  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3137  DL, MVT::f32, Out64);
3138  }
3139  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3140  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3141  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3142  MVT::f64, SDValue(U64, 0), In);
3143  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3144  if (Subtarget.hasHighWord())
3145  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3146  MVT::i32, Out64);
3147  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3148  DAG.getConstant(32, DL, MVT::i64));
3149  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3150  }
3151  llvm_unreachable("Unexpected bitcast combination");
3152 }
3153 
3154 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3155  SelectionDAG &DAG) const {
3156  MachineFunction &MF = DAG.getMachineFunction();
3157  SystemZMachineFunctionInfo *FuncInfo =
3159  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3160 
3161  SDValue Chain = Op.getOperand(0);
3162  SDValue Addr = Op.getOperand(1);
3163  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3164  SDLoc DL(Op);
3165 
3166  // The initial values of each field.
3167  const unsigned NumFields = 4;
3168  SDValue Fields[NumFields] = {
3169  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3170  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3171  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3172  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3173  };
3174 
3175  // Store each field into its respective slot.
3176  SDValue MemOps[NumFields];
3177  unsigned Offset = 0;
3178  for (unsigned I = 0; I < NumFields; ++I) {
3179  SDValue FieldAddr = Addr;
3180  if (Offset != 0)
3181  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3182  DAG.getIntPtrConstant(Offset, DL));
3183  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3184  MachinePointerInfo(SV, Offset));
3185  Offset += 8;
3186  }
3187  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3188 }
3189 
3190 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3191  SelectionDAG &DAG) const {
3192  SDValue Chain = Op.getOperand(0);
3193  SDValue DstPtr = Op.getOperand(1);
3194  SDValue SrcPtr = Op.getOperand(2);
3195  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3196  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3197  SDLoc DL(Op);
3198 
3199  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3200  /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3201  /*isTailCall*/false,
3202  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
3203 }
3204 
3205 SDValue SystemZTargetLowering::
3206 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3207  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3208  MachineFunction &MF = DAG.getMachineFunction();
3209  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3210  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3211 
3212  SDValue Chain = Op.getOperand(0);
3213  SDValue Size = Op.getOperand(1);
3214  SDValue Align = Op.getOperand(2);
3215  SDLoc DL(Op);
3216 
3217  // If user has set the no alignment function attribute, ignore
3218  // alloca alignments.
3219  uint64_t AlignVal = (RealignOpt ?
3220  dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3221 
3222  uint64_t StackAlign = TFI->getStackAlignment();
3223  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3224  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3225 
3226  unsigned SPReg = getStackPointerRegisterToSaveRestore();
3227  SDValue NeededSpace = Size;
3228 
3229  // Get a reference to the stack pointer.
3230  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3231 
3232  // If we need a backchain, save it now.
3233  SDValue Backchain;
3234  if (StoreBackchain)
3235  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3236 
3237  // Add extra space for alignment if needed.
3238  if (ExtraAlignSpace)
3239  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3240  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3241 
3242  // Get the new stack pointer value.
3243  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3244 
3245  // Copy the new stack pointer back.
3246  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3247 
3248  // The allocated data lives above the 160 bytes allocated for the standard
3249  // frame, plus any outgoing stack arguments. We don't know how much that
3250  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3251  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3252  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3253 
3254  // Dynamically realign if needed.
3255  if (RequiredAlign > StackAlign) {
3256  Result =
3257  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3258  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3259  Result =
3260  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3261  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3262  }
3263 
3264  if (StoreBackchain)
3265  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3266 
3267  SDValue Ops[2] = { Result, Chain };
3268  return DAG.getMergeValues(Ops, DL);
3269 }
3270 
3271 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3272  SDValue Op, SelectionDAG &DAG) const {
3273  SDLoc DL(Op);
3274 
3275  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3276 }
3277 
3278 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3279  SelectionDAG &DAG) const {
3280  EVT VT = Op.getValueType();
3281  SDLoc DL(Op);
3282  SDValue Ops[2];
3283  if (is32Bit(VT))
3284  // Just do a normal 64-bit multiplication and extract the results.
3285  // We define this so that it can be used for constant division.
3287  Op.getOperand(1), Ops[1], Ops[0]);
3288  else if (Subtarget.hasMiscellaneousExtensions2())
3289  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3290  // the high result in the even register. ISD::SMUL_LOHI is defined to
3291  // return the low half first, so the results are in reverse order.
3293  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3294  else {
3295  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3296  //
3297  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3298  //
3299  // but using the fact that the upper halves are either all zeros
3300  // or all ones:
3301  //
3302  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3303  //
3304  // and grouping the right terms together since they are quicker than the
3305  // multiplication:
3306  //
3307  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3308  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3309  SDValue LL = Op.getOperand(0);
3310  SDValue RL = Op.getOperand(1);
3311  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3312  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3313  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3314  // the high result in the even register. ISD::SMUL_LOHI is defined to
3315  // return the low half first, so the results are in reverse order.
3317  LL, RL, Ops[1], Ops[0]);
3318  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3319  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3320  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3321  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3322  }
3323  return DAG.getMergeValues(Ops, DL);
3324 }
3325 
3326 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3327  SelectionDAG &DAG) const {
3328  EVT VT = Op.getValueType();
3329  SDLoc DL(Op);
3330  SDValue Ops[2];
3331  if (is32Bit(VT))
3332  // Just do a normal 64-bit multiplication and extract the results.
3333  // We define this so that it can be used for constant division.
3335  Op.getOperand(1), Ops[1], Ops[0]);
3336  else
3337  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3338  // the high result in the even register. ISD::UMUL_LOHI is defined to
3339  // return the low half first, so the results are in reverse order.
3341  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3342  return DAG.getMergeValues(Ops, DL);
3343 }
3344 
3345 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3346  SelectionDAG &DAG) const {
3347  SDValue Op0 = Op.getOperand(0);
3348  SDValue Op1 = Op.getOperand(1);
3349  EVT VT = Op.getValueType();
3350  SDLoc DL(Op);
3351 
3352  // We use DSGF for 32-bit division. This means the first operand must
3353  // always be 64-bit, and the second operand should be 32-bit whenever
3354  // that is possible, to improve performance.
3355  if (is32Bit(VT))
3356  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3357  else if (DAG.ComputeNumSignBits(Op1) > 32)
3358  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3359 
3360  // DSG(F) returns the remainder in the even register and the
3361  // quotient in the odd register.
3362  SDValue Ops[2];
3363  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3364  return DAG.getMergeValues(Ops, DL);
3365 }
3366 
3367 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3368  SelectionDAG &DAG) const {
3369  EVT VT = Op.getValueType();
3370  SDLoc DL(Op);
3371 
3372  // DL(G) returns the remainder in the even register and the
3373  // quotient in the odd register.
3374  SDValue Ops[2];
3376  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3377  return DAG.getMergeValues(Ops, DL);
3378 }
3379 
3380 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3381  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3382 
3383  // Get the known-zero masks for each operand.
3384  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3385  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3386  DAG.computeKnownBits(Ops[1])};
3387 
3388  // See if the upper 32 bits of one operand and the lower 32 bits of the
3389  // other are known zero. They are the low and high operands respectively.
3390  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3391  Known[1].Zero.getZExtValue() };
3392  unsigned High, Low;
3393  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3394  High = 1, Low = 0;
3395  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3396  High = 0, Low = 1;
3397  else
3398  return Op;
3399 
3400  SDValue LowOp = Ops[Low];
3401  SDValue HighOp = Ops[High];
3402 
3403  // If the high part is a constant, we're better off using IILH.
3404  if (HighOp.getOpcode() == ISD::Constant)
3405  return Op;
3406 
3407  // If the low part is a constant that is outside the range of LHI,
3408  // then we're better off using IILF.
3409  if (LowOp.getOpcode() == ISD::Constant) {
3410  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3411  if (!isInt<16>(Value))
3412  return Op;
3413  }
3414 
3415  // Check whether the high part is an AND that doesn't change the
3416  // high 32 bits and just masks out low bits. We can skip it if so.
3417  if (HighOp.getOpcode() == ISD::AND &&
3418  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3419  SDValue HighOp0 = HighOp.getOperand(0);
3420  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3421  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3422  HighOp = HighOp0;
3423  }
3424 
3425  // Take advantage of the fact that all GR32 operations only change the
3426  // low 32 bits by truncating Low to an i32 and inserting it directly
3427  // using a subreg. The interesting cases are those where the truncation
3428  // can be folded.
3429  SDLoc DL(Op);
3430  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3431  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3432  MVT::i64, HighOp, Low32);
3433 }
3434 
3435 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3436 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3437  SelectionDAG &DAG) const {
3438  SDNode *N = Op.getNode();
3439  SDValue LHS = N->getOperand(0);
3440  SDValue RHS = N->getOperand(1);
3441  SDLoc DL(N);
3442  unsigned BaseOp = 0;
3443  unsigned CCValid = 0;
3444  unsigned CCMask = 0;
3445 
3446  switch (Op.getOpcode()) {
3447  default: llvm_unreachable("Unknown instruction!");
3448  case ISD::SADDO:
3449  BaseOp = SystemZISD::SADDO;
3450  CCValid = SystemZ::CCMASK_ARITH;
3452  break;
3453  case ISD::SSUBO:
3454  BaseOp = SystemZISD::SSUBO;
3455  CCValid = SystemZ::CCMASK_ARITH;
3457  break;
3458  case ISD::UADDO:
3459  BaseOp = SystemZISD::UADDO;
3460  CCValid = SystemZ::CCMASK_LOGICAL;
3462  break;
3463  case ISD::USUBO:
3464  BaseOp = SystemZISD::USUBO;
3465  CCValid = SystemZ::CCMASK_LOGICAL;
3467  break;
3468  }
3469 
3470  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3471  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3472 
3473  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3474  if (N->getValueType(1) == MVT::i1)
3475  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3476 
3477  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3478 }
3479 
3480 static bool isAddCarryChain(SDValue Carry) {
3481  while (Carry.getOpcode() == ISD::ADDCARRY)
3482  Carry = Carry.getOperand(2);
3483  return Carry.getOpcode() == ISD::UADDO;
3484 }
3485 
3486 static bool isSubBorrowChain(SDValue Carry) {
3487  while (Carry.getOpcode() == ISD::SUBCARRY)
3488  Carry = Carry.getOperand(2);
3489  return Carry.getOpcode() == ISD::USUBO;
3490 }
3491 
3492 // Lower ADDCARRY/SUBCARRY nodes.
3493 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3494  SelectionDAG &DAG) const {
3495 
3496  SDNode *N = Op.getNode();
3497  MVT VT = N->getSimpleValueType(0);
3498 
3499  // Let legalize expand this if it isn't a legal type yet.
3500  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3501  return SDValue();
3502 
3503  SDValue LHS = N->getOperand(0);
3504  SDValue RHS = N->getOperand(1);
3505  SDValue Carry = Op.getOperand(2);
3506  SDLoc DL(N);
3507  unsigned BaseOp = 0;
3508  unsigned CCValid = 0;
3509  unsigned CCMask = 0;
3510 
3511  switch (Op.getOpcode()) {
3512  default: llvm_unreachable("Unknown instruction!");
3513  case ISD::ADDCARRY:
3514  if (!isAddCarryChain(Carry))
3515  return SDValue();
3516 
3517  BaseOp = SystemZISD::ADDCARRY;
3518  CCValid = SystemZ::CCMASK_LOGICAL;
3520  break;
3521  case ISD::SUBCARRY:
3522  if (!isSubBorrowChain(Carry))
3523  return SDValue();
3524 
3525  BaseOp = SystemZISD::SUBCARRY;
3526  CCValid = SystemZ::CCMASK_LOGICAL;
3528  break;
3529  }
3530 
3531  // Set the condition code from the carry flag.
3532  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3533  DAG.getConstant(CCValid, DL, MVT::i32),
3534  DAG.getConstant(CCMask, DL, MVT::i32));
3535 
3536  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3537  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3538 
3539  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3540  if (N->getValueType(1) == MVT::i1)
3541  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3542 
3543  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3544 }
3545 
3546 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3547  SelectionDAG &DAG) const {
3548  EVT VT = Op.getValueType();
3549  SDLoc DL(Op);
3550  Op = Op.getOperand(0);
3551 
3552  // Handle vector types via VPOPCT.
3553  if (VT.isVector()) {
3554  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3555  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3556  switch (VT.getScalarSizeInBits()) {
3557  case 8:
3558  break;
3559  case 16: {
3560  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3561  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3562  SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3563  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3564  Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3565  break;
3566  }
3567  case 32: {
3568  SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3569  DAG.getConstant(0, DL, MVT::i32));
3570  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3571  break;
3572  }
3573  case 64: {
3574  SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3575  DAG.getConstant(0, DL, MVT::i32));
3576  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3577  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3578  break;
3579  }
3580  default:
3581  llvm_unreachable("Unexpected type");
3582  }
3583  return Op;
3584  }
3585 
3586  // Get the known-zero mask for the operand.
3587  KnownBits Known = DAG.computeKnownBits(Op);
3588  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
3589  if (NumSignificantBits == 0)
3590  return DAG.getConstant(0, DL, VT);
3591 
3592  // Skip known-zero high parts of the operand.
3593  int64_t OrigBitSize = VT.getSizeInBits();
3594  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3595  BitSize = std::min(BitSize, OrigBitSize);
3596 
3597  // The POPCNT instruction counts the number of bits in each byte.
3598  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3599  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3600  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3601 
3602  // Add up per-byte counts in a binary tree. All bits of Op at
3603  // position larger than BitSize remain zero throughout.
3604  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3605  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3606  if (BitSize != OrigBitSize)
3607  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3608  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3609  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3610  }
3611 
3612  // Extract overall result from high byte.
3613  if (BitSize > 8)
3614  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3615  DAG.getConstant(BitSize - 8, DL, VT));
3616 
3617  return Op;
3618 }
3619 
3620 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3621  SelectionDAG &DAG) const {
3622  SDLoc DL(Op);
3623  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3624  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3625  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3626  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3627 
3628  // The only fence that needs an instruction is a sequentially-consistent
3629  // cross-thread fence.
3630  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3631  FenceSSID == SyncScope::System) {
3632  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3633  Op.getOperand(0)),
3634  0);
3635  }
3636 
3637  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3638  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3639 }
3640 
3641 // Op is an atomic load. Lower it into a normal volatile load.
3642 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3643  SelectionDAG &DAG) const {
3644  auto *Node = cast<AtomicSDNode>(Op.getNode());
3645  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3646  Node->getChain(), Node->getBasePtr(),
3647  Node->getMemoryVT(), Node->getMemOperand());
3648 }
3649 
3650 // Op is an atomic store. Lower it into a normal volatile store.
3651 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3652  SelectionDAG &DAG) const {
3653  auto *Node = cast<AtomicSDNode>(Op.getNode());
3654  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3655  Node->getBasePtr(), Node->getMemoryVT(),
3656  Node->getMemOperand());
3657  // We have to enforce sequential consistency by performing a
3658  // serialization operation after the store.
3659  if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3660  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3661  MVT::Other, Chain), 0);
3662  return Chain;
3663 }
3664 
3665 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3666 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3667 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3668  SelectionDAG &DAG,
3669  unsigned Opcode) const {
3670  auto *Node = cast<AtomicSDNode>(Op.getNode());
3671 
3672  // 32-bit operations need no code outside the main loop.
3673  EVT NarrowVT = Node->getMemoryVT();
3674  EVT WideVT = MVT::i32;
3675  if (NarrowVT == WideVT)
3676  return Op;
3677 
3678  int64_t BitSize = NarrowVT.getSizeInBits();
3679  SDValue ChainIn = Node->getChain();
3680  SDValue Addr = Node->getBasePtr();
3681  SDValue Src2 = Node->getVal();
3682  MachineMemOperand *MMO = Node->getMemOperand();
3683  SDLoc DL(Node);
3684  EVT PtrVT = Addr.getValueType();
3685 
3686  // Convert atomic subtracts of constants into additions.
3687  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3688  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3690  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3691  }
3692 
3693  // Get the address of the containing word.
3694  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3695  DAG.getConstant(-4, DL, PtrVT));
3696 
3697  // Get the number of bits that the word must be rotated left in order
3698  // to bring the field to the top bits of a GR32.
3699  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3700  DAG.getConstant(3, DL, PtrVT));
3701  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3702 
3703  // Get the complementing shift amount, for rotating a field in the top
3704  // bits back to its proper position.
3705  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3706  DAG.getConstant(0, DL, WideVT), BitShift);
3707 
3708  // Extend the source operand to 32 bits and prepare it for the inner loop.
3709  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3710  // operations require the source to be shifted in advance. (This shift
3711  // can be folded if the source is constant.) For AND and NAND, the lower
3712  // bits must be set, while for other opcodes they should be left clear.
3713  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3714  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3715  DAG.getConstant(32 - BitSize, DL, WideVT));
3716  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3718  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3719  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3720 
3721  // Construct the ATOMIC_LOADW_* node.
3722  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3723  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3724  DAG.getConstant(BitSize, DL, WideVT) };
3725  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3726  NarrowVT, MMO);
3727 
3728  // Rotate the result of the final CS so that the field is in the lower
3729  // bits of a GR32, then truncate it.
3730  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3731  DAG.getConstant(BitSize, DL, WideVT));
3732  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3733 
3734  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3735  return DAG.getMergeValues(RetOps, DL);
3736 }
3737 
3738 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3739 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3740 // operations into additions.
3741 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3742  SelectionDAG &DAG) const {
3743  auto *Node = cast<AtomicSDNode>(Op.getNode());
3744  EVT MemVT = Node->getMemoryVT();
3745  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3746  // A full-width operation.
3747  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3748  SDValue Src2 = Node->getVal();
3749  SDValue NegSrc2;
3750  SDLoc DL(Src2);
3751 
3752  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3753  // Use an addition if the operand is constant and either LAA(G) is
3754  // available or the negative value is in the range of A(G)FHI.
3755  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3756  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3757  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3758  } else if (Subtarget.hasInterlockedAccess1())
3759  // Use LAA(G) if available.
3760  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3761  Src2);
3762 
3763  if (NegSrc2.getNode())
3764  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3765  Node->getChain(), Node->getBasePtr(), NegSrc2,
3766  Node->getMemOperand());
3767 
3768  // Use the node as-is.
3769  return Op;
3770  }
3771 
3772  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3773 }
3774 
3775 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3776 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3777  SelectionDAG &DAG) const {
3778  auto *Node = cast<AtomicSDNode>(Op.getNode());
3779  SDValue ChainIn = Node->getOperand(0);
3780  SDValue Addr = Node->getOperand(1);
3781  SDValue CmpVal = Node->getOperand(2);
3782  SDValue SwapVal = Node->getOperand(3);
3783  MachineMemOperand *MMO = Node->getMemOperand();
3784  SDLoc DL(Node);
3785 
3786  // We have native support for 32-bit and 64-bit compare and swap, but we
3787  // still need to expand extracting the "success" result from the CC.
3788  EVT NarrowVT = Node->getMemoryVT();
3789  EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
3790  if (NarrowVT == WideVT) {
3791  SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3792  SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3794  DL, Tys, Ops, NarrowVT, MMO);
3795  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3797 
3798  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3800  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3801  return SDValue();
3802  }
3803 
3804  // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3805  // via a fullword ATOMIC_CMP_SWAPW operation.
3806  int64_t BitSize = NarrowVT.getSizeInBits();
3807  EVT PtrVT = Addr.getValueType();
3808 
3809  // Get the address of the containing word.
3810  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3811  DAG.getConstant(-4, DL, PtrVT));
3812 
3813  // Get the number of bits that the word must be rotated left in order
3814  // to bring the field to the top bits of a GR32.
3815  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3816  DAG.getConstant(3, DL, PtrVT));
3817  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3818 
3819  // Get the complementing shift amount, for rotating a field in the top
3820  // bits back to its proper position.
3821  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3822  DAG.getConstant(0, DL, WideVT), BitShift);
3823 
3824  // Construct the ATOMIC_CMP_SWAPW node.
3825  SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3826  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3827  NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3829  VTList, Ops, NarrowVT, MMO);
3830  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3832 
3833  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3835  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3836  return SDValue();
3837 }
3838 
3840 SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
3841  // Because of how we convert atomic_load and atomic_store to normal loads and
3842  // stores in the DAG, we need to ensure that the MMOs are marked volatile
3843  // since DAGCombine hasn't been updated to account for atomic, but non
3844  // volatile loads. (See D57601)
3845  if (auto *SI = dyn_cast<StoreInst>(&I))
3846  if (SI->isAtomic())
3848  if (auto *LI = dyn_cast<LoadInst>(&I))
3849  if (LI->isAtomic())
3851  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
3852  if (AI->isAtomic())
3854  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
3855  if (AI->isAtomic())
3858 }
3859 
3860 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3861  SelectionDAG &DAG) const {
3862  MachineFunction &MF = DAG.getMachineFunction();
3863  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3864  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3865  SystemZ::R15D, Op.getValueType());
3866 }
3867 
3868 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3869  SelectionDAG &DAG) const {
3870  MachineFunction &MF = DAG.getMachineFunction();
3871  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3872  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3873 
3874  SDValue Chain = Op.getOperand(0);
3875  SDValue NewSP = Op.getOperand(1);
3876  SDValue Backchain;
3877  SDLoc DL(Op);
3878 
3879  if (StoreBackchain) {
3880  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
3881  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3882  }
3883 
3884  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3885 
3886  if (StoreBackchain)
3887  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3888 
3889  return Chain;
3890 }
3891 
3892 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3893  SelectionDAG &DAG) const {
3894  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3895  if (!IsData)
3896  // Just preserve the chain.
3897  return Op.getOperand(0);
3898 
3899  SDLoc DL(Op);
3900  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3901  unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
3902  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3903  SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
3904  Op.getOperand(1)};
3906  Node->getVTList(), Ops,
3907  Node->getMemoryVT(), Node->getMemOperand());
3908 }
3909 
3910 // Convert condition code in CCReg to an i32 value.
3912  SDLoc DL(CCReg);
3913  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
3914  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3916 }
3917 
3918 SDValue
3919 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3920  SelectionDAG &DAG) const {
3921  unsigned Opcode, CCValid;
3922  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3923  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3924  SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
3925  SDValue CC = getCCResult(DAG, SDValue(Node, 0));
3926  DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3927  return SDValue();
3928  }
3929 
3930  return SDValue();
3931 }
3932 
3933 SDValue
3934 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3935  SelectionDAG &DAG) const {
3936  unsigned Opcode, CCValid;
3937  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3938  SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
3939  if (Op->getNumValues() == 1)
3940  return getCCResult(DAG, SDValue(Node, 0));
3941  assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3942  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
3943  SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
3944  }
3945 
3946  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3947  switch (Id) {
3948  case Intrinsic::thread_pointer:
3949  return lowerThreadPointer(SDLoc(Op), DAG);
3950 
3951  case Intrinsic::s390_vpdi:
3952  return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3953  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3954 
3955  case Intrinsic::s390_vperm:
3956  return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3957  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3958 
3959  case Intrinsic::s390_vuphb:
3960  case Intrinsic::s390_vuphh:
3961  case Intrinsic::s390_vuphf:
3962  return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3963  Op.getOperand(1));
3964 
3965  case Intrinsic::s390_vuplhb:
3966  case Intrinsic::s390_vuplhh:
3967  case Intrinsic::s390_vuplhf:
3968  return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3969  Op.getOperand(1));
3970 
3971  case Intrinsic::s390_vuplb:
3972  case Intrinsic::s390_vuplhw:
3973  case Intrinsic::s390_vuplf:
3974  return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3975  Op.getOperand(1));
3976 
3977  case Intrinsic::s390_vupllb:
3978  case Intrinsic::s390_vupllh:
3979  case Intrinsic::s390_vupllf:
3980  return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3981  Op.getOperand(1));
3982 
3983  case Intrinsic::s390_vsumb:
3984  case Intrinsic::s390_vsumh:
3985  case Intrinsic::s390_vsumgh:
3986  case Intrinsic::s390_vsumgf:
3987  case Intrinsic::s390_vsumqf:
3988  case Intrinsic::s390_vsumqg:
3989  return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3990  Op.getOperand(1), Op.getOperand(2));
3991  }
3992 
3993  return SDValue();
3994 }
3995 
3996 namespace {
3997 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3998 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3999 // Operand is the constant third operand, otherwise it is the number of
4000 // bytes in each element of the result.
4001 struct Permute {
4002  unsigned Opcode;
4003  unsigned Operand;
4004  unsigned char Bytes[SystemZ::VectorBytes];
4005 };
4006 }
4007 
4008 static const Permute PermuteForms[] = {
4009  // VMRHG
4011  { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4012  // VMRHF
4014  { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4015  // VMRHH
4017  { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4018  // VMRHB
4020  { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4021  // VMRLG
4022  { SystemZISD::MERGE_LOW, 8,
4023  { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4024  // VMRLF
4025  { SystemZISD::MERGE_LOW, 4,
4026  { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4027  // VMRLH
4028  { SystemZISD::MERGE_LOW, 2,
4029  { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4030  // VMRLB
4031  { SystemZISD::MERGE_LOW, 1,
4032  { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4033  // VPKG
4034  { SystemZISD::PACK, 4,
4035  { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4036  // VPKF
4037  { SystemZISD::PACK, 2,
4038  { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4039  // VPKH
4040  { SystemZISD::PACK, 1,
4041  { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4042  // VPDI V1, V2, 4 (low half of V1, high half of V2)
4044  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4045  // VPDI V1, V2, 1 (high half of V1, low half of V2)
4047  { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4048 };
4049 
4050 // Called after matching a vector shuffle against a particular pattern.
4051 // Both the original shuffle and the pattern have two vector operands.
4052 // OpNos[0] is the operand of the original shuffle that should be used for
4053 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4054 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4055 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4056 // for operands 0 and 1 of the pattern.
4057 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4058  if (OpNos[0] < 0) {
4059  if (OpNos[1] < 0)
4060  return false;
4061  OpNo0 = OpNo1 = OpNos[1];
4062  } else if (OpNos[1] < 0) {
4063  OpNo0 = OpNo1 = OpNos[0];
4064  } else {
4065  OpNo0 = OpNos[0];
4066  OpNo1 = OpNos[1];
4067  }
4068  return true;
4069 }
4070 
4071 // Bytes is a VPERM-like permute vector, except that -1 is used for
4072 // undefined bytes. Return true if the VPERM can be implemented using P.
4073 // When returning true set OpNo0 to the VPERM operand that should be
4074 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4075 //
4076 // For example, if swapping the VPERM operands allows P to match, OpNo0
4077 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4078 // operand, but rewriting it to use two duplicated operands allows it to
4079 // match P, then OpNo0 and OpNo1 will be the same.
4080 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4081  unsigned &OpNo0, unsigned &OpNo1) {
4082  int OpNos[] = { -1, -1 };
4083  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4084  int Elt = Bytes[I];
4085  if (Elt >= 0) {
4086  // Make sure that the two permute vectors use the same suboperand
4087  // byte number. Only the operand numbers (the high bits) are
4088  // allowed to differ.
4089  if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4090  return false;
4091  int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4092  int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4093  // Make sure that the operand mappings are consistent with previous
4094  // elements.
4095  if (OpNos[ModelOpNo] == 1 - RealOpNo)
4096  return false;
4097  OpNos[ModelOpNo] = RealOpNo;
4098  }
4099  }
4100  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4101 }
4102 
4103 // As above, but search for a matching permute.
4104 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4105  unsigned &OpNo0, unsigned &OpNo1) {
4106  for (auto &P : PermuteForms)
4107  if (matchPermute(Bytes, P, OpNo0, OpNo1))
4108  return &P;
4109  return nullptr;
4110 }
4111 
4112 // Bytes is a VPERM-like permute vector, except that -1 is used for
4113 // undefined bytes. This permute is an operand of an outer permute.
4114 // See whether redistributing the -1 bytes gives a shuffle that can be
4115 // implemented using P. If so, set Transform to a VPERM-like permute vector
4116 // that, when applied to the result of P, gives the original permute in Bytes.
4117 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4118  const Permute &P,
4119  SmallVectorImpl<int> &Transform) {
4120  unsigned To = 0;
4121  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4122  int Elt = Bytes[From];
4123  if (Elt < 0)
4124  // Byte number From of the result is undefined.
4125  Transform[From] = -1;
4126  else {
4127  while (P.Bytes[To] != Elt) {
4128  To += 1;
4129  if (To == SystemZ::VectorBytes)
4130  return false;
4131  }
4132  Transform[From] = To;
4133  }
4134  }
4135  return true;
4136 }
4137 
4138 // As above, but search for a matching permute.
4139 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4140  SmallVectorImpl<int> &Transform) {
4141  for (auto &P : PermuteForms)
4142  if (matchDoublePermute(Bytes, P, Transform))
4143  return &P;
4144  return nullptr;
4145 }
4146 
4147 // Convert the mask of the given shuffle op into a byte-level mask,
4148 // as if it had type vNi8.
4149 static bool getVPermMask(SDValue ShuffleOp,
4150  SmallVectorImpl<int> &Bytes) {
4151  EVT VT = ShuffleOp.getValueType();
4152  unsigned NumElements = VT.getVectorNumElements();
4153  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4154 
4155  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4156  Bytes.resize(NumElements * BytesPerElement, -1);
4157  for (unsigned I = 0; I < NumElements; ++I) {
4158  int Index = VSN->getMaskElt(I);
4159  if (Index >= 0)
4160  for (unsigned J = 0; J < BytesPerElement; ++J)
4161  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4162  }
4163  return true;
4164  }
4165  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4166  isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4167  unsigned Index = ShuffleOp.getConstantOperandVal(1);
4168  Bytes.resize(NumElements * BytesPerElement, -1);
4169  for (unsigned I = 0; I < NumElements; ++I)
4170  for (unsigned J = 0; J < BytesPerElement; ++J)
4171  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4172  return true;
4173  }
4174  return false;
4175 }
4176 
4177 // Bytes is a VPERM-like permute vector, except that -1 is used for
4178 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4179 // the result come from a contiguous sequence of bytes from one input.
4180 // Set Base to the selector for the first byte if so.
4181 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4182  unsigned BytesPerElement, int &Base) {
4183  Base = -1;
4184  for (unsigned I = 0; I < BytesPerElement; ++I) {
4185  if (Bytes[Start + I] >= 0) {
4186  unsigned Elem = Bytes[Start + I];
4187  if (Base < 0) {
4188  Base = Elem - I;
4189  // Make sure the bytes would come from one input operand.
4190  if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4191  return false;
4192  } else if (unsigned(Base) != Elem - I)
4193  return false;
4194  }
4195  }
4196  return true;
4197 }
4198 
4199 // Bytes is a VPERM-like permute vector, except that -1 is used for
4200 // undefined bytes. Return true if it can be performed using VSLDI.
4201 // When returning true, set StartIndex to the shift amount and OpNo0
4202 // and OpNo1 to the VPERM operands that should be used as the first
4203 // and second shift operand respectively.
4204 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4205  unsigned &StartIndex, unsigned &OpNo0,
4206  unsigned &OpNo1) {
4207  int OpNos[] = { -1, -1 };
4208  int Shift = -1;
4209  for (unsigned I = 0; I < 16; ++I) {
4210  int Index = Bytes[I];
4211  if (Index >= 0) {
4212  int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4213  int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4214  int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4215  if (Shift < 0)
4216  Shift = ExpectedShift;
4217  else if (Shift != ExpectedShift)
4218  return false;
4219  // Make sure that the operand mappings are consistent with previous
4220  // elements.
4221  if (OpNos[ModelOpNo] == 1 - RealOpNo)
4222  return false;
4223  OpNos[ModelOpNo] = RealOpNo;
4224  }
4225  }
4226  StartIndex = Shift;
4227  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4228 }
4229 
4230 // Create a node that performs P on operands Op0 and Op1, casting the
4231 // operands to the appropriate type. The type of the result is determined by P.
4232 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4233  const Permute &P, SDValue Op0, SDValue Op1) {
4234  // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4235  // elements of a PACK are twice as wide as the outputs.
4236  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4237  P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4238  P.Operand);
4239  // Cast both operands to the appropriate type.
4240  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4241  SystemZ::VectorBytes / InBytes);
4242  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4243  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4244  SDValue Op;
4245  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4246  SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4247  Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4248  } else if (P.Opcode == SystemZISD::PACK) {
4249  MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4250  SystemZ::VectorBytes / P.Operand);
4251  Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4252  } else {
4253  Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4254  }
4255  return Op;
4256 }
4257 
4258 // Bytes is a VPERM-like permute vector, except that -1 is used for
4259 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4260 // VSLDI or VPERM.
4262  SDValue *Ops,
4263  const SmallVectorImpl<int> &Bytes) {
4264  for (unsigned I = 0; I < 2; ++I)
4265  Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4266 
4267  // First see whether VSLDI can be used.
4268  unsigned StartIndex, OpNo0, OpNo1;
4269  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4270  return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4271  Ops[OpNo1],
4272  DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4273 
4274  // Fall back on VPERM. Construct an SDNode for the permute vector.
4275  SDValue IndexNodes[SystemZ::VectorBytes];
4276  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4277  if (Bytes[I] >= 0)
4278  IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4279  else
4280  IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4281  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4282  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
4283 }
4284 
4285 namespace {
4286 // Describes a general N-operand vector shuffle.
4287 struct GeneralShuffle {
4288  GeneralShuffle(EVT vt) : VT(vt) {}
4289  void addUndef();
4290  bool add(SDValue, unsigned);
4291  SDValue getNode(SelectionDAG &, const SDLoc &);
4292 
4293  // The operands of the shuffle.
4295 
4296  // Index I is -1 if byte I of the result is undefined. Otherwise the
4297  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4298  // Bytes[I] / SystemZ::VectorBytes.
4300 
4301  // The type of the shuffle result.
4302  EVT VT;
4303 };
4304 }
4305 
4306 // Add an extra undefined element to the shuffle.
4307 void GeneralShuffle::addUndef() {
4308  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4309  for (unsigned I = 0; I < BytesPerElement; ++I)
4310  Bytes.push_back(-1);
4311 }
4312 
4313 // Add an extra element to the shuffle, taking it from element Elem of Op.
4314 // A null Op indicates a vector input whose value will be calculated later;
4315 // there is at most one such input per shuffle and it always has the same
4316 // type as the result. Aborts and returns false if the source vector elements
4317 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4318 // LLVM they become implicitly extended, but this is rare and not optimized.
4319 bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4320  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4321 
4322  // The source vector can have wider elements than the result,
4323  // either through an explicit TRUNCATE or because of type legalization.
4324  // We want the least significant part.
4325  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4326  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4327 
4328  // Return false if the source elements are smaller than their destination
4329  // elements.
4330  if (FromBytesPerElement < BytesPerElement)
4331  return false;
4332 
4333  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4334  (FromBytesPerElement - BytesPerElement));
4335 
4336  // Look through things like shuffles and bitcasts.
4337  while (Op.getNode()) {
4338  if (Op.getOpcode() == ISD::BITCAST)
4339  Op = Op.getOperand(0);
4340  else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4341  // See whether the bytes we need come from a contiguous part of one
4342  // operand.
4344  if (!getVPermMask(Op, OpBytes))
4345  break;
4346  int NewByte;
4347  if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4348  break;
4349  if (NewByte < 0) {
4350  addUndef();
4351  return true;
4352  }
4353  Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4354  Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4355  } else if (Op.isUndef()) {
4356  addUndef();
4357  return true;
4358  } else
4359  break;
4360  }
4361 
4362  // Make sure that the source of the extraction is in Ops.
4363  unsigned OpNo = 0;
4364  for (; OpNo < Ops.size(); ++OpNo)
4365  if (Ops[OpNo] == Op)
4366  break;
4367  if (OpNo == Ops.size())
4368  Ops.push_back(Op);
4369 
4370  // Add the element to Bytes.
4371  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4372  for (unsigned I = 0; I < BytesPerElement; ++I)
4373  Bytes.push_back(Base + I);
4374 
4375  return true;
4376 }
4377 
4378 // Return SDNodes for the completed shuffle.
4379 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4380  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4381 
4382  if (Ops.size() == 0)
4383  return DAG.getUNDEF(VT);
4384 
4385  // Make sure that there are at least two shuffle operands.
4386  if (Ops.size() == 1)
4387  Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4388 
4389  // Create a tree of shuffles, deferring root node until after the loop.
4390  // Try to redistribute the undefined elements of non-root nodes so that
4391  // the non-root shuffles match something like a pack or merge, then adjust
4392  // the parent node's permute vector to compensate for the new order.
4393  // Among other things, this copes with vectors like <2 x i16> that were
4394  // padded with undefined elements during type legalization.
4395  //
4396  // In the best case this redistribution will lead to the whole tree
4397  // using packs and merges. It should rarely be a loss in other cases.
4398  unsigned Stride = 1;
4399  for (; Stride * 2 < Ops.size(); Stride *= 2) {
4400  for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4401  SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4402 
4403  // Create a mask for just these two operands.
4405  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4406  unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4407  unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4408  if (OpNo == I)
4409  NewBytes[J] = Byte;
4410  else if (OpNo == I + Stride)
4411  NewBytes[J] = SystemZ::VectorBytes + Byte;
4412  else
4413  NewBytes[J] = -1;
4414  }
4415  // See if it would be better to reorganize NewMask to avoid using VPERM.
4416  SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4417  if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4418  Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4419  // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4420  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4421  if (NewBytes[J] >= 0) {
4422  assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4423  "Invalid double permute");
4424  Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4425  } else
4426  assert(NewBytesMap[J] < 0 && "Invalid double permute");
4427  }
4428  } else {
4429  // Just use NewBytes on the operands.
4430  Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4431  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4432  if (NewBytes[J] >= 0)
4433  Bytes[J] = I * SystemZ::VectorBytes + J;
4434  }
4435  }
4436  }
4437 
4438  // Now we just have 2 inputs. Put the second operand in Ops[1].
4439  if (Stride > 1) {
4440  Ops[1] = Ops[Stride];
4441  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4442  if (Bytes[I] >= int(SystemZ::VectorBytes))
4443  Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4444  }
4445 
4446  // Look for an instruction that can do the permute without resorting
4447  // to VPERM.
4448  unsigned OpNo0, OpNo1;
4449  SDValue Op;
4450  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4451  Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4452  else
4453  Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4454  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4455 }
4456 
4457 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4458 static bool isScalarToVector(SDValue Op) {
4459  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4460  if (!Op.getOperand(I).isUndef())
4461  return false;
4462  return true;
4463 }
4464 
4465 // Return a vector of type VT that contains Value in the first element.
4466 // The other elements don't matter.
4467 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4468  SDValue Value) {
4469  // If we have a constant, replicate it to all elements and let the
4470  // BUILD_VECTOR lowering take care of it.
4471  if (Value.getOpcode() == ISD::Constant ||
4472  Value.getOpcode() == ISD::ConstantFP) {
4474  return DAG.getBuildVector(VT, DL, Ops);
4475  }
4476  if (Value.isUndef())
4477  return DAG.getUNDEF(VT);
4478  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4479 }
4480 
4481 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4482 // element 1. Used for cases in which replication is cheap.
4483 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4484  SDValue Op0, SDValue Op1) {
4485  if (Op0.isUndef()) {
4486  if (Op1.isUndef())
4487  return DAG.getUNDEF(VT);
4488  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4489  }
4490  if (Op1.isUndef())
4491  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4492  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4493  buildScalarToVector(DAG, DL, VT, Op0),
4494  buildScalarToVector(DAG, DL, VT, Op1));
4495 }
4496 
4497 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4498 // vector for them.
4499 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4500  SDValue Op1) {
4501  if (Op0.isUndef() && Op1.isUndef())
4502  return DAG.getUNDEF(MVT::v2i64);
4503  // If one of the two inputs is undefined then replicate the other one,
4504  // in order to avoid using another register unnecessarily.
4505  if (Op0.isUndef())
4506  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4507  else if (Op1.isUndef())
4508  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4509  else {
4510  Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4511  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4512  }
4513  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4514 }
4515 
4516 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4517 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4518 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4519 // would benefit from this representation and return it if so.
4521  BuildVectorSDNode *BVN) {
4522  EVT VT = BVN->getValueType(0);
4523  unsigned NumElements = VT.getVectorNumElements();
4524 
4525  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4526  // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4527  // need a BUILD_VECTOR, add an additional placeholder operand for that
4528  // BUILD_VECTOR and store its operands in ResidueOps.
4529  GeneralShuffle GS(VT);
4531  bool FoundOne = false;
4532  for (unsigned I = 0; I < NumElements; ++I) {
4533  SDValue Op = BVN->