LLVM  8.0.0svn
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SystemZTargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SystemZISelLowering.h"
15 #include "SystemZCallingConv.h"
18 #include "SystemZTargetMachine.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In)
37  : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38 
39  // The operands to the comparison.
40  SDValue Op0, Op1;
41 
42  // The opcode that should be used to compare Op0 and Op1.
43  unsigned Opcode;
44 
45  // A SystemZICMP value. Only used for integer comparisons.
46  unsigned ICmpType;
47 
48  // The mask of CC values that Opcode can produce.
49  unsigned CCValid;
50 
51  // The mask of CC values for which the original condition is true.
52  unsigned CCMask;
53 };
54 } // end anonymous namespace
55 
56 // Classify VT as either 32 or 64 bit.
57 static bool is32Bit(EVT VT) {
58  switch (VT.getSimpleVT().SimpleTy) {
59  case MVT::i32:
60  return true;
61  case MVT::i64:
62  return false;
63  default:
64  llvm_unreachable("Unsupported type");
65  }
66 }
67 
68 // Return a version of MachineOperand that can be safely used before the
69 // final use.
71  if (Op.isReg())
72  Op.setIsKill(false);
73  return Op;
74 }
75 
77  const SystemZSubtarget &STI)
78  : TargetLowering(TM), Subtarget(STI) {
79  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
80 
81  // Set up the register classes.
82  if (Subtarget.hasHighWord())
83  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
84  else
85  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
86  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
87  if (Subtarget.hasVector()) {
88  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
89  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
90  } else {
91  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
92  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
93  }
94  if (Subtarget.hasVectorEnhancements1())
95  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
96  else
97  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
98 
99  if (Subtarget.hasVector()) {
100  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
101  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
102  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
103  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
104  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
105  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
106  }
107 
108  // Compute derived properties from the register classes
110 
111  // Set up special registers.
113 
114  // TODO: It may be better to default to latency-oriented scheduling, however
115  // LLVM's current latency-oriented scheduler can't handle physreg definitions
116  // such as SystemZ has with CC, so set this to the register-pressure
117  // scheduler, because it can.
119 
122 
123  // Instructions are strings of 2-byte aligned 2-byte values.
125  // For performance reasons we prefer 16-byte alignment.
127 
128  // Handle operations that are handled in a similar way for all types.
129  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
131  ++I) {
132  MVT VT = MVT::SimpleValueType(I);
133  if (isTypeLegal(VT)) {
134  // Lower SET_CC into an IPM-based sequence.
136 
137  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
139 
140  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
143  }
144  }
145 
146  // Expand jump table branches as address arithmetic followed by an
147  // indirect jump.
149 
150  // Expand BRCOND into a BR_CC (see above).
152 
153  // Handle integer types.
154  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
156  ++I) {
157  MVT VT = MVT::SimpleValueType(I);
158  if (isTypeLegal(VT)) {
159  // Expand individual DIV and REMs into DIVREMs.
166 
167  // Support addition/subtraction with overflow.
170 
171  // Support addition/subtraction with carry.
174 
175  // Support carry in as value rather than glue.
178 
179  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
180  // stores, putting a serialization instruction after the stores.
183 
184  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
185  // available, or if the operand is constant.
187 
188  // Use POPCNT on z196 and above.
189  if (Subtarget.hasPopulationCount())
191  else
193 
194  // No special instructions for these.
197 
198  // Use *MUL_LOHI where possible instead of MULH*.
203 
204  // Only z196 and above have native support for conversions to unsigned.
205  // On z10, promoting to i64 doesn't generate an inexact condition for
206  // values that are outside the i32 range but in the i64 range, so use
207  // the default expansion.
208  if (!Subtarget.hasFPExtension())
210  }
211  }
212 
213  // Type legalization will convert 8- and 16-bit atomic operations into
214  // forms that operate on i32s (but still keeping the original memory VT).
215  // Lower them into full i32 operations.
227 
228  // Even though i128 is not a legal type, we still need to custom lower
229  // the atomic operations in order to exploit SystemZ instructions.
232 
233  // We can use the CC result of compare-and-swap to implement
234  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
238 
240 
241  // Traps are legal, as we will convert them to "j .+2".
243 
244  // z10 has instructions for signed but not unsigned FP conversion.
245  // Handle unsigned 32-bit types as signed 64-bit types.
246  if (!Subtarget.hasFPExtension()) {
249  }
250 
251  // We have native support for a 64-bit CTLZ, via FLOGR.
254 
255  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
257 
258  // FIXME: Can we support these natively?
262 
263  // We have native instructions for i8, i16 and i32 extensions, but not i1.
265  for (MVT VT : MVT::integer_valuetypes()) {
269  }
270 
271  // Handle the various types of symbolic address.
277 
278  // We need to handle dynamic allocations specially because of the
279  // 160-byte area at the bottom of the stack.
282 
283  // Use custom expanders so that we can force the function to use
284  // a frame pointer.
287 
288  // Handle prefetches with PFD or PFDRL.
290 
291  for (MVT VT : MVT::vector_valuetypes()) {
292  // Assume by default that all vector operations need to be expanded.
293  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
294  if (getOperationAction(Opcode, VT) == Legal)
295  setOperationAction(Opcode, VT, Expand);
296 
297  // Likewise all truncating stores and extending loads.
298  for (MVT InnerVT : MVT::vector_valuetypes()) {
299  setTruncStoreAction(VT, InnerVT, Expand);
300  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
301  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
302  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
303  }
304 
305  if (isTypeLegal(VT)) {
306  // These operations are legal for anything that can be stored in a
307  // vector register, even if there is no native support for the format
308  // as such. In particular, we can do these for v4f32 even though there
309  // are no specific instructions for that format.
315 
316  // Likewise, except that we need to replace the nodes with something
317  // more specific.
320  }
321  }
322 
323  // Handle integer vector types.
324  for (MVT VT : MVT::integer_vector_valuetypes()) {
325  if (isTypeLegal(VT)) {
326  // These operations have direct equivalents.
331  if (VT != MVT::v2i64)
336  if (Subtarget.hasVectorEnhancements1())
338  else
342 
343  // Convert a GPR scalar to a vector by inserting it into element 0.
345 
346  // Use a series of unpacks for extensions.
349 
350  // Detect shifts by a scalar amount and convert them into
351  // V*_BY_SCALAR.
355 
356  // At present ROTL isn't matched by DAGCombiner. ROTR should be
357  // converted into ROTL.
360 
361  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
362  // and inverting the result as necessary.
364  }
365  }
366 
367  if (Subtarget.hasVector()) {
368  // There should be no need to check for float types other than v2f64
369  // since <2 x f32> isn't a legal type.
378  }
379 
380  // Handle floating-point types.
381  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
383  ++I) {
384  MVT VT = MVT::SimpleValueType(I);
385  if (isTypeLegal(VT)) {
386  // We can use FI for FRINT.
388 
389  // We can use the extended form of FI for other rounding operations.
390  if (Subtarget.hasFPExtension()) {
396  }
397 
398  // No special instructions for these.
404  }
405  }
406 
407  // Handle floating-point vector types.
408  if (Subtarget.hasVector()) {
409  // Scalar-to-vector conversion is just a subreg.
412 
413  // Some insertions and extractions can be done directly but others
414  // need to go via integers.
419 
420  // These operations have direct equivalents.
435  }
436 
437  // The vector enhancements facility 1 has instructions for these.
438  if (Subtarget.hasVectorEnhancements1()) {
453 
458 
463 
468 
473 
478  }
479 
480  // We have fused multiply-addition for f32 and f64 but not f128.
483  if (Subtarget.hasVectorEnhancements1())
485  else
487 
488  // We don't have a copysign instruction on vector registers.
489  if (Subtarget.hasVectorEnhancements1())
491 
492  // Needed so that we don't try to implement f128 constant loads using
493  // a load-and-extend of a f80 constant (in cases where the constant
494  // would fit in an f80).
495  for (MVT VT : MVT::fp_valuetypes())
497 
498  // We don't have extending load instruction on vector registers.
499  if (Subtarget.hasVectorEnhancements1()) {
502  }
503 
504  // Floating-point truncation and stores need to be done separately.
508 
509  // We have 64-bit FPR<->GPR moves, but need special handling for
510  // 32-bit forms.
511  if (!Subtarget.hasVector()) {
514  }
515 
516  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
517  // structure, but VAEND is a no-op.
521 
522  // Codes for which we want to perform some z-specific combinations.
535 
536  // Handle intrinsics.
539 
540  // We want to use MVC in preference to even a single load/store pair.
541  MaxStoresPerMemcpy = 0;
543 
544  // The main memset sequence is a byte store followed by an MVC.
545  // Two STC or MV..I stores win over that, but the kind of fused stores
546  // generated by target-independent code don't when the byte value is
547  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
548  // than "STC;MVC". Handle the choice in target-specific code instead.
549  MaxStoresPerMemset = 0;
551 }
552 
554  LLVMContext &, EVT VT) const {
555  if (!VT.isVector())
556  return MVT::i32;
558 }
559 
561  VT = VT.getScalarType();
562 
563  if (!VT.isSimple())
564  return false;
565 
566  switch (VT.getSimpleVT().SimpleTy) {
567  case MVT::f32:
568  case MVT::f64:
569  return true;
570  case MVT::f128:
571  return Subtarget.hasVectorEnhancements1();
572  default:
573  break;
574  }
575 
576  return false;
577 }
578 
579 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
580  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
581  return Imm.isZero() || Imm.isNegZero();
582 }
583 
585  // We can use CGFI or CLGFI.
586  return isInt<32>(Imm) || isUInt<32>(Imm);
587 }
588 
590  // We can use ALGFI or SLGFI.
591  return isUInt<32>(Imm) || isUInt<32>(-Imm);
592 }
593 
595  unsigned,
596  unsigned,
597  bool *Fast) const {
598  // Unaligned accesses should never be slower than the expanded version.
599  // We check specifically for aligned accesses in the few cases where
600  // they are required.
601  if (Fast)
602  *Fast = true;
603  return true;
604 }
605 
606 // Information about the addressing mode for a memory access.
608  // True if a long displacement is supported.
610 
611  // True if use of index register is supported.
612  bool IndexReg;
613 
614  AddressingMode(bool LongDispl, bool IdxReg) :
615  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
616 };
617 
618 // Return the desired addressing mode for a Load which has only one use (in
619 // the same block) which is a Store.
620 static AddressingMode getLoadStoreAddrMode(bool HasVector,
621  Type *Ty) {
622  // With vector support a Load->Store combination may be combined to either
623  // an MVC or vector operations and it seems to work best to allow the
624  // vector addressing mode.
625  if (HasVector)
626  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
627 
628  // Otherwise only the MVC case is special.
629  bool MVC = Ty->isIntegerTy(8);
630  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
631 }
632 
633 // Return the addressing mode which seems most desirable given an LLVM
634 // Instruction pointer.
635 static AddressingMode
637  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
638  switch (II->getIntrinsicID()) {
639  default: break;
640  case Intrinsic::memset:
641  case Intrinsic::memmove:
642  case Intrinsic::memcpy:
643  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
644  }
645  }
646 
647  if (isa<LoadInst>(I) && I->hasOneUse()) {
648  auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
649  if (SingleUser->getParent() == I->getParent()) {
650  if (isa<ICmpInst>(SingleUser)) {
651  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
652  if (C->getBitWidth() <= 64 &&
653  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
654  // Comparison of memory with 16 bit signed / unsigned immediate
655  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
656  } else if (isa<StoreInst>(SingleUser))
657  // Load->Store
658  return getLoadStoreAddrMode(HasVector, I->getType());
659  }
660  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
661  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
662  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
663  // Load->Store
664  return getLoadStoreAddrMode(HasVector, LoadI->getType());
665  }
666 
667  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
668 
669  // * Use LDE instead of LE/LEY for z13 to avoid partial register
670  // dependencies (LDE only supports small offsets).
671  // * Utilize the vector registers to hold floating point
672  // values (vector load / store instructions only support small
673  // offsets).
674 
675  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
676  I->getOperand(0)->getType());
677  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
678  bool IsVectorAccess = MemAccessTy->isVectorTy();
679 
680  // A store of an extracted vector element will be combined into a VSTE type
681  // instruction.
682  if (!IsVectorAccess && isa<StoreInst>(I)) {
683  Value *DataOp = I->getOperand(0);
684  if (isa<ExtractElementInst>(DataOp))
685  IsVectorAccess = true;
686  }
687 
688  // A load which gets inserted into a vector element will be combined into a
689  // VLE type instruction.
690  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
691  User *LoadUser = *I->user_begin();
692  if (isa<InsertElementInst>(LoadUser))
693  IsVectorAccess = true;
694  }
695 
696  if (IsFPAccess || IsVectorAccess)
697  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
698  }
699 
700  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
701 }
702 
704  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
705  // Punt on globals for now, although they can be used in limited
706  // RELATIVE LONG cases.
707  if (AM.BaseGV)
708  return false;
709 
710  // Require a 20-bit signed offset.
711  if (!isInt<20>(AM.BaseOffs))
712  return false;
713 
714  AddressingMode SupportedAM(true, true);
715  if (I != nullptr)
716  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
717 
718  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
719  return false;
720 
721  if (!SupportedAM.IndexReg)
722  // No indexing allowed.
723  return AM.Scale == 0;
724  else
725  // Indexing is OK but no scale factor can be applied.
726  return AM.Scale == 0 || AM.Scale == 1;
727 }
728 
730  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
731  return false;
732  unsigned FromBits = FromType->getPrimitiveSizeInBits();
733  unsigned ToBits = ToType->getPrimitiveSizeInBits();
734  return FromBits > ToBits;
735 }
736 
738  if (!FromVT.isInteger() || !ToVT.isInteger())
739  return false;
740  unsigned FromBits = FromVT.getSizeInBits();
741  unsigned ToBits = ToVT.getSizeInBits();
742  return FromBits > ToBits;
743 }
744 
745 //===----------------------------------------------------------------------===//
746 // Inline asm support
747 //===----------------------------------------------------------------------===//
748 
751  if (Constraint.size() == 1) {
752  switch (Constraint[0]) {
753  case 'a': // Address register
754  case 'd': // Data register (equivalent to 'r')
755  case 'f': // Floating-point register
756  case 'h': // High-part register
757  case 'r': // General-purpose register
758  case 'v': // Vector register
759  return C_RegisterClass;
760 
761  case 'Q': // Memory with base and unsigned 12-bit displacement
762  case 'R': // Likewise, plus an index
763  case 'S': // Memory with base and signed 20-bit displacement
764  case 'T': // Likewise, plus an index
765  case 'm': // Equivalent to 'T'.
766  return C_Memory;
767 
768  case 'I': // Unsigned 8-bit constant
769  case 'J': // Unsigned 12-bit constant
770  case 'K': // Signed 16-bit constant
771  case 'L': // Signed 20-bit displacement (on all targets we support)
772  case 'M': // 0x7fffffff
773  return C_Other;
774 
775  default:
776  break;
777  }
778  }
779  return TargetLowering::getConstraintType(Constraint);
780 }
781 
784  const char *constraint) const {
785  ConstraintWeight weight = CW_Invalid;
786  Value *CallOperandVal = info.CallOperandVal;
787  // If we don't have a value, we can't do a match,
788  // but allow it at the lowest weight.
789  if (!CallOperandVal)
790  return CW_Default;
791  Type *type = CallOperandVal->getType();
792  // Look at the constraint type.
793  switch (*constraint) {
794  default:
795  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
796  break;
797 
798  case 'a': // Address register
799  case 'd': // Data register (equivalent to 'r')
800  case 'h': // High-part register
801  case 'r': // General-purpose register
802  if (CallOperandVal->getType()->isIntegerTy())
803  weight = CW_Register;
804  break;
805 
806  case 'f': // Floating-point register
807  if (type->isFloatingPointTy())
808  weight = CW_Register;
809  break;
810 
811  case 'v': // Vector register
812  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
813  Subtarget.hasVector())
814  weight = CW_Register;
815  break;
816 
817  case 'I': // Unsigned 8-bit constant
818  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
819  if (isUInt<8>(C->getZExtValue()))
820  weight = CW_Constant;
821  break;
822 
823  case 'J': // Unsigned 12-bit constant
824  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
825  if (isUInt<12>(C->getZExtValue()))
826  weight = CW_Constant;
827  break;
828 
829  case 'K': // Signed 16-bit constant
830  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
831  if (isInt<16>(C->getSExtValue()))
832  weight = CW_Constant;
833  break;
834 
835  case 'L': // Signed 20-bit displacement (on all targets we support)
836  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
837  if (isInt<20>(C->getSExtValue()))
838  weight = CW_Constant;
839  break;
840 
841  case 'M': // 0x7fffffff
842  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
843  if (C->getZExtValue() == 0x7fffffff)
844  weight = CW_Constant;
845  break;
846  }
847  return weight;
848 }
849 
850 // Parse a "{tNNN}" register constraint for which the register type "t"
851 // has already been verified. MC is the class associated with "t" and
852 // Map maps 0-based register numbers to LLVM register numbers.
853 static std::pair<unsigned, const TargetRegisterClass *>
855  const unsigned *Map, unsigned Size) {
856  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
857  if (isdigit(Constraint[2])) {
858  unsigned Index;
859  bool Failed =
860  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
861  if (!Failed && Index < Size && Map[Index])
862  return std::make_pair(Map[Index], RC);
863  }
864  return std::make_pair(0U, nullptr);
865 }
866 
867 std::pair<unsigned, const TargetRegisterClass *>
869  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
870  if (Constraint.size() == 1) {
871  // GCC Constraint Letters
872  switch (Constraint[0]) {
873  default: break;
874  case 'd': // Data register (equivalent to 'r')
875  case 'r': // General-purpose register
876  if (VT == MVT::i64)
877  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
878  else if (VT == MVT::i128)
879  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
880  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
881 
882  case 'a': // Address register
883  if (VT == MVT::i64)
884  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
885  else if (VT == MVT::i128)
886  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
887  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
888 
889  case 'h': // High-part register (an LLVM extension)
890  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
891 
892  case 'f': // Floating-point register
893  if (VT == MVT::f64)
894  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
895  else if (VT == MVT::f128)
896  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
897  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
898 
899  case 'v': // Vector register
900  if (Subtarget.hasVector()) {
901  if (VT == MVT::f32)
902  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
903  if (VT == MVT::f64)
904  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
905  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
906  }
907  break;
908  }
909  }
910  if (Constraint.size() > 0 && Constraint[0] == '{') {
911  // We need to override the default register parsing for GPRs and FPRs
912  // because the interpretation depends on VT. The internal names of
913  // the registers are also different from the external names
914  // (F0D and F0S instead of F0, etc.).
915  if (Constraint[1] == 'r') {
916  if (VT == MVT::i32)
917  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
918  SystemZMC::GR32Regs, 16);
919  if (VT == MVT::i128)
920  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
922  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
923  SystemZMC::GR64Regs, 16);
924  }
925  if (Constraint[1] == 'f') {
926  if (VT == MVT::f32)
927  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
928  SystemZMC::FP32Regs, 16);
929  if (VT == MVT::f128)
930  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
932  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
933  SystemZMC::FP64Regs, 16);
934  }
935  if (Constraint[1] == 'v') {
936  if (VT == MVT::f32)
937  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
938  SystemZMC::VR32Regs, 32);
939  if (VT == MVT::f64)
940  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
941  SystemZMC::VR64Regs, 32);
942  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
944  }
945  }
946  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
947 }
948 
950 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
951  std::vector<SDValue> &Ops,
952  SelectionDAG &DAG) const {
953  // Only support length 1 constraints for now.
954  if (Constraint.length() == 1) {
955  switch (Constraint[0]) {
956  case 'I': // Unsigned 8-bit constant
957  if (auto *C = dyn_cast<ConstantSDNode>(Op))
958  if (isUInt<8>(C->getZExtValue()))
959  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
960  Op.getValueType()));
961  return;
962 
963  case 'J': // Unsigned 12-bit constant
964  if (auto *C = dyn_cast<ConstantSDNode>(Op))
965  if (isUInt<12>(C->getZExtValue()))
966  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
967  Op.getValueType()));
968  return;
969 
970  case 'K': // Signed 16-bit constant
971  if (auto *C = dyn_cast<ConstantSDNode>(Op))
972  if (isInt<16>(C->getSExtValue()))
973  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
974  Op.getValueType()));
975  return;
976 
977  case 'L': // Signed 20-bit displacement (on all targets we support)
978  if (auto *C = dyn_cast<ConstantSDNode>(Op))
979  if (isInt<20>(C->getSExtValue()))
980  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
981  Op.getValueType()));
982  return;
983 
984  case 'M': // 0x7fffffff
985  if (auto *C = dyn_cast<ConstantSDNode>(Op))
986  if (C->getZExtValue() == 0x7fffffff)
987  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
988  Op.getValueType()));
989  return;
990  }
991  }
992  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
993 }
994 
995 //===----------------------------------------------------------------------===//
996 // Calling conventions
997 //===----------------------------------------------------------------------===//
998 
999 #include "SystemZGenCallingConv.inc"
1000 
1002  CallingConv::ID) const {
1003  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1004  SystemZ::R14D, 0 };
1005  return ScratchRegs;
1006 }
1007 
1009  Type *ToType) const {
1010  return isTruncateFree(FromType, ToType);
1011 }
1012 
1014  return CI->isTailCall();
1015 }
1016 
1017 // We do not yet support 128-bit single-element vector types. If the user
1018 // attempts to use such types as function argument or return type, prefer
1019 // to error out instead of emitting code violating the ABI.
1020 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1021  if (ArgVT.isVector() && !VT.isVector())
1022  report_fatal_error("Unsupported vector argument or return type");
1023 }
1024 
1026  for (unsigned i = 0; i < Ins.size(); ++i)
1027  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1028 }
1029 
1031  for (unsigned i = 0; i < Outs.size(); ++i)
1032  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1033 }
1034 
1035 // Value is a value that has been passed to us in the location described by VA
1036 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1037 // any loads onto Chain.
1039  CCValAssign &VA, SDValue Chain,
1040  SDValue Value) {
1041  // If the argument has been promoted from a smaller type, insert an
1042  // assertion to capture this.
1043  if (VA.getLocInfo() == CCValAssign::SExt)
1044  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1045  DAG.getValueType(VA.getValVT()));
1046  else if (VA.getLocInfo() == CCValAssign::ZExt)
1047  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1048  DAG.getValueType(VA.getValVT()));
1049 
1050  if (VA.isExtInLoc())
1051  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1052  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1053  // If this is a short vector argument loaded from the stack,
1054  // extend from i64 to full vector size and then bitcast.
1055  assert(VA.getLocVT() == MVT::i64);
1056  assert(VA.getValVT().isVector());
1057  Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1058  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1059  } else
1060  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1061  return Value;
1062 }
1063 
1064 // Value is a value of type VA.getValVT() that we need to copy into
1065 // the location described by VA. Return a copy of Value converted to
1066 // VA.getValVT(). The caller is responsible for handling indirect values.
1068  CCValAssign &VA, SDValue Value) {
1069  switch (VA.getLocInfo()) {
1070  case CCValAssign::SExt:
1071  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1072  case CCValAssign::ZExt:
1073  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1074  case CCValAssign::AExt:
1075  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1076  case CCValAssign::BCvt:
1077  // If this is a short vector argument to be stored to the stack,
1078  // bitcast to v2i64 and then extract first element.
1079  assert(VA.getLocVT() == MVT::i64);
1080  assert(VA.getValVT().isVector());
1081  Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1082  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1083  DAG.getConstant(0, DL, MVT::i32));
1084  case CCValAssign::Full:
1085  return Value;
1086  default:
1087  llvm_unreachable("Unhandled getLocInfo()");
1088  }
1089 }
1090 
1092  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1093  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1094  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1095  MachineFunction &MF = DAG.getMachineFunction();
1096  MachineFrameInfo &MFI = MF.getFrameInfo();
1098  SystemZMachineFunctionInfo *FuncInfo =
1100  auto *TFL =
1101  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1102  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1103 
1104  // Detect unsupported vector argument types.
1105  if (Subtarget.hasVector())
1106  VerifyVectorTypes(Ins);
1107 
1108  // Assign locations to all of the incoming arguments.
1110  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1111  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1112 
1113  unsigned NumFixedGPRs = 0;
1114  unsigned NumFixedFPRs = 0;
1115  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1116  SDValue ArgValue;
1117  CCValAssign &VA = ArgLocs[I];
1118  EVT LocVT = VA.getLocVT();
1119  if (VA.isRegLoc()) {
1120  // Arguments passed in registers
1121  const TargetRegisterClass *RC;
1122  switch (LocVT.getSimpleVT().SimpleTy) {
1123  default:
1124  // Integers smaller than i64 should be promoted to i64.
1125  llvm_unreachable("Unexpected argument type");
1126  case MVT::i32:
1127  NumFixedGPRs += 1;
1128  RC = &SystemZ::GR32BitRegClass;
1129  break;
1130  case MVT::i64:
1131  NumFixedGPRs += 1;
1132  RC = &SystemZ::GR64BitRegClass;
1133  break;
1134  case MVT::f32:
1135  NumFixedFPRs += 1;
1136  RC = &SystemZ::FP32BitRegClass;
1137  break;
1138  case MVT::f64:
1139  NumFixedFPRs += 1;
1140  RC = &SystemZ::FP64BitRegClass;
1141  break;
1142  case MVT::v16i8:
1143  case MVT::v8i16:
1144  case MVT::v4i32:
1145  case MVT::v2i64:
1146  case MVT::v4f32:
1147  case MVT::v2f64:
1148  RC = &SystemZ::VR128BitRegClass;
1149  break;
1150  }
1151 
1152  unsigned VReg = MRI.createVirtualRegister(RC);
1153  MRI.addLiveIn(VA.getLocReg(), VReg);
1154  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1155  } else {
1156  assert(VA.isMemLoc() && "Argument not register or memory");
1157 
1158  // Create the frame index object for this incoming parameter.
1159  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1160  VA.getLocMemOffset(), true);
1161 
1162  // Create the SelectionDAG nodes corresponding to a load
1163  // from this parameter. Unpromoted ints and floats are
1164  // passed as right-justified 8-byte values.
1165  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1166  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1167  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1168  DAG.getIntPtrConstant(4, DL));
1169  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1171  }
1172 
1173  // Convert the value of the argument register into the value that's
1174  // being passed.
1175  if (VA.getLocInfo() == CCValAssign::Indirect) {
1176  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1177  MachinePointerInfo()));
1178  // If the original argument was split (e.g. i128), we need
1179  // to load all parts of it here (using the same address).
1180  unsigned ArgIndex = Ins[I].OrigArgIndex;
1181  assert (Ins[I].PartOffset == 0);
1182  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1183  CCValAssign &PartVA = ArgLocs[I + 1];
1184  unsigned PartOffset = Ins[I + 1].PartOffset;
1185  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1186  DAG.getIntPtrConstant(PartOffset, DL));
1187  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1188  MachinePointerInfo()));
1189  ++I;
1190  }
1191  } else
1192  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1193  }
1194 
1195  if (IsVarArg) {
1196  // Save the number of non-varargs registers for later use by va_start, etc.
1197  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1198  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1199 
1200  // Likewise the address (in the form of a frame index) of where the
1201  // first stack vararg would be. The 1-byte size here is arbitrary.
1202  int64_t StackSize = CCInfo.getNextStackOffset();
1203  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1204 
1205  // ...and a similar frame index for the caller-allocated save area
1206  // that will be used to store the incoming registers.
1207  int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1208  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1209  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1210 
1211  // Store the FPR varargs in the reserved frame slots. (We store the
1212  // GPRs as part of the prologue.)
1213  if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1214  SDValue MemOps[SystemZ::NumArgFPRs];
1215  for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1216  unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1217  int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1218  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1219  unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1220  &SystemZ::FP64BitRegClass);
1221  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1222  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1224  }
1225  // Join the stores, which are independent of one another.
1226  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1227  makeArrayRef(&MemOps[NumFixedFPRs],
1228  SystemZ::NumArgFPRs-NumFixedFPRs));
1229  }
1230  }
1231 
1232  return Chain;
1233 }
1234 
1235 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1238  // Punt if there are any indirect or stack arguments, or if the call
1239  // needs the callee-saved argument register R6, or if the call uses
1240  // the callee-saved register arguments SwiftSelf and SwiftError.
1241  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1242  CCValAssign &VA = ArgLocs[I];
1243  if (VA.getLocInfo() == CCValAssign::Indirect)
1244  return false;
1245  if (!VA.isRegLoc())
1246  return false;
1247  unsigned Reg = VA.getLocReg();
1248  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1249  return false;
1250  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1251  return false;
1252  }
1253  return true;
1254 }
1255 
1256 SDValue
1258  SmallVectorImpl<SDValue> &InVals) const {
1259  SelectionDAG &DAG = CLI.DAG;
1260  SDLoc &DL = CLI.DL;
1262  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1264  SDValue Chain = CLI.Chain;
1265  SDValue Callee = CLI.Callee;
1266  bool &IsTailCall = CLI.IsTailCall;
1267  CallingConv::ID CallConv = CLI.CallConv;
1268  bool IsVarArg = CLI.IsVarArg;
1269  MachineFunction &MF = DAG.getMachineFunction();
1270  EVT PtrVT = getPointerTy(MF.getDataLayout());
1271 
1272  // Detect unsupported vector argument and return types.
1273  if (Subtarget.hasVector()) {
1274  VerifyVectorTypes(Outs);
1275  VerifyVectorTypes(Ins);
1276  }
1277 
1278  // Analyze the operands of the call, assigning locations to each operand.
1280  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1281  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1282 
1283  // We don't support GuaranteedTailCallOpt, only automatically-detected
1284  // sibling calls.
1285  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1286  IsTailCall = false;
1287 
1288  // Get a count of how many bytes are to be pushed on the stack.
1289  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1290 
1291  // Mark the start of the call.
1292  if (!IsTailCall)
1293  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1294 
1295  // Copy argument values to their designated locations.
1297  SmallVector<SDValue, 8> MemOpChains;
1298  SDValue StackPtr;
1299  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1300  CCValAssign &VA = ArgLocs[I];
1301  SDValue ArgValue = OutVals[I];
1302 
1303  if (VA.getLocInfo() == CCValAssign::Indirect) {
1304  // Store the argument in a stack slot and pass its address.
1305  SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1306  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1307  MemOpChains.push_back(
1308  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1310  // If the original argument was split (e.g. i128), we need
1311  // to store all parts of it here (and pass just one address).
1312  unsigned ArgIndex = Outs[I].OrigArgIndex;
1313  assert (Outs[I].PartOffset == 0);
1314  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1315  SDValue PartValue = OutVals[I + 1];
1316  unsigned PartOffset = Outs[I + 1].PartOffset;
1317  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1318  DAG.getIntPtrConstant(PartOffset, DL));
1319  MemOpChains.push_back(
1320  DAG.getStore(Chain, DL, PartValue, Address,
1322  ++I;
1323  }
1324  ArgValue = SpillSlot;
1325  } else
1326  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1327 
1328  if (VA.isRegLoc())
1329  // Queue up the argument copies and emit them at the end.
1330  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1331  else {
1332  assert(VA.isMemLoc() && "Argument not register or memory");
1333 
1334  // Work out the address of the stack slot. Unpromoted ints and
1335  // floats are passed as right-justified 8-byte values.
1336  if (!StackPtr.getNode())
1337  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1339  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1340  Offset += 4;
1341  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1342  DAG.getIntPtrConstant(Offset, DL));
1343 
1344  // Emit the store.
1345  MemOpChains.push_back(
1346  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1347  }
1348  }
1349 
1350  // Join the stores, which are independent of one another.
1351  if (!MemOpChains.empty())
1352  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1353 
1354  // Accept direct calls by converting symbolic call addresses to the
1355  // associated Target* opcodes. Force %r1 to be used for indirect
1356  // tail calls.
1357  SDValue Glue;
1358  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1359  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1360  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1361  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1362  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1363  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1364  } else if (IsTailCall) {
1365  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1366  Glue = Chain.getValue(1);
1367  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1368  }
1369 
1370  // Build a sequence of copy-to-reg nodes, chained and glued together.
1371  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1372  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1373  RegsToPass[I].second, Glue);
1374  Glue = Chain.getValue(1);
1375  }
1376 
1377  // The first call operand is the chain and the second is the target address.
1379  Ops.push_back(Chain);
1380  Ops.push_back(Callee);
1381 
1382  // Add argument registers to the end of the list so that they are
1383  // known live into the call.
1384  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1385  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1386  RegsToPass[I].second.getValueType()));
1387 
1388  // Add a register mask operand representing the call-preserved registers.
1389  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1390  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1391  assert(Mask && "Missing call preserved mask for calling convention");
1392  Ops.push_back(DAG.getRegisterMask(Mask));
1393 
1394  // Glue the call to the argument copies, if any.
1395  if (Glue.getNode())
1396  Ops.push_back(Glue);
1397 
1398  // Emit the call.
1399  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1400  if (IsTailCall)
1401  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1402  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1403  Glue = Chain.getValue(1);
1404 
1405  // Mark the end of the call, which is glued to the call itself.
1406  Chain = DAG.getCALLSEQ_END(Chain,
1407  DAG.getConstant(NumBytes, DL, PtrVT, true),
1408  DAG.getConstant(0, DL, PtrVT, true),
1409  Glue, DL);
1410  Glue = Chain.getValue(1);
1411 
1412  // Assign locations to each value returned by this call.
1414  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1415  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1416 
1417  // Copy all of the result registers out of their specified physreg.
1418  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1419  CCValAssign &VA = RetLocs[I];
1420 
1421  // Copy the value out, gluing the copy to the end of the call sequence.
1422  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1423  VA.getLocVT(), Glue);
1424  Chain = RetValue.getValue(1);
1425  Glue = RetValue.getValue(2);
1426 
1427  // Convert the value of the return register into the value that's
1428  // being returned.
1429  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1430  }
1431 
1432  return Chain;
1433 }
1434 
1437  MachineFunction &MF, bool isVarArg,
1438  const SmallVectorImpl<ISD::OutputArg> &Outs,
1439  LLVMContext &Context) const {
1440  // Detect unsupported vector return types.
1441  if (Subtarget.hasVector())
1442  VerifyVectorTypes(Outs);
1443 
1444  // Special case that we cannot easily detect in RetCC_SystemZ since
1445  // i128 is not a legal type.
1446  for (auto &Out : Outs)
1447  if (Out.ArgVT == MVT::i128)
1448  return false;
1449 
1451  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1452  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1453 }
1454 
1455 SDValue
1457  bool IsVarArg,
1458  const SmallVectorImpl<ISD::OutputArg> &Outs,
1459  const SmallVectorImpl<SDValue> &OutVals,
1460  const SDLoc &DL, SelectionDAG &DAG) const {
1461  MachineFunction &MF = DAG.getMachineFunction();
1462 
1463  // Detect unsupported vector return types.
1464  if (Subtarget.hasVector())
1465  VerifyVectorTypes(Outs);
1466 
1467  // Assign locations to each returned value.
1469  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1470  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1471 
1472  // Quick exit for void returns
1473  if (RetLocs.empty())
1474  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1475 
1476  // Copy the result values into the output registers.
1477  SDValue Glue;
1478  SmallVector<SDValue, 4> RetOps;
1479  RetOps.push_back(Chain);
1480  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1481  CCValAssign &VA = RetLocs[I];
1482  SDValue RetValue = OutVals[I];
1483 
1484  // Make the return register live on exit.
1485  assert(VA.isRegLoc() && "Can only return in registers!");
1486 
1487  // Promote the value as required.
1488  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1489 
1490  // Chain and glue the copies together.
1491  unsigned Reg = VA.getLocReg();
1492  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1493  Glue = Chain.getValue(1);
1494  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1495  }
1496 
1497  // Update chain and glue.
1498  RetOps[0] = Chain;
1499  if (Glue.getNode())
1500  RetOps.push_back(Glue);
1501 
1502  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1503 }
1504 
1505 // Return true if Op is an intrinsic node with chain that returns the CC value
1506 // as its only (other) argument. Provide the associated SystemZISD opcode and
1507 // the mask of valid CC values if so.
1508 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1509  unsigned &CCValid) {
1510  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1511  switch (Id) {
1512  case Intrinsic::s390_tbegin:
1513  Opcode = SystemZISD::TBEGIN;
1514  CCValid = SystemZ::CCMASK_TBEGIN;
1515  return true;
1516 
1517  case Intrinsic::s390_tbegin_nofloat:
1518  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1519  CCValid = SystemZ::CCMASK_TBEGIN;
1520  return true;
1521 
1522  case Intrinsic::s390_tend:
1523  Opcode = SystemZISD::TEND;
1524  CCValid = SystemZ::CCMASK_TEND;
1525  return true;
1526 
1527  default:
1528  return false;
1529  }
1530 }
1531 
1532 // Return true if Op is an intrinsic node without chain that returns the
1533 // CC value as its final argument. Provide the associated SystemZISD
1534 // opcode and the mask of valid CC values if so.
1535 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1536  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1537  switch (Id) {
1538  case Intrinsic::s390_vpkshs:
1539  case Intrinsic::s390_vpksfs:
1540  case Intrinsic::s390_vpksgs:
1541  Opcode = SystemZISD::PACKS_CC;
1542  CCValid = SystemZ::CCMASK_VCMP;
1543  return true;
1544 
1545  case Intrinsic::s390_vpklshs:
1546  case Intrinsic::s390_vpklsfs:
1547  case Intrinsic::s390_vpklsgs:
1548  Opcode = SystemZISD::PACKLS_CC;
1549  CCValid = SystemZ::CCMASK_VCMP;
1550  return true;
1551 
1552  case Intrinsic::s390_vceqbs:
1553  case Intrinsic::s390_vceqhs:
1554  case Intrinsic::s390_vceqfs:
1555  case Intrinsic::s390_vceqgs:
1556  Opcode = SystemZISD::VICMPES;
1557  CCValid = SystemZ::CCMASK_VCMP;
1558  return true;
1559 
1560  case Intrinsic::s390_vchbs:
1561  case Intrinsic::s390_vchhs:
1562  case Intrinsic::s390_vchfs:
1563  case Intrinsic::s390_vchgs:
1564  Opcode = SystemZISD::VICMPHS;
1565  CCValid = SystemZ::CCMASK_VCMP;
1566  return true;
1567 
1568  case Intrinsic::s390_vchlbs:
1569  case Intrinsic::s390_vchlhs:
1570  case Intrinsic::s390_vchlfs:
1571  case Intrinsic::s390_vchlgs:
1572  Opcode = SystemZISD::VICMPHLS;
1573  CCValid = SystemZ::CCMASK_VCMP;
1574  return true;
1575 
1576  case Intrinsic::s390_vtm:
1577  Opcode = SystemZISD::VTM;
1578  CCValid = SystemZ::CCMASK_VCMP;
1579  return true;
1580 
1581  case Intrinsic::s390_vfaebs:
1582  case Intrinsic::s390_vfaehs:
1583  case Intrinsic::s390_vfaefs:
1584  Opcode = SystemZISD::VFAE_CC;
1585  CCValid = SystemZ::CCMASK_ANY;
1586  return true;
1587 
1588  case Intrinsic::s390_vfaezbs:
1589  case Intrinsic::s390_vfaezhs:
1590  case Intrinsic::s390_vfaezfs:
1591  Opcode = SystemZISD::VFAEZ_CC;
1592  CCValid = SystemZ::CCMASK_ANY;
1593  return true;
1594 
1595  case Intrinsic::s390_vfeebs:
1596  case Intrinsic::s390_vfeehs:
1597  case Intrinsic::s390_vfeefs:
1598  Opcode = SystemZISD::VFEE_CC;
1599  CCValid = SystemZ::CCMASK_ANY;
1600  return true;
1601 
1602  case Intrinsic::s390_vfeezbs:
1603  case Intrinsic::s390_vfeezhs:
1604  case Intrinsic::s390_vfeezfs:
1605  Opcode = SystemZISD::VFEEZ_CC;
1606  CCValid = SystemZ::CCMASK_ANY;
1607  return true;
1608 
1609  case Intrinsic::s390_vfenebs:
1610  case Intrinsic::s390_vfenehs:
1611  case Intrinsic::s390_vfenefs:
1612  Opcode = SystemZISD::VFENE_CC;
1613  CCValid = SystemZ::CCMASK_ANY;
1614  return true;
1615 
1616  case Intrinsic::s390_vfenezbs:
1617  case Intrinsic::s390_vfenezhs:
1618  case Intrinsic::s390_vfenezfs:
1619  Opcode = SystemZISD::VFENEZ_CC;
1620  CCValid = SystemZ::CCMASK_ANY;
1621  return true;
1622 
1623  case Intrinsic::s390_vistrbs:
1624  case Intrinsic::s390_vistrhs:
1625  case Intrinsic::s390_vistrfs:
1626  Opcode = SystemZISD::VISTR_CC;
1628  return true;
1629 
1630  case Intrinsic::s390_vstrcbs:
1631  case Intrinsic::s390_vstrchs:
1632  case Intrinsic::s390_vstrcfs:
1633  Opcode = SystemZISD::VSTRC_CC;
1634  CCValid = SystemZ::CCMASK_ANY;
1635  return true;
1636 
1637  case Intrinsic::s390_vstrczbs:
1638  case Intrinsic::s390_vstrczhs:
1639  case Intrinsic::s390_vstrczfs:
1640  Opcode = SystemZISD::VSTRCZ_CC;
1641  CCValid = SystemZ::CCMASK_ANY;
1642  return true;
1643 
1644  case Intrinsic::s390_vfcedbs:
1645  case Intrinsic::s390_vfcesbs:
1646  Opcode = SystemZISD::VFCMPES;
1647  CCValid = SystemZ::CCMASK_VCMP;
1648  return true;
1649 
1650  case Intrinsic::s390_vfchdbs:
1651  case Intrinsic::s390_vfchsbs:
1652  Opcode = SystemZISD::VFCMPHS;
1653  CCValid = SystemZ::CCMASK_VCMP;
1654  return true;
1655 
1656  case Intrinsic::s390_vfchedbs:
1657  case Intrinsic::s390_vfchesbs:
1658  Opcode = SystemZISD::VFCMPHES;
1659  CCValid = SystemZ::CCMASK_VCMP;
1660  return true;
1661 
1662  case Intrinsic::s390_vftcidb:
1663  case Intrinsic::s390_vftcisb:
1664  Opcode = SystemZISD::VFTCI;
1665  CCValid = SystemZ::CCMASK_VCMP;
1666  return true;
1667 
1668  case Intrinsic::s390_tdc:
1669  Opcode = SystemZISD::TDC;
1670  CCValid = SystemZ::CCMASK_TDC;
1671  return true;
1672 
1673  default:
1674  return false;
1675  }
1676 }
1677 
1678 // Emit an intrinsic with chain and an explicit CC register result.
1680  unsigned Opcode) {
1681  // Copy all operands except the intrinsic ID.
1682  unsigned NumOps = Op.getNumOperands();
1684  Ops.reserve(NumOps - 1);
1685  Ops.push_back(Op.getOperand(0));
1686  for (unsigned I = 2; I < NumOps; ++I)
1687  Ops.push_back(Op.getOperand(I));
1688 
1689  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1690  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
1691  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1692  SDValue OldChain = SDValue(Op.getNode(), 1);
1693  SDValue NewChain = SDValue(Intr.getNode(), 1);
1694  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1695  return Intr.getNode();
1696 }
1697 
1698 // Emit an intrinsic with an explicit CC register result.
1700  unsigned Opcode) {
1701  // Copy all operands except the intrinsic ID.
1702  unsigned NumOps = Op.getNumOperands();
1704  Ops.reserve(NumOps - 1);
1705  for (unsigned I = 1; I < NumOps; ++I)
1706  Ops.push_back(Op.getOperand(I));
1707 
1708  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
1709  return Intr.getNode();
1710 }
1711 
1712 // CC is a comparison that will be implemented using an integer or
1713 // floating-point comparison. Return the condition code mask for
1714 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1715 // unsigned comparisons and clear for signed ones. In the floating-point
1716 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1717 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1718 #define CONV(X) \
1719  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1720  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1721  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1722 
1723  switch (CC) {
1724  default:
1725  llvm_unreachable("Invalid integer condition!");
1726 
1727  CONV(EQ);
1728  CONV(NE);
1729  CONV(GT);
1730  CONV(GE);
1731  CONV(LT);
1732  CONV(LE);
1733 
1734  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1735  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1736  }
1737 #undef CONV
1738 }
1739 
1740 // If C can be converted to a comparison against zero, adjust the operands
1741 // as necessary.
1742 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1743  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1744  return;
1745 
1746  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1747  if (!ConstOp1)
1748  return;
1749 
1750  int64_t Value = ConstOp1->getSExtValue();
1751  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1752  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1753  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1754  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1755  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1756  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1757  }
1758 }
1759 
1760 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1761 // adjust the operands as necessary.
1762 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1763  Comparison &C) {
1764  // For us to make any changes, it must a comparison between a single-use
1765  // load and a constant.
1766  if (!C.Op0.hasOneUse() ||
1767  C.Op0.getOpcode() != ISD::LOAD ||
1768  C.Op1.getOpcode() != ISD::Constant)
1769  return;
1770 
1771  // We must have an 8- or 16-bit load.
1772  auto *Load = cast<LoadSDNode>(C.Op0);
1773  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1774  if (NumBits != 8 && NumBits != 16)
1775  return;
1776 
1777  // The load must be an extending one and the constant must be within the
1778  // range of the unextended value.
1779  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1780  uint64_t Value = ConstOp1->getZExtValue();
1781  uint64_t Mask = (1 << NumBits) - 1;
1782  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1783  // Make sure that ConstOp1 is in range of C.Op0.
1784  int64_t SignedValue = ConstOp1->getSExtValue();
1785  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
1786  return;
1787  if (C.ICmpType != SystemZICMP::SignedOnly) {
1788  // Unsigned comparison between two sign-extended values is equivalent
1789  // to unsigned comparison between two zero-extended values.
1790  Value &= Mask;
1791  } else if (NumBits == 8) {
1792  // Try to treat the comparison as unsigned, so that we can use CLI.
1793  // Adjust CCMask and Value as necessary.
1794  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
1795  // Test whether the high bit of the byte is set.
1796  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1797  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
1798  // Test whether the high bit of the byte is clear.
1799  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
1800  else
1801  // No instruction exists for this combination.
1802  return;
1803  C.ICmpType = SystemZICMP::UnsignedOnly;
1804  }
1805  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
1806  if (Value > Mask)
1807  return;
1808  // If the constant is in range, we can use any comparison.
1809  C.ICmpType = SystemZICMP::Any;
1810  } else
1811  return;
1812 
1813  // Make sure that the first operand is an i32 of the right extension type.
1815  ISD::SEXTLOAD :
1816  ISD::ZEXTLOAD);
1817  if (C.Op0.getValueType() != MVT::i32 ||
1818  Load->getExtensionType() != ExtType) {
1819  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
1820  Load->getBasePtr(), Load->getPointerInfo(),
1821  Load->getMemoryVT(), Load->getAlignment(),
1822  Load->getMemOperand()->getFlags());
1823  // Update the chain uses.
1824  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
1825  }
1826 
1827  // Make sure that the second operand is an i32 with the right value.
1828  if (C.Op1.getValueType() != MVT::i32 ||
1829  Value != ConstOp1->getZExtValue())
1830  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
1831 }
1832 
1833 // Return true if Op is either an unextended load, or a load suitable
1834 // for integer register-memory comparisons of type ICmpType.
1835 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
1836  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
1837  if (Load) {
1838  // There are no instructions to compare a register with a memory byte.
1839  if (Load->getMemoryVT() == MVT::i8)
1840  return false;
1841  // Otherwise decide on extension type.
1842  switch (Load->getExtensionType()) {
1843  case ISD::NON_EXTLOAD:
1844  return true;
1845  case ISD::SEXTLOAD:
1846  return ICmpType != SystemZICMP::UnsignedOnly;
1847  case ISD::ZEXTLOAD:
1848  return ICmpType != SystemZICMP::SignedOnly;
1849  default:
1850  break;
1851  }
1852  }
1853  return false;
1854 }
1855 
1856 // Return true if it is better to swap the operands of C.
1857 static bool shouldSwapCmpOperands(const Comparison &C) {
1858  // Leave f128 comparisons alone, since they have no memory forms.
1859  if (C.Op0.getValueType() == MVT::f128)
1860  return false;
1861 
1862  // Always keep a floating-point constant second, since comparisons with
1863  // zero can use LOAD TEST and comparisons with other constants make a
1864  // natural memory operand.
1865  if (isa<ConstantFPSDNode>(C.Op1))
1866  return false;
1867 
1868  // Never swap comparisons with zero since there are many ways to optimize
1869  // those later.
1870  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
1871  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
1872  return false;
1873 
1874  // Also keep natural memory operands second if the loaded value is
1875  // only used here. Several comparisons have memory forms.
1876  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
1877  return false;
1878 
1879  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1880  // In that case we generally prefer the memory to be second.
1881  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
1882  // The only exceptions are when the second operand is a constant and
1883  // we can use things like CHHSI.
1884  if (!ConstOp1)
1885  return true;
1886  // The unsigned memory-immediate instructions can handle 16-bit
1887  // unsigned integers.
1888  if (C.ICmpType != SystemZICMP::SignedOnly &&
1889  isUInt<16>(ConstOp1->getZExtValue()))
1890  return false;
1891  // The signed memory-immediate instructions can handle 16-bit
1892  // signed integers.
1893  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
1894  isInt<16>(ConstOp1->getSExtValue()))
1895  return false;
1896  return true;
1897  }
1898 
1899  // Try to promote the use of CGFR and CLGFR.
1900  unsigned Opcode0 = C.Op0.getOpcode();
1901  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
1902  return true;
1903  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
1904  return true;
1905  if (C.ICmpType != SystemZICMP::SignedOnly &&
1906  Opcode0 == ISD::AND &&
1907  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
1908  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
1909  return true;
1910 
1911  return false;
1912 }
1913 
1914 // Return a version of comparison CC mask CCMask in which the LT and GT
1915 // actions are swapped.
1916 static unsigned reverseCCMask(unsigned CCMask) {
1917  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
1919  (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
1920  (CCMask & SystemZ::CCMASK_CMP_UO));
1921 }
1922 
1923 // Check whether C tests for equality between X and Y and whether X - Y
1924 // or Y - X is also computed. In that case it's better to compare the
1925 // result of the subtraction against zero.
1926 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
1927  Comparison &C) {
1928  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
1929  C.CCMask == SystemZ::CCMASK_CMP_NE) {
1930  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1931  SDNode *N = *I;
1932  if (N->getOpcode() == ISD::SUB &&
1933  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
1934  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
1935  C.Op0 = SDValue(N, 0);
1936  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
1937  return;
1938  }
1939  }
1940  }
1941 }
1942 
1943 // Check whether C compares a floating-point value with zero and if that
1944 // floating-point value is also negated. In this case we can use the
1945 // negation to set CC, so avoiding separate LOAD AND TEST and
1946 // LOAD (NEGATIVE/COMPLEMENT) instructions.
1947 static void adjustForFNeg(Comparison &C) {
1948  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
1949  if (C1 && C1->isZero()) {
1950  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1951  SDNode *N = *I;
1952  if (N->getOpcode() == ISD::FNEG) {
1953  C.Op0 = SDValue(N, 0);
1954  C.CCMask = reverseCCMask(C.CCMask);
1955  return;
1956  }
1957  }
1958  }
1959 }
1960 
1961 // Check whether C compares (shl X, 32) with 0 and whether X is
1962 // also sign-extended. In that case it is better to test the result
1963 // of the sign extension using LTGFR.
1964 //
1965 // This case is important because InstCombine transforms a comparison
1966 // with (sext (trunc X)) into a comparison with (shl X, 32).
1967 static void adjustForLTGFR(Comparison &C) {
1968  // Check for a comparison between (shl X, 32) and 0.
1969  if (C.Op0.getOpcode() == ISD::SHL &&
1970  C.Op0.getValueType() == MVT::i64 &&
1971  C.Op1.getOpcode() == ISD::Constant &&
1972  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1973  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
1974  if (C1 && C1->getZExtValue() == 32) {
1975  SDValue ShlOp0 = C.Op0.getOperand(0);
1976  // See whether X has any SIGN_EXTEND_INREG uses.
1977  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
1978  SDNode *N = *I;
1979  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
1980  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
1981  C.Op0 = SDValue(N, 0);
1982  return;
1983  }
1984  }
1985  }
1986  }
1987 }
1988 
1989 // If C compares the truncation of an extending load, try to compare
1990 // the untruncated value instead. This exposes more opportunities to
1991 // reuse CC.
1992 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
1993  Comparison &C) {
1994  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
1995  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
1996  C.Op1.getOpcode() == ISD::Constant &&
1997  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1998  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
1999  if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
2000  unsigned Type = L->getExtensionType();
2001  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2002  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2003  C.Op0 = C.Op0.getOperand(0);
2004  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2005  }
2006  }
2007  }
2008 }
2009 
2010 // Return true if shift operation N has an in-range constant shift value.
2011 // Store it in ShiftVal if so.
2012 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2013  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2014  if (!Shift)
2015  return false;
2016 
2017  uint64_t Amount = Shift->getZExtValue();
2018  if (Amount >= N.getValueSizeInBits())
2019  return false;
2020 
2021  ShiftVal = Amount;
2022  return true;
2023 }
2024 
2025 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2026 // instruction and whether the CC value is descriptive enough to handle
2027 // a comparison of type Opcode between the AND result and CmpVal.
2028 // CCMask says which comparison result is being tested and BitSize is
2029 // the number of bits in the operands. If TEST UNDER MASK can be used,
2030 // return the corresponding CC mask, otherwise return 0.
2031 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2032  uint64_t Mask, uint64_t CmpVal,
2033  unsigned ICmpType) {
2034  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2035 
2036  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2037  if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2038  !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2039  return 0;
2040 
2041  // Work out the masks for the lowest and highest bits.
2042  unsigned HighShift = 63 - countLeadingZeros(Mask);
2043  uint64_t High = uint64_t(1) << HighShift;
2044  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2045 
2046  // Signed ordered comparisons are effectively unsigned if the sign
2047  // bit is dropped.
2048  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2049 
2050  // Check for equality comparisons with 0, or the equivalent.
2051  if (CmpVal == 0) {
2052  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2053  return SystemZ::CCMASK_TM_ALL_0;
2054  if (CCMask == SystemZ::CCMASK_CMP_NE)
2056  }
2057  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2058  if (CCMask == SystemZ::CCMASK_CMP_LT)
2059  return SystemZ::CCMASK_TM_ALL_0;
2060  if (CCMask == SystemZ::CCMASK_CMP_GE)
2062  }
2063  if (EffectivelyUnsigned && CmpVal < Low) {
2064  if (CCMask == SystemZ::CCMASK_CMP_LE)
2065  return SystemZ::CCMASK_TM_ALL_0;
2066  if (CCMask == SystemZ::CCMASK_CMP_GT)
2068  }
2069 
2070  // Check for equality comparisons with the mask, or the equivalent.
2071  if (CmpVal == Mask) {
2072  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2073  return SystemZ::CCMASK_TM_ALL_1;
2074  if (CCMask == SystemZ::CCMASK_CMP_NE)
2076  }
2077  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2078  if (CCMask == SystemZ::CCMASK_CMP_GT)
2079  return SystemZ::CCMASK_TM_ALL_1;
2080  if (CCMask == SystemZ::CCMASK_CMP_LE)
2082  }
2083  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2084  if (CCMask == SystemZ::CCMASK_CMP_GE)
2085  return SystemZ::CCMASK_TM_ALL_1;
2086  if (CCMask == SystemZ::CCMASK_CMP_LT)
2088  }
2089 
2090  // Check for ordered comparisons with the top bit.
2091  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2092  if (CCMask == SystemZ::CCMASK_CMP_LE)
2093  return SystemZ::CCMASK_TM_MSB_0;
2094  if (CCMask == SystemZ::CCMASK_CMP_GT)
2095  return SystemZ::CCMASK_TM_MSB_1;
2096  }
2097  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2098  if (CCMask == SystemZ::CCMASK_CMP_LT)
2099  return SystemZ::CCMASK_TM_MSB_0;
2100  if (CCMask == SystemZ::CCMASK_CMP_GE)
2101  return SystemZ::CCMASK_TM_MSB_1;
2102  }
2103 
2104  // If there are just two bits, we can do equality checks for Low and High
2105  // as well.
2106  if (Mask == Low + High) {
2107  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2109  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2111  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2113  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2115  }
2116 
2117  // Looks like we've exhausted our options.
2118  return 0;
2119 }
2120 
2121 // See whether C can be implemented as a TEST UNDER MASK instruction.
2122 // Update the arguments with the TM version if so.
2123 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2124  Comparison &C) {
2125  // Check that we have a comparison with a constant.
2126  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2127  if (!ConstOp1)
2128  return;
2129  uint64_t CmpVal = ConstOp1->getZExtValue();
2130 
2131  // Check whether the nonconstant input is an AND with a constant mask.
2132  Comparison NewC(C);
2133  uint64_t MaskVal;
2134  ConstantSDNode *Mask = nullptr;
2135  if (C.Op0.getOpcode() == ISD::AND) {
2136  NewC.Op0 = C.Op0.getOperand(0);
2137  NewC.Op1 = C.Op0.getOperand(1);
2138  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2139  if (!Mask)
2140  return;
2141  MaskVal = Mask->getZExtValue();
2142  } else {
2143  // There is no instruction to compare with a 64-bit immediate
2144  // so use TMHH instead if possible. We need an unsigned ordered
2145  // comparison with an i64 immediate.
2146  if (NewC.Op0.getValueType() != MVT::i64 ||
2147  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2148  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2149  NewC.ICmpType == SystemZICMP::SignedOnly)
2150  return;
2151  // Convert LE and GT comparisons into LT and GE.
2152  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2153  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2154  if (CmpVal == uint64_t(-1))
2155  return;
2156  CmpVal += 1;
2157  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2158  }
2159  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2160  // be masked off without changing the result.
2161  MaskVal = -(CmpVal & -CmpVal);
2162  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2163  }
2164  if (!MaskVal)
2165  return;
2166 
2167  // Check whether the combination of mask, comparison value and comparison
2168  // type are suitable.
2169  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2170  unsigned NewCCMask, ShiftVal;
2171  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2172  NewC.Op0.getOpcode() == ISD::SHL &&
2173  isSimpleShift(NewC.Op0, ShiftVal) &&
2174  (MaskVal >> ShiftVal != 0) &&
2175  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2176  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2177  MaskVal >> ShiftVal,
2178  CmpVal >> ShiftVal,
2179  SystemZICMP::Any))) {
2180  NewC.Op0 = NewC.Op0.getOperand(0);
2181  MaskVal >>= ShiftVal;
2182  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2183  NewC.Op0.getOpcode() == ISD::SRL &&
2184  isSimpleShift(NewC.Op0, ShiftVal) &&
2185  (MaskVal << ShiftVal != 0) &&
2186  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2187  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2188  MaskVal << ShiftVal,
2189  CmpVal << ShiftVal,
2191  NewC.Op0 = NewC.Op0.getOperand(0);
2192  MaskVal <<= ShiftVal;
2193  } else {
2194  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2195  NewC.ICmpType);
2196  if (!NewCCMask)
2197  return;
2198  }
2199 
2200  // Go ahead and make the change.
2201  C.Opcode = SystemZISD::TM;
2202  C.Op0 = NewC.Op0;
2203  if (Mask && Mask->getZExtValue() == MaskVal)
2204  C.Op1 = SDValue(Mask, 0);
2205  else
2206  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2207  C.CCValid = SystemZ::CCMASK_TM;
2208  C.CCMask = NewCCMask;
2209 }
2210 
2211 // See whether the comparison argument contains a redundant AND
2212 // and remove it if so. This sometimes happens due to the generic
2213 // BRCOND expansion.
2214 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2215  Comparison &C) {
2216  if (C.Op0.getOpcode() != ISD::AND)
2217  return;
2218  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2219  if (!Mask)
2220  return;
2221  KnownBits Known;
2222  DAG.computeKnownBits(C.Op0.getOperand(0), Known);
2223  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2224  return;
2225 
2226  C.Op0 = C.Op0.getOperand(0);
2227 }
2228 
2229 // Return a Comparison that tests the condition-code result of intrinsic
2230 // node Call against constant integer CC using comparison code Cond.
2231 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2232 // and CCValid is the set of possible condition-code results.
2233 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2234  SDValue Call, unsigned CCValid, uint64_t CC,
2235  ISD::CondCode Cond) {
2236  Comparison C(Call, SDValue());
2237  C.Opcode = Opcode;
2238  C.CCValid = CCValid;
2239  if (Cond == ISD::SETEQ)
2240  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2241  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2242  else if (Cond == ISD::SETNE)
2243  // ...and the inverse of that.
2244  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2245  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2246  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2247  // always true for CC>3.
2248  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2249  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2250  // ...and the inverse of that.
2251  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2252  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2253  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2254  // always true for CC>3.
2255  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2256  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2257  // ...and the inverse of that.
2258  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2259  else
2260  llvm_unreachable("Unexpected integer comparison type");
2261  C.CCMask &= CCValid;
2262  return C;
2263 }
2264 
2265 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2266 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2267  ISD::CondCode Cond, const SDLoc &DL) {
2268  if (CmpOp1.getOpcode() == ISD::Constant) {
2269  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2270  unsigned Opcode, CCValid;
2271  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2272  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2273  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2274  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2275  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2276  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2277  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2278  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2279  }
2280  Comparison C(CmpOp0, CmpOp1);
2281  C.CCMask = CCMaskForCondCode(Cond);
2282  if (C.Op0.getValueType().isFloatingPoint()) {
2283  C.CCValid = SystemZ::CCMASK_FCMP;
2284  C.Opcode = SystemZISD::FCMP;
2285  adjustForFNeg(C);
2286  } else {
2287  C.CCValid = SystemZ::CCMASK_ICMP;
2288  C.Opcode = SystemZISD::ICMP;
2289  // Choose the type of comparison. Equality and inequality tests can
2290  // use either signed or unsigned comparisons. The choice also doesn't
2291  // matter if both sign bits are known to be clear. In those cases we
2292  // want to give the main isel code the freedom to choose whichever
2293  // form fits best.
2294  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2295  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2296  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2297  C.ICmpType = SystemZICMP::Any;
2298  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2299  C.ICmpType = SystemZICMP::UnsignedOnly;
2300  else
2301  C.ICmpType = SystemZICMP::SignedOnly;
2302  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2303  adjustForRedundantAnd(DAG, DL, C);
2304  adjustZeroCmp(DAG, DL, C);
2305  adjustSubwordCmp(DAG, DL, C);
2306  adjustForSubtraction(DAG, DL, C);
2307  adjustForLTGFR(C);
2308  adjustICmpTruncate(DAG, DL, C);
2309  }
2310 
2311  if (shouldSwapCmpOperands(C)) {
2312  std::swap(C.Op0, C.Op1);
2313  C.CCMask = reverseCCMask(C.CCMask);
2314  }
2315 
2316  adjustForTestUnderMask(DAG, DL, C);
2317  return C;
2318 }
2319 
2320 // Emit the comparison instruction described by C.
2321 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2322  if (!C.Op1.getNode()) {
2323  SDNode *Node;
2324  switch (C.Op0.getOpcode()) {
2326  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2327  return SDValue(Node, 0);
2329  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2330  return SDValue(Node, Node->getNumValues() - 1);
2331  default:
2332  llvm_unreachable("Invalid comparison operands");
2333  }
2334  }
2335  if (C.Opcode == SystemZISD::ICMP)
2336  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2337  DAG.getConstant(C.ICmpType, DL, MVT::i32));
2338  if (C.Opcode == SystemZISD::TM) {
2339  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2340  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2341  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2342  DAG.getConstant(RegisterOnly, DL, MVT::i32));
2343  }
2344  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2345 }
2346 
2347 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2348 // 64 bits. Extend is the extension type to use. Store the high part
2349 // in Hi and the low part in Lo.
2350 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2351  SDValue Op0, SDValue Op1, SDValue &Hi,
2352  SDValue &Lo) {
2353  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2354  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2355  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2356  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2357  DAG.getConstant(32, DL, MVT::i64));
2358  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2359  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2360 }
2361 
2362 // Lower a binary operation that produces two VT results, one in each
2363 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2364 // and Opcode performs the GR128 operation. Store the even register result
2365 // in Even and the odd register result in Odd.
2366 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2367  unsigned Opcode, SDValue Op0, SDValue Op1,
2368  SDValue &Even, SDValue &Odd) {
2369  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2370  bool Is32Bit = is32Bit(VT);
2371  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2372  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2373 }
2374 
2375 // Return an i32 value that is 1 if the CC value produced by CCReg is
2376 // in the mask CCMask and 0 otherwise. CC is known to have a value
2377 // in CCValid, so other values can be ignored.
2378 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2379  unsigned CCValid, unsigned CCMask) {
2380  SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
2381  DAG.getConstant(0, DL, MVT::i32),
2382  DAG.getConstant(CCValid, DL, MVT::i32),
2383  DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
2384  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2385 }
2386 
2387 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2388 // be done directly. IsFP is true if CC is for a floating-point rather than
2389 // integer comparison.
2390 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2391  switch (CC) {
2392  case ISD::SETOEQ:
2393  case ISD::SETEQ:
2394  return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
2395 
2396  case ISD::SETOGE:
2397  case ISD::SETGE:
2398  return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
2399 
2400  case ISD::SETOGT:
2401  case ISD::SETGT:
2402  return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
2403 
2404  case ISD::SETUGT:
2405  return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
2406 
2407  default:
2408  return 0;
2409  }
2410 }
2411 
2412 // Return the SystemZISD vector comparison operation for CC or its inverse,
2413 // or 0 if neither can be done directly. Indicate in Invert whether the
2414 // result is for the inverse of CC. IsFP is true if CC is for a
2415 // floating-point rather than integer comparison.
2416 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2417  bool &Invert) {
2418  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2419  Invert = false;
2420  return Opcode;
2421  }
2422 
2423  CC = ISD::getSetCCInverse(CC, !IsFP);
2424  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2425  Invert = true;
2426  return Opcode;
2427  }
2428 
2429  return 0;
2430 }
2431 
2432 // Return a v2f64 that contains the extended form of elements Start and Start+1
2433 // of v4f32 value Op.
2434 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2435  SDValue Op) {
2436  int Mask[] = { Start, -1, Start + 1, -1 };
2437  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2438  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2439 }
2440 
2441 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2442 // producing a result of type VT.
2443 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2444  const SDLoc &DL, EVT VT,
2445  SDValue CmpOp0,
2446  SDValue CmpOp1) const {
2447  // There is no hardware support for v4f32 (unless we have the vector
2448  // enhancements facility 1), so extend the vector into two v2f64s
2449  // and compare those.
2450  if (CmpOp0.getValueType() == MVT::v4f32 &&
2451  !Subtarget.hasVectorEnhancements1()) {
2452  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2453  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2454  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2455  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2456  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2457  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2458  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2459  }
2460  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2461 }
2462 
2463 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2464 // an integer mask of type VT.
2465 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2466  const SDLoc &DL, EVT VT,
2467  ISD::CondCode CC,
2468  SDValue CmpOp0,
2469  SDValue CmpOp1) const {
2470  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2471  bool Invert = false;
2472  SDValue Cmp;
2473  switch (CC) {
2474  // Handle tests for order using (or (ogt y x) (oge x y)).
2475  case ISD::SETUO:
2476  Invert = true;
2478  case ISD::SETO: {
2479  assert(IsFP && "Unexpected integer comparison");
2480  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2481  SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2482  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2483  break;
2484  }
2485 
2486  // Handle <> tests using (or (ogt y x) (ogt x y)).
2487  case ISD::SETUEQ:
2488  Invert = true;
2490  case ISD::SETONE: {
2491  assert(IsFP && "Unexpected integer comparison");
2492  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2493  SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2494  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2495  break;
2496  }
2497 
2498  // Otherwise a single comparison is enough. It doesn't really
2499  // matter whether we try the inversion or the swap first, since
2500  // there are no cases where both work.
2501  default:
2502  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2503  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2504  else {
2506  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2507  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2508  else
2509  llvm_unreachable("Unhandled comparison");
2510  }
2511  break;
2512  }
2513  if (Invert) {
2515  DAG.getConstant(65535, DL, MVT::i32));
2516  Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
2517  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2518  }
2519  return Cmp;
2520 }
2521 
2522 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2523  SelectionDAG &DAG) const {
2524  SDValue CmpOp0 = Op.getOperand(0);
2525  SDValue CmpOp1 = Op.getOperand(1);
2526  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2527  SDLoc DL(Op);
2528  EVT VT = Op.getValueType();
2529  if (VT.isVector())
2530  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2531 
2532  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2533  SDValue CCReg = emitCmp(DAG, DL, C);
2534  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2535 }
2536 
2537 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2538  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2539  SDValue CmpOp0 = Op.getOperand(2);
2540  SDValue CmpOp1 = Op.getOperand(3);
2541  SDValue Dest = Op.getOperand(4);
2542  SDLoc DL(Op);
2543 
2544  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2545  SDValue CCReg = emitCmp(DAG, DL, C);
2546  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
2547  Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2548  DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2549 }
2550 
2551 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2552 // allowing Pos and Neg to be wider than CmpOp.
2553 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2554  return (Neg.getOpcode() == ISD::SUB &&
2555  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2556  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2557  Neg.getOperand(1) == Pos &&
2558  (Pos == CmpOp ||
2559  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2560  Pos.getOperand(0) == CmpOp)));
2561 }
2562 
2563 // Return the absolute or negative absolute of Op; IsNegative decides which.
2564 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2565  bool IsNegative) {
2566  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2567  if (IsNegative)
2568  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2569  DAG.getConstant(0, DL, Op.getValueType()), Op);
2570  return Op;
2571 }
2572 
2573 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2574  SelectionDAG &DAG) const {
2575  SDValue CmpOp0 = Op.getOperand(0);
2576  SDValue CmpOp1 = Op.getOperand(1);
2577  SDValue TrueOp = Op.getOperand(2);
2578  SDValue FalseOp = Op.getOperand(3);
2579  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2580  SDLoc DL(Op);
2581 
2582  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2583 
2584  // Check for absolute and negative-absolute selections, including those
2585  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2586  // This check supplements the one in DAGCombiner.
2587  if (C.Opcode == SystemZISD::ICMP &&
2588  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2589  C.CCMask != SystemZ::CCMASK_CMP_NE &&
2590  C.Op1.getOpcode() == ISD::Constant &&
2591  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2592  if (isAbsolute(C.Op0, TrueOp, FalseOp))
2593  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2594  if (isAbsolute(C.Op0, FalseOp, TrueOp))
2595  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2596  }
2597 
2598  SDValue CCReg = emitCmp(DAG, DL, C);
2599  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2600  DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
2601 
2602  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
2603 }
2604 
2605 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2606  SelectionDAG &DAG) const {
2607  SDLoc DL(Node);
2608  const GlobalValue *GV = Node->getGlobal();
2609  int64_t Offset = Node->getOffset();
2610  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2612 
2613  SDValue Result;
2614  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2615  // Assign anchors at 1<<12 byte boundaries.
2616  uint64_t Anchor = Offset & ~uint64_t(0xfff);
2617  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2618  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2619 
2620  // The offset can be folded into the address if it is aligned to a halfword.
2621  Offset -= Anchor;
2622  if (Offset != 0 && (Offset & 1) == 0) {
2623  SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2624  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2625  Offset = 0;
2626  }
2627  } else {
2628  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2629  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2630  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2632  }
2633 
2634  // If there was a non-zero offset that we didn't fold, create an explicit
2635  // addition for it.
2636  if (Offset != 0)
2637  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2638  DAG.getConstant(Offset, DL, PtrVT));
2639 
2640  return Result;
2641 }
2642 
2643 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2644  SelectionDAG &DAG,
2645  unsigned Opcode,
2646  SDValue GOTOffset) const {
2647  SDLoc DL(Node);
2648  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2649  SDValue Chain = DAG.getEntryNode();
2650  SDValue Glue;
2651 
2652  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2653  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2654  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2655  Glue = Chain.getValue(1);
2656  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2657  Glue = Chain.getValue(1);
2658 
2659  // The first call operand is the chain and the second is the TLS symbol.
2661  Ops.push_back(Chain);
2662  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2663  Node->getValueType(0),
2664  0, 0));
2665 
2666  // Add argument registers to the end of the list so that they are
2667  // known live into the call.
2668  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2669  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2670 
2671  // Add a register mask operand representing the call-preserved registers.
2672  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2673  const uint32_t *Mask =
2675  assert(Mask && "Missing call preserved mask for calling convention");
2676  Ops.push_back(DAG.getRegisterMask(Mask));
2677 
2678  // Glue the call to the argument copies.
2679  Ops.push_back(Glue);
2680 
2681  // Emit the call.
2682  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2683  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2684  Glue = Chain.getValue(1);
2685 
2686  // Copy the return value from %r2.
2687  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2688 }
2689 
2690 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2691  SelectionDAG &DAG) const {
2692  SDValue Chain = DAG.getEntryNode();
2693  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2694 
2695  // The high part of the thread pointer is in access register 0.
2696  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
2697  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2698 
2699  // The low part of the thread pointer is in access register 1.
2700  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
2701  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2702 
2703  // Merge them into a single 64-bit address.
2704  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2705  DAG.getConstant(32, DL, PtrVT));
2706  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2707 }
2708 
2709 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2710  SelectionDAG &DAG) const {
2711  if (DAG.getTarget().useEmulatedTLS())
2712  return LowerToTLSEmulatedModel(Node, DAG);
2713  SDLoc DL(Node);
2714  const GlobalValue *GV = Node->getGlobal();
2715  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2716  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2717 
2718  SDValue TP = lowerThreadPointer(DL, DAG);
2719 
2720  // Get the offset of GA from the thread pointer, based on the TLS model.
2721  SDValue Offset;
2722  switch (model) {
2723  case TLSModel::GeneralDynamic: {
2724  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2727 
2728  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2729  Offset = DAG.getLoad(
2730  PtrVT, DL, DAG.getEntryNode(), Offset,
2732 
2733  // Call __tls_get_offset to retrieve the offset.
2734  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2735  break;
2736  }
2737 
2738  case TLSModel::LocalDynamic: {
2739  // Load the GOT offset of the module ID.
2742 
2743  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2744  Offset = DAG.getLoad(
2745  PtrVT, DL, DAG.getEntryNode(), Offset,
2747 
2748  // Call __tls_get_offset to retrieve the module base offset.
2749  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2750 
2751  // Note: The SystemZLDCleanupPass will remove redundant computations
2752  // of the module base offset. Count total number of local-dynamic
2753  // accesses to trigger execution of that pass.
2757 
2758  // Add the per-symbol offset.
2760 
2761  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2762  DTPOffset = DAG.getLoad(
2763  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
2765 
2766  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2767  break;
2768  }
2769 
2770  case TLSModel::InitialExec: {
2771  // Load the offset from the GOT.
2772  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2774  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2775  Offset =
2776  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2778  break;
2779  }
2780 
2781  case TLSModel::LocalExec: {
2782  // Force the offset into the constant pool and load it from there.
2785 
2786  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2787  Offset = DAG.getLoad(
2788  PtrVT, DL, DAG.getEntryNode(), Offset,
2790  break;
2791  }
2792  }
2793 
2794  // Add the base and offset together.
2795  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2796 }
2797 
2798 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2799  SelectionDAG &DAG) const {
2800  SDLoc DL(Node);
2801  const BlockAddress *BA = Node->getBlockAddress();
2802  int64_t Offset = Node->getOffset();
2803  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2804 
2805  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
2806  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2807  return Result;
2808 }
2809 
2810 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
2811  SelectionDAG &DAG) const {
2812  SDLoc DL(JT);
2813  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2814  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2815 
2816  // Use LARL to load the address of the table.
2817  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2818 }
2819 
2820 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
2821  SelectionDAG &DAG) const {
2822  SDLoc DL(CP);
2823  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2824 
2825  SDValue Result;
2826  if (CP->isMachineConstantPoolEntry())
2827  Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2828  CP->getAlignment());
2829  else
2830  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2831  CP->getAlignment(), CP->getOffset());
2832 
2833  // Use LARL to load the address of the constant pool entry.
2834  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2835 }
2836 
2837 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
2838  SelectionDAG &DAG) const {
2839  MachineFunction &MF = DAG.getMachineFunction();
2840  MachineFrameInfo &MFI = MF.getFrameInfo();
2841  MFI.setFrameAddressIsTaken(true);
2842 
2843  SDLoc DL(Op);
2844  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2845  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2846 
2847  // If the back chain frame index has not been allocated yet, do so.
2849  int BackChainIdx = FI->getFramePointerSaveIndex();
2850  if (!BackChainIdx) {
2851  // By definition, the frame address is the address of the back chain.
2852  BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
2853  FI->setFramePointerSaveIndex(BackChainIdx);
2854  }
2855  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
2856 
2857  // FIXME The frontend should detect this case.
2858  if (Depth > 0) {
2859  report_fatal_error("Unsupported stack frame traversal count");
2860  }
2861 
2862  return BackChain;
2863 }
2864 
2865 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
2866  SelectionDAG &DAG) const {
2867  MachineFunction &MF = DAG.getMachineFunction();
2868  MachineFrameInfo &MFI = MF.getFrameInfo();
2869  MFI.setReturnAddressIsTaken(true);
2870 
2872  return SDValue();
2873 
2874  SDLoc DL(Op);
2875  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2876  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2877 
2878  // FIXME The frontend should detect this case.
2879  if (Depth > 0) {
2880  report_fatal_error("Unsupported stack frame traversal count");
2881  }
2882 
2883  // Return R14D, which has the return address. Mark it an implicit live-in.
2884  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
2885  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
2886 }
2887 
2888 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
2889  SelectionDAG &DAG) const {
2890  SDLoc DL(Op);
2891  SDValue In = Op.getOperand(0);
2892  EVT InVT = In.getValueType();
2893  EVT ResVT = Op.getValueType();
2894 
2895  // Convert loads directly. This is normally done by DAGCombiner,
2896  // but we need this case for bitcasts that are created during lowering
2897  // and which are then lowered themselves.
2898  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
2899  if (ISD::isNormalLoad(LoadN)) {
2900  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
2901  LoadN->getBasePtr(), LoadN->getMemOperand());
2902  // Update the chain uses.
2903  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
2904  return NewLoad;
2905  }
2906 
2907  if (InVT == MVT::i32 && ResVT == MVT::f32) {
2908  SDValue In64;
2909  if (Subtarget.hasHighWord()) {
2910  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
2911  MVT::i64);
2912  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2913  MVT::i64, SDValue(U64, 0), In);
2914  } else {
2915  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
2916  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
2917  DAG.getConstant(32, DL, MVT::i64));
2918  }
2919  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
2920  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
2921  DL, MVT::f32, Out64);
2922  }
2923  if (InVT == MVT::f32 && ResVT == MVT::i32) {
2924  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
2925  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2926  MVT::f64, SDValue(U64, 0), In);
2927  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
2928  if (Subtarget.hasHighWord())
2929  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
2930  MVT::i32, Out64);
2931  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
2932  DAG.getConstant(32, DL, MVT::i64));
2933  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
2934  }
2935  llvm_unreachable("Unexpected bitcast combination");
2936 }
2937 
2938 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
2939  SelectionDAG &DAG) const {
2940  MachineFunction &MF = DAG.getMachineFunction();
2941  SystemZMachineFunctionInfo *FuncInfo =
2943  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2944 
2945  SDValue Chain = Op.getOperand(0);
2946  SDValue Addr = Op.getOperand(1);
2947  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2948  SDLoc DL(Op);
2949 
2950  // The initial values of each field.
2951  const unsigned NumFields = 4;
2952  SDValue Fields[NumFields] = {
2953  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
2954  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
2955  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
2956  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
2957  };
2958 
2959  // Store each field into its respective slot.
2960  SDValue MemOps[NumFields];
2961  unsigned Offset = 0;
2962  for (unsigned I = 0; I < NumFields; ++I) {
2963  SDValue FieldAddr = Addr;
2964  if (Offset != 0)
2965  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
2966  DAG.getIntPtrConstant(Offset, DL));
2967  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
2968  MachinePointerInfo(SV, Offset));
2969  Offset += 8;
2970  }
2971  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2972 }
2973 
2974 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
2975  SelectionDAG &DAG) const {
2976  SDValue Chain = Op.getOperand(0);
2977  SDValue DstPtr = Op.getOperand(1);
2978  SDValue SrcPtr = Op.getOperand(2);
2979  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2980  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
2981  SDLoc DL(Op);
2982 
2983  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
2984  /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
2985  /*isTailCall*/false,
2986  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
2987 }
2988 
2989 SDValue SystemZTargetLowering::
2990 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
2991  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
2992  MachineFunction &MF = DAG.getMachineFunction();
2993  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
2994  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
2995 
2996  SDValue Chain = Op.getOperand(0);
2997  SDValue Size = Op.getOperand(1);
2998  SDValue Align = Op.getOperand(2);
2999  SDLoc DL(Op);
3000 
3001  // If user has set the no alignment function attribute, ignore
3002  // alloca alignments.
3003  uint64_t AlignVal = (RealignOpt ?
3004  dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3005 
3006  uint64_t StackAlign = TFI->getStackAlignment();
3007  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3008  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3009 
3010  unsigned SPReg = getStackPointerRegisterToSaveRestore();
3011  SDValue NeededSpace = Size;
3012 
3013  // Get a reference to the stack pointer.
3014  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3015 
3016  // If we need a backchain, save it now.
3017  SDValue Backchain;
3018  if (StoreBackchain)
3019  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3020 
3021  // Add extra space for alignment if needed.
3022  if (ExtraAlignSpace)
3023  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3024  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3025 
3026  // Get the new stack pointer value.
3027  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3028 
3029  // Copy the new stack pointer back.
3030  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3031 
3032  // The allocated data lives above the 160 bytes allocated for the standard
3033  // frame, plus any outgoing stack arguments. We don't know how much that
3034  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3035  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3036  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3037 
3038  // Dynamically realign if needed.
3039  if (RequiredAlign > StackAlign) {
3040  Result =
3041  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3042  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3043  Result =
3044  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3045  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3046  }
3047 
3048  if (StoreBackchain)
3049  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3050 
3051  SDValue Ops[2] = { Result, Chain };
3052  return DAG.getMergeValues(Ops, DL);
3053 }
3054 
3055 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3056  SDValue Op, SelectionDAG &DAG) const {
3057  SDLoc DL(Op);
3058 
3059  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3060 }
3061 
3062 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3063  SelectionDAG &DAG) const {
3064  EVT VT = Op.getValueType();
3065  SDLoc DL(Op);
3066  SDValue Ops[2];
3067  if (is32Bit(VT))
3068  // Just do a normal 64-bit multiplication and extract the results.
3069  // We define this so that it can be used for constant division.
3071  Op.getOperand(1), Ops[1], Ops[0]);
3072  else if (Subtarget.hasMiscellaneousExtensions2())
3073  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3074  // the high result in the even register. ISD::SMUL_LOHI is defined to
3075  // return the low half first, so the results are in reverse order.
3077  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3078  else {
3079  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3080  //
3081  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3082  //
3083  // but using the fact that the upper halves are either all zeros
3084  // or all ones:
3085  //
3086  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3087  //
3088  // and grouping the right terms together since they are quicker than the
3089  // multiplication:
3090  //
3091  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3092  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3093  SDValue LL = Op.getOperand(0);
3094  SDValue RL = Op.getOperand(1);
3095  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3096  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3097  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3098  // the high result in the even register. ISD::SMUL_LOHI is defined to
3099  // return the low half first, so the results are in reverse order.
3101  LL, RL, Ops[1], Ops[0]);
3102  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3103  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3104  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3105  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3106  }
3107  return DAG.getMergeValues(Ops, DL);
3108 }
3109 
3110 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3111  SelectionDAG &DAG) const {
3112  EVT VT = Op.getValueType();
3113  SDLoc DL(Op);
3114  SDValue Ops[2];
3115  if (is32Bit(VT))
3116  // Just do a normal 64-bit multiplication and extract the results.
3117  // We define this so that it can be used for constant division.
3119  Op.getOperand(1), Ops[1], Ops[0]);
3120  else
3121  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3122  // the high result in the even register. ISD::UMUL_LOHI is defined to
3123  // return the low half first, so the results are in reverse order.
3125  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3126  return DAG.getMergeValues(Ops, DL);
3127 }
3128 
3129 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3130  SelectionDAG &DAG) const {
3131  SDValue Op0 = Op.getOperand(0);
3132  SDValue Op1 = Op.getOperand(1);
3133  EVT VT = Op.getValueType();
3134  SDLoc DL(Op);
3135 
3136  // We use DSGF for 32-bit division. This means the first operand must
3137  // always be 64-bit, and the second operand should be 32-bit whenever
3138  // that is possible, to improve performance.
3139  if (is32Bit(VT))
3140  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3141  else if (DAG.ComputeNumSignBits(Op1) > 32)
3142  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3143 
3144  // DSG(F) returns the remainder in the even register and the
3145  // quotient in the odd register.
3146  SDValue Ops[2];
3147  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3148  return DAG.getMergeValues(Ops, DL);
3149 }
3150 
3151 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3152  SelectionDAG &DAG) const {
3153  EVT VT = Op.getValueType();
3154  SDLoc DL(Op);
3155 
3156  // DL(G) returns the remainder in the even register and the
3157  // quotient in the odd register.
3158  SDValue Ops[2];
3160  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3161  return DAG.getMergeValues(Ops, DL);
3162 }
3163 
3164 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3165  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3166 
3167  // Get the known-zero masks for each operand.
3168  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
3169  KnownBits Known[2];
3170  DAG.computeKnownBits(Ops[0], Known[0]);
3171  DAG.computeKnownBits(Ops[1], Known[1]);
3172 
3173  // See if the upper 32 bits of one operand and the lower 32 bits of the
3174  // other are known zero. They are the low and high operands respectively.
3175  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3176  Known[1].Zero.getZExtValue() };
3177  unsigned High, Low;
3178  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3179  High = 1, Low = 0;
3180  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3181  High = 0, Low = 1;
3182  else
3183  return Op;
3184 
3185  SDValue LowOp = Ops[Low];
3186  SDValue HighOp = Ops[High];
3187 
3188  // If the high part is a constant, we're better off using IILH.
3189  if (HighOp.getOpcode() == ISD::Constant)
3190  return Op;
3191 
3192  // If the low part is a constant that is outside the range of LHI,
3193  // then we're better off using IILF.
3194  if (LowOp.getOpcode() == ISD::Constant) {
3195  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3196  if (!isInt<16>(Value))
3197  return Op;
3198  }
3199 
3200  // Check whether the high part is an AND that doesn't change the
3201  // high 32 bits and just masks out low bits. We can skip it if so.
3202  if (HighOp.getOpcode() == ISD::AND &&
3203  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3204  SDValue HighOp0 = HighOp.getOperand(0);
3205  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3206  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3207  HighOp = HighOp0;
3208  }
3209 
3210  // Take advantage of the fact that all GR32 operations only change the
3211  // low 32 bits by truncating Low to an i32 and inserting it directly
3212  // using a subreg. The interesting cases are those where the truncation
3213  // can be folded.
3214  SDLoc DL(Op);
3215  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3216  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3217  MVT::i64, HighOp, Low32);
3218 }
3219 
3220 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3221 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3222  SelectionDAG &DAG) const {
3223  SDNode *N = Op.getNode();
3224  SDValue LHS = N->getOperand(0);
3225  SDValue RHS = N->getOperand(1);
3226  SDLoc DL(N);
3227  unsigned BaseOp = 0;
3228  unsigned CCValid = 0;
3229  unsigned CCMask = 0;
3230 
3231  switch (Op.getOpcode()) {
3232  default: llvm_unreachable("Unknown instruction!");
3233  case ISD::SADDO:
3234  BaseOp = SystemZISD::SADDO;
3235  CCValid = SystemZ::CCMASK_ARITH;
3237  break;
3238  case ISD::SSUBO:
3239  BaseOp = SystemZISD::SSUBO;
3240  CCValid = SystemZ::CCMASK_ARITH;
3242  break;
3243  case ISD::UADDO:
3244  BaseOp = SystemZISD::UADDO;
3245  CCValid = SystemZ::CCMASK_LOGICAL;
3247  break;
3248  case ISD::USUBO:
3249  BaseOp = SystemZISD::USUBO;
3250  CCValid = SystemZ::CCMASK_LOGICAL;
3252  break;
3253  }
3254 
3255  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3256  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3257 
3258  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3259  if (N->getValueType(1) == MVT::i1)
3260  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3261 
3262  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3263 }
3264 
3265 // Lower ADDCARRY/SUBCARRY nodes.
3266 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3267  SelectionDAG &DAG) const {
3268 
3269  SDNode *N = Op.getNode();
3270  MVT VT = N->getSimpleValueType(0);
3271 
3272  // Let legalize expand this if it isn't a legal type yet.
3273  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3274  return SDValue();
3275 
3276  SDValue LHS = N->getOperand(0);
3277  SDValue RHS = N->getOperand(1);
3278  SDValue Carry = Op.getOperand(2);
3279  SDLoc DL(N);
3280  unsigned BaseOp = 0;
3281  unsigned CCValid = 0;
3282  unsigned CCMask = 0;
3283 
3284  switch (Op.getOpcode()) {
3285  default: llvm_unreachable("Unknown instruction!");
3286  case ISD::ADDCARRY:
3287  BaseOp = SystemZISD::ADDCARRY;
3288  CCValid = SystemZ::CCMASK_LOGICAL;
3290  break;
3291  case ISD::SUBCARRY:
3292  BaseOp = SystemZISD::SUBCARRY;
3293  CCValid = SystemZ::CCMASK_LOGICAL;
3295  break;
3296  }
3297 
3298  // Set the condition code from the carry flag.
3299  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3300  DAG.getConstant(CCValid, DL, MVT::i32),
3301  DAG.getConstant(CCMask, DL, MVT::i32));
3302 
3303  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3304  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3305 
3306  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3307  if (N->getValueType(1) == MVT::i1)
3308  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3309 
3310  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3311 }
3312 
3313 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3314  SelectionDAG &DAG) const {
3315  EVT VT = Op.getValueType();
3316  SDLoc DL(Op);
3317  Op = Op.getOperand(0);
3318 
3319  // Handle vector types via VPOPCT.
3320  if (VT.isVector()) {
3321  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3322  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3323  switch (VT.getScalarSizeInBits()) {
3324  case 8:
3325  break;
3326  case 16: {
3327  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3328  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3329  SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3330  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3331  Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3332  break;
3333  }
3334  case 32: {
3336  DAG.getConstant(0, DL, MVT::i32));
3337  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3338  break;
3339  }
3340  case 64: {
3342  DAG.getConstant(0, DL, MVT::i32));
3343  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3344  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3345  break;
3346  }
3347  default:
3348  llvm_unreachable("Unexpected type");
3349  }
3350  return Op;
3351  }
3352 
3353  // Get the known-zero mask for the operand.
3354  KnownBits Known;
3355  DAG.computeKnownBits(Op, Known);
3356  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
3357  if (NumSignificantBits == 0)
3358  return DAG.getConstant(0, DL, VT);
3359 
3360  // Skip known-zero high parts of the operand.
3361  int64_t OrigBitSize = VT.getSizeInBits();
3362  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3363  BitSize = std::min(BitSize, OrigBitSize);
3364 
3365  // The POPCNT instruction counts the number of bits in each byte.
3366  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3367  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3368  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3369 
3370  // Add up per-byte counts in a binary tree. All bits of Op at
3371  // position larger than BitSize remain zero throughout.
3372  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3373  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3374  if (BitSize != OrigBitSize)
3375  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3376  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3377  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3378  }
3379 
3380  // Extract overall result from high byte.
3381  if (BitSize > 8)
3382  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3383  DAG.getConstant(BitSize - 8, DL, VT));
3384 
3385  return Op;
3386 }
3387 
3388 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3389  SelectionDAG &DAG) const {
3390  SDLoc DL(Op);
3391  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3392  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3393  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3394  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3395 
3396  // The only fence that needs an instruction is a sequentially-consistent
3397  // cross-thread fence.
3398  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3399  FenceSSID == SyncScope::System) {
3400  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3401  Op.getOperand(0)),
3402  0);
3403  }
3404 
3405  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3406  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3407 }
3408 
3409 // Op is an atomic load. Lower it into a normal volatile load.
3410 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3411  SelectionDAG &DAG) const {
3412  auto *Node = cast<AtomicSDNode>(Op.getNode());
3413  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3414  Node->getChain(), Node->getBasePtr(),
3415  Node->getMemoryVT(), Node->getMemOperand());
3416 }
3417 
3418 // Op is an atomic store. Lower it into a normal volatile store.
3419 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3420  SelectionDAG &DAG) const {
3421  auto *Node = cast<AtomicSDNode>(Op.getNode());
3422  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3423  Node->getBasePtr(), Node->getMemoryVT(),
3424  Node->getMemOperand());
3425  // We have to enforce sequential consistency by performing a
3426  // serialization operation after the store.
3427  if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3428  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3429  MVT::Other, Chain), 0);
3430  return Chain;
3431 }
3432 
3433 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3434 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3435 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3436  SelectionDAG &DAG,
3437  unsigned Opcode) const {
3438  auto *Node = cast<AtomicSDNode>(Op.getNode());
3439 
3440  // 32-bit operations need no code outside the main loop.
3441  EVT NarrowVT = Node->getMemoryVT();
3442  EVT WideVT = MVT::i32;
3443  if (NarrowVT == WideVT)
3444  return Op;
3445 
3446  int64_t BitSize = NarrowVT.getSizeInBits();
3447  SDValue ChainIn = Node->getChain();
3448  SDValue Addr = Node->getBasePtr();
3449  SDValue Src2 = Node->getVal();
3450  MachineMemOperand *MMO = Node->getMemOperand();
3451  SDLoc DL(Node);
3452  EVT PtrVT = Addr.getValueType();
3453 
3454  // Convert atomic subtracts of constants into additions.
3455  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3456  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3458  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3459  }
3460 
3461  // Get the address of the containing word.
3462  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3463  DAG.getConstant(-4, DL, PtrVT));
3464 
3465  // Get the number of bits that the word must be rotated left in order
3466  // to bring the field to the top bits of a GR32.
3467  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3468  DAG.getConstant(3, DL, PtrVT));
3469  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3470 
3471  // Get the complementing shift amount, for rotating a field in the top
3472  // bits back to its proper position.
3473  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3474  DAG.getConstant(0, DL, WideVT), BitShift);
3475 
3476  // Extend the source operand to 32 bits and prepare it for the inner loop.
3477  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3478  // operations require the source to be shifted in advance. (This shift
3479  // can be folded if the source is constant.) For AND and NAND, the lower
3480  // bits must be set, while for other opcodes they should be left clear.
3481  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3482  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3483  DAG.getConstant(32 - BitSize, DL, WideVT));
3484  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3486  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3487  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3488 
3489  // Construct the ATOMIC_LOADW_* node.
3490  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3491  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3492  DAG.getConstant(BitSize, DL, WideVT) };
3493  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3494  NarrowVT, MMO);
3495 
3496  // Rotate the result of the final CS so that the field is in the lower
3497  // bits of a GR32, then truncate it.
3498  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3499  DAG.getConstant(BitSize, DL, WideVT));
3500  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3501 
3502  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3503  return DAG.getMergeValues(RetOps, DL);
3504 }
3505 
3506 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3507 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3508 // operations into additions.
3509 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3510  SelectionDAG &DAG) const {
3511  auto *Node = cast<AtomicSDNode>(Op.getNode());
3512  EVT MemVT = Node->getMemoryVT();
3513  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3514  // A full-width operation.
3515  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3516  SDValue Src2 = Node->getVal();
3517  SDValue NegSrc2;
3518  SDLoc DL(Src2);
3519 
3520  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3521  // Use an addition if the operand is constant and either LAA(G) is
3522  // available or the negative value is in the range of A(G)FHI.
3523  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3524  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3525  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3526  } else if (Subtarget.hasInterlockedAccess1())
3527  // Use LAA(G) if available.
3528  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3529  Src2);
3530 
3531  if (NegSrc2.getNode())
3532  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3533  Node->getChain(), Node->getBasePtr(), NegSrc2,
3534  Node->getMemOperand());
3535 
3536  // Use the node as-is.
3537  return Op;
3538  }
3539 
3540  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3541 }
3542 
3543 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3544 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3545  SelectionDAG &DAG) const {
3546  auto *Node = cast<AtomicSDNode>(Op.getNode());
3547  SDValue ChainIn = Node->getOperand(0);
3548  SDValue Addr = Node->getOperand(1);
3549  SDValue CmpVal = Node->getOperand(2);
3550  SDValue SwapVal = Node->getOperand(3);
3551  MachineMemOperand *MMO = Node->getMemOperand();
3552  SDLoc DL(Node);
3553 
3554  // We have native support for 32-bit and 64-bit compare and swap, but we
3555  // still need to expand extracting the "success" result from the CC.
3556  EVT NarrowVT = Node->getMemoryVT();
3557  EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
3558  if (NarrowVT == WideVT) {
3559  SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3560  SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3562  DL, Tys, Ops, NarrowVT, MMO);
3563  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3565 
3566  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3568  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3569  return SDValue();
3570  }
3571 
3572  // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3573  // via a fullword ATOMIC_CMP_SWAPW operation.
3574  int64_t BitSize = NarrowVT.getSizeInBits();
3575  EVT PtrVT = Addr.getValueType();
3576 
3577  // Get the address of the containing word.
3578  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3579  DAG.getConstant(-4, DL, PtrVT));
3580 
3581  // Get the number of bits that the word must be rotated left in order
3582  // to bring the field to the top bits of a GR32.
3583  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3584  DAG.getConstant(3, DL, PtrVT));
3585  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3586 
3587  // Get the complementing shift amount, for rotating a field in the top
3588  // bits back to its proper position.
3589  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3590  DAG.getConstant(0, DL, WideVT), BitShift);
3591 
3592  // Construct the ATOMIC_CMP_SWAPW node.
3593  SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3594  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3595  NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3597  VTList, Ops, NarrowVT, MMO);
3598  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3600 
3601  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3603  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3604  return SDValue();
3605 }
3606 
3607 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3608  SelectionDAG &DAG) const {
3609  MachineFunction &MF = DAG.getMachineFunction();
3610  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3611  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3612  SystemZ::R15D, Op.getValueType());
3613 }
3614 
3615 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3616  SelectionDAG &DAG) const {
3617  MachineFunction &MF = DAG.getMachineFunction();
3618  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3619  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3620 
3621  SDValue Chain = Op.getOperand(0);
3622  SDValue NewSP = Op.getOperand(1);
3623  SDValue Backchain;
3624  SDLoc DL(Op);
3625 
3626  if (StoreBackchain) {
3627  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
3628  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3629  }
3630 
3631  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3632 
3633  if (StoreBackchain)
3634  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3635 
3636  return Chain;
3637 }
3638 
3639 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3640  SelectionDAG &DAG) const {
3641  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3642  if (!IsData)
3643  // Just preserve the chain.
3644  return Op.getOperand(0);
3645 
3646  SDLoc DL(Op);
3647  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3648  unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
3649  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3650  SDValue Ops[] = {
3651  Op.getOperand(0),
3652  DAG.getConstant(Code, DL, MVT::i32),
3653  Op.getOperand(1)
3654  };
3656  Node->getVTList(), Ops,
3657  Node->getMemoryVT(), Node->getMemOperand());
3658 }
3659 
3660 // Convert condition code in CCReg to an i32 value.
3662  SDLoc DL(CCReg);
3663  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
3664  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3666 }
3667 
3668 SDValue
3669 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3670  SelectionDAG &DAG) const {
3671  unsigned Opcode, CCValid;
3672  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3673  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3674  SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
3675  SDValue CC = getCCResult(DAG, SDValue(Node, 0));
3676  DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3677  return SDValue();
3678  }
3679 
3680  return SDValue();
3681 }
3682 
3683 SDValue
3684 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3685  SelectionDAG &DAG) const {
3686  unsigned Opcode, CCValid;
3687  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3688  SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
3689  if (Op->getNumValues() == 1)
3690  return getCCResult(DAG, SDValue(Node, 0));
3691  assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3692  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
3693  SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
3694  }
3695 
3696  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3697  switch (Id) {
3698  case Intrinsic::thread_pointer:
3699  return lowerThreadPointer(SDLoc(Op), DAG);
3700 
3701  case Intrinsic::s390_vpdi:
3702  return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3703  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3704 
3705  case Intrinsic::s390_vperm:
3706  return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3707  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3708 
3709  case Intrinsic::s390_vuphb:
3710  case Intrinsic::s390_vuphh:
3711  case Intrinsic::s390_vuphf:
3712  return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3713  Op.getOperand(1));
3714 
3715  case Intrinsic::s390_vuplhb:
3716  case Intrinsic::s390_vuplhh:
3717  case Intrinsic::s390_vuplhf:
3718  return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3719  Op.getOperand(1));
3720 
3721  case Intrinsic::s390_vuplb:
3722  case Intrinsic::s390_vuplhw:
3723  case Intrinsic::s390_vuplf:
3724  return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3725  Op.getOperand(1));
3726 
3727  case Intrinsic::s390_vupllb:
3728  case Intrinsic::s390_vupllh:
3729  case Intrinsic::s390_vupllf:
3730  return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3731  Op.getOperand(1));
3732 
3733  case Intrinsic::s390_vsumb:
3734  case Intrinsic::s390_vsumh:
3735  case Intrinsic::s390_vsumgh:
3736  case Intrinsic::s390_vsumgf:
3737  case Intrinsic::s390_vsumqf:
3738  case Intrinsic::s390_vsumqg:
3739  return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3740  Op.getOperand(1), Op.getOperand(2));
3741  }
3742 
3743  return SDValue();
3744 }
3745 
3746 namespace {
3747 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3748 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3749 // Operand is the constant third operand, otherwise it is the number of
3750 // bytes in each element of the result.
3751 struct Permute {
3752  unsigned Opcode;
3753  unsigned Operand;
3754  unsigned char Bytes[SystemZ::VectorBytes];
3755 };
3756 }
3757 
3758 static const Permute PermuteForms[] = {
3759  // VMRHG
3761  { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3762  // VMRHF
3764  { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3765  // VMRHH
3767  { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3768  // VMRHB
3770  { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3771  // VMRLG
3772  { SystemZISD::MERGE_LOW, 8,
3773  { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3774  // VMRLF
3775  { SystemZISD::MERGE_LOW, 4,
3776  { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3777  // VMRLH
3778  { SystemZISD::MERGE_LOW, 2,
3779  { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3780  // VMRLB
3781  { SystemZISD::MERGE_LOW, 1,
3782  { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3783  // VPKG
3784  { SystemZISD::PACK, 4,
3785  { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3786  // VPKF
3787  { SystemZISD::PACK, 2,
3788  { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3789  // VPKH
3790  { SystemZISD::PACK, 1,
3791  { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3792  // VPDI V1, V2, 4 (low half of V1, high half of V2)
3794  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3795  // VPDI V1, V2, 1 (high half of V1, low half of V2)
3797  { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3798 };
3799 
3800 // Called after matching a vector shuffle against a particular pattern.
3801 // Both the original shuffle and the pattern have two vector operands.
3802 // OpNos[0] is the operand of the original shuffle that should be used for
3803 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3804 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3805 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3806 // for operands 0 and 1 of the pattern.
3807 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
3808  if (OpNos[0] < 0) {
3809  if (OpNos[1] < 0)
3810  return false;
3811  OpNo0 = OpNo1 = OpNos[1];
3812  } else if (OpNos[1] < 0) {
3813  OpNo0 = OpNo1 = OpNos[0];
3814  } else {
3815  OpNo0 = OpNos[0];
3816  OpNo1 = OpNos[1];
3817  }
3818  return true;
3819 }
3820 
3821 // Bytes is a VPERM-like permute vector, except that -1 is used for
3822 // undefined bytes. Return true if the VPERM can be implemented using P.
3823 // When returning true set OpNo0 to the VPERM operand that should be
3824 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3825 //
3826 // For example, if swapping the VPERM operands allows P to match, OpNo0
3827 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3828 // operand, but rewriting it to use two duplicated operands allows it to
3829 // match P, then OpNo0 and OpNo1 will be the same.
3830 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
3831  unsigned &OpNo0, unsigned &OpNo1) {
3832  int OpNos[] = { -1, -1 };
3833  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
3834  int Elt = Bytes[I];
3835  if (Elt >= 0) {
3836  // Make sure that the two permute vectors use the same suboperand
3837  // byte number. Only the operand numbers (the high bits) are
3838  // allowed to differ.
3839  if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
3840  return false;
3841  int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
3842  int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
3843  // Make sure that the operand mappings are consistent with previous
3844  // elements.
3845  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3846  return false;
3847  OpNos[ModelOpNo] = RealOpNo;
3848  }
3849  }
3850  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3851 }
3852 
3853 // As above, but search for a matching permute.
3854 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
3855  unsigned &OpNo0, unsigned &OpNo1) {
3856  for (auto &P : PermuteForms)
3857  if (matchPermute(Bytes, P, OpNo0, OpNo1))
3858  return &P;
3859  return nullptr;
3860 }
3861 
3862 // Bytes is a VPERM-like permute vector, except that -1 is used for
3863 // undefined bytes. This permute is an operand of an outer permute.
3864 // See whether redistributing the -1 bytes gives a shuffle that can be
3865 // implemented using P. If so, set Transform to a VPERM-like permute vector
3866 // that, when applied to the result of P, gives the original permute in Bytes.
3867 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3868  const Permute &P,
3869  SmallVectorImpl<int> &Transform) {
3870  unsigned To = 0;
3871  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
3872  int Elt = Bytes[From];
3873  if (Elt < 0)
3874  // Byte number From of the result is undefined.
3875  Transform[From] = -1;
3876  else {
3877  while (P.Bytes[To] != Elt) {
3878  To += 1;
3879  if (To == SystemZ::VectorBytes)
3880  return false;
3881  }
3882  Transform[From] = To;
3883  }
3884  }
3885  return true;
3886 }
3887 
3888 // As above, but search for a matching permute.
3889 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3890  SmallVectorImpl<int> &Transform) {
3891  for (auto &P : PermuteForms)
3892  if (matchDoublePermute(Bytes, P, Transform))
3893  return &P;
3894  return nullptr;
3895 }
3896 
3897 // Convert the mask of the given shuffle op into a byte-level mask,
3898 // as if it had type vNi8.
3899 static bool getVPermMask(SDValue ShuffleOp,
3900  SmallVectorImpl<int> &Bytes) {
3901  EVT VT = ShuffleOp.getValueType();
3902  unsigned NumElements = VT.getVectorNumElements();
3903  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3904 
3905  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
3906  Bytes.resize(NumElements * BytesPerElement, -1);
3907  for (unsigned I = 0; I < NumElements; ++I) {
3908  int Index = VSN->getMaskElt(I);
3909  if (Index >= 0)
3910  for (unsigned J = 0; J < BytesPerElement; ++J)
3911  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3912  }
3913  return true;
3914  }
3915  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
3916  isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
3917  unsigned Index = ShuffleOp.getConstantOperandVal(1);
3918  Bytes.resize(NumElements * BytesPerElement, -1);
3919  for (unsigned I = 0; I < NumElements; ++I)
3920  for (unsigned J = 0; J < BytesPerElement; ++J)
3921  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3922  return true;
3923  }
3924  return false;
3925 }
3926 
3927 // Bytes is a VPERM-like permute vector, except that -1 is used for
3928 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
3929 // the result come from a contiguous sequence of bytes from one input.
3930 // Set Base to the selector for the first byte if so.
3931 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
3932  unsigned BytesPerElement, int &Base) {
3933  Base = -1;
3934  for (unsigned I = 0; I < BytesPerElement; ++I) {
3935  if (Bytes[Start + I] >= 0) {
3936  unsigned Elem = Bytes[Start + I];
3937  if (Base < 0) {
3938  Base = Elem - I;
3939  // Make sure the bytes would come from one input operand.
3940  if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
3941  return false;
3942  } else if (unsigned(Base) != Elem - I)
3943  return false;
3944  }
3945  }
3946  return true;
3947 }
3948 
3949 // Bytes is a VPERM-like permute vector, except that -1 is used for
3950 // undefined bytes. Return true if it can be performed using VSLDI.
3951 // When returning true, set StartIndex to the shift amount and OpNo0
3952 // and OpNo1 to the VPERM operands that should be used as the first
3953 // and second shift operand respectively.
3954 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
3955  unsigned &StartIndex, unsigned &OpNo0,
3956  unsigned &OpNo1) {
3957  int OpNos[] = { -1, -1 };
3958  int Shift = -1;
3959  for (unsigned I = 0; I < 16; ++I) {
3960  int Index = Bytes[I];
3961  if (Index >= 0) {
3962  int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
3963  int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
3964  int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
3965  if (Shift < 0)
3966  Shift = ExpectedShift;
3967  else if (Shift != ExpectedShift)
3968  return false;
3969  // Make sure that the operand mappings are consistent with previous
3970  // elements.
3971  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3972  return false;
3973  OpNos[ModelOpNo] = RealOpNo;
3974  }
3975  }
3976  StartIndex = Shift;
3977  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3978 }
3979 
3980 // Create a node that performs P on operands Op0 and Op1, casting the
3981 // operands to the appropriate type. The type of the result is determined by P.
3982 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
3983  const Permute &P, SDValue Op0, SDValue Op1) {
3984  // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
3985  // elements of a PACK are twice as wide as the outputs.
3986  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
3987  P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
3988  P.Operand);
3989  // Cast both operands to the appropriate type.
3990  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
3991  SystemZ::VectorBytes / InBytes);
3992  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
3993  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
3994  SDValue Op;
3995  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
3996  SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
3997  Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
3998  } else if (P.Opcode == SystemZISD::PACK) {
3999  MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4000  SystemZ::VectorBytes / P.Operand);
4001  Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4002  } else {
4003  Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4004  }
4005  return Op;
4006 }
4007 
4008 // Bytes is a VPERM-like permute vector, except that -1 is used for
4009 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4010 // VSLDI or VPERM.
4012  SDValue *Ops,
4013  const SmallVectorImpl<int> &Bytes) {
4014  for (unsigned I = 0; I < 2; ++I)
4015  Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4016 
4017  // First see whether VSLDI can be used.
4018  unsigned StartIndex, OpNo0, OpNo1;
4019  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4020  return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4021  Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
4022 
4023  // Fall back on VPERM. Construct an SDNode for the permute vector.
4024  SDValue IndexNodes[SystemZ::VectorBytes];
4025  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4026  if (Bytes[I] >= 0)
4027  IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4028  else
4029  IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4030  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4031  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
4032 }
4033 
4034 namespace {
4035 // Describes a general N-operand vector shuffle.
4036 struct GeneralShuffle {
4037  GeneralShuffle(EVT vt) : VT(vt) {}
4038  void addUndef();
4039  bool add(SDValue, unsigned);
4040  SDValue getNode(SelectionDAG &, const SDLoc &);
4041 
4042  // The operands of the shuffle.
4044 
4045  // Index I is -1 if byte I of the result is undefined. Otherwise the
4046  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4047  // Bytes[I] / SystemZ::VectorBytes.
4049 
4050  // The type of the shuffle result.
4051  EVT VT;
4052 };
4053 }
4054 
4055 // Add an extra undefined element to the shuffle.
4056 void GeneralShuffle::addUndef() {
4057  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4058  for (unsigned I = 0; I < BytesPerElement; ++I)
4059  Bytes.push_back(-1);
4060 }
4061 
4062 // Add an extra element to the shuffle, taking it from element Elem of Op.
4063 // A null Op indicates a vector input whose value will be calculated later;
4064 // there is at most one such input per shuffle and it always has the same
4065 // type as the result. Aborts and returns false if the source vector elements
4066 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4067 // LLVM they become implicitly extended, but this is rare and not optimized.
4068 bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4069  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4070 
4071  // The source vector can have wider elements than the result,
4072  // either through an explicit TRUNCATE or because of type legalization.
4073  // We want the least significant part.
4074  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4075  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4076 
4077  // Return false if the source elements are smaller than their destination
4078  // elements.
4079  if (FromBytesPerElement < BytesPerElement)
4080  return false;
4081 
4082  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4083  (FromBytesPerElement - BytesPerElement));
4084 
4085  // Look through things like shuffles and bitcasts.
4086  while (Op.getNode()) {
4087  if (Op.getOpcode() == ISD::BITCAST)
4088  Op = Op.getOperand(0);
4089  else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4090  // See whether the bytes we need come from a contiguous part of one
4091  // operand.
4093  if (!getVPermMask(Op, OpBytes))
4094  break;
4095  int NewByte;
4096  if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4097  break;
4098  if (NewByte < 0) {
4099  addUndef();
4100  return true;
4101  }
4102  Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4103  Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4104  } else if (Op.isUndef()) {
4105  addUndef();
4106  return true;
4107  } else
4108  break;
4109  }
4110 
4111  // Make sure that the source of the extraction is in Ops.
4112  unsigned OpNo = 0;
4113  for (; OpNo < Ops.size(); ++OpNo)
4114  if (Ops[OpNo] == Op)
4115  break;
4116  if (OpNo == Ops.size())
4117  Ops.push_back(Op);
4118 
4119  // Add the element to Bytes.
4120  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4121  for (unsigned I = 0; I < BytesPerElement; ++I)
4122  Bytes.push_back(Base + I);
4123 
4124  return true;
4125 }
4126 
4127 // Return SDNodes for the completed shuffle.
4128 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4129  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4130 
4131  if (Ops.size() == 0)
4132  return DAG.getUNDEF(VT);
4133 
4134  // Make sure that there are at least two shuffle operands.
4135  if (Ops.size() == 1)
4136  Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4137 
4138  // Create a tree of shuffles, deferring root node until after the loop.
4139  // Try to redistribute the undefined elements of non-root nodes so that
4140  // the non-root shuffles match something like a pack or merge, then adjust
4141  // the parent node's permute vector to compensate for the new order.
4142  // Among other things, this copes with vectors like <2 x i16> that were
4143  // padded with undefined elements during type legalization.
4144  //
4145  // In the best case this redistribution will lead to the whole tree
4146  // using packs and merges. It should rarely be a loss in other cases.
4147  unsigned Stride = 1;
4148  for (; Stride * 2 < Ops.size(); Stride *= 2) {
4149  for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4150  SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4151 
4152  // Create a mask for just these two operands.
4154  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4155  unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4156  unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4157  if (OpNo == I)
4158  NewBytes[J] = Byte;
4159  else if (OpNo == I + Stride)
4160  NewBytes[J] = SystemZ::VectorBytes + Byte;
4161  else
4162  NewBytes[J] = -1;
4163  }
4164  // See if it would be better to reorganize NewMask to avoid using VPERM.
4165  SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4166  if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4167  Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4168  // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4169  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4170  if (NewBytes[J] >= 0) {
4171  assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4172  "Invalid double permute");
4173  Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4174  } else
4175  assert(NewBytesMap[J] < 0 && "Invalid double permute");
4176  }
4177  } else {
4178  // Just use NewBytes on the operands.
4179  Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4180  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4181  if (NewBytes[J] >= 0)
4182  Bytes[J] = I * SystemZ::VectorBytes + J;
4183  }
4184  }
4185  }
4186 
4187  // Now we just have 2 inputs. Put the second operand in Ops[1].
4188  if (Stride > 1) {
4189  Ops[1] = Ops[Stride];
4190  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4191  if (Bytes[I] >= int(SystemZ::VectorBytes))
4192  Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4193  }
4194 
4195  // Look for an instruction that can do the permute without resorting
4196  // to VPERM.
4197  unsigned OpNo0, OpNo1;
4198  SDValue Op;
4199  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4200  Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4201  else
4202  Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4203  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4204 }
4205 
4206 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4207 static bool isScalarToVector(SDValue Op) {
4208  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4209  if (!Op.getOperand(I).isUndef())
4210  return false;
4211  return true;
4212 }
4213 
4214 // Return a vector of type VT that contains Value in the first element.
4215 // The other elements don't matter.
4216 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4217  SDValue Value) {
4218  // If we have a constant, replicate it to all elements and let the
4219  // BUILD_VECTOR lowering take care of it.
4220  if (Value.getOpcode() == ISD::Constant ||
4221  Value.getOpcode() == ISD::ConstantFP) {
4223  return DAG.getBuildVector(VT, DL, Ops);
4224  }
4225  if (Value.isUndef())
4226  return DAG.getUNDEF(VT);
4227  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4228 }
4229 
4230 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4231 // element 1. Used for cases in which replication is cheap.
4232 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4233  SDValue Op0, SDValue Op1) {
4234  if (Op0.isUndef()) {
4235  if (Op1.isUndef())
4236  return DAG.getUNDEF(VT);
4237  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4238  }
4239  if (Op1.isUndef())
4240  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4241  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4242  buildScalarToVector(DAG, DL, VT, Op0),
4243  buildScalarToVector(DAG, DL, VT, Op1));
4244 }
4245 
4246 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4247 // vector for them.
4248 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4249  SDValue Op1) {
4250  if (Op0.isUndef() && Op1.isUndef())
4251  return DAG.getUNDEF(MVT::v2i64);
4252  // If one of the two inputs is undefined then replicate the other one,
4253  // in order to avoid using another register unnecessarily.
4254  if (Op0.isUndef())
4255  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4256  else if (Op1.isUndef())
4257  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4258  else {
4259  Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4260  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4261  }
4262  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4263 }
4264 
4265 // Try to represent constant BUILD_VECTOR node BVN using a
4266 // SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
4267 // on success.
4268 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
4269  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
4270  unsigned BytesPerElement = ElemVT.getStoreSize();
4271  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
4272  SDValue Op = BVN->getOperand(I);
4273  if (!Op.isUndef()) {
4274  uint64_t Value;
4275  if (Op.getOpcode() == ISD::Constant)
4276  Value = cast<ConstantSDNode>(Op)->getZExtValue();
4277  else if (Op.getOpcode() == ISD::ConstantFP)
4278  Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
4279  .getZExtValue());
4280  else
4281  return false;
4282  for (unsigned J = 0; J < BytesPerElement; ++J) {
4283  uint64_t Byte = (Value >> (J * 8)) & 0xff;
4284  if (Byte == 0xff)
4285  Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
4286  else if (Byte != 0)
4287  return false;
4288  }
4289  }
4290  }
4291  return true;
4292 }
4293 
4294 // Try to load a vector constant in which BitsPerElement-bit value Value
4295 // is replicated to fill the vector. VT is the type of the resulting
4296 // constant, which may have elements of a different size from BitsPerElement.
4297 // Return the SDValue of the constant on success, otherwise return
4298 // an empty value.
4300  const SystemZInstrInfo *TII,
4301  const SDLoc &DL, EVT VT, uint64_t Value,
4302  unsigned BitsPerElement) {
4303  // Signed 16-bit values can be replicated using VREPI.
4304  // Mark the constants as opaque or DAGCombiner will convert back to
4305  // BUILD_VECTOR.
4306  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
4307  if (isInt<16>(SignedValue)) {
4308  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4309  SystemZ::VectorBits / BitsPerElement);
4310  SDValue Op = DAG.getNode(
4311  SystemZISD::REPLICATE, DL, VecVT,
4312  DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
4313  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4314  }
4315  // See whether rotating the constant left some N places gives a value that
4316  // is one less than a power of 2 (i.e. all zeros followed by all ones).
4317  // If so we can use VGM.
4318  unsigned Start, End;
4319  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
4320  // isRxSBGMask returns the bit numbers for a full 64-bit value,
4321  // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
4322  // bit numbers for an BitsPerElement value, so that 0 denotes
4323  // 1 << (BitsPerElement-1).
4324  Start -= 64 - BitsPerElement;
4325  End -= 64 - BitsPerElement;
4326  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4327  SystemZ::VectorBits / BitsPerElement);
4328  SDValue Op = DAG.getNode(
4329  SystemZISD::ROTATE_MASK, DL, VecVT,
4330  DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
4331  DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
4332  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4333  }
4334  return SDValue();
4335 }
4336 
4337 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4338 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4339 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4340 // would benefit from this representation and return it if so.
4342  BuildVectorSDNode *BVN) {
4343  EVT VT = BVN->getValueType(0);
4344  unsigned NumElements = VT.getVectorNumElements();
4345 
4346  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4347  // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4348  // need a BUILD_VECTOR, add an additional placeholder operand for that
4349  // BUILD_VECTOR and store its operands in ResidueOps.
4350  GeneralShuffle GS(VT);
4352  bool FoundOne = false;
4353  for (unsigned I = 0; I < NumElements; ++I) {
4354  SDValue Op = BVN->getOperand(I);
4355  if (Op.getOpcode() == ISD::TRUNCATE)
4356  Op = Op.getOperand(0);
4357  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4358  Op.getOperand(1).getOpcode() == ISD::Constant) {
4359  unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
4360  if (!GS.add(Op.getOperand(0), Elem))
4361  return SDValue();
4362  FoundOne = true;
4363  } else if (Op.isUndef()) {
4364  GS.addUndef();
4365  } else {
4366  if (!GS.add(SDValue(), ResidueOps.size()))
4367  return SDValue();
4368  ResidueOps.push_back(BVN->getOperand(I));
4369  }
4370  }
4371 
4372  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4373  if (!FoundOne)
4374  return SDValue();
4375 
4376  // Create the BUILD_VECTOR for the remaining elements, if any.
4377  if (!ResidueOps.empty()) {
4378  while (ResidueOps.size() < NumElements)
4379  ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
4380  for (auto &Op : GS.Ops) {
4381  if (!Op.getNode()) {
4382  Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
4383  break;
4384  }
4385  }
4386  }
4387  return GS.getNode(DAG, SDLoc(BVN));
4388 }
4389 
4390 // Combine GPR scalar values Elems into a vector of type VT.
4391 static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4392  SmallVectorImpl<SDValue> &Elems) {
4393  // See whether there is a single replicated value.
4394  SDValue Single;
4395  unsigned int NumElements = Elems.size();
4396  unsigned int Count = 0;
4397  for (auto Elem : Elems) {
4398  if (!Elem.isUndef()) {
4399  if (!Single.getNode())
4400  Single = Elem;
4401  else if (Elem != Single) {
4402  Single = SDValue();
4403  break;
4404  }
4405  Count += 1;
4406  }
4407  }
4408  // There are three cases here:
4409  //
4410  // - if the only defined element is a loaded one, the best sequence
4411  // is a replicating load.
4412  //
4413  // - otherwise, if the only defined element is an i64 value, we will
4414  // end up with the same VLVGP sequence regardless of whether we short-cut
4415  // for replication or fall through to the later code.
4416  //
4417  // - otherwise, if the only defined element is an i32 or smaller value,
4418  // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4419  // This is only a win if the single defined element is used more than once.
4420  // In other cases we're better off using a single VLVGx.
4421  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
4422  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4423 
4424  // If all elements are loads, use VLREP/VLEs (below).
4425  bool AllLoads = true;
4426  for (auto Elem : Elems)
4427  if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
4428  AllLoads = false;
4429  break;
4430  }
4431 
4432  // The best way of building a v2i64 from two i64s is to use VLVGP.
4433  if (VT == MVT::v2i64 && !AllLoads)
4434  return joinDwords(DAG, DL, Elems[0], Elems[1]);
4435 
4436  // Use a 64-bit merge high to combine two doubles.
4437  if (VT == MVT::v2f64 && !AllLoads)
4438  return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4439 
4440  // Build v4f32 values directly from the FPRs:
4441  //
4442  // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4443  // V V VMRHF
4444  // <ABxx> <CDxx>
4445  // V VMRHG
4446  // <ABCD>
4447  if (VT == MVT::v4f32 && !AllLoads) {
4448  SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4449  SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4450  // Avoid unnecessary undefs by reusing the other operand.
4451  if (Op01.isUndef())
4452  Op01 = Op23;
4453  else if (Op23.isUndef())
4454  Op23 = Op01;
4455  // Merging identical replications is a no-op.
4456  if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
4457  return Op01;
4458  Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4459  Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4461  DL, MVT::v2i64, Op01, Op23);
4462  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4463  }
4464 
4465  // Collect the constant terms.
4467  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4468 
4469  unsigned NumConstants = 0;
4470  for (unsigned I = 0; I < NumElements; ++I) {
4471  SDValue Elem = Elems[I];
4472  if (Elem.getOpcode() == ISD::Constant ||
4473  Elem.getOpcode() == ISD::ConstantFP) {
4474  NumConstants += 1;
4475  Constants[I] = Elem;
4476  Done[I] = true;
4477  }
4478  }
4479  // If there was at least one constant, fill in the other elements of
4480  // Constants with undefs to get a full vector constant and use that
4481  // as the starting point.
4482  SDValue Result;
4483  SDValue ReplicatedVal;
4484  if (NumConstants > 0) {
4485  for (unsigned I = 0; I < NumElements; ++I)
4486  if (!Constants[I].getNode())
4487  Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4488  Result = DAG.getBuildVector(VT, DL, Constants);
4489  } else {
4490  // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4491  // avoid a false dependency on any previous contents of the vector
4492  // register.
4493 
4494  // Use a VLREP if at least one element is a load. Make sure to replicate
4495  // the load with the most elements having its value.
4496  std::map<const SDNode*, unsigned> UseCounts;
4497  SDNode *LoadMaxUses = nullptr;
4498  for (unsigned I = 0; I < NumElements; ++I)
4499  if (Elems[I].getOpcode() == ISD::LOAD &&
4500  cast<LoadSDNode>(Elems[I])->isUnindexed()) {
4501  SDNode *Ld = Elems[I].getNode();
4502  UseCounts[Ld]++;
4503  if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
4504  LoadMaxUses = Ld;
4505  }
4506  if (LoadMaxUses != nullptr) {
4507  ReplicatedVal = SDValue(LoadMaxUses, 0);
4508  Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
4509  } else {
4510  // Try to use VLVGP.
4511  unsigned I1 = NumElements / 2 - 1;
4512  unsigned I2 = NumElements - 1;
4513  bool Def1 = !Elems[I1].isUndef();
4514  bool Def2 = !Elems[I2].isUndef();
4515  if (Def1 || Def2) {
4516  SDValue Elem1 = Elems[Def1 ? I1 : I2];
4517  SDValue Elem2 = Elems[Def2 ? I2 : I1];
4518  Result = DAG.getNode(ISD::BITCAST, DL, VT,
4519  joinDwords(DAG, DL, Elem1, Elem2));
4520  Done[I1] = true;
4521  Done[I2] = true;
4522  } else
4523  Result = DAG.getUNDEF(VT);
4524  }
4525  }
4526 
4527  // Use VLVGx to insert the other elements.
4528  for (unsigned I = 0; I < NumElements; ++I)
4529  if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
4530  Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4531  DAG.getConstant(I, DL, MVT::i32));
4532  return Result;
4533 }
4534 
4535 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4536  SelectionDAG &DAG) const {
4537  const SystemZInstrInfo *TII =
4538  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4539  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4540  SDLoc DL(Op);
4541  EVT VT = Op.getValueType();
4542 
4543  if (BVN->isConstant()) {
4544  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
4545  // preferred way of creating all-zero and all-one vectors so give it
4546  // priority over other methods below.
4547  uint64_t Mask = 0;
4548  if (tryBuildVectorByteMask(BVN, Mask)) {
4549  SDValue Op = DAG.getNode(
4551  DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
4552  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4553  }
4554 
4555  // Try using some form of replication.
4556  APInt SplatBits, SplatUndef;
4557  unsigned SplatBitSize;
4558  bool HasAnyUndefs;
4559  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4560  8, true) &&
4561  SplatBitSize <= 64) {
4562  // First try assuming that any undefined bits above the highest set bit
4563  // and below the lowest set bit are 1s. This increases the likelihood of
4564  // being able to use a sign-extended element value in VECTOR REPLICATE
4565  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
4566  uint64_t SplatBitsZ = SplatBits.getZExtValue();
4567  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
4568  uint64_t Lower = (SplatUndefZ
4569  & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
4570  uint64_t Upper = (SplatUndefZ
4571  & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
4572  uint64_t Value = SplatBitsZ | Upper | Lower;
4573  SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
4574  SplatBitSize);
4575  if (Op.getNode())
4576  return Op;
4577 
4578  // Now try assuming that any undefined bits between the first and
4579  // last defined set bits are set. This increases the chances of
4580  // using a non-wraparound mask.
4581  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
4582  Value = SplatBitsZ | Middle;
4583  Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
4584  if (Op.getNode())
4585  return Op;
4586  }
4587 
4588  // Fall back to loading it from memory.
4589  return SDValue();
4590  }
4591 
4592  // See if we should use shuffles to construct the vector from other vectors.
4593  if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
4594  return Res;
4595 
4596  // Detect SCALAR_TO_VECTOR conversions.
4598  return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4599 
4600  // Otherwise use buildVector to build the vector up from GPRs.
4601  unsigned NumElements = Op.getNumOperands();
4603  for (unsigned I = 0; I < NumElements; ++I)
4604  Ops[I] = Op.getOperand(I);
4605  return buildVector(DAG, DL, VT, Ops);
4606 }
4607 
4608 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4609  SelectionDAG &DAG) const {
4610  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4611  SDLoc DL(Op);
4612  EVT VT = Op.getValueType();
4613  unsigned NumElements = VT.getVectorNumElements();
4614 
4615