LLVM  4.0.0
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SystemZTargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SystemZISelLowering.h"
15 #include "SystemZCallingConv.h"
18 #include "SystemZTargetMachine.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include <cctype>
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "systemz-lower"
30 
31 namespace {
32 // Represents a sequence for extracting a 0/1 value from an IPM result:
33 // (((X ^ XORValue) + AddValue) >> Bit)
34 struct IPMConversion {
35  IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
36  : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
37 
38  int64_t XORValue;
39  int64_t AddValue;
40  unsigned Bit;
41 };
42 
43 // Represents information about a comparison.
44 struct Comparison {
45  Comparison(SDValue Op0In, SDValue Op1In)
46  : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
47 
48  // The operands to the comparison.
49  SDValue Op0, Op1;
50 
51  // The opcode that should be used to compare Op0 and Op1.
52  unsigned Opcode;
53 
54  // A SystemZICMP value. Only used for integer comparisons.
55  unsigned ICmpType;
56 
57  // The mask of CC values that Opcode can produce.
58  unsigned CCValid;
59 
60  // The mask of CC values for which the original condition is true.
61  unsigned CCMask;
62 };
63 } // end anonymous namespace
64 
65 // Classify VT as either 32 or 64 bit.
66 static bool is32Bit(EVT VT) {
67  switch (VT.getSimpleVT().SimpleTy) {
68  case MVT::i32:
69  return true;
70  case MVT::i64:
71  return false;
72  default:
73  llvm_unreachable("Unsupported type");
74  }
75 }
76 
77 // Return a version of MachineOperand that can be safely used before the
78 // final use.
80  if (Op.isReg())
81  Op.setIsKill(false);
82  return Op;
83 }
84 
86  const SystemZSubtarget &STI)
87  : TargetLowering(TM), Subtarget(STI) {
88  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
89 
90  // Set up the register classes.
91  if (Subtarget.hasHighWord())
92  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
93  else
94  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
95  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
96  if (Subtarget.hasVector()) {
97  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
98  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
99  } else {
100  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
101  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
102  }
103  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
104 
105  if (Subtarget.hasVector()) {
106  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
107  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
112  }
113 
114  // Compute derived properties from the register classes
116 
117  // Set up special registers.
119 
120  // TODO: It may be better to default to latency-oriented scheduling, however
121  // LLVM's current latency-oriented scheduler can't handle physreg definitions
122  // such as SystemZ has with CC, so set this to the register-pressure
123  // scheduler, because it can.
125 
128 
129  // Instructions are strings of 2-byte aligned 2-byte values.
131 
132  // Handle operations that are handled in a similar way for all types.
133  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
135  ++I) {
136  MVT VT = MVT::SimpleValueType(I);
137  if (isTypeLegal(VT)) {
138  // Lower SET_CC into an IPM-based sequence.
140 
141  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
143 
144  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
147  }
148  }
149 
150  // Expand jump table branches as address arithmetic followed by an
151  // indirect jump.
153 
154  // Expand BRCOND into a BR_CC (see above).
156 
157  // Handle integer types.
158  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
160  ++I) {
161  MVT VT = MVT::SimpleValueType(I);
162  if (isTypeLegal(VT)) {
163  // Expand individual DIV and REMs into DIVREMs.
170 
171  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
172  // stores, putting a serialization instruction after the stores.
175 
176  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
177  // available, or if the operand is constant.
179 
180  // Use POPCNT on z196 and above.
181  if (Subtarget.hasPopulationCount())
183  else
185 
186  // No special instructions for these.
189 
190  // Use *MUL_LOHI where possible instead of MULH*.
195 
196  // Only z196 and above have native support for conversions to unsigned.
197  if (!Subtarget.hasFPExtension())
199  }
200  }
201 
202  // Type legalization will convert 8- and 16-bit atomic operations into
203  // forms that operate on i32s (but still keeping the original memory VT).
204  // Lower them into full i32 operations.
217 
219 
220  // Traps are legal, as we will convert them to "j .+2".
222 
223  // z10 has instructions for signed but not unsigned FP conversion.
224  // Handle unsigned 32-bit types as signed 64-bit types.
225  if (!Subtarget.hasFPExtension()) {
228  }
229 
230  // We have native support for a 64-bit CTLZ, via FLOGR.
233 
234  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
236 
237  // FIXME: Can we support these natively?
241 
242  // We have native instructions for i8, i16 and i32 extensions, but not i1.
244  for (MVT VT : MVT::integer_valuetypes()) {
248  }
249 
250  // Handle the various types of symbolic address.
256 
257  // We need to handle dynamic allocations specially because of the
258  // 160-byte area at the bottom of the stack.
261 
262  // Use custom expanders so that we can force the function to use
263  // a frame pointer.
266 
267  // Handle prefetches with PFD or PFDRL.
269 
270  for (MVT VT : MVT::vector_valuetypes()) {
271  // Assume by default that all vector operations need to be expanded.
272  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
273  if (getOperationAction(Opcode, VT) == Legal)
274  setOperationAction(Opcode, VT, Expand);
275 
276  // Likewise all truncating stores and extending loads.
277  for (MVT InnerVT : MVT::vector_valuetypes()) {
278  setTruncStoreAction(VT, InnerVT, Expand);
279  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
280  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
281  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
282  }
283 
284  if (isTypeLegal(VT)) {
285  // These operations are legal for anything that can be stored in a
286  // vector register, even if there is no native support for the format
287  // as such. In particular, we can do these for v4f32 even though there
288  // are no specific instructions for that format.
294 
295  // Likewise, except that we need to replace the nodes with something
296  // more specific.
299  }
300  }
301 
302  // Handle integer vector types.
303  for (MVT VT : MVT::integer_vector_valuetypes()) {
304  if (isTypeLegal(VT)) {
305  // These operations have direct equivalents.
310  if (VT != MVT::v2i64)
318 
319  // Convert a GPR scalar to a vector by inserting it into element 0.
321 
322  // Use a series of unpacks for extensions.
325 
326  // Detect shifts by a scalar amount and convert them into
327  // V*_BY_SCALAR.
331 
332  // At present ROTL isn't matched by DAGCombiner. ROTR should be
333  // converted into ROTL.
336 
337  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
338  // and inverting the result as necessary.
340  }
341  }
342 
343  if (Subtarget.hasVector()) {
344  // There should be no need to check for float types other than v2f64
345  // since <2 x f32> isn't a legal type.
350  }
351 
352  // Handle floating-point types.
353  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
355  ++I) {
356  MVT VT = MVT::SimpleValueType(I);
357  if (isTypeLegal(VT)) {
358  // We can use FI for FRINT.
360 
361  // We can use the extended form of FI for other rounding operations.
362  if (Subtarget.hasFPExtension()) {
368  }
369 
370  // No special instructions for these.
376  }
377  }
378 
379  // Handle floating-point vector types.
380  if (Subtarget.hasVector()) {
381  // Scalar-to-vector conversion is just a subreg.
384 
385  // Some insertions and extractions can be done directly but others
386  // need to go via integers.
391 
392  // These operations have direct equivalents.
407  }
408 
409  // We have fused multiply-addition for f32 and f64 but not f128.
413 
414  // Needed so that we don't try to implement f128 constant loads using
415  // a load-and-extend of a f80 constant (in cases where the constant
416  // would fit in an f80).
417  for (MVT VT : MVT::fp_valuetypes())
419 
420  // Floating-point truncation and stores need to be done separately.
424 
425  // We have 64-bit FPR<->GPR moves, but need special handling for
426  // 32-bit forms.
427  if (!Subtarget.hasVector()) {
430  }
431 
432  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
433  // structure, but VAEND is a no-op.
437 
438  // Codes for which we want to perform some z-specific combinations.
448 
449  // Handle intrinsics.
452 
453  // We want to use MVC in preference to even a single load/store pair.
454  MaxStoresPerMemcpy = 0;
456 
457  // The main memset sequence is a byte store followed by an MVC.
458  // Two STC or MV..I stores win over that, but the kind of fused stores
459  // generated by target-independent code don't when the byte value is
460  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
461  // than "STC;MVC". Handle the choice in target-specific code instead.
462  MaxStoresPerMemset = 0;
464 }
465 
467  LLVMContext &, EVT VT) const {
468  if (!VT.isVector())
469  return MVT::i32;
471 }
472 
474  VT = VT.getScalarType();
475 
476  if (!VT.isSimple())
477  return false;
478 
479  switch (VT.getSimpleVT().SimpleTy) {
480  case MVT::f32:
481  case MVT::f64:
482  return true;
483  case MVT::f128:
484  return false;
485  default:
486  break;
487  }
488 
489  return false;
490 }
491 
492 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
493  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
494  return Imm.isZero() || Imm.isNegZero();
495 }
496 
498  // We can use CGFI or CLGFI.
499  return isInt<32>(Imm) || isUInt<32>(Imm);
500 }
501 
503  // We can use ALGFI or SLGFI.
504  return isUInt<32>(Imm) || isUInt<32>(-Imm);
505 }
506 
508  unsigned,
509  unsigned,
510  bool *Fast) const {
511  // Unaligned accesses should never be slower than the expanded version.
512  // We check specifically for aligned accesses in the few cases where
513  // they are required.
514  if (Fast)
515  *Fast = true;
516  return true;
517 }
518 
520  const AddrMode &AM, Type *Ty,
521  unsigned AS) const {
522  // Punt on globals for now, although they can be used in limited
523  // RELATIVE LONG cases.
524  if (AM.BaseGV)
525  return false;
526 
527  // Require a 20-bit signed offset.
528  if (!isInt<20>(AM.BaseOffs))
529  return false;
530 
531  // Indexing is OK but no scale factor can be applied.
532  return AM.Scale == 0 || AM.Scale == 1;
533 }
534 
536  int64_t Offset) const {
537  // This only applies to z13.
538  if (!Subtarget.hasVector())
539  return true;
540 
541  // * Use LDE instead of LE/LEY to avoid partial register
542  // dependencies (LDE only supports small offsets).
543  // * Utilize the vector registers to hold floating point
544  // values (vector load / store instructions only support small
545  // offsets).
546 
547  assert (isa<LoadInst>(I) || isa<StoreInst>(I));
548  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
549  I->getOperand(0)->getType());
550  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
551  bool IsVectorAccess = MemAccessTy->isVectorTy();
552 
553  // A store of an extracted vector element will be combined into a VSTE type
554  // instruction.
555  if (!IsVectorAccess && isa<StoreInst>(I)) {
556  Value *DataOp = I->getOperand(0);
557  if (isa<ExtractElementInst>(DataOp))
558  IsVectorAccess = true;
559  }
560 
561  // A load which gets inserted into a vector element will be combined into a
562  // VLE type instruction.
563  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
564  User *LoadUser = *I->user_begin();
565  if (isa<InsertElementInst>(LoadUser))
566  IsVectorAccess = true;
567  }
568 
569  if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess))
570  return false;
571 
572  return true;
573 }
574 
576  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
577  return false;
578  unsigned FromBits = FromType->getPrimitiveSizeInBits();
579  unsigned ToBits = ToType->getPrimitiveSizeInBits();
580  return FromBits > ToBits;
581 }
582 
584  if (!FromVT.isInteger() || !ToVT.isInteger())
585  return false;
586  unsigned FromBits = FromVT.getSizeInBits();
587  unsigned ToBits = ToVT.getSizeInBits();
588  return FromBits > ToBits;
589 }
590 
591 //===----------------------------------------------------------------------===//
592 // Inline asm support
593 //===----------------------------------------------------------------------===//
594 
597  if (Constraint.size() == 1) {
598  switch (Constraint[0]) {
599  case 'a': // Address register
600  case 'd': // Data register (equivalent to 'r')
601  case 'f': // Floating-point register
602  case 'h': // High-part register
603  case 'r': // General-purpose register
604  return C_RegisterClass;
605 
606  case 'Q': // Memory with base and unsigned 12-bit displacement
607  case 'R': // Likewise, plus an index
608  case 'S': // Memory with base and signed 20-bit displacement
609  case 'T': // Likewise, plus an index
610  case 'm': // Equivalent to 'T'.
611  return C_Memory;
612 
613  case 'I': // Unsigned 8-bit constant
614  case 'J': // Unsigned 12-bit constant
615  case 'K': // Signed 16-bit constant
616  case 'L': // Signed 20-bit displacement (on all targets we support)
617  case 'M': // 0x7fffffff
618  return C_Other;
619 
620  default:
621  break;
622  }
623  }
624  return TargetLowering::getConstraintType(Constraint);
625 }
626 
629  const char *constraint) const {
630  ConstraintWeight weight = CW_Invalid;
631  Value *CallOperandVal = info.CallOperandVal;
632  // If we don't have a value, we can't do a match,
633  // but allow it at the lowest weight.
634  if (!CallOperandVal)
635  return CW_Default;
636  Type *type = CallOperandVal->getType();
637  // Look at the constraint type.
638  switch (*constraint) {
639  default:
640  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
641  break;
642 
643  case 'a': // Address register
644  case 'd': // Data register (equivalent to 'r')
645  case 'h': // High-part register
646  case 'r': // General-purpose register
647  if (CallOperandVal->getType()->isIntegerTy())
648  weight = CW_Register;
649  break;
650 
651  case 'f': // Floating-point register
652  if (type->isFloatingPointTy())
653  weight = CW_Register;
654  break;
655 
656  case 'I': // Unsigned 8-bit constant
657  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
658  if (isUInt<8>(C->getZExtValue()))
659  weight = CW_Constant;
660  break;
661 
662  case 'J': // Unsigned 12-bit constant
663  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
664  if (isUInt<12>(C->getZExtValue()))
665  weight = CW_Constant;
666  break;
667 
668  case 'K': // Signed 16-bit constant
669  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
670  if (isInt<16>(C->getSExtValue()))
671  weight = CW_Constant;
672  break;
673 
674  case 'L': // Signed 20-bit displacement (on all targets we support)
675  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
676  if (isInt<20>(C->getSExtValue()))
677  weight = CW_Constant;
678  break;
679 
680  case 'M': // 0x7fffffff
681  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
682  if (C->getZExtValue() == 0x7fffffff)
683  weight = CW_Constant;
684  break;
685  }
686  return weight;
687 }
688 
689 // Parse a "{tNNN}" register constraint for which the register type "t"
690 // has already been verified. MC is the class associated with "t" and
691 // Map maps 0-based register numbers to LLVM register numbers.
692 static std::pair<unsigned, const TargetRegisterClass *>
694  const unsigned *Map) {
695  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
696  if (isdigit(Constraint[2])) {
697  unsigned Index;
698  bool Failed =
699  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
700  if (!Failed && Index < 16 && Map[Index])
701  return std::make_pair(Map[Index], RC);
702  }
703  return std::make_pair(0U, nullptr);
704 }
705 
706 std::pair<unsigned, const TargetRegisterClass *>
708  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
709  if (Constraint.size() == 1) {
710  // GCC Constraint Letters
711  switch (Constraint[0]) {
712  default: break;
713  case 'd': // Data register (equivalent to 'r')
714  case 'r': // General-purpose register
715  if (VT == MVT::i64)
716  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
717  else if (VT == MVT::i128)
718  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
719  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
720 
721  case 'a': // Address register
722  if (VT == MVT::i64)
723  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
724  else if (VT == MVT::i128)
725  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
726  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
727 
728  case 'h': // High-part register (an LLVM extension)
729  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
730 
731  case 'f': // Floating-point register
732  if (VT == MVT::f64)
733  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
734  else if (VT == MVT::f128)
735  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
736  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
737  }
738  }
739  if (Constraint.size() > 0 && Constraint[0] == '{') {
740  // We need to override the default register parsing for GPRs and FPRs
741  // because the interpretation depends on VT. The internal names of
742  // the registers are also different from the external names
743  // (F0D and F0S instead of F0, etc.).
744  if (Constraint[1] == 'r') {
745  if (VT == MVT::i32)
746  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
748  if (VT == MVT::i128)
749  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
751  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
753  }
754  if (Constraint[1] == 'f') {
755  if (VT == MVT::f32)
756  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
758  if (VT == MVT::f128)
759  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
761  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
763  }
764  }
765  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
766 }
767 
769 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
770  std::vector<SDValue> &Ops,
771  SelectionDAG &DAG) const {
772  // Only support length 1 constraints for now.
773  if (Constraint.length() == 1) {
774  switch (Constraint[0]) {
775  case 'I': // Unsigned 8-bit constant
776  if (auto *C = dyn_cast<ConstantSDNode>(Op))
777  if (isUInt<8>(C->getZExtValue()))
778  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
779  Op.getValueType()));
780  return;
781 
782  case 'J': // Unsigned 12-bit constant
783  if (auto *C = dyn_cast<ConstantSDNode>(Op))
784  if (isUInt<12>(C->getZExtValue()))
785  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
786  Op.getValueType()));
787  return;
788 
789  case 'K': // Signed 16-bit constant
790  if (auto *C = dyn_cast<ConstantSDNode>(Op))
791  if (isInt<16>(C->getSExtValue()))
792  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
793  Op.getValueType()));
794  return;
795 
796  case 'L': // Signed 20-bit displacement (on all targets we support)
797  if (auto *C = dyn_cast<ConstantSDNode>(Op))
798  if (isInt<20>(C->getSExtValue()))
799  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
800  Op.getValueType()));
801  return;
802 
803  case 'M': // 0x7fffffff
804  if (auto *C = dyn_cast<ConstantSDNode>(Op))
805  if (C->getZExtValue() == 0x7fffffff)
806  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
807  Op.getValueType()));
808  return;
809  }
810  }
811  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
812 }
813 
814 //===----------------------------------------------------------------------===//
815 // Calling conventions
816 //===----------------------------------------------------------------------===//
817 
818 #include "SystemZGenCallingConv.inc"
819 
821  Type *ToType) const {
822  return isTruncateFree(FromType, ToType);
823 }
824 
826  return CI->isTailCall();
827 }
828 
829 // We do not yet support 128-bit single-element vector types. If the user
830 // attempts to use such types as function argument or return type, prefer
831 // to error out instead of emitting code violating the ABI.
832 static void VerifyVectorType(MVT VT, EVT ArgVT) {
833  if (ArgVT.isVector() && !VT.isVector())
834  report_fatal_error("Unsupported vector argument or return type");
835 }
836 
838  for (unsigned i = 0; i < Ins.size(); ++i)
839  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
840 }
841 
843  for (unsigned i = 0; i < Outs.size(); ++i)
844  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
845 }
846 
847 // Value is a value that has been passed to us in the location described by VA
848 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
849 // any loads onto Chain.
851  CCValAssign &VA, SDValue Chain,
852  SDValue Value) {
853  // If the argument has been promoted from a smaller type, insert an
854  // assertion to capture this.
855  if (VA.getLocInfo() == CCValAssign::SExt)
856  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
857  DAG.getValueType(VA.getValVT()));
858  else if (VA.getLocInfo() == CCValAssign::ZExt)
859  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
860  DAG.getValueType(VA.getValVT()));
861 
862  if (VA.isExtInLoc())
863  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
864  else if (VA.getLocInfo() == CCValAssign::BCvt) {
865  // If this is a short vector argument loaded from the stack,
866  // extend from i64 to full vector size and then bitcast.
867  assert(VA.getLocVT() == MVT::i64);
868  assert(VA.getValVT().isVector());
869  Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
870  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
871  } else
872  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
873  return Value;
874 }
875 
876 // Value is a value of type VA.getValVT() that we need to copy into
877 // the location described by VA. Return a copy of Value converted to
878 // VA.getValVT(). The caller is responsible for handling indirect values.
880  CCValAssign &VA, SDValue Value) {
881  switch (VA.getLocInfo()) {
882  case CCValAssign::SExt:
883  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
884  case CCValAssign::ZExt:
885  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
886  case CCValAssign::AExt:
887  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
888  case CCValAssign::BCvt:
889  // If this is a short vector argument to be stored to the stack,
890  // bitcast to v2i64 and then extract first element.
891  assert(VA.getLocVT() == MVT::i64);
892  assert(VA.getValVT().isVector());
893  Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
894  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
895  DAG.getConstant(0, DL, MVT::i32));
896  case CCValAssign::Full:
897  return Value;
898  default:
899  llvm_unreachable("Unhandled getLocInfo()");
900  }
901 }
902 
904  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
905  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
906  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
908  MachineFrameInfo &MFI = MF.getFrameInfo();
910  SystemZMachineFunctionInfo *FuncInfo =
912  auto *TFL =
913  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
914  EVT PtrVT = getPointerTy(DAG.getDataLayout());
915 
916  // Detect unsupported vector argument types.
917  if (Subtarget.hasVector())
918  VerifyVectorTypes(Ins);
919 
920  // Assign locations to all of the incoming arguments.
922  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
923  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
924 
925  unsigned NumFixedGPRs = 0;
926  unsigned NumFixedFPRs = 0;
927  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
928  SDValue ArgValue;
929  CCValAssign &VA = ArgLocs[I];
930  EVT LocVT = VA.getLocVT();
931  if (VA.isRegLoc()) {
932  // Arguments passed in registers
933  const TargetRegisterClass *RC;
934  switch (LocVT.getSimpleVT().SimpleTy) {
935  default:
936  // Integers smaller than i64 should be promoted to i64.
937  llvm_unreachable("Unexpected argument type");
938  case MVT::i32:
939  NumFixedGPRs += 1;
940  RC = &SystemZ::GR32BitRegClass;
941  break;
942  case MVT::i64:
943  NumFixedGPRs += 1;
944  RC = &SystemZ::GR64BitRegClass;
945  break;
946  case MVT::f32:
947  NumFixedFPRs += 1;
948  RC = &SystemZ::FP32BitRegClass;
949  break;
950  case MVT::f64:
951  NumFixedFPRs += 1;
952  RC = &SystemZ::FP64BitRegClass;
953  break;
954  case MVT::v16i8:
955  case MVT::v8i16:
956  case MVT::v4i32:
957  case MVT::v2i64:
958  case MVT::v4f32:
959  case MVT::v2f64:
960  RC = &SystemZ::VR128BitRegClass;
961  break;
962  }
963 
964  unsigned VReg = MRI.createVirtualRegister(RC);
965  MRI.addLiveIn(VA.getLocReg(), VReg);
966  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
967  } else {
968  assert(VA.isMemLoc() && "Argument not register or memory");
969 
970  // Create the frame index object for this incoming parameter.
971  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
972  VA.getLocMemOffset(), true);
973 
974  // Create the SelectionDAG nodes corresponding to a load
975  // from this parameter. Unpromoted ints and floats are
976  // passed as right-justified 8-byte values.
977  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
978  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
979  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
980  DAG.getIntPtrConstant(4, DL));
981  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
983  }
984 
985  // Convert the value of the argument register into the value that's
986  // being passed.
987  if (VA.getLocInfo() == CCValAssign::Indirect) {
988  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
989  MachinePointerInfo()));
990  // If the original argument was split (e.g. i128), we need
991  // to load all parts of it here (using the same address).
992  unsigned ArgIndex = Ins[I].OrigArgIndex;
993  assert (Ins[I].PartOffset == 0);
994  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
995  CCValAssign &PartVA = ArgLocs[I + 1];
996  unsigned PartOffset = Ins[I + 1].PartOffset;
997  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
998  DAG.getIntPtrConstant(PartOffset, DL));
999  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1000  MachinePointerInfo()));
1001  ++I;
1002  }
1003  } else
1004  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1005  }
1006 
1007  if (IsVarArg) {
1008  // Save the number of non-varargs registers for later use by va_start, etc.
1009  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1010  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1011 
1012  // Likewise the address (in the form of a frame index) of where the
1013  // first stack vararg would be. The 1-byte size here is arbitrary.
1014  int64_t StackSize = CCInfo.getNextStackOffset();
1015  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1016 
1017  // ...and a similar frame index for the caller-allocated save area
1018  // that will be used to store the incoming registers.
1019  int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1020  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1021  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1022 
1023  // Store the FPR varargs in the reserved frame slots. (We store the
1024  // GPRs as part of the prologue.)
1025  if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1026  SDValue MemOps[SystemZ::NumArgFPRs];
1027  for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1028  unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1029  int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1030  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1031  unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1032  &SystemZ::FP64BitRegClass);
1033  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1034  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1036  }
1037  // Join the stores, which are independent of one another.
1038  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1039  makeArrayRef(&MemOps[NumFixedFPRs],
1040  SystemZ::NumArgFPRs-NumFixedFPRs));
1041  }
1042  }
1043 
1044  return Chain;
1045 }
1046 
1047 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1050  // Punt if there are any indirect or stack arguments, or if the call
1051  // needs the callee-saved argument register R6, or if the call uses
1052  // the callee-saved register arguments SwiftSelf and SwiftError.
1053  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1054  CCValAssign &VA = ArgLocs[I];
1055  if (VA.getLocInfo() == CCValAssign::Indirect)
1056  return false;
1057  if (!VA.isRegLoc())
1058  return false;
1059  unsigned Reg = VA.getLocReg();
1060  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1061  return false;
1062  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1063  return false;
1064  }
1065  return true;
1066 }
1067 
1068 SDValue
1070  SmallVectorImpl<SDValue> &InVals) const {
1071  SelectionDAG &DAG = CLI.DAG;
1072  SDLoc &DL = CLI.DL;
1074  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1076  SDValue Chain = CLI.Chain;
1077  SDValue Callee = CLI.Callee;
1078  bool &IsTailCall = CLI.IsTailCall;
1079  CallingConv::ID CallConv = CLI.CallConv;
1080  bool IsVarArg = CLI.IsVarArg;
1081  MachineFunction &MF = DAG.getMachineFunction();
1082  EVT PtrVT = getPointerTy(MF.getDataLayout());
1083 
1084  // Detect unsupported vector argument and return types.
1085  if (Subtarget.hasVector()) {
1086  VerifyVectorTypes(Outs);
1087  VerifyVectorTypes(Ins);
1088  }
1089 
1090  // Analyze the operands of the call, assigning locations to each operand.
1092  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1093  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1094 
1095  // We don't support GuaranteedTailCallOpt, only automatically-detected
1096  // sibling calls.
1097  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1098  IsTailCall = false;
1099 
1100  // Get a count of how many bytes are to be pushed on the stack.
1101  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1102 
1103  // Mark the start of the call.
1104  if (!IsTailCall)
1105  Chain = DAG.getCALLSEQ_START(Chain,
1106  DAG.getConstant(NumBytes, DL, PtrVT, true),
1107  DL);
1108 
1109  // Copy argument values to their designated locations.
1111  SmallVector<SDValue, 8> MemOpChains;
1112  SDValue StackPtr;
1113  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1114  CCValAssign &VA = ArgLocs[I];
1115  SDValue ArgValue = OutVals[I];
1116 
1117  if (VA.getLocInfo() == CCValAssign::Indirect) {
1118  // Store the argument in a stack slot and pass its address.
1119  SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1120  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1121  MemOpChains.push_back(
1122  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1124  // If the original argument was split (e.g. i128), we need
1125  // to store all parts of it here (and pass just one address).
1126  unsigned ArgIndex = Outs[I].OrigArgIndex;
1127  assert (Outs[I].PartOffset == 0);
1128  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1129  SDValue PartValue = OutVals[I + 1];
1130  unsigned PartOffset = Outs[I + 1].PartOffset;
1131  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1132  DAG.getIntPtrConstant(PartOffset, DL));
1133  MemOpChains.push_back(
1134  DAG.getStore(Chain, DL, PartValue, Address,
1136  ++I;
1137  }
1138  ArgValue = SpillSlot;
1139  } else
1140  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1141 
1142  if (VA.isRegLoc())
1143  // Queue up the argument copies and emit them at the end.
1144  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1145  else {
1146  assert(VA.isMemLoc() && "Argument not register or memory");
1147 
1148  // Work out the address of the stack slot. Unpromoted ints and
1149  // floats are passed as right-justified 8-byte values.
1150  if (!StackPtr.getNode())
1151  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1153  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1154  Offset += 4;
1155  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1156  DAG.getIntPtrConstant(Offset, DL));
1157 
1158  // Emit the store.
1159  MemOpChains.push_back(
1160  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1161  }
1162  }
1163 
1164  // Join the stores, which are independent of one another.
1165  if (!MemOpChains.empty())
1166  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1167 
1168  // Accept direct calls by converting symbolic call addresses to the
1169  // associated Target* opcodes. Force %r1 to be used for indirect
1170  // tail calls.
1171  SDValue Glue;
1172  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1173  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1174  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1175  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1176  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1177  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1178  } else if (IsTailCall) {
1179  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1180  Glue = Chain.getValue(1);
1181  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1182  }
1183 
1184  // Build a sequence of copy-to-reg nodes, chained and glued together.
1185  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1186  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1187  RegsToPass[I].second, Glue);
1188  Glue = Chain.getValue(1);
1189  }
1190 
1191  // The first call operand is the chain and the second is the target address.
1193  Ops.push_back(Chain);
1194  Ops.push_back(Callee);
1195 
1196  // Add argument registers to the end of the list so that they are
1197  // known live into the call.
1198  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1199  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1200  RegsToPass[I].second.getValueType()));
1201 
1202  // Add a register mask operand representing the call-preserved registers.
1203  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1204  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1205  assert(Mask && "Missing call preserved mask for calling convention");
1206  Ops.push_back(DAG.getRegisterMask(Mask));
1207 
1208  // Glue the call to the argument copies, if any.
1209  if (Glue.getNode())
1210  Ops.push_back(Glue);
1211 
1212  // Emit the call.
1213  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1214  if (IsTailCall)
1215  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1216  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1217  Glue = Chain.getValue(1);
1218 
1219  // Mark the end of the call, which is glued to the call itself.
1220  Chain = DAG.getCALLSEQ_END(Chain,
1221  DAG.getConstant(NumBytes, DL, PtrVT, true),
1222  DAG.getConstant(0, DL, PtrVT, true),
1223  Glue, DL);
1224  Glue = Chain.getValue(1);
1225 
1226  // Assign locations to each value returned by this call.
1228  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1229  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1230 
1231  // Copy all of the result registers out of their specified physreg.
1232  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1233  CCValAssign &VA = RetLocs[I];
1234 
1235  // Copy the value out, gluing the copy to the end of the call sequence.
1236  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1237  VA.getLocVT(), Glue);
1238  Chain = RetValue.getValue(1);
1239  Glue = RetValue.getValue(2);
1240 
1241  // Convert the value of the return register into the value that's
1242  // being returned.
1243  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1244  }
1245 
1246  return Chain;
1247 }
1248 
1251  MachineFunction &MF, bool isVarArg,
1252  const SmallVectorImpl<ISD::OutputArg> &Outs,
1253  LLVMContext &Context) const {
1254  // Detect unsupported vector return types.
1255  if (Subtarget.hasVector())
1256  VerifyVectorTypes(Outs);
1257 
1258  // Special case that we cannot easily detect in RetCC_SystemZ since
1259  // i128 is not a legal type.
1260  for (auto &Out : Outs)
1261  if (Out.ArgVT == MVT::i128)
1262  return false;
1263 
1265  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1266  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1267 }
1268 
1269 SDValue
1271  bool IsVarArg,
1272  const SmallVectorImpl<ISD::OutputArg> &Outs,
1273  const SmallVectorImpl<SDValue> &OutVals,
1274  const SDLoc &DL, SelectionDAG &DAG) const {
1275  MachineFunction &MF = DAG.getMachineFunction();
1276 
1277  // Detect unsupported vector return types.
1278  if (Subtarget.hasVector())
1279  VerifyVectorTypes(Outs);
1280 
1281  // Assign locations to each returned value.
1283  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1284  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1285 
1286  // Quick exit for void returns
1287  if (RetLocs.empty())
1288  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1289 
1290  // Copy the result values into the output registers.
1291  SDValue Glue;
1292  SmallVector<SDValue, 4> RetOps;
1293  RetOps.push_back(Chain);
1294  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1295  CCValAssign &VA = RetLocs[I];
1296  SDValue RetValue = OutVals[I];
1297 
1298  // Make the return register live on exit.
1299  assert(VA.isRegLoc() && "Can only return in registers!");
1300 
1301  // Promote the value as required.
1302  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1303 
1304  // Chain and glue the copies together.
1305  unsigned Reg = VA.getLocReg();
1306  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1307  Glue = Chain.getValue(1);
1308  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1309  }
1310 
1311  // Update chain and glue.
1312  RetOps[0] = Chain;
1313  if (Glue.getNode())
1314  RetOps.push_back(Glue);
1315 
1316  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1317 }
1318 
1320  SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const {
1321  return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
1322 }
1323 
1324 // Return true if Op is an intrinsic node with chain that returns the CC value
1325 // as its only (other) argument. Provide the associated SystemZISD opcode and
1326 // the mask of valid CC values if so.
1327 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1328  unsigned &CCValid) {
1329  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1330  switch (Id) {
1331  case Intrinsic::s390_tbegin:
1332  Opcode = SystemZISD::TBEGIN;
1333  CCValid = SystemZ::CCMASK_TBEGIN;
1334  return true;
1335 
1336  case Intrinsic::s390_tbegin_nofloat:
1337  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1338  CCValid = SystemZ::CCMASK_TBEGIN;
1339  return true;
1340 
1341  case Intrinsic::s390_tend:
1342  Opcode = SystemZISD::TEND;
1343  CCValid = SystemZ::CCMASK_TEND;
1344  return true;
1345 
1346  default:
1347  return false;
1348  }
1349 }
1350 
1351 // Return true if Op is an intrinsic node without chain that returns the
1352 // CC value as its final argument. Provide the associated SystemZISD
1353 // opcode and the mask of valid CC values if so.
1354 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1355  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1356  switch (Id) {
1357  case Intrinsic::s390_vpkshs:
1358  case Intrinsic::s390_vpksfs:
1359  case Intrinsic::s390_vpksgs:
1360  Opcode = SystemZISD::PACKS_CC;
1361  CCValid = SystemZ::CCMASK_VCMP;
1362  return true;
1363 
1364  case Intrinsic::s390_vpklshs:
1365  case Intrinsic::s390_vpklsfs:
1366  case Intrinsic::s390_vpklsgs:
1367  Opcode = SystemZISD::PACKLS_CC;
1368  CCValid = SystemZ::CCMASK_VCMP;
1369  return true;
1370 
1371  case Intrinsic::s390_vceqbs:
1372  case Intrinsic::s390_vceqhs:
1373  case Intrinsic::s390_vceqfs:
1374  case Intrinsic::s390_vceqgs:
1375  Opcode = SystemZISD::VICMPES;
1376  CCValid = SystemZ::CCMASK_VCMP;
1377  return true;
1378 
1379  case Intrinsic::s390_vchbs:
1380  case Intrinsic::s390_vchhs:
1381  case Intrinsic::s390_vchfs:
1382  case Intrinsic::s390_vchgs:
1383  Opcode = SystemZISD::VICMPHS;
1384  CCValid = SystemZ::CCMASK_VCMP;
1385  return true;
1386 
1387  case Intrinsic::s390_vchlbs:
1388  case Intrinsic::s390_vchlhs:
1389  case Intrinsic::s390_vchlfs:
1390  case Intrinsic::s390_vchlgs:
1391  Opcode = SystemZISD::VICMPHLS;
1392  CCValid = SystemZ::CCMASK_VCMP;
1393  return true;
1394 
1395  case Intrinsic::s390_vtm:
1396  Opcode = SystemZISD::VTM;
1397  CCValid = SystemZ::CCMASK_VCMP;
1398  return true;
1399 
1400  case Intrinsic::s390_vfaebs:
1401  case Intrinsic::s390_vfaehs:
1402  case Intrinsic::s390_vfaefs:
1403  Opcode = SystemZISD::VFAE_CC;
1404  CCValid = SystemZ::CCMASK_ANY;
1405  return true;
1406 
1407  case Intrinsic::s390_vfaezbs:
1408  case Intrinsic::s390_vfaezhs:
1409  case Intrinsic::s390_vfaezfs:
1410  Opcode = SystemZISD::VFAEZ_CC;
1411  CCValid = SystemZ::CCMASK_ANY;
1412  return true;
1413 
1414  case Intrinsic::s390_vfeebs:
1415  case Intrinsic::s390_vfeehs:
1416  case Intrinsic::s390_vfeefs:
1417  Opcode = SystemZISD::VFEE_CC;
1418  CCValid = SystemZ::CCMASK_ANY;
1419  return true;
1420 
1421  case Intrinsic::s390_vfeezbs:
1422  case Intrinsic::s390_vfeezhs:
1423  case Intrinsic::s390_vfeezfs:
1424  Opcode = SystemZISD::VFEEZ_CC;
1425  CCValid = SystemZ::CCMASK_ANY;
1426  return true;
1427 
1428  case Intrinsic::s390_vfenebs:
1429  case Intrinsic::s390_vfenehs:
1430  case Intrinsic::s390_vfenefs:
1431  Opcode = SystemZISD::VFENE_CC;
1432  CCValid = SystemZ::CCMASK_ANY;
1433  return true;
1434 
1435  case Intrinsic::s390_vfenezbs:
1436  case Intrinsic::s390_vfenezhs:
1437  case Intrinsic::s390_vfenezfs:
1438  Opcode = SystemZISD::VFENEZ_CC;
1439  CCValid = SystemZ::CCMASK_ANY;
1440  return true;
1441 
1442  case Intrinsic::s390_vistrbs:
1443  case Intrinsic::s390_vistrhs:
1444  case Intrinsic::s390_vistrfs:
1445  Opcode = SystemZISD::VISTR_CC;
1447  return true;
1448 
1449  case Intrinsic::s390_vstrcbs:
1450  case Intrinsic::s390_vstrchs:
1451  case Intrinsic::s390_vstrcfs:
1452  Opcode = SystemZISD::VSTRC_CC;
1453  CCValid = SystemZ::CCMASK_ANY;
1454  return true;
1455 
1456  case Intrinsic::s390_vstrczbs:
1457  case Intrinsic::s390_vstrczhs:
1458  case Intrinsic::s390_vstrczfs:
1459  Opcode = SystemZISD::VSTRCZ_CC;
1460  CCValid = SystemZ::CCMASK_ANY;
1461  return true;
1462 
1463  case Intrinsic::s390_vfcedbs:
1464  Opcode = SystemZISD::VFCMPES;
1465  CCValid = SystemZ::CCMASK_VCMP;
1466  return true;
1467 
1468  case Intrinsic::s390_vfchdbs:
1469  Opcode = SystemZISD::VFCMPHS;
1470  CCValid = SystemZ::CCMASK_VCMP;
1471  return true;
1472 
1473  case Intrinsic::s390_vfchedbs:
1474  Opcode = SystemZISD::VFCMPHES;
1475  CCValid = SystemZ::CCMASK_VCMP;
1476  return true;
1477 
1478  case Intrinsic::s390_vftcidb:
1479  Opcode = SystemZISD::VFTCI;
1480  CCValid = SystemZ::CCMASK_VCMP;
1481  return true;
1482 
1483  case Intrinsic::s390_tdc:
1484  Opcode = SystemZISD::TDC;
1485  CCValid = SystemZ::CCMASK_TDC;
1486  return true;
1487 
1488  default:
1489  return false;
1490  }
1491 }
1492 
1493 // Emit an intrinsic with chain with a glued value instead of its CC result.
1495  unsigned Opcode) {
1496  // Copy all operands except the intrinsic ID.
1497  unsigned NumOps = Op.getNumOperands();
1499  Ops.reserve(NumOps - 1);
1500  Ops.push_back(Op.getOperand(0));
1501  for (unsigned I = 2; I < NumOps; ++I)
1502  Ops.push_back(Op.getOperand(I));
1503 
1504  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1505  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1506  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1507  SDValue OldChain = SDValue(Op.getNode(), 1);
1508  SDValue NewChain = SDValue(Intr.getNode(), 0);
1509  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1510  return Intr;
1511 }
1512 
1513 // Emit an intrinsic with a glued value instead of its CC result.
1515  unsigned Opcode) {
1516  // Copy all operands except the intrinsic ID.
1517  unsigned NumOps = Op.getNumOperands();
1519  Ops.reserve(NumOps - 1);
1520  for (unsigned I = 1; I < NumOps; ++I)
1521  Ops.push_back(Op.getOperand(I));
1522 
1523  if (Op->getNumValues() == 1)
1524  return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
1525  assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
1526  SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
1527  return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1528 }
1529 
1530 // CC is a comparison that will be implemented using an integer or
1531 // floating-point comparison. Return the condition code mask for
1532 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1533 // unsigned comparisons and clear for signed ones. In the floating-point
1534 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1535 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1536 #define CONV(X) \
1537  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1538  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1539  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1540 
1541  switch (CC) {
1542  default:
1543  llvm_unreachable("Invalid integer condition!");
1544 
1545  CONV(EQ);
1546  CONV(NE);
1547  CONV(GT);
1548  CONV(GE);
1549  CONV(LT);
1550  CONV(LE);
1551 
1552  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1553  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1554  }
1555 #undef CONV
1556 }
1557 
1558 // Return a sequence for getting a 1 from an IPM result when CC has a
1559 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
1560 // The handling of CC values outside CCValid doesn't matter.
1561 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
1562  // Deal with cases where the result can be taken directly from a bit
1563  // of the IPM result.
1564  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
1565  return IPMConversion(0, 0, SystemZ::IPM_CC);
1566  if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
1567  return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
1568 
1569  // Deal with cases where we can add a value to force the sign bit
1570  // to contain the right value. Putting the bit in 31 means we can
1571  // use SRL rather than RISBG(L), and also makes it easier to get a
1572  // 0/-1 value, so it has priority over the other tests below.
1573  //
1574  // These sequences rely on the fact that the upper two bits of the
1575  // IPM result are zero.
1576  uint64_t TopBit = uint64_t(1) << 31;
1577  if (CCMask == (CCValid & SystemZ::CCMASK_0))
1578  return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
1579  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
1580  return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
1581  if (CCMask == (CCValid & (SystemZ::CCMASK_0
1583  | SystemZ::CCMASK_2)))
1584  return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
1585  if (CCMask == (CCValid & SystemZ::CCMASK_3))
1586  return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
1587  if (CCMask == (CCValid & (SystemZ::CCMASK_1
1589  | SystemZ::CCMASK_3)))
1590  return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
1591 
1592  // Next try inverting the value and testing a bit. 0/1 could be
1593  // handled this way too, but we dealt with that case above.
1594  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
1595  return IPMConversion(-1, 0, SystemZ::IPM_CC);
1596 
1597  // Handle cases where adding a value forces a non-sign bit to contain
1598  // the right value.
1599  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
1600  return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
1601  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
1602  return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
1603 
1604  // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are
1605  // can be done by inverting the low CC bit and applying one of the
1606  // sign-based extractions above.
1607  if (CCMask == (CCValid & SystemZ::CCMASK_1))
1608  return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
1609  if (CCMask == (CCValid & SystemZ::CCMASK_2))
1610  return IPMConversion(1 << SystemZ::IPM_CC,
1611  TopBit - (3 << SystemZ::IPM_CC), 31);
1612  if (CCMask == (CCValid & (SystemZ::CCMASK_0
1613  | SystemZ::CCMASK_1
1614  | SystemZ::CCMASK_3)))
1615  return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
1616  if (CCMask == (CCValid & (SystemZ::CCMASK_0
1617  | SystemZ::CCMASK_2
1618  | SystemZ::CCMASK_3)))
1619  return IPMConversion(1 << SystemZ::IPM_CC,
1620  TopBit - (1 << SystemZ::IPM_CC), 31);
1621 
1622  llvm_unreachable("Unexpected CC combination");
1623 }
1624 
1625 // If C can be converted to a comparison against zero, adjust the operands
1626 // as necessary.
1627 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1628  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1629  return;
1630 
1631  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1632  if (!ConstOp1)
1633  return;
1634 
1635  int64_t Value = ConstOp1->getSExtValue();
1636  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1637  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1638  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1639  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1640  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1641  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1642  }
1643 }
1644 
1645 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1646 // adjust the operands as necessary.
1647 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1648  Comparison &C) {
1649  // For us to make any changes, it must a comparison between a single-use
1650  // load and a constant.
1651  if (!C.Op0.hasOneUse() ||
1652  C.Op0.getOpcode() != ISD::LOAD ||
1653  C.Op1.getOpcode() != ISD::Constant)
1654  return;
1655 
1656  // We must have an 8- or 16-bit load.
1657  auto *Load = cast<LoadSDNode>(C.Op0);
1658  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1659  if (NumBits != 8 && NumBits != 16)
1660  return;
1661 
1662  // The load must be an extending one and the constant must be within the
1663  // range of the unextended value.
1664  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1665  uint64_t Value = ConstOp1->getZExtValue();
1666  uint64_t Mask = (1 << NumBits) - 1;
1667  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1668  // Make sure that ConstOp1 is in range of C.Op0.
1669  int64_t SignedValue = ConstOp1->getSExtValue();
1670  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
1671  return;
1672  if (C.ICmpType != SystemZICMP::SignedOnly) {
1673  // Unsigned comparison between two sign-extended values is equivalent
1674  // to unsigned comparison between two zero-extended values.
1675  Value &= Mask;
1676  } else if (NumBits == 8) {
1677  // Try to treat the comparison as unsigned, so that we can use CLI.
1678  // Adjust CCMask and Value as necessary.
1679  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
1680  // Test whether the high bit of the byte is set.
1681  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1682  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
1683  // Test whether the high bit of the byte is clear.
1684  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
1685  else
1686  // No instruction exists for this combination.
1687  return;
1688  C.ICmpType = SystemZICMP::UnsignedOnly;
1689  }
1690  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
1691  if (Value > Mask)
1692  return;
1693  // If the constant is in range, we can use any comparison.
1694  C.ICmpType = SystemZICMP::Any;
1695  } else
1696  return;
1697 
1698  // Make sure that the first operand is an i32 of the right extension type.
1699  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
1700  ISD::SEXTLOAD :
1701  ISD::ZEXTLOAD);
1702  if (C.Op0.getValueType() != MVT::i32 ||
1703  Load->getExtensionType() != ExtType)
1704  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
1705  Load->getBasePtr(), Load->getPointerInfo(),
1706  Load->getMemoryVT(), Load->getAlignment(),
1707  Load->getMemOperand()->getFlags());
1708 
1709  // Make sure that the second operand is an i32 with the right value.
1710  if (C.Op1.getValueType() != MVT::i32 ||
1711  Value != ConstOp1->getZExtValue())
1712  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
1713 }
1714 
1715 // Return true if Op is either an unextended load, or a load suitable
1716 // for integer register-memory comparisons of type ICmpType.
1717 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
1718  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
1719  if (Load) {
1720  // There are no instructions to compare a register with a memory byte.
1721  if (Load->getMemoryVT() == MVT::i8)
1722  return false;
1723  // Otherwise decide on extension type.
1724  switch (Load->getExtensionType()) {
1725  case ISD::NON_EXTLOAD:
1726  return true;
1727  case ISD::SEXTLOAD:
1728  return ICmpType != SystemZICMP::UnsignedOnly;
1729  case ISD::ZEXTLOAD:
1730  return ICmpType != SystemZICMP::SignedOnly;
1731  default:
1732  break;
1733  }
1734  }
1735  return false;
1736 }
1737 
1738 // Return true if it is better to swap the operands of C.
1739 static bool shouldSwapCmpOperands(const Comparison &C) {
1740  // Leave f128 comparisons alone, since they have no memory forms.
1741  if (C.Op0.getValueType() == MVT::f128)
1742  return false;
1743 
1744  // Always keep a floating-point constant second, since comparisons with
1745  // zero can use LOAD TEST and comparisons with other constants make a
1746  // natural memory operand.
1747  if (isa<ConstantFPSDNode>(C.Op1))
1748  return false;
1749 
1750  // Never swap comparisons with zero since there are many ways to optimize
1751  // those later.
1752  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
1753  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
1754  return false;
1755 
1756  // Also keep natural memory operands second if the loaded value is
1757  // only used here. Several comparisons have memory forms.
1758  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
1759  return false;
1760 
1761  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1762  // In that case we generally prefer the memory to be second.
1763  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
1764  // The only exceptions are when the second operand is a constant and
1765  // we can use things like CHHSI.
1766  if (!ConstOp1)
1767  return true;
1768  // The unsigned memory-immediate instructions can handle 16-bit
1769  // unsigned integers.
1770  if (C.ICmpType != SystemZICMP::SignedOnly &&
1771  isUInt<16>(ConstOp1->getZExtValue()))
1772  return false;
1773  // The signed memory-immediate instructions can handle 16-bit
1774  // signed integers.
1775  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
1776  isInt<16>(ConstOp1->getSExtValue()))
1777  return false;
1778  return true;
1779  }
1780 
1781  // Try to promote the use of CGFR and CLGFR.
1782  unsigned Opcode0 = C.Op0.getOpcode();
1783  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
1784  return true;
1785  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
1786  return true;
1787  if (C.ICmpType != SystemZICMP::SignedOnly &&
1788  Opcode0 == ISD::AND &&
1789  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
1790  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
1791  return true;
1792 
1793  return false;
1794 }
1795 
1796 // Return a version of comparison CC mask CCMask in which the LT and GT
1797 // actions are swapped.
1798 static unsigned reverseCCMask(unsigned CCMask) {
1799  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
1801  (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
1802  (CCMask & SystemZ::CCMASK_CMP_UO));
1803 }
1804 
1805 // Check whether C tests for equality between X and Y and whether X - Y
1806 // or Y - X is also computed. In that case it's better to compare the
1807 // result of the subtraction against zero.
1808 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
1809  Comparison &C) {
1810  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
1811  C.CCMask == SystemZ::CCMASK_CMP_NE) {
1812  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1813  SDNode *N = *I;
1814  if (N->getOpcode() == ISD::SUB &&
1815  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
1816  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
1817  C.Op0 = SDValue(N, 0);
1818  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
1819  return;
1820  }
1821  }
1822  }
1823 }
1824 
1825 // Check whether C compares a floating-point value with zero and if that
1826 // floating-point value is also negated. In this case we can use the
1827 // negation to set CC, so avoiding separate LOAD AND TEST and
1828 // LOAD (NEGATIVE/COMPLEMENT) instructions.
1829 static void adjustForFNeg(Comparison &C) {
1830  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
1831  if (C1 && C1->isZero()) {
1832  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1833  SDNode *N = *I;
1834  if (N->getOpcode() == ISD::FNEG) {
1835  C.Op0 = SDValue(N, 0);
1836  C.CCMask = reverseCCMask(C.CCMask);
1837  return;
1838  }
1839  }
1840  }
1841 }
1842 
1843 // Check whether C compares (shl X, 32) with 0 and whether X is
1844 // also sign-extended. In that case it is better to test the result
1845 // of the sign extension using LTGFR.
1846 //
1847 // This case is important because InstCombine transforms a comparison
1848 // with (sext (trunc X)) into a comparison with (shl X, 32).
1849 static void adjustForLTGFR(Comparison &C) {
1850  // Check for a comparison between (shl X, 32) and 0.
1851  if (C.Op0.getOpcode() == ISD::SHL &&
1852  C.Op0.getValueType() == MVT::i64 &&
1853  C.Op1.getOpcode() == ISD::Constant &&
1854  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1855  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
1856  if (C1 && C1->getZExtValue() == 32) {
1857  SDValue ShlOp0 = C.Op0.getOperand(0);
1858  // See whether X has any SIGN_EXTEND_INREG uses.
1859  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
1860  SDNode *N = *I;
1861  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
1862  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
1863  C.Op0 = SDValue(N, 0);
1864  return;
1865  }
1866  }
1867  }
1868  }
1869 }
1870 
1871 // If C compares the truncation of an extending load, try to compare
1872 // the untruncated value instead. This exposes more opportunities to
1873 // reuse CC.
1874 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
1875  Comparison &C) {
1876  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
1877  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
1878  C.Op1.getOpcode() == ISD::Constant &&
1879  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1880  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
1881  if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
1882  unsigned Type = L->getExtensionType();
1883  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
1884  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
1885  C.Op0 = C.Op0.getOperand(0);
1886  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
1887  }
1888  }
1889  }
1890 }
1891 
1892 // Return true if shift operation N has an in-range constant shift value.
1893 // Store it in ShiftVal if so.
1894 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
1895  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
1896  if (!Shift)
1897  return false;
1898 
1899  uint64_t Amount = Shift->getZExtValue();
1900  if (Amount >= N.getValueSizeInBits())
1901  return false;
1902 
1903  ShiftVal = Amount;
1904  return true;
1905 }
1906 
1907 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
1908 // instruction and whether the CC value is descriptive enough to handle
1909 // a comparison of type Opcode between the AND result and CmpVal.
1910 // CCMask says which comparison result is being tested and BitSize is
1911 // the number of bits in the operands. If TEST UNDER MASK can be used,
1912 // return the corresponding CC mask, otherwise return 0.
1913 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
1914  uint64_t Mask, uint64_t CmpVal,
1915  unsigned ICmpType) {
1916  assert(Mask != 0 && "ANDs with zero should have been removed by now");
1917 
1918  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
1919  if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
1920  !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
1921  return 0;
1922 
1923  // Work out the masks for the lowest and highest bits.
1924  unsigned HighShift = 63 - countLeadingZeros(Mask);
1925  uint64_t High = uint64_t(1) << HighShift;
1926  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
1927 
1928  // Signed ordered comparisons are effectively unsigned if the sign
1929  // bit is dropped.
1930  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
1931 
1932  // Check for equality comparisons with 0, or the equivalent.
1933  if (CmpVal == 0) {
1934  if (CCMask == SystemZ::CCMASK_CMP_EQ)
1935  return SystemZ::CCMASK_TM_ALL_0;
1936  if (CCMask == SystemZ::CCMASK_CMP_NE)
1938  }
1939  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
1940  if (CCMask == SystemZ::CCMASK_CMP_LT)
1941  return SystemZ::CCMASK_TM_ALL_0;
1942  if (CCMask == SystemZ::CCMASK_CMP_GE)
1944  }
1945  if (EffectivelyUnsigned && CmpVal < Low) {
1946  if (CCMask == SystemZ::CCMASK_CMP_LE)
1947  return SystemZ::CCMASK_TM_ALL_0;
1948  if (CCMask == SystemZ::CCMASK_CMP_GT)
1950  }
1951 
1952  // Check for equality comparisons with the mask, or the equivalent.
1953  if (CmpVal == Mask) {
1954  if (CCMask == SystemZ::CCMASK_CMP_EQ)
1955  return SystemZ::CCMASK_TM_ALL_1;
1956  if (CCMask == SystemZ::CCMASK_CMP_NE)
1958  }
1959  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
1960  if (CCMask == SystemZ::CCMASK_CMP_GT)
1961  return SystemZ::CCMASK_TM_ALL_1;
1962  if (CCMask == SystemZ::CCMASK_CMP_LE)
1964  }
1965  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
1966  if (CCMask == SystemZ::CCMASK_CMP_GE)
1967  return SystemZ::CCMASK_TM_ALL_1;
1968  if (CCMask == SystemZ::CCMASK_CMP_LT)
1970  }
1971 
1972  // Check for ordered comparisons with the top bit.
1973  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
1974  if (CCMask == SystemZ::CCMASK_CMP_LE)
1975  return SystemZ::CCMASK_TM_MSB_0;
1976  if (CCMask == SystemZ::CCMASK_CMP_GT)
1977  return SystemZ::CCMASK_TM_MSB_1;
1978  }
1979  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
1980  if (CCMask == SystemZ::CCMASK_CMP_LT)
1981  return SystemZ::CCMASK_TM_MSB_0;
1982  if (CCMask == SystemZ::CCMASK_CMP_GE)
1983  return SystemZ::CCMASK_TM_MSB_1;
1984  }
1985 
1986  // If there are just two bits, we can do equality checks for Low and High
1987  // as well.
1988  if (Mask == Low + High) {
1989  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
1991  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
1993  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
1995  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
1997  }
1998 
1999  // Looks like we've exhausted our options.
2000  return 0;
2001 }
2002 
2003 // See whether C can be implemented as a TEST UNDER MASK instruction.
2004 // Update the arguments with the TM version if so.
2005 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2006  Comparison &C) {
2007  // Check that we have a comparison with a constant.
2008  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2009  if (!ConstOp1)
2010  return;
2011  uint64_t CmpVal = ConstOp1->getZExtValue();
2012 
2013  // Check whether the nonconstant input is an AND with a constant mask.
2014  Comparison NewC(C);
2015  uint64_t MaskVal;
2016  ConstantSDNode *Mask = nullptr;
2017  if (C.Op0.getOpcode() == ISD::AND) {
2018  NewC.Op0 = C.Op0.getOperand(0);
2019  NewC.Op1 = C.Op0.getOperand(1);
2020  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2021  if (!Mask)
2022  return;
2023  MaskVal = Mask->getZExtValue();
2024  } else {
2025  // There is no instruction to compare with a 64-bit immediate
2026  // so use TMHH instead if possible. We need an unsigned ordered
2027  // comparison with an i64 immediate.
2028  if (NewC.Op0.getValueType() != MVT::i64 ||
2029  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2030  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2031  NewC.ICmpType == SystemZICMP::SignedOnly)
2032  return;
2033  // Convert LE and GT comparisons into LT and GE.
2034  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2035  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2036  if (CmpVal == uint64_t(-1))
2037  return;
2038  CmpVal += 1;
2039  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2040  }
2041  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2042  // be masked off without changing the result.
2043  MaskVal = -(CmpVal & -CmpVal);
2044  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2045  }
2046  if (!MaskVal)
2047  return;
2048 
2049  // Check whether the combination of mask, comparison value and comparison
2050  // type are suitable.
2051  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2052  unsigned NewCCMask, ShiftVal;
2053  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2054  NewC.Op0.getOpcode() == ISD::SHL &&
2055  isSimpleShift(NewC.Op0, ShiftVal) &&
2056  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2057  MaskVal >> ShiftVal,
2058  CmpVal >> ShiftVal,
2059  SystemZICMP::Any))) {
2060  NewC.Op0 = NewC.Op0.getOperand(0);
2061  MaskVal >>= ShiftVal;
2062  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2063  NewC.Op0.getOpcode() == ISD::SRL &&
2064  isSimpleShift(NewC.Op0, ShiftVal) &&
2065  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2066  MaskVal << ShiftVal,
2067  CmpVal << ShiftVal,
2069  NewC.Op0 = NewC.Op0.getOperand(0);
2070  MaskVal <<= ShiftVal;
2071  } else {
2072  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2073  NewC.ICmpType);
2074  if (!NewCCMask)
2075  return;
2076  }
2077 
2078  // Go ahead and make the change.
2079  C.Opcode = SystemZISD::TM;
2080  C.Op0 = NewC.Op0;
2081  if (Mask && Mask->getZExtValue() == MaskVal)
2082  C.Op1 = SDValue(Mask, 0);
2083  else
2084  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2085  C.CCValid = SystemZ::CCMASK_TM;
2086  C.CCMask = NewCCMask;
2087 }
2088 
2089 // Return a Comparison that tests the condition-code result of intrinsic
2090 // node Call against constant integer CC using comparison code Cond.
2091 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2092 // and CCValid is the set of possible condition-code results.
2093 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2094  SDValue Call, unsigned CCValid, uint64_t CC,
2095  ISD::CondCode Cond) {
2096  Comparison C(Call, SDValue());
2097  C.Opcode = Opcode;
2098  C.CCValid = CCValid;
2099  if (Cond == ISD::SETEQ)
2100  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2101  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2102  else if (Cond == ISD::SETNE)
2103  // ...and the inverse of that.
2104  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2105  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2106  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2107  // always true for CC>3.
2108  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2109  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2110  // ...and the inverse of that.
2111  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2112  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2113  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2114  // always true for CC>3.
2115  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2116  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2117  // ...and the inverse of that.
2118  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2119  else
2120  llvm_unreachable("Unexpected integer comparison type");
2121  C.CCMask &= CCValid;
2122  return C;
2123 }
2124 
2125 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2126 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2127  ISD::CondCode Cond, const SDLoc &DL) {
2128  if (CmpOp1.getOpcode() == ISD::Constant) {
2129  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2130  unsigned Opcode, CCValid;
2131  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2132  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2133  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2134  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2135  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2136  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2137  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2138  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2139  }
2140  Comparison C(CmpOp0, CmpOp1);
2141  C.CCMask = CCMaskForCondCode(Cond);
2142  if (C.Op0.getValueType().isFloatingPoint()) {
2143  C.CCValid = SystemZ::CCMASK_FCMP;
2144  C.Opcode = SystemZISD::FCMP;
2145  adjustForFNeg(C);
2146  } else {
2147  C.CCValid = SystemZ::CCMASK_ICMP;
2148  C.Opcode = SystemZISD::ICMP;
2149  // Choose the type of comparison. Equality and inequality tests can
2150  // use either signed or unsigned comparisons. The choice also doesn't
2151  // matter if both sign bits are known to be clear. In those cases we
2152  // want to give the main isel code the freedom to choose whichever
2153  // form fits best.
2154  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2155  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2156  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2157  C.ICmpType = SystemZICMP::Any;
2158  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2159  C.ICmpType = SystemZICMP::UnsignedOnly;
2160  else
2161  C.ICmpType = SystemZICMP::SignedOnly;
2162  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2163  adjustZeroCmp(DAG, DL, C);
2164  adjustSubwordCmp(DAG, DL, C);
2165  adjustForSubtraction(DAG, DL, C);
2166  adjustForLTGFR(C);
2167  adjustICmpTruncate(DAG, DL, C);
2168  }
2169 
2170  if (shouldSwapCmpOperands(C)) {
2171  std::swap(C.Op0, C.Op1);
2172  C.CCMask = reverseCCMask(C.CCMask);
2173  }
2174 
2175  adjustForTestUnderMask(DAG, DL, C);
2176  return C;
2177 }
2178 
2179 // Emit the comparison instruction described by C.
2180 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2181  if (!C.Op1.getNode()) {
2182  SDValue Op;
2183  switch (C.Op0.getOpcode()) {
2185  Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
2186  break;
2188  Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
2189  break;
2190  default:
2191  llvm_unreachable("Invalid comparison operands");
2192  }
2193  return SDValue(Op.getNode(), Op->getNumValues() - 1);
2194  }
2195  if (C.Opcode == SystemZISD::ICMP)
2196  return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
2197  DAG.getConstant(C.ICmpType, DL, MVT::i32));
2198  if (C.Opcode == SystemZISD::TM) {
2199  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2200  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2201  return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
2202  DAG.getConstant(RegisterOnly, DL, MVT::i32));
2203  }
2204  return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
2205 }
2206 
2207 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2208 // 64 bits. Extend is the extension type to use. Store the high part
2209 // in Hi and the low part in Lo.
2210 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2211  SDValue Op0, SDValue Op1, SDValue &Hi,
2212  SDValue &Lo) {
2213  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2214  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2215  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2216  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2217  DAG.getConstant(32, DL, MVT::i64));
2218  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2219  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2220 }
2221 
2222 // Lower a binary operation that produces two VT results, one in each
2223 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2224 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
2225 // on the extended Op0 and (unextended) Op1. Store the even register result
2226 // in Even and the odd register result in Odd.
2227 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2228  unsigned Extend, unsigned Opcode, SDValue Op0,
2229  SDValue Op1, SDValue &Even, SDValue &Odd) {
2230  SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
2231  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
2232  SDValue(In128, 0), Op1);
2233  bool Is32Bit = is32Bit(VT);
2234  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2235  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2236 }
2237 
2238 // Return an i32 value that is 1 if the CC value produced by Glue is
2239 // in the mask CCMask and 0 otherwise. CC is known to have a value
2240 // in CCValid, so other values can be ignored.
2241 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
2242  unsigned CCValid, unsigned CCMask) {
2243  IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
2244  SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
2245 
2246  if (Conversion.XORValue)
2247  Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
2248  DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
2249 
2250  if (Conversion.AddValue)
2251  Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
2252  DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
2253 
2254  // The SHR/AND sequence should get optimized to an RISBG.
2255  Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
2256  DAG.getConstant(Conversion.Bit, DL, MVT::i32));
2257  if (Conversion.Bit != 31)
2258  Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
2259  DAG.getConstant(1, DL, MVT::i32));
2260  return Result;
2261 }
2262 
2263 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2264 // be done directly. IsFP is true if CC is for a floating-point rather than
2265 // integer comparison.
2266 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2267  switch (CC) {
2268  case ISD::SETOEQ:
2269  case ISD::SETEQ:
2270  return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
2271 
2272  case ISD::SETOGE:
2273  case ISD::SETGE:
2274  return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
2275 
2276  case ISD::SETOGT:
2277  case ISD::SETGT:
2278  return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
2279 
2280  case ISD::SETUGT:
2281  return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
2282 
2283  default:
2284  return 0;
2285  }
2286 }
2287 
2288 // Return the SystemZISD vector comparison operation for CC or its inverse,
2289 // or 0 if neither can be done directly. Indicate in Invert whether the
2290 // result is for the inverse of CC. IsFP is true if CC is for a
2291 // floating-point rather than integer comparison.
2292 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2293  bool &Invert) {
2294  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2295  Invert = false;
2296  return Opcode;
2297  }
2298 
2299  CC = ISD::getSetCCInverse(CC, !IsFP);
2300  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2301  Invert = true;
2302  return Opcode;
2303  }
2304 
2305  return 0;
2306 }
2307 
2308 // Return a v2f64 that contains the extended form of elements Start and Start+1
2309 // of v4f32 value Op.
2310 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2311  SDValue Op) {
2312  int Mask[] = { Start, -1, Start + 1, -1 };
2313  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2314  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2315 }
2316 
2317 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2318 // producing a result of type VT.
2319 static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
2320  EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
2321  // There is no hardware support for v4f32, so extend the vector into
2322  // two v2f64s and compare those.
2323  if (CmpOp0.getValueType() == MVT::v4f32) {
2324  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2325  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2326  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2327  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2328  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2329  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2330  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2331  }
2332  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2333 }
2334 
2335 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2336 // an integer mask of type VT.
2337 static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2338  ISD::CondCode CC, SDValue CmpOp0,
2339  SDValue CmpOp1) {
2340  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2341  bool Invert = false;
2342  SDValue Cmp;
2343  switch (CC) {
2344  // Handle tests for order using (or (ogt y x) (oge x y)).
2345  case ISD::SETUO:
2346  Invert = true;
2347  case ISD::SETO: {
2348  assert(IsFP && "Unexpected integer comparison");
2349  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2350  SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2351  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2352  break;
2353  }
2354 
2355  // Handle <> tests using (or (ogt y x) (ogt x y)).
2356  case ISD::SETUEQ:
2357  Invert = true;
2358  case ISD::SETONE: {
2359  assert(IsFP && "Unexpected integer comparison");
2360  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2361  SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2362  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2363  break;
2364  }
2365 
2366  // Otherwise a single comparison is enough. It doesn't really
2367  // matter whether we try the inversion or the swap first, since
2368  // there are no cases where both work.
2369  default:
2370  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2371  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2372  else {
2374  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2375  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2376  else
2377  llvm_unreachable("Unhandled comparison");
2378  }
2379  break;
2380  }
2381  if (Invert) {
2383  DAG.getConstant(65535, DL, MVT::i32));
2384  Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
2385  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2386  }
2387  return Cmp;
2388 }
2389 
2390 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2391  SelectionDAG &DAG) const {
2392  SDValue CmpOp0 = Op.getOperand(0);
2393  SDValue CmpOp1 = Op.getOperand(1);
2394  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2395  SDLoc DL(Op);
2396  EVT VT = Op.getValueType();
2397  if (VT.isVector())
2398  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2399 
2400  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2401  SDValue Glue = emitCmp(DAG, DL, C);
2402  return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
2403 }
2404 
2405 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2406  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2407  SDValue CmpOp0 = Op.getOperand(2);
2408  SDValue CmpOp1 = Op.getOperand(3);
2409  SDValue Dest = Op.getOperand(4);
2410  SDLoc DL(Op);
2411 
2412  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2413  SDValue Glue = emitCmp(DAG, DL, C);
2414  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
2415  Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2416  DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
2417 }
2418 
2419 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2420 // allowing Pos and Neg to be wider than CmpOp.
2421 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2422  return (Neg.getOpcode() == ISD::SUB &&
2423  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2424  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2425  Neg.getOperand(1) == Pos &&
2426  (Pos == CmpOp ||
2427  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2428  Pos.getOperand(0) == CmpOp)));
2429 }
2430 
2431 // Return the absolute or negative absolute of Op; IsNegative decides which.
2432 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2433  bool IsNegative) {
2434  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2435  if (IsNegative)
2436  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2437  DAG.getConstant(0, DL, Op.getValueType()), Op);
2438  return Op;
2439 }
2440 
2441 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2442  SelectionDAG &DAG) const {
2443  SDValue CmpOp0 = Op.getOperand(0);
2444  SDValue CmpOp1 = Op.getOperand(1);
2445  SDValue TrueOp = Op.getOperand(2);
2446  SDValue FalseOp = Op.getOperand(3);
2447  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2448  SDLoc DL(Op);
2449 
2450  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2451 
2452  // Check for absolute and negative-absolute selections, including those
2453  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2454  // This check supplements the one in DAGCombiner.
2455  if (C.Opcode == SystemZISD::ICMP &&
2456  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2457  C.CCMask != SystemZ::CCMASK_CMP_NE &&
2458  C.Op1.getOpcode() == ISD::Constant &&
2459  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2460  if (isAbsolute(C.Op0, TrueOp, FalseOp))
2461  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2462  if (isAbsolute(C.Op0, FalseOp, TrueOp))
2463  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2464  }
2465 
2466  SDValue Glue = emitCmp(DAG, DL, C);
2467 
2468  // Special case for handling -1/0 results. The shifts we use here
2469  // should get optimized with the IPM conversion sequence.
2470  auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
2471  auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
2472  if (TrueC && FalseC) {
2473  int64_t TrueVal = TrueC->getSExtValue();
2474  int64_t FalseVal = FalseC->getSExtValue();
2475  if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
2476  // Invert the condition if we want -1 on false.
2477  if (TrueVal == 0)
2478  C.CCMask ^= C.CCValid;
2479  SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
2480  EVT VT = Op.getValueType();
2481  // Extend the result to VT. Upper bits are ignored.
2482  if (!is32Bit(VT))
2483  Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
2484  // Sign-extend from the low bit.
2485  SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
2486  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
2487  return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
2488  }
2489  }
2490 
2491  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2492  DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
2493 
2494  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
2495  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
2496 }
2497 
2498 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2499  SelectionDAG &DAG) const {
2500  SDLoc DL(Node);
2501  const GlobalValue *GV = Node->getGlobal();
2502  int64_t Offset = Node->getOffset();
2503  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2505 
2506  SDValue Result;
2507  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2508  // Assign anchors at 1<<12 byte boundaries.
2509  uint64_t Anchor = Offset & ~uint64_t(0xfff);
2510  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2511  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2512 
2513  // The offset can be folded into the address if it is aligned to a halfword.
2514  Offset -= Anchor;
2515  if (Offset != 0 && (Offset & 1) == 0) {
2516  SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2517  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2518  Offset = 0;
2519  }
2520  } else {
2521  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2522  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2523  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2525  }
2526 
2527  // If there was a non-zero offset that we didn't fold, create an explicit
2528  // addition for it.
2529  if (Offset != 0)
2530  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2531  DAG.getConstant(Offset, DL, PtrVT));
2532 
2533  return Result;
2534 }
2535 
2536 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2537  SelectionDAG &DAG,
2538  unsigned Opcode,
2539  SDValue GOTOffset) const {
2540  SDLoc DL(Node);
2541  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2542  SDValue Chain = DAG.getEntryNode();
2543  SDValue Glue;
2544 
2545  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2546  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2547  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2548  Glue = Chain.getValue(1);
2549  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2550  Glue = Chain.getValue(1);
2551 
2552  // The first call operand is the chain and the second is the TLS symbol.
2554  Ops.push_back(Chain);
2555  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2556  Node->getValueType(0),
2557  0, 0));
2558 
2559  // Add argument registers to the end of the list so that they are
2560  // known live into the call.
2561  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2562  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2563 
2564  // Add a register mask operand representing the call-preserved registers.
2565  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2566  const uint32_t *Mask =
2568  assert(Mask && "Missing call preserved mask for calling convention");
2569  Ops.push_back(DAG.getRegisterMask(Mask));
2570 
2571  // Glue the call to the argument copies.
2572  Ops.push_back(Glue);
2573 
2574  // Emit the call.
2575  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2576  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2577  Glue = Chain.getValue(1);
2578 
2579  // Copy the return value from %r2.
2580  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2581 }
2582 
2583 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2584  SelectionDAG &DAG) const {
2585  SDValue Chain = DAG.getEntryNode();
2586  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2587 
2588  // The high part of the thread pointer is in access register 0.
2589  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
2590  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2591 
2592  // The low part of the thread pointer is in access register 1.
2593  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
2594  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2595 
2596  // Merge them into a single 64-bit address.
2597  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2598  DAG.getConstant(32, DL, PtrVT));
2599  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2600 }
2601 
2602 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2603  SelectionDAG &DAG) const {
2604  if (DAG.getTarget().Options.EmulatedTLS)
2605  return LowerToTLSEmulatedModel(Node, DAG);
2606  SDLoc DL(Node);
2607  const GlobalValue *GV = Node->getGlobal();
2608  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2609  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2610 
2611  SDValue TP = lowerThreadPointer(DL, DAG);
2612 
2613  // Get the offset of GA from the thread pointer, based on the TLS model.
2614  SDValue Offset;
2615  switch (model) {
2616  case TLSModel::GeneralDynamic: {
2617  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2620 
2621  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2622  Offset = DAG.getLoad(
2623  PtrVT, DL, DAG.getEntryNode(), Offset,
2625 
2626  // Call __tls_get_offset to retrieve the offset.
2627  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2628  break;
2629  }
2630 
2631  case TLSModel::LocalDynamic: {
2632  // Load the GOT offset of the module ID.
2635 
2636  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2637  Offset = DAG.getLoad(
2638  PtrVT, DL, DAG.getEntryNode(), Offset,
2640 
2641  // Call __tls_get_offset to retrieve the module base offset.
2642  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2643 
2644  // Note: The SystemZLDCleanupPass will remove redundant computations
2645  // of the module base offset. Count total number of local-dynamic
2646  // accesses to trigger execution of that pass.
2650 
2651  // Add the per-symbol offset.
2653 
2654  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2655  DTPOffset = DAG.getLoad(
2656  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
2658 
2659  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2660  break;
2661  }
2662 
2663  case TLSModel::InitialExec: {
2664  // Load the offset from the GOT.
2665  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2667  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2668  Offset =
2669  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2671  break;
2672  }
2673 
2674  case TLSModel::LocalExec: {
2675  // Force the offset into the constant pool and load it from there.
2678 
2679  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2680  Offset = DAG.getLoad(
2681  PtrVT, DL, DAG.getEntryNode(), Offset,
2683  break;
2684  }
2685  }
2686 
2687  // Add the base and offset together.
2688  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2689 }
2690 
2691 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2692  SelectionDAG &DAG) const {
2693  SDLoc DL(Node);
2694  const BlockAddress *BA = Node->getBlockAddress();
2695  int64_t Offset = Node->getOffset();
2696  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2697 
2698  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
2699  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2700  return Result;
2701 }
2702 
2703 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
2704  SelectionDAG &DAG) const {
2705  SDLoc DL(JT);
2706  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2707  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2708 
2709  // Use LARL to load the address of the table.
2710  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2711 }
2712 
2713 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
2714  SelectionDAG &DAG) const {
2715  SDLoc DL(CP);
2716  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2717 
2718  SDValue Result;
2719  if (CP->isMachineConstantPoolEntry())
2720  Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2721  CP->getAlignment());
2722  else
2723  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2724  CP->getAlignment(), CP->getOffset());
2725 
2726  // Use LARL to load the address of the constant pool entry.
2727  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2728 }
2729 
2730 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
2731  SelectionDAG &DAG) const {
2732  MachineFunction &MF = DAG.getMachineFunction();
2733  MachineFrameInfo &MFI = MF.getFrameInfo();
2734  MFI.setFrameAddressIsTaken(true);
2735 
2736  SDLoc DL(Op);
2737  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2738  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2739 
2740  // If the back chain frame index has not been allocated yet, do so.
2742  int BackChainIdx = FI->getFramePointerSaveIndex();
2743  if (!BackChainIdx) {
2744  // By definition, the frame address is the address of the back chain.
2745  BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
2746  FI->setFramePointerSaveIndex(BackChainIdx);
2747  }
2748  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
2749 
2750  // FIXME The frontend should detect this case.
2751  if (Depth > 0) {
2752  report_fatal_error("Unsupported stack frame traversal count");
2753  }
2754 
2755  return BackChain;
2756 }
2757 
2758 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
2759  SelectionDAG &DAG) const {
2760  MachineFunction &MF = DAG.getMachineFunction();
2761  MachineFrameInfo &MFI = MF.getFrameInfo();
2762  MFI.setReturnAddressIsTaken(true);
2763 
2765  return SDValue();
2766 
2767  SDLoc DL(Op);
2768  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2769  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2770 
2771  // FIXME The frontend should detect this case.
2772  if (Depth > 0) {
2773  report_fatal_error("Unsupported stack frame traversal count");
2774  }
2775 
2776  // Return R14D, which has the return address. Mark it an implicit live-in.
2777  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
2778  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
2779 }
2780 
2781 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
2782  SelectionDAG &DAG) const {
2783  SDLoc DL(Op);
2784  SDValue In = Op.getOperand(0);
2785  EVT InVT = In.getValueType();
2786  EVT ResVT = Op.getValueType();
2787 
2788  // Convert loads directly. This is normally done by DAGCombiner,
2789  // but we need this case for bitcasts that are created during lowering
2790  // and which are then lowered themselves.
2791  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
2792  return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
2793  LoadN->getMemOperand());
2794 
2795  if (InVT == MVT::i32 && ResVT == MVT::f32) {
2796  SDValue In64;
2797  if (Subtarget.hasHighWord()) {
2798  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
2799  MVT::i64);
2800  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2801  MVT::i64, SDValue(U64, 0), In);
2802  } else {
2803  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
2804  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
2805  DAG.getConstant(32, DL, MVT::i64));
2806  }
2807  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
2808  return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
2809  DL, MVT::f32, Out64);
2810  }
2811  if (InVT == MVT::f32 && ResVT == MVT::i32) {
2812  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
2813  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
2814  MVT::f64, SDValue(U64, 0), In);
2815  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
2816  if (Subtarget.hasHighWord())
2817  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
2818  MVT::i32, Out64);
2819  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
2820  DAG.getConstant(32, DL, MVT::i64));
2821  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
2822  }
2823  llvm_unreachable("Unexpected bitcast combination");
2824 }
2825 
2826 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
2827  SelectionDAG &DAG) const {
2828  MachineFunction &MF = DAG.getMachineFunction();
2829  SystemZMachineFunctionInfo *FuncInfo =
2831  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2832 
2833  SDValue Chain = Op.getOperand(0);
2834  SDValue Addr = Op.getOperand(1);
2835  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2836  SDLoc DL(Op);
2837 
2838  // The initial values of each field.
2839  const unsigned NumFields = 4;
2840  SDValue Fields[NumFields] = {
2841  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
2842  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
2843  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
2844  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
2845  };
2846 
2847  // Store each field into its respective slot.
2848  SDValue MemOps[NumFields];
2849  unsigned Offset = 0;
2850  for (unsigned I = 0; I < NumFields; ++I) {
2851  SDValue FieldAddr = Addr;
2852  if (Offset != 0)
2853  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
2854  DAG.getIntPtrConstant(Offset, DL));
2855  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
2856  MachinePointerInfo(SV, Offset));
2857  Offset += 8;
2858  }
2859  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2860 }
2861 
2862 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
2863  SelectionDAG &DAG) const {
2864  SDValue Chain = Op.getOperand(0);
2865  SDValue DstPtr = Op.getOperand(1);
2866  SDValue SrcPtr = Op.getOperand(2);
2867  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2868  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
2869  SDLoc DL(Op);
2870 
2871  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
2872  /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
2873  /*isTailCall*/false,
2874  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
2875 }
2876 
2877 SDValue SystemZTargetLowering::
2878 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
2879  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
2880  MachineFunction &MF = DAG.getMachineFunction();
2881  bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack");
2882  bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
2883 
2884  SDValue Chain = Op.getOperand(0);
2885  SDValue Size = Op.getOperand(1);
2886  SDValue Align = Op.getOperand(2);
2887  SDLoc DL(Op);
2888 
2889  // If user has set the no alignment function attribute, ignore
2890  // alloca alignments.
2891  uint64_t AlignVal = (RealignOpt ?
2892  dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
2893 
2894  uint64_t StackAlign = TFI->getStackAlignment();
2895  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
2896  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
2897 
2898  unsigned SPReg = getStackPointerRegisterToSaveRestore();
2899  SDValue NeededSpace = Size;
2900 
2901  // Get a reference to the stack pointer.
2902  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
2903 
2904  // If we need a backchain, save it now.
2905  SDValue Backchain;
2906  if (StoreBackchain)
2907  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
2908 
2909  // Add extra space for alignment if needed.
2910  if (ExtraAlignSpace)
2911  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
2912  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
2913 
2914  // Get the new stack pointer value.
2915  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
2916 
2917  // Copy the new stack pointer back.
2918  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
2919 
2920  // The allocated data lives above the 160 bytes allocated for the standard
2921  // frame, plus any outgoing stack arguments. We don't know how much that
2922  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
2923  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
2924  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
2925 
2926  // Dynamically realign if needed.
2927  if (RequiredAlign > StackAlign) {
2928  Result =
2929  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
2930  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
2931  Result =
2932  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
2933  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
2934  }
2935 
2936  if (StoreBackchain)
2937  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
2938 
2939  SDValue Ops[2] = { Result, Chain };
2940  return DAG.getMergeValues(Ops, DL);
2941 }
2942 
2943 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
2944  SDValue Op, SelectionDAG &DAG) const {
2945  SDLoc DL(Op);
2946 
2947  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
2948 }
2949 
2950 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
2951  SelectionDAG &DAG) const {
2952  EVT VT = Op.getValueType();
2953  SDLoc DL(Op);
2954  SDValue Ops[2];
2955  if (is32Bit(VT))
2956  // Just do a normal 64-bit multiplication and extract the results.
2957  // We define this so that it can be used for constant division.
2959  Op.getOperand(1), Ops[1], Ops[0]);
2960  else {
2961  // Do a full 128-bit multiplication based on UMUL_LOHI64:
2962  //
2963  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
2964  //
2965  // but using the fact that the upper halves are either all zeros
2966  // or all ones:
2967  //
2968  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
2969  //
2970  // and grouping the right terms together since they are quicker than the
2971  // multiplication:
2972  //
2973  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
2974  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
2975  SDValue LL = Op.getOperand(0);
2976  SDValue RL = Op.getOperand(1);
2977  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
2978  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
2979  // UMUL_LOHI64 returns the low result in the odd register and the high
2980  // result in the even register. SMUL_LOHI is defined to return the
2981  // low half first, so the results are in reverse order.
2982  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
2983  LL, RL, Ops[1], Ops[0]);
2984  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
2985  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
2986  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
2987  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
2988  }
2989  return DAG.getMergeValues(Ops, DL);
2990 }
2991 
2992 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
2993  SelectionDAG &DAG) const {
2994  EVT VT = Op.getValueType();
2995  SDLoc DL(Op);
2996  SDValue Ops[2];
2997  if (is32Bit(VT))
2998  // Just do a normal 64-bit multiplication and extract the results.
2999  // We define this so that it can be used for constant division.
3001  Op.getOperand(1), Ops[1], Ops[0]);
3002  else
3003  // UMUL_LOHI64 returns the low result in the odd register and the high
3004  // result in the even register. UMUL_LOHI is defined to return the
3005  // low half first, so the results are in reverse order.
3006  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
3007  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3008  return DAG.getMergeValues(Ops, DL);
3009 }
3010 
3011 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3012  SelectionDAG &DAG) const {
3013  SDValue Op0 = Op.getOperand(0);
3014  SDValue Op1 = Op.getOperand(1);
3015  EVT VT = Op.getValueType();
3016  SDLoc DL(Op);
3017  unsigned Opcode;
3018 
3019  // We use DSGF for 32-bit division.
3020  if (is32Bit(VT)) {
3021  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3022  Opcode = SystemZISD::SDIVREM32;
3023  } else if (DAG.ComputeNumSignBits(Op1) > 32) {
3024  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3025  Opcode = SystemZISD::SDIVREM32;
3026  } else
3027  Opcode = SystemZISD::SDIVREM64;
3028 
3029  // DSG(F) takes a 64-bit dividend, so the even register in the GR128
3030  // input is "don't care". The instruction returns the remainder in
3031  // the even register and the quotient in the odd register.
3032  SDValue Ops[2];
3033  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
3034  Op0, Op1, Ops[1], Ops[0]);
3035  return DAG.getMergeValues(Ops, DL);
3036 }
3037 
3038 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3039  SelectionDAG &DAG) const {
3040  EVT VT = Op.getValueType();
3041  SDLoc DL(Op);
3042 
3043  // DL(G) uses a double-width dividend, so we need to clear the even
3044  // register in the GR128 input. The instruction returns the remainder
3045  // in the even register and the quotient in the odd register.
3046  SDValue Ops[2];
3047  if (is32Bit(VT))
3048  lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
3049  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3050  else
3051  lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
3052  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3053  return DAG.getMergeValues(Ops, DL);
3054 }
3055 
3056 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3057  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3058 
3059  // Get the known-zero masks for each operand.
3060  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
3061  APInt KnownZero[2], KnownOne[2];
3062  DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
3063  DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
3064 
3065  // See if the upper 32 bits of one operand and the lower 32 bits of the
3066  // other are known zero. They are the low and high operands respectively.
3067  uint64_t Masks[] = { KnownZero[0].getZExtValue(),
3068  KnownZero[1].getZExtValue() };
3069  unsigned High, Low;
3070  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3071  High = 1, Low = 0;
3072  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3073  High = 0, Low = 1;
3074  else
3075  return Op;
3076 
3077  SDValue LowOp = Ops[Low];
3078  SDValue HighOp = Ops[High];
3079 
3080  // If the high part is a constant, we're better off using IILH.
3081  if (HighOp.getOpcode() == ISD::Constant)
3082  return Op;
3083 
3084  // If the low part is a constant that is outside the range of LHI,
3085  // then we're better off using IILF.
3086  if (LowOp.getOpcode() == ISD::Constant) {
3087  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3088  if (!isInt<16>(Value))
3089  return Op;
3090  }
3091 
3092  // Check whether the high part is an AND that doesn't change the
3093  // high 32 bits and just masks out low bits. We can skip it if so.
3094  if (HighOp.getOpcode() == ISD::AND &&
3095  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3096  SDValue HighOp0 = HighOp.getOperand(0);
3097  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3098  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3099  HighOp = HighOp0;
3100  }
3101 
3102  // Take advantage of the fact that all GR32 operations only change the
3103  // low 32 bits by truncating Low to an i32 and inserting it directly
3104  // using a subreg. The interesting cases are those where the truncation
3105  // can be folded.
3106  SDLoc DL(Op);
3107  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3108  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3109  MVT::i64, HighOp, Low32);
3110 }
3111 
3112 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3113  SelectionDAG &DAG) const {
3114  EVT VT = Op.getValueType();
3115  SDLoc DL(Op);
3116  Op = Op.getOperand(0);
3117 
3118  // Handle vector types via VPOPCT.
3119  if (VT.isVector()) {
3120  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3121  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3122  switch (VT.getScalarSizeInBits()) {
3123  case 8:
3124  break;
3125  case 16: {
3126  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3127  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3128  SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3129  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3130  Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3131  break;
3132  }
3133  case 32: {
3135  DAG.getConstant(0, DL, MVT::i32));
3136  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3137  break;
3138  }
3139  case 64: {
3141  DAG.getConstant(0, DL, MVT::i32));
3142  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3143  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3144  break;
3145  }
3146  default:
3147  llvm_unreachable("Unexpected type");
3148  }
3149  return Op;
3150  }
3151 
3152  // Get the known-zero mask for the operand.
3153  APInt KnownZero, KnownOne;
3154  DAG.computeKnownBits(Op, KnownZero, KnownOne);
3155  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
3156  if (NumSignificantBits == 0)
3157  return DAG.getConstant(0, DL, VT);
3158 
3159  // Skip known-zero high parts of the operand.
3160  int64_t OrigBitSize = VT.getSizeInBits();
3161  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3162  BitSize = std::min(BitSize, OrigBitSize);
3163 
3164  // The POPCNT instruction counts the number of bits in each byte.
3165  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3166  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3167  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3168 
3169  // Add up per-byte counts in a binary tree. All bits of Op at
3170  // position larger than BitSize remain zero throughout.
3171  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3172  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3173  if (BitSize != OrigBitSize)
3174  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3175  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3176  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3177  }
3178 
3179  // Extract overall result from high byte.
3180  if (BitSize > 8)
3181  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3182  DAG.getConstant(BitSize - 8, DL, VT));
3183 
3184  return Op;
3185 }
3186 
3187 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3188  SelectionDAG &DAG) const {
3189  SDLoc DL(Op);
3190  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3191  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3192  SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
3193  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3194 
3195  // The only fence that needs an instruction is a sequentially-consistent
3196  // cross-thread fence.
3197  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3198  FenceScope == CrossThread) {
3199  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3200  Op.getOperand(0)),
3201  0);
3202  }
3203 
3204  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3205  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3206 }
3207 
3208 // Op is an atomic load. Lower it into a normal volatile load.
3209 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3210  SelectionDAG &DAG) const {
3211  auto *Node = cast<AtomicSDNode>(Op.getNode());
3212  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3213  Node->getChain(), Node->getBasePtr(),
3214  Node->getMemoryVT(), Node->getMemOperand());
3215 }
3216 
3217 // Op is an atomic store. Lower it into a normal volatile store followed
3218 // by a serialization.
3219 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3220  SelectionDAG &DAG) const {
3221  auto *Node = cast<AtomicSDNode>(Op.getNode());
3222  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3223  Node->getBasePtr(), Node->getMemoryVT(),
3224  Node->getMemOperand());
3225  return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
3226  Chain), 0);
3227 }
3228 
3229 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3230 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3231 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3232  SelectionDAG &DAG,
3233  unsigned Opcode) const {
3234  auto *Node = cast<AtomicSDNode>(Op.getNode());
3235 
3236  // 32-bit operations need no code outside the main loop.
3237  EVT NarrowVT = Node->getMemoryVT();
3238  EVT WideVT = MVT::i32;
3239  if (NarrowVT == WideVT)
3240  return Op;
3241 
3242  int64_t BitSize = NarrowVT.getSizeInBits();
3243  SDValue ChainIn = Node->getChain();
3244  SDValue Addr = Node->getBasePtr();
3245  SDValue Src2 = Node->getVal();
3246  MachineMemOperand *MMO = Node->getMemOperand();
3247  SDLoc DL(Node);
3248  EVT PtrVT = Addr.getValueType();
3249 
3250  // Convert atomic subtracts of constants into additions.
3251  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3252  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3254  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3255  }
3256 
3257  // Get the address of the containing word.
3258  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3259  DAG.getConstant(-4, DL, PtrVT));
3260 
3261  // Get the number of bits that the word must be rotated left in order
3262  // to bring the field to the top bits of a GR32.
3263  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3264  DAG.getConstant(3, DL, PtrVT));
3265  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3266 
3267  // Get the complementing shift amount, for rotating a field in the top
3268  // bits back to its proper position.
3269  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3270  DAG.getConstant(0, DL, WideVT), BitShift);
3271 
3272  // Extend the source operand to 32 bits and prepare it for the inner loop.
3273  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3274  // operations require the source to be shifted in advance. (This shift
3275  // can be folded if the source is constant.) For AND and NAND, the lower
3276  // bits must be set, while for other opcodes they should be left clear.
3277  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3278  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3279  DAG.getConstant(32 - BitSize, DL, WideVT));
3280  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3282  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3283  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3284 
3285  // Construct the ATOMIC_LOADW_* node.
3286  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3287  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3288  DAG.getConstant(BitSize, DL, WideVT) };
3289  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3290  NarrowVT, MMO);
3291 
3292  // Rotate the result of the final CS so that the field is in the lower
3293  // bits of a GR32, then truncate it.
3294  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3295  DAG.getConstant(BitSize, DL, WideVT));
3296  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3297 
3298  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3299  return DAG.getMergeValues(RetOps, DL);
3300 }
3301 
3302 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3303 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3304 // operations into additions.
3305 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3306  SelectionDAG &DAG) const {
3307  auto *Node = cast<AtomicSDNode>(Op.getNode());
3308  EVT MemVT = Node->getMemoryVT();
3309  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3310  // A full-width operation.
3311  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3312  SDValue Src2 = Node->getVal();
3313  SDValue NegSrc2;
3314  SDLoc DL(Src2);
3315 
3316  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3317  // Use an addition if the operand is constant and either LAA(G) is
3318  // available or the negative value is in the range of A(G)FHI.
3319  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3320  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3321  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3322  } else if (Subtarget.hasInterlockedAccess1())
3323  // Use LAA(G) if available.
3324  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3325  Src2);
3326 
3327  if (NegSrc2.getNode())
3328  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3329  Node->getChain(), Node->getBasePtr(), NegSrc2,
3330  Node->getMemOperand());
3331 
3332  // Use the node as-is.
3333  return Op;
3334  }
3335 
3336  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3337 }
3338 
3339 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
3340 // into a fullword ATOMIC_CMP_SWAPW operation.
3341 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3342  SelectionDAG &DAG) const {
3343  auto *Node = cast<AtomicSDNode>(Op.getNode());
3344 
3345  // We have native support for 32-bit compare and swap.
3346  EVT NarrowVT = Node->getMemoryVT();
3347  EVT WideVT = MVT::i32;
3348  if (NarrowVT == WideVT)
3349  return Op;
3350 
3351  int64_t BitSize = NarrowVT.getSizeInBits();
3352  SDValue ChainIn = Node->getOperand(0);
3353  SDValue Addr = Node->getOperand(1);
3354  SDValue CmpVal = Node->getOperand(2);
3355  SDValue SwapVal = Node->getOperand(3);
3356  MachineMemOperand *MMO = Node->getMemOperand();
3357  SDLoc DL(Node);
3358  EVT PtrVT = Addr.getValueType();
3359 
3360  // Get the address of the containing word.
3361  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3362  DAG.getConstant(-4, DL, PtrVT));
3363 
3364  // Get the number of bits that the word must be rotated left in order
3365  // to bring the field to the top bits of a GR32.
3366  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3367  DAG.getConstant(3, DL, PtrVT));
3368  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3369 
3370  // Get the complementing shift amount, for rotating a field in the top
3371  // bits back to its proper position.
3372  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3373  DAG.getConstant(0, DL, WideVT), BitShift);
3374 
3375  // Construct the ATOMIC_CMP_SWAPW node.
3376  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3377  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3378  NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3380  VTList, Ops, NarrowVT, MMO);
3381  return AtomicOp;
3382 }
3383 
3384 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3385  SelectionDAG &DAG) const {
3386  MachineFunction &MF = DAG.getMachineFunction();
3387  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3388  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3389  SystemZ::R15D, Op.getValueType());
3390 }
3391 
3392 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3393  SelectionDAG &DAG) const {
3394  MachineFunction &MF = DAG.getMachineFunction();
3395  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3396  bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
3397 
3398  SDValue Chain = Op.getOperand(0);
3399  SDValue NewSP = Op.getOperand(1);
3400  SDValue Backchain;
3401  SDLoc DL(Op);
3402 
3403  if (StoreBackchain) {
3404  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
3405  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3406  }
3407 
3408  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3409 
3410  if (StoreBackchain)
3411  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3412 
3413  return Chain;
3414 }
3415 
3416 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3417  SelectionDAG &DAG) const {
3418  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3419  if (!IsData)
3420  // Just preserve the chain.
3421  return Op.getOperand(0);
3422 
3423  SDLoc DL(Op);
3424  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3425  unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
3426  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3427  SDValue Ops[] = {
3428  Op.getOperand(0),
3429  DAG.getConstant(Code, DL, MVT::i32),
3430  Op.getOperand(1)
3431  };
3433  Node->getVTList(), Ops,
3434  Node->getMemoryVT(), Node->getMemOperand());
3435 }
3436 
3437 // Return an i32 that contains the value of CC immediately after After,
3438 // whose final operand must be MVT::Glue.
3439 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
3440  SDLoc DL(After);
3441  SDValue Glue = SDValue(After, After->getNumValues() - 1);
3442  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
3443  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3445 }
3446 
3447 SDValue
3448 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3449  SelectionDAG &DAG) const {
3450  unsigned Opcode, CCValid;
3451  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3452  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3453  SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
3454  SDValue CC = getCCResult(DAG, Glued.getNode());
3455  DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3456  return SDValue();
3457  }
3458 
3459  return SDValue();
3460 }
3461 
3462 SDValue
3463 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3464  SelectionDAG &DAG) const {
3465  unsigned Opcode, CCValid;
3466  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3467  SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
3468  SDValue CC = getCCResult(DAG, Glued.getNode());
3469  if (Op->getNumValues() == 1)
3470  return CC;
3471  assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3472  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
3473  CC);
3474  }
3475 
3476  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3477  switch (Id) {
3478  case Intrinsic::thread_pointer:
3479  return lowerThreadPointer(SDLoc(Op), DAG);
3480 
3481  case Intrinsic::s390_vpdi:
3482  return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3483  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3484 
3485  case Intrinsic::s390_vperm:
3486  return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3487  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3488 
3489  case Intrinsic::s390_vuphb:
3490  case Intrinsic::s390_vuphh:
3491  case Intrinsic::s390_vuphf:
3492  return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3493  Op.getOperand(1));
3494 
3495  case Intrinsic::s390_vuplhb:
3496  case Intrinsic::s390_vuplhh:
3497  case Intrinsic::s390_vuplhf:
3498  return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3499  Op.getOperand(1));
3500 
3501  case Intrinsic::s390_vuplb:
3502  case Intrinsic::s390_vuplhw:
3503  case Intrinsic::s390_vuplf:
3504  return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3505  Op.getOperand(1));
3506 
3507  case Intrinsic::s390_vupllb:
3508  case Intrinsic::s390_vupllh:
3509  case Intrinsic::s390_vupllf:
3510  return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3511  Op.getOperand(1));
3512 
3513  case Intrinsic::s390_vsumb:
3514  case Intrinsic::s390_vsumh:
3515  case Intrinsic::s390_vsumgh:
3516  case Intrinsic::s390_vsumgf:
3517  case Intrinsic::s390_vsumqf:
3518  case Intrinsic::s390_vsumqg:
3519  return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3520  Op.getOperand(1), Op.getOperand(2));
3521  }
3522 
3523  return SDValue();
3524 }
3525 
3526 namespace {
3527 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3528 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3529 // Operand is the constant third operand, otherwise it is the number of
3530 // bytes in each element of the result.
3531 struct Permute {
3532  unsigned Opcode;
3533  unsigned Operand;
3534  unsigned char Bytes[SystemZ::VectorBytes];
3535 };
3536 }
3537 
3538 static const Permute PermuteForms[] = {
3539  // VMRHG
3541  { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3542  // VMRHF
3544  { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3545  // VMRHH
3547  { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3548  // VMRHB
3550  { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3551  // VMRLG
3552  { SystemZISD::MERGE_LOW, 8,
3553  { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3554  // VMRLF
3555  { SystemZISD::MERGE_LOW, 4,
3556  { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3557  // VMRLH
3558  { SystemZISD::MERGE_LOW, 2,
3559  { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3560  // VMRLB
3561  { SystemZISD::MERGE_LOW, 1,
3562  { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3563  // VPKG
3564  { SystemZISD::PACK, 4,
3565  { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3566  // VPKF
3567  { SystemZISD::PACK, 2,
3568  { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3569  // VPKH
3570  { SystemZISD::PACK, 1,
3571  { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3572  // VPDI V1, V2, 4 (low half of V1, high half of V2)
3574  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3575  // VPDI V1, V2, 1 (high half of V1, low half of V2)
3577  { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3578 };
3579 
3580 // Called after matching a vector shuffle against a particular pattern.
3581 // Both the original shuffle and the pattern have two vector operands.
3582 // OpNos[0] is the operand of the original shuffle that should be used for
3583 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3584 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3585 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3586 // for operands 0 and 1 of the pattern.
3587 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
3588  if (OpNos[0] < 0) {
3589  if (OpNos[1] < 0)
3590  return false;
3591  OpNo0 = OpNo1 = OpNos[1];
3592  } else if (OpNos[1] < 0) {
3593  OpNo0 = OpNo1 = OpNos[0];
3594  } else {
3595  OpNo0 = OpNos[0];
3596  OpNo1 = OpNos[1];
3597  }
3598  return true;
3599 }
3600 
3601 // Bytes is a VPERM-like permute vector, except that -1 is used for
3602 // undefined bytes. Return true if the VPERM can be implemented using P.
3603 // When returning true set OpNo0 to the VPERM operand that should be
3604 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3605 //
3606 // For example, if swapping the VPERM operands allows P to match, OpNo0
3607 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3608 // operand, but rewriting it to use two duplicated operands allows it to
3609 // match P, then OpNo0 and OpNo1 will be the same.
3610 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
3611  unsigned &OpNo0, unsigned &OpNo1) {
3612  int OpNos[] = { -1, -1 };
3613  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
3614  int Elt = Bytes[I];
3615  if (Elt >= 0) {
3616  // Make sure that the two permute vectors use the same suboperand
3617  // byte number. Only the operand numbers (the high bits) are
3618  // allowed to differ.
3619  if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
3620  return false;
3621  int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
3622  int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
3623  // Make sure that the operand mappings are consistent with previous
3624  // elements.
3625  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3626  return false;
3627  OpNos[ModelOpNo] = RealOpNo;
3628  }
3629  }
3630  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3631 }
3632 
3633 // As above, but search for a matching permute.
3634 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
3635  unsigned &OpNo0, unsigned &OpNo1) {
3636  for (auto &P : PermuteForms)
3637  if (matchPermute(Bytes, P, OpNo0, OpNo1))
3638  return &P;
3639  return nullptr;
3640 }
3641 
3642 // Bytes is a VPERM-like permute vector, except that -1 is used for
3643 // undefined bytes. This permute is an operand of an outer permute.
3644 // See whether redistributing the -1 bytes gives a shuffle that can be
3645 // implemented using P. If so, set Transform to a VPERM-like permute vector
3646 // that, when applied to the result of P, gives the original permute in Bytes.
3647 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3648  const Permute &P,
3649  SmallVectorImpl<int> &Transform) {
3650  unsigned To = 0;
3651  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
3652  int Elt = Bytes[From];
3653  if (Elt < 0)
3654  // Byte number From of the result is undefined.
3655  Transform[From] = -1;
3656  else {
3657  while (P.Bytes[To] != Elt) {
3658  To += 1;
3659  if (To == SystemZ::VectorBytes)
3660  return false;
3661  }
3662  Transform[From] = To;
3663  }
3664  }
3665  return true;
3666 }
3667 
3668 // As above, but search for a matching permute.
3669 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3670  SmallVectorImpl<int> &Transform) {
3671  for (auto &P : PermuteForms)
3672  if (matchDoublePermute(Bytes, P, Transform))
3673  return &P;
3674  return nullptr;
3675 }
3676 
3677 // Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
3678 // as if it had type vNi8.
3680  SmallVectorImpl<int> &Bytes) {
3681  EVT VT = VSN->getValueType(0);
3682  unsigned NumElements = VT.getVectorNumElements();
3683  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3684  Bytes.resize(NumElements * BytesPerElement, -1);
3685  for (unsigned I = 0; I < NumElements; ++I) {
3686  int Index = VSN->getMaskElt(I);
3687  if (Index >= 0)
3688  for (unsigned J = 0; J < BytesPerElement; ++J)
3689  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3690  }
3691 }
3692 
3693 // Bytes is a VPERM-like permute vector, except that -1 is used for
3694 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
3695 // the result come from a contiguous sequence of bytes from one input.
3696 // Set Base to the selector for the first byte if so.
3697 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
3698  unsigned BytesPerElement, int &Base) {
3699  Base = -1;
3700  for (unsigned I = 0; I < BytesPerElement; ++I) {
3701  if (Bytes[Start + I] >= 0) {
3702  unsigned Elem = Bytes[Start + I];
3703  if (Base < 0) {
3704  Base = Elem - I;
3705  // Make sure the bytes would come from one input operand.
3706  if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
3707  return false;
3708  } else if (unsigned(Base) != Elem - I)
3709  return false;
3710  }
3711  }
3712  return true;
3713 }
3714 
3715 // Bytes is a VPERM-like permute vector, except that -1 is used for
3716 // undefined bytes. Return true if it can be performed using VSLDI.
3717 // When returning true, set StartIndex to the shift amount and OpNo0
3718 // and OpNo1 to the VPERM operands that should be used as the first
3719 // and second shift operand respectively.
3720 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
3721  unsigned &StartIndex, unsigned &OpNo0,
3722  unsigned &OpNo1) {
3723  int OpNos[] = { -1, -1 };
3724  int Shift = -1;
3725  for (unsigned I = 0; I < 16; ++I) {
3726  int Index = Bytes[I];
3727  if (Index >= 0) {
3728  int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
3729  int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
3730  int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
3731  if (Shift < 0)
3732  Shift = ExpectedShift;
3733  else if (Shift != ExpectedShift)
3734  return false;
3735  // Make sure that the operand mappings are consistent with previous
3736  // elements.
3737  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3738  return false;
3739  OpNos[ModelOpNo] = RealOpNo;
3740  }
3741  }
3742  StartIndex = Shift;
3743  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3744 }
3745 
3746 // Create a node that performs P on operands Op0 and Op1, casting the
3747 // operands to the appropriate type. The type of the result is determined by P.
3748 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
3749  const Permute &P, SDValue Op0, SDValue Op1) {
3750  // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
3751  // elements of a PACK are twice as wide as the outputs.
3752  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
3753  P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
3754  P.Operand);
3755  // Cast both operands to the appropriate type.
3756  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
3757  SystemZ::VectorBytes / InBytes);
3758  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
3759  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
3760  SDValue Op;
3761  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
3762  SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
3763  Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
3764  } else if (P.Opcode == SystemZISD::PACK) {
3765  MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
3766  SystemZ::VectorBytes / P.Operand);
3767  Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
3768  } else {
3769  Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
3770  }
3771  return Op;
3772 }
3773 
3774 // Bytes is a VPERM-like permute vector, except that -1 is used for
3775 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
3776 // VSLDI or VPERM.
3778  SDValue *Ops,
3779  const SmallVectorImpl<int> &Bytes) {
3780  for (unsigned I = 0; I < 2; ++I)
3781  Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
3782 
3783  // First see whether VSLDI can be used.
3784  unsigned StartIndex, OpNo0, OpNo1;
3785  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
3786  return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
3787  Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
3788 
3789  // Fall back on VPERM. Construct an SDNode for the permute vector.
3790  SDValue IndexNodes[SystemZ::VectorBytes];
3791  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
3792  if (Bytes[I] >= 0)
3793  IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
3794  else
3795  IndexNodes[I] = DAG.getUNDEF(MVT::i32);
3796  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
3797  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
3798 }
3799 
3800 namespace {
3801 // Describes a general N-operand vector shuffle.
3802 struct GeneralShuffle {
3803  GeneralShuffle(EVT vt) : VT(vt) {}
3804  void addUndef();
3805  void add(SDValue, unsigned);
3806  SDValue getNode(SelectionDAG &, const SDLoc &);
3807 
3808  // The operands of the shuffle.
3810 
3811  // Index I is -1 if byte I of the result is undefined. Otherwise the
3812  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
3813  // Bytes[I] / SystemZ::VectorBytes.
3815 
3816  // The type of the shuffle result.
3817  EVT VT;
3818 };
3819 }
3820 
3821 // Add an extra undefined element to the shuffle.
3822 void GeneralShuffle::addUndef() {
3823  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3824  for (unsigned I = 0; I < BytesPerElement; ++I)
3825  Bytes.push_back(-1);
3826 }
3827 
3828 // Add an extra element to the shuffle, taking it from element Elem of Op.
3829 // A null Op indicates a vector input whose value will be calculated later;
3830 // there is at most one such input per shuffle and it always has the same
3831 // type as the result.
3832 void GeneralShuffle::add(SDValue Op, unsigned Elem) {
3833  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3834 
3835  // The source vector can have wider elements than the result,
3836  // either through an explicit TRUNCATE or because of type legalization.
3837  // We want the least significant part.
3838  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
3839  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
3840  assert(FromBytesPerElement >= BytesPerElement &&
3841  "Invalid EXTRACT_VECTOR_ELT");
3842  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
3843  (FromBytesPerElement - BytesPerElement));
3844 
3845  // Look through things like shuffles and bitcasts.
3846  while (Op.getNode()) {
3847  if (Op.getOpcode() == ISD::BITCAST)
3848  Op = Op.getOperand(0);
3849  else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
3850  // See whether the bytes we need come from a contiguous part of one
3851  // operand.
3853  getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
3854  int NewByte;
3855  if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
3856  break;
3857  if (NewByte < 0) {
3858  addUndef();
3859  return;
3860  }
3861  Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
3862  Byte = unsigned(NewByte) % SystemZ::VectorBytes;
3863  } else if (Op.isUndef()) {
3864  addUndef();
3865  return;
3866  } else
3867  break;
3868  }
3869 
3870  // Make sure that the source of the extraction is in Ops.
3871  unsigned OpNo = 0;
3872  for (; OpNo < Ops.size(); ++OpNo)
3873  if (Ops[OpNo] == Op)
3874  break;
3875  if (OpNo == Ops.size())
3876  Ops.push_back(Op);
3877 
3878  // Add the element to Bytes.
3879  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
3880  for (unsigned I = 0; I < BytesPerElement; ++I)
3881  Bytes.push_back(Base + I);
3882 }
3883 
3884 // Return SDNodes for the completed shuffle.
3885 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
3886  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
3887 
3888  if (Ops.size() == 0)
3889  return DAG.getUNDEF(VT);
3890 
3891  // Make sure that there are at least two shuffle operands.
3892  if (Ops.size() == 1)
3893  Ops.push_back(DAG.getUNDEF(MVT::v16i8));
3894 
3895  // Create a tree of shuffles, deferring root node until after the loop.
3896  // Try to redistribute the undefined elements of non-root nodes so that
3897  // the non-root shuffles match something like a pack or merge, then adjust
3898  // the parent node's permute vector to compensate for the new order.
3899  // Among other things, this copes with vectors like <2 x i16> that were
3900  // padded with undefined elements during type legalization.
3901  //
3902  // In the best case this redistribution will lead to the whole tree
3903  // using packs and merges. It should rarely be a loss in other cases.
3904  unsigned Stride = 1;
3905  for (; Stride * 2 < Ops.size(); Stride *= 2) {
3906  for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
3907  SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
3908 
3909  // Create a mask for just these two operands.
3911  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
3912  unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
3913  unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
3914  if (OpNo == I)
3915  NewBytes[J] = Byte;
3916  else if (OpNo == I + Stride)
3917  NewBytes[J] = SystemZ::VectorBytes + Byte;
3918  else
3919  NewBytes[J] = -1;
3920  }
3921  // See if it would be better to reorganize NewMask to avoid using VPERM.
3922  SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
3923  if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
3924  Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
3925  // Applying NewBytesMap to Ops[I] gets back to NewBytes.
3926  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
3927  if (NewBytes[J] >= 0) {
3928  assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
3929  "Invalid double permute");
3930  Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
3931  } else
3932  assert(NewBytesMap[J] < 0 && "Invalid double permute");
3933  }
3934  } else {
3935  // Just use NewBytes on the operands.
3936  Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
3937  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
3938  if (NewBytes[J] >= 0)
3939  Bytes[J] = I * SystemZ::VectorBytes + J;
3940  }
3941  }
3942  }
3943 
3944  // Now we just have 2 inputs. Put the second operand in Ops[1].
3945  if (Stride > 1) {
3946  Ops[1] = Ops[Stride];
3947  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
3948  if (Bytes[I] >= int(SystemZ::VectorBytes))
3949  Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
3950  }
3951 
3952  // Look for an instruction that can do the permute without resorting
3953  // to VPERM.
3954  unsigned OpNo0, OpNo1;
3955  SDValue Op;
3956  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
3957  Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
3958  else
3959  Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
3960  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
3961 }
3962 
3963 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
3964 static bool isScalarToVector(SDValue Op) {
3965  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
3966  if (!Op.getOperand(I).isUndef())
3967  return false;
3968  return true;
3969 }
3970 
3971 // Return a vector of type VT that contains Value in the first element.
3972 // The other elements don't matter.
3973 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3974  SDValue Value) {
3975  // If we have a constant, replicate it to all elements and let the
3976  // BUILD_VECTOR lowering take care of it.
3977  if (Value.getOpcode() == ISD::Constant ||
3978  Value.getOpcode() == ISD::ConstantFP) {
3980  return DAG.getBuildVector(VT, DL, Ops);
3981  }
3982  if (Value.isUndef())
3983  return DAG.getUNDEF(VT);
3984  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
3985 }
3986 
3987 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
3988 // element 1. Used for cases in which replication is cheap.
3989 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3990  SDValue Op0, SDValue Op1) {
3991  if (Op0.isUndef()) {
3992  if (Op1.isUndef())
3993  return DAG.getUNDEF(VT);
3994  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
3995  }
3996  if (Op1.isUndef())
3997  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
3998  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
3999  buildScalarToVector(DAG, DL, VT, Op0),
4000  buildScalarToVector(DAG, DL, VT, Op1));
4001 }
4002 
4003 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4004 // vector for them.
4005 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4006  SDValue Op1) {
4007  if (Op0.isUndef() && Op1.isUndef())
4008  return DAG.getUNDEF(MVT::v2i64);
4009  // If one of the two inputs is undefined then replicate the other one,
4010  // in order to avoid using another register unnecessarily.
4011  if (Op0.isUndef())
4012  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4013  else if (Op1.isUndef())
4014  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4015  else {
4016  Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4017  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4018  }
4019  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4020 }
4021 
4022 // Try to represent constant BUILD_VECTOR node BVN using a
4023 // SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
4024 // on success.
4025 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
4026  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
4027  unsigned BytesPerElement = ElemVT.getStoreSize();
4028  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
4029  SDValue Op = BVN->getOperand(I);
4030  if (!Op.isUndef()) {
4031  uint64_t Value;
4032  if (Op.getOpcode() == ISD::Constant)
4033  Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
4034  else if (Op.getOpcode() == ISD::ConstantFP)
4035  Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
4036  .getZExtValue());
4037  else
4038  return false;
4039  for (unsigned J = 0; J < BytesPerElement; ++J) {
4040  uint64_t Byte = (Value >> (J * 8)) & 0xff;
4041  if (Byte == 0xff)
4042  Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
4043  else if (Byte != 0)
4044  return false;
4045  }
4046  }
4047  }
4048  return true;
4049 }
4050 
4051 // Try to load a vector constant in which BitsPerElement-bit value Value
4052 // is replicated to fill the vector. VT is the type of the resulting
4053 // constant, which may have elements of a different size from BitsPerElement.
4054 // Return the SDValue of the constant on success, otherwise return
4055 // an empty value.
4057  const SystemZInstrInfo *TII,
4058  const SDLoc &DL, EVT VT, uint64_t Value,
4059  unsigned BitsPerElement) {
4060  // Signed 16-bit values can be replicated using VREPI.
4061  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
4062  if (isInt<16>(SignedValue)) {
4063  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4064  SystemZ::VectorBits / BitsPerElement);
4065  SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
4066  DAG.getConstant(SignedValue, DL, MVT::i32));
4067  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4068  }
4069  // See whether rotating the constant left some N places gives a value that
4070  // is one less than a power of 2 (i.e. all zeros followed by all ones).
4071  // If so we can use VGM.
4072  unsigned Start, End;
4073  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
4074  // isRxSBGMask returns the bit numbers for a full 64-bit value,
4075  // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
4076  // bit numbers for an BitsPerElement value, so that 0 denotes
4077  // 1 << (BitsPerElement-1).
4078  Start -= 64 - BitsPerElement;
4079  End -= 64 - BitsPerElement;
4080  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4081  SystemZ::VectorBits / BitsPerElement);
4082  SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
4083  DAG.getConstant(Start, DL, MVT::i32),
4084  DAG.getConstant(End, DL, MVT::i32));
4085  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4086  }
4087  return SDValue();
4088 }
4089 
4090 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4091 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4092 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4093 // would benefit from this representation and return it if so.
4095  BuildVectorSDNode *BVN) {
4096  EVT VT = BVN->getValueType(0);
4097  unsigned NumElements = VT.getVectorNumElements();
4098 
4099  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4100  // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4101  // need a BUILD_VECTOR, add an additional placeholder operand for that
4102  // BUILD_VECTOR and store its operands in ResidueOps.
4103  GeneralShuffle GS(VT);
4105  bool FoundOne = false;
4106  for (unsigned I = 0; I < NumElements; ++I) {
4107  SDValue Op = BVN->getOperand(I);
4108  if (Op.getOpcode() == ISD::TRUNCATE)
4109  Op = Op.getOperand(0);
4110  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4111  Op.getOperand(1).getOpcode() == ISD::Constant) {
4112  unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
4113  GS.add(Op.getOperand(0), Elem);
4114  FoundOne = true;
4115  } else if (Op.isUndef()) {
4116  GS.addUndef();
4117  } else {
4118  GS.add(SDValue(), ResidueOps.size());
4119  ResidueOps.push_back(BVN->getOperand(I));
4120  }
4121  }
4122 
4123  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4124  if (!FoundOne)
4125  return SDValue();
4126 
4127  // Create the BUILD_VECTOR for the remaining elements, if any.
4128  if (!ResidueOps.empty()) {
4129  while (ResidueOps.size() < NumElements)
4130  ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
4131  for (auto &Op : GS.Ops) {
4132  if (!Op.getNode()) {
4133  Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
4134  break;
4135  }
4136  }
4137  }
4138  return GS.getNode(DAG, SDLoc(BVN));
4139 }
4140 
4141 // Combine GPR scalar values Elems into a vector of type VT.
4142 static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4143  SmallVectorImpl<SDValue> &Elems) {
4144  // See whether there is a single replicated value.
4145  SDValue Single;
4146  unsigned int NumElements = Elems.size();
4147  unsigned int Count = 0;
4148  for (auto Elem : Elems) {
4149  if (!Elem.isUndef()) {
4150  if (!Single.getNode())
4151  Single = Elem;
4152  else if (Elem != Single) {
4153  Single = SDValue();
4154  break;
4155  }
4156  Count += 1;
4157  }
4158  }
4159  // There are three cases here:
4160  //
4161  // - if the only defined element is a loaded one, the best sequence
4162  // is a replicating load.
4163  //
4164  // - otherwise, if the only defined element is an i64 value, we will
4165  // end up with the same VLVGP sequence regardless of whether we short-cut
4166  // for replication or fall through to the later code.
4167  //
4168  // - otherwise, if the only defined element is an i32 or smaller value,
4169  // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4170  // This is only a win if the single defined element is used more than once.
4171  // In other cases we're better off using a single VLVGx.
4172  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
4173  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4174 
4175  // The best way of building a v2i64 from two i64s is to use VLVGP.
4176  if (VT == MVT::v2i64)
4177  return joinDwords(DAG, DL, Elems[0], Elems[1]);
4178 
4179  // Use a 64-bit merge high to combine two doubles.
4180  if (VT == MVT::v2f64)
4181  return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4182 
4183  // Build v4f32 values directly from the FPRs:
4184  //
4185  // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4186  // V V VMRHF
4187  // <ABxx> <CDxx>
4188  // V VMRHG
4189  // <ABCD>
4190  if (VT == MVT::v4f32) {
4191  SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4192  SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4193  // Avoid unnecessary undefs by reusing the other operand.
4194  if (Op01.isUndef())
4195  Op01 = Op23;
4196  else if (Op23.isUndef())
4197  Op23 = Op01;
4198  // Merging identical replications is a no-op.
4199  if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
4200  return Op01;
4201  Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4202  Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4204  DL, MVT::v2i64, Op01, Op23);
4205  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4206  }
4207 
4208  // Collect the constant terms.
4210  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4211 
4212  unsigned NumConstants = 0;
4213  for (unsigned I = 0; I < NumElements; ++I) {
4214  SDValue Elem = Elems[I];
4215  if (Elem.getOpcode() == ISD::Constant ||
4216  Elem.getOpcode() == ISD::ConstantFP) {
4217  NumConstants += 1;
4218  Constants[I] = Elem;
4219  Done[I] = true;
4220  }
4221  }
4222  // If there was at least one constant, fill in the other elements of
4223  // Constants with undefs to get a full vector constant and use that
4224  // as the starting point.
4225  SDValue Result;
4226  if (NumConstants > 0) {
4227  for (unsigned I = 0; I < NumElements; ++I)
4228  if (!Constants[I].getNode())
4229  Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4230  Result = DAG.getBuildVector(VT, DL, Constants);
4231  } else {
4232  // Otherwise try to use VLVGP to start the sequence in order to
4233  // avoid a false dependency on any previous contents of the vector
4234  // register. This only makes sense if one of the associated elements
4235  // is defined.
4236  unsigned I1 = NumElements / 2 - 1;
4237  unsigned I2 = NumElements - 1;
4238  bool Def1 = !Elems[I1].isUndef();
4239  bool Def2 = !Elems[I2].isUndef();
4240  if (Def1 || Def2) {
4241  SDValue Elem1 = Elems[Def1 ? I1 : I2];
4242  SDValue Elem2 = Elems[Def2 ? I2 : I1];
4243  Result = DAG.getNode(ISD::BITCAST, DL, VT,
4244  joinDwords(DAG, DL, Elem1, Elem2));
4245  Done[I1] = true;
4246  Done[I2] = true;
4247  } else
4248  Result = DAG.getUNDEF(VT);
4249  }
4250 
4251  // Use VLVGx to insert the other elements.
4252  for (unsigned I = 0; I < NumElements; ++I)
4253  if (!Done[I] && !Elems[I].isUndef())
4254  Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4255  DAG.getConstant(I, DL, MVT::i32));
4256  return Result;
4257 }
4258 
4259 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4260  SelectionDAG &DAG) const {
4261  const SystemZInstrInfo *TII =
4262  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4263  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4264  SDLoc DL(Op);
4265  EVT VT = Op.getValueType();
4266 
4267  if (BVN->isConstant()) {
4268  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
4269  // preferred way of creating all-zero and all-one vectors so give it
4270  // priority over other methods below.
4271  uint64_t Mask = 0;
4272  if (tryBuildVectorByteMask(BVN, Mask)) {
4274  DAG.getConstant(Mask, DL, MVT::i32));
4275  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4276  }
4277 
4278  // Try using some form of replication.
4279  APInt SplatBits, SplatUndef;
4280  unsigned SplatBitSize;
4281  bool HasAnyUndefs;
4282  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4283  8, true) &&
4284  SplatBitSize <= 64) {
4285  // First try assuming that any undefined bits above the highest set bit
4286  // and below the lowest set bit are 1s. This increases the likelihood of
4287  // being able to use a sign-extended element value in VECTOR REPLICATE
4288  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
4289  uint64_t SplatBitsZ = SplatBits.getZExtValue();
4290  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
4291  uint64_t Lower = (SplatUndefZ
4292  & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
4293  uint64_t Upper = (SplatUndefZ
4294  & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
4295  uint64_t Value = SplatBitsZ | Upper | Lower;
4296  SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
4297  SplatBitSize);
4298  if (Op.getNode())
4299  return Op;
4300 
4301  // Now try assuming that any undefined bits between the first and
4302  // last defined set bits are set. This increases the chances of
4303  // using a non-wraparound mask.
4304  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
4305  Value = SplatBitsZ | Middle;
4306  Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
4307  if (Op.getNode())
4308  return Op;
4309  }
4310 
4311  // Fall back to loading it from memory.
4312  return SDValue();
4313  }
4314 
4315  // See if we should use shuffles to construct the vector from other vectors.
4316  if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
4317  return Res;
4318 
4319  // Detect SCALAR_TO_VECTOR conversions.
4321  return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4322 
4323  // Otherwise use buildVector to build the vector up from GPRs.
4324  unsigned NumElements = Op.getNumOperands();
4326  for (unsigned I = 0; I < NumElements; ++I)
4327  Ops[I] = Op.getOperand(I);
4328  return buildVector(DAG, DL, VT, Ops);
4329 }
4330 
4331 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4332  SelectionDAG &DAG) const {
4333  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4334  SDLoc DL(Op);
4335  EVT VT = Op.getValueType();
4336  unsigned NumElements = VT.getVectorNumElements();
4337 
4338  if (VSN->isSplat()) {
4339  SDValue Op0 = Op.getOperand(0);
4340  unsigned Index = VSN->getSplatIndex();
4341  assert(Index < VT.getVectorNumElements() &&
4342  "Splat index should be defined and in first operand");
4343  // See whether the value we're splatting is directly available as a scalar.
4344  if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4345  Op0.getOpcode() == ISD::BUILD_VECTOR)
4346  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4347  // Otherwise keep it as a vector-to-vector operation.
4348  return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4349  DAG.getConstant(Index, DL, MVT::i32));
4350  }
4351 
4352  GeneralShuffle GS(VT);
4353  for (unsigned I = 0; I < NumElements; ++I) {
4354  int Elt = VSN->getMaskElt(I);
4355  if (Elt < 0)
4356  GS.addUndef();
4357  else
4358  GS.add(Op.getOperand(unsigned(Elt) / NumElements),
4359  unsigned(Elt) % NumElements);
4360  }
4361  return GS.getNode(DAG, SDLoc(VSN));
4362 }
4363 
4364 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
4365  SelectionDAG &DAG) const {
4366  SDLoc DL(Op);
4367  // Just insert the scalar into element 0 of an undefined vector.
4368  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4369  Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
4370  Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
4371 }
4372 
4373 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4374  SelectionDAG &DAG) const {
4375  // Handle insertions of floating-point values.
4376  SDLoc DL(Op);
4377  SDValue Op0 = Op.getOperand(0);
4378  SDValue Op1 = Op.getOperand(1);
4379  SDValue Op2 = Op.getOperand(2);
4380  EVT VT = Op.getValueType();
4381 
4382  // Insertions into constant indices of a v2f64 can be done using VPDI.
4383  // However, if the inserted value is a bitcast or a constant then it's
4384  // better to use GPRs, as below.
4385  if (VT == MVT::v2f64 &&
4386  Op1.getOpcode() != ISD::BITCAST &&
4387  Op1.getOpcode() != ISD::ConstantFP &&
4388  Op2.getOpcode() == ISD::Constant) {
4389  uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
4390  unsigned Mask = VT.getVectorNumElements() - 1;
4391  if (Index <= Mask)
4392  return Op;
4393  }
4394 
4395  // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4397  MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
4398  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
4399  DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
4400  DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
4401  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4402 }
4403 
4404 SDValue
4405 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4406  SelectionDAG &DAG) const {
4407  // Handle extractions of floating-point values.
4408  SDLoc DL(Op);
4409  SDValue Op0 = Op.getOperand(0);
4410  SDValue Op1 = Op.getOperand(1);
4411  EVT VT = Op.getValueType();
4412  EVT VecVT = Op0.getValueType();
4413 
4414  // Extractions of constant indices can be done directly.
4415  if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
4416  uint64_t Index = CIndexN->getZExtValue();
4417  unsigned Mask = VecVT.getVectorNumElements() - 1;
4418  if (Index <= Mask)
4419  return Op;
4420  }
4421 
4422  // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4423  MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
4424  MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
4425  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
4426  DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
4427  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4428 }
4429 
4430 SDValue
4431 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
4432  unsigned UnpackHigh) const {
4433  SDValue PackedOp = Op.getOperand(0);
4434  EVT OutVT = Op.getValueType();
4435  EVT InVT = PackedOp.getValueType();
4436  unsigned ToBits = OutVT.getScalarSizeInBits();
4437  unsigned FromBits = InVT.getScalarSizeInBits();
4438  do {
4439  FromBits *= 2;
4440  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
4441  SystemZ::VectorBits / FromBits);
4442  PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
4443  } while (FromBits != ToBits);
4444  return PackedOp;
4445 }
4446 
4447 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
4448  unsigned ByScalar) const {
4449  // Look for cases where a vector shift can use the *_BY_SCALAR form.
4450  SDValue Op0 = Op.getOperand(0);
4451  SDValue Op1 = Op.getOperand(1);
4452  SDLoc DL(Op);
4453  EVT VT = Op.getValueType();
4454  unsigned ElemBitSize = VT.getScalarSizeInBits();
4455 
4456  // See whether the shift vector is a splat represented as BUILD_VECTOR.
4457  if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
4458  APInt SplatBits, SplatUndef;
4459  unsigned SplatBitSize;
4460  bool HasAnyUndefs;
4461  // Check for constant splats. Use ElemBitSize as the minimum element
4462  // width and reject splats that need wider elements.
4463  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4464  ElemBitSize, true) &&
4465  SplatBitSize == ElemBitSize) {
4466  SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
4467  DL, MVT::i32);
4468  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4469  }
4470  // Check for variable splats.
4471  BitVector UndefElements;
4472  SDValue Splat = BVN->getSplatValue(&UndefElements);
4473  if (Splat) {
4474  // Since i32 is the smallest legal type, we either need a no-op
4475  // or a truncation.
4476  SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
4477  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4478  }
4479  }
4480 
4481  // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4482  // and the shift amount is directly available in a GPR.
4483  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
4484  if (VSN->isSplat()) {
4485  SDValue VSNOp0 = VSN->getOperand(0);
4486  unsigned Index = VSN->getSplatIndex();
4487  assert(Index < VT.getVectorNumElements() &&
4488  "Splat index should be defined and in first operand");
4489  if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4490  VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
4491  // Since i32 is the smallest legal type, we either need a no-op
4492  // or a truncation.
4493  SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
4494  VSNOp0.getOperand(Index));
4495  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4496  }
4497  }
4498  }
4499 
4500  // Otherwise just treat the current form as legal.
4501  return Op;
4502 }
4503 
4505  SelectionDAG &DAG) const {
4506  switch (Op.getOpcode()) {
4507  case ISD::FRAMEADDR:
4508  return lowerFRAMEADDR(Op, DAG);
4509  case ISD::RETURNADDR:
4510  return lowerRETURNADDR(Op, DAG);
4511  case ISD::BR_CC:
4512  return lowerBR_CC(Op, DAG);
4513  case ISD::SELECT_CC:
4514  return lowerSELECT_CC(Op, DAG);
4515  case ISD::SETCC:
4516  return lowerSETCC(Op, DAG);
4517  case ISD::GlobalAddress:
4518  return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
4519  case ISD::GlobalTLSAddress:
4520  return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
4521  case ISD::BlockAddress:
4522  return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
4523  case ISD::JumpTable:
4524  return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
4525  case ISD::ConstantPool:
4526  return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
4527  case ISD::BITCAST:
4528  return lowerBITCAST(Op, DAG);
4529  case ISD::VASTART:
4530  return lowerVASTART(Op, DAG);
4531  case ISD::VACOPY:
4532  return lowerVACOPY(Op, DAG);
4534  return lowerDYNAMIC_STACKALLOC(Op, DAG);
4536  return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
4537  case ISD::SMUL_LOHI:
4538  return lowerSMUL_LOHI(Op, DAG);
4539  case ISD::UMUL_LOHI:
4540  return lowerUMUL_LOHI(Op, DAG);
4541  case ISD::SDIVREM:
4542  return lowerSDIVREM(Op, DAG);
4543  case ISD::UDIVREM:
4544  return lowerUDIVREM(Op, DAG);
4545  case ISD::OR:
4546  return lowerOR(Op, DAG);
4547  case ISD::CTPOP:
4548  return lowerCTPOP(Op, DAG);
4549  case ISD::ATOMIC_FENCE:
4550  return lowerATOMIC_FENCE(Op, DAG);
4551  case ISD::ATOMIC_SWAP:
4552  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
4553  case ISD::ATOMIC_STORE:
4554  return lowerATOMIC_STORE(Op, DAG);
4555  case ISD::ATOMIC_LOAD:
4556  return lowerATOMIC_LOAD(Op, DAG);
4557  case ISD::ATOMIC_LOAD_ADD:
4558  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
4559  case ISD::ATOMIC_LOAD_SUB:
4560  return lowerATOMIC_LOAD_SUB(Op, DAG);
4561  case ISD::ATOMIC_LOAD_AND:
4562  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
4563  case ISD::ATOMIC_LOAD_OR:
4564  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
4565  case ISD::ATOMIC_LOAD_XOR:
4566  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
4567  case ISD::ATOMIC_LOAD_NAND:
4568  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
4569  case ISD::ATOMIC_LOAD_MIN:
4570  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
4571  case ISD::ATOMIC_LOAD_MAX:
4572  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
4573  case ISD::ATOMIC_LOAD_UMIN:
4574  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
4575  case ISD::ATOMIC_LOAD_UMAX:
4576  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
4577  case ISD::ATOMIC_CMP_SWAP:
4578  return lowerATOMIC_CMP_SWAP(Op, DAG);
4579  case ISD::STACKSAVE:
4580  return lowerSTACKSAVE(Op, DAG);
4581  case ISD::STACKRESTORE:
4582  return lowerSTACKRESTORE(Op, DAG);
4583  case ISD::PREFETCH:
4584  return lowerPREFETCH(Op, DAG);
4586  return lowerINTRINSIC_W_CHAIN(Op, DAG);
4588  return lowerINTRINSIC_WO_CHAIN(Op, DAG);
4589  case ISD::BUILD_VECTOR:
4590  return lowerBUILD_VECTOR(Op, DAG);
4591  case ISD::VECTOR_SHUFFLE:
4592  return lowerVECTOR_SHUFFLE(Op, DAG);
4593  case ISD::SCALAR_TO_VECTOR:
4594  return lowerSCALAR_TO_VECTOR(Op, DAG);
4596  return lowerINSERT_VECTOR_ELT(Op, DAG);
4598  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4600  return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
4602  return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
4603  case ISD::SHL:
4604  return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
4605  case ISD::SRL:
4606  return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
4607  case ISD::SRA:
4608  return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
4609  default:
4610  llvm_unreachable("Unexpected node to lower");
4611  }
4612 }
4613 
4614 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
4615 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
4616  switch ((SystemZISD::NodeType)Opcode) {
4617  case SystemZISD::FIRST_NUMBER: break;
4618  OPCODE(RET_FLAG);
4619  OPCODE(CALL);
4620  OPCODE(SIBCALL);
4621  OPCODE(TLS_GDCALL);
4622  OPCODE(TLS_LDCALL);
4625  OPCODE(IABS);
4626  OPCODE(ICMP);
4627  OPCODE(FCMP);
4628  OPCODE(TM);
4629  OPCODE(BR_CCMASK);
4632  OPCODE(POPCNT);
4634  OPCODE(SDIVREM32);
4635  OPCODE(SDIVREM64);
4636  OPCODE(UDIVREM32);
4637  OPCODE(UDIVREM64);
4638  OPCODE(MVC);
4639  OPCODE(MVC_LOOP);
4640  OPCODE(NC);
4641  OPCODE(NC_LOOP);
4642  OPCODE(OC);
4643  OPCODE(OC_LOOP);
4644  OPCODE(XC);
4645  OPCODE(XC_LOOP);
4646  OPCODE(CLC);
4647  OPCODE(CLC_LOOP);
4648  OPCODE(STPCPY);
4649  OPCODE(STRCMP);
4651  OPCODE(IPM);
4652  OPCODE(SERIALIZE);
4653  OPCODE(MEMBARRIER);
4654  OPCODE(TBEGIN);
4656  OPCODE(TEND);
4657  OPCODE(BYTE_MASK);
4659  OPCODE(REPLICATE);
4661  OPCODE(SPLAT);
4662  OPCODE(MERGE_HIGH);
4663  OPCODE(MERGE_LOW);
4664  OPCODE(SHL_DOUBLE);
4666  OPCODE(PERMUTE);
4667  OPCODE(PACK);
4668  OPCODE(PACKS_CC);
4669  OPCODE(PACKLS_CC);
4672  OPCODE(UNPACK_LOW);
4677  OPCODE(VSUM);
4678  OPCODE(VICMPE);
4679  OPCODE(VICMPH);
4680  OPCODE(VICMPHL);
4681  OPCODE(VICMPES);
4682  OPCODE(VICMPHS);
4683  OPCODE(VICMPHLS);
4684  OPCODE(VFCMPE);
4685  OPCODE(VFCMPH);
4686  OPCODE(VFCMPHE);
4687  OPCODE(VFCMPES);
4688  OPCODE(VFCMPHS);
4689  OPCODE(VFCMPHES);
4690  OPCODE(VFTCI);
4691  OPCODE(VEXTEND);
4692  OPCODE(VROUND);
4693  OPCODE(VTM);
4694  OPCODE(VFAE_CC);
4695  OPCODE(VFAEZ_CC);
4696  OPCODE(VFEE_CC);
4697  OPCODE(VFEEZ_CC);
4698  OPCODE(VFENE_CC);
4699  OPCODE(VFENEZ_CC);
4700  OPCODE(VISTR_CC);
4701  OPCODE(VSTRC_CC);
4702  OPCODE(VSTRCZ_CC);
4703  OPCODE(TDC);
4716  OPCODE(LRV);
4717  OPCODE(STRV);
4718  OPCODE(PREFETCH);
4719  }
4720  return nullptr;
4721 #undef OPCODE
4722 }
4723 
4724 // Return true if VT is a vector whose elements are a whole number of bytes
4725 // in width.
4726 static bool canTreatAsByteVector(EVT VT) {
4727  return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0;
4728 }
4729 
4730 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
4731 // producing a result of type ResVT. Op is a possibly bitcast version
4732 // of the input vector and Index is the index (based on type VecVT) that
4733 // should be extracted. Return the new extraction if a simplification
4734 // was possible or if Force is true.
4735 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
4736  EVT VecVT, SDValue Op,
4737  unsigned Index,
4738  DAGCombinerInfo &DCI,
4739  bool Force) const {
4740  SelectionDAG &DAG = DCI.DAG;
4741 
4742  // The number of bytes being extracted.
4743  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
4744 
4745  for (;;) {
4746  unsigned Opcode = Op.getOpcode();
4747  if (Opcode == ISD::BITCAST)
4748  // Look through bitcasts.
4749  Op = Op.getOperand(0);
4750  else if (Opcode == ISD::VECTOR_SHUFFLE &&
4752  // Get a VPERM-like permute mask and see whether the bytes covered
4753  // by the extracted element are a contiguous sequence from one
4754  // source operand.
4756  getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
4757  int First;
4758  if (!getShuffleInput(Bytes, Index * BytesPerElement,
4759  BytesPerElement, First))
4760  break;
4761  if (First < 0)
4762  return DAG.getUNDEF(ResVT);
4763  // Make sure the contiguous sequence starts at a multiple of the
4764  // original element size.
4765  unsigned Byte = unsigned(First) % Bytes.size();
4766  if (Byte % BytesPerElement != 0)
4767  break;
4768  // We can get the extracted value directly from an input.
4769  Index = Byte / BytesPerElement;
4770  Op = Op.getOperand(unsigned(First) / Bytes.size());
4771  Force = true;
4772  } else if (Opcode == ISD::BUILD_VECTOR &&
4774  // We can only optimize this case if the BUILD_VECTOR elements are
4775  // at least as wide as the extracted value.
4776  EVT OpVT = Op.getValueType();
4777  unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
4778  if (OpBytesPerElement < BytesPerElement)
4779  break;
4780  // Make sure that the least-significant bit of the extracted value
4781  // is the least significant bit of an input.
4782  unsigned End = (Index + 1) * BytesPerElement;
4783  if (End % OpBytesPerElement != 0)
4784  break;
4785  // We're extracting the low part of one operand of the BUILD_VECTOR.
4786  Op = Op.getOperand(End / OpBytesPerElement - 1);
4787  if (!Op.getValueType().isInteger()) {
4789  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4790  DCI.AddToWorklist(Op.getNode());
4791  }
4792  EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
4793  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4794  if (VT != ResVT) {
4795  DCI.AddToWorklist(Op.getNode());
4796  Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
4797  }
4798  return Op;
4799  } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
4800  Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
4801  Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
4802  canTreatAsByteVector(Op.getValueType()) &&
4803  canTreatAsByteVector(Op.getOperand(0).getValueType())) {
4804  // Make sure that only the unextended bits are significant.
4805  EVT ExtVT = Op.getValueType();
4806  EVT OpVT = Op.getOperand(0).getValueType();
4807  unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
4808  unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
4809  unsigned Byte = Index * BytesPerElement;
4810  unsigned SubByte = Byte % ExtBytesPerElement;
4811  unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
4812  if (SubByte < MinSubByte ||
4813  SubByte + BytesPerElement > ExtBytesPerElement)
4814  break;
4815  // Get the byte offset of the unextended element
4816  Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
4817  // ...then add the byte offset relative to that element.
4818  Byte += SubByte - MinSubByte;
4819  if (Byte % BytesPerElement != 0)
4820  break;
4821  Op = Op.getOperand(0);
4822  Index = Byte / BytesPerElement;
4823  Force = true;
4824  } else
4825  break;
4826  }
4827  if (Force) {
4828  if (Op.getValueType() != VecVT) {
4829  Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
4830  DCI.AddToWorklist(Op.getNode());
4831  }
4832  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
4833  DAG.getConstant(Index, DL, MVT::i32));
4834  }
4835  return SDValue();
4836 }
4837 
4838 // Optimize vector operations in scalar value Op on the basis that Op
4839 // is truncated to TruncVT.
4840 SDValue SystemZTargetLowering::combineTruncateExtract(
4841  const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
4842  // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
4843  // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
4844  // of type TruncVT.
4845  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4846  TruncVT.getSizeInBits() % 8 == 0) {
4847  SDValue Vec = Op.getOperand(0);
4848  EVT VecVT = Vec.getValueType();
4849  if (canTreatAsByteVector(VecVT)) {
4850  if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
4851  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
4852  unsigned TruncBytes = TruncVT.getStoreSize();
4853  if (BytesPerElement % TruncBytes == 0) {
4854  // Calculate the value of Y' in the above description. We are
4855  // splitting the original elements into Scale equal-sized pieces
4856  // and for truncation purposes want the last (least-significant)
4857  // of these pieces for IndexN. This is easiest to do by calculating
4858  // the start index of the following element and then subtracting 1.
4859  unsigned Scale = BytesPerElement / TruncBytes;
4860  unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
4861 
4862  // Defer the creation of the bitcast from X to combineExtract,
4863  // which might be able to optimize the extraction.
4864  VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
4865  VecVT.getStoreSize() / TruncBytes);
4866  EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
4867  return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
4868  }
4869  }
4870  }
4871  }
4872  return SDValue();
4873 }
4874 
4875 SDValue SystemZTargetLowering::combineSIGN_EXTEND(
4876  SDNode *N, DAGCombinerInfo &DCI) const {
4877  // Convert (sext (ashr (shl X, C1), C2)) to
4878  // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
4879  // cheap as narrower ones.
4880  SelectionDAG &DAG = DCI.DAG;
4881  SDValue N0 = N->getOperand(0);
4882  EVT VT = N->getValueType(0);
4883  if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
4884  auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4885  SDValue Inner = N0.getOperand(0);
4886  if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
4887  if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
4888  unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
4889  unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
4890  unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
4891  EVT ShiftVT = N0.getOperand(1).getValueType();
4892  SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
4893  Inner.getOperand(0));
4894  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
4895  DAG.getConstant(NewShlAmt, SDLoc(Inner),
4896  ShiftVT));
4897  return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
4898  DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
4899  }
4900  }
4901  }
4902  return SDValue();
4903 }
4904 
4905 SDValue SystemZTargetLowering::combineMERGE(
4906  SDNode *N, DAGCombinerInfo &DCI) const {
4907  SelectionDAG &DAG = DCI.DAG;
4908  unsigned Opcode = N->getOpcode();
4909  SDValue Op0 = N->getOperand(0);
4910  SDValue Op1 = N->getOperand(1);
4911  if (Op0.getOpcode() == ISD::BITCAST)
4912  Op0 = Op0.getOperand(0);
4913  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
4914  cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
4915  // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
4916  // for v4f32.
4917  if (Op1 == N->getOperand(0))
4918  return Op1;
4919  // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
4920  EVT VT = Op1.getValueType();
4921  unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
4922  if (ElemBytes <= 4) {
4923  Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
4926  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
4927  SystemZ::VectorBytes / ElemBytes / 2);
4928  if (VT != InVT) {
4929  Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
4930  DCI.AddToWorklist(Op1.getNode());
4931  }
4932  SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
4933  DCI.AddToWorklist(Op.getNode());
4934  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
4935  }
4936  }
4937  return SDValue();
4938 }
4939 
4940 SDValue SystemZTargetLowering::combineSTORE(
4941  SDNode *N, DAGCombinerInfo &DCI) const {
4942  SelectionDAG &DAG = DCI.DAG;
4943  auto *SN = cast<StoreSDNode>(N);
4944  auto &Op1 = N->getOperand(1);
4945  EVT MemVT = SN->getMemoryVT();
4946  // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
4947  // for the extraction to be done on a vMiN value, so that we can use VSTE.
4948  // If X has wider elements then convert it to:
4949  // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
4950  if (MemVT.isInteger()) {
4951  if (SDValue Value =
4952  combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
4953  DCI.AddToWorklist(Value.getNode());
4954 
4955  // Rewrite the store with the new form of stored value.
4956  return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
4957  SN->getBasePtr(), SN->getMemoryVT(),
4958  SN->getMemOperand());
4959  }
4960  }
4961  // Combine STORE (BSWAP) into STRVH/STRV/STRVG
4962  // See comment in combineBSWAP about volatile accesses.
4963  if (!SN->isVolatile() &&
4964  Op1.getOpcode() == ISD::BSWAP &&
4965  Op1.getNode()->hasOneUse() &&
4966  (Op1.getValueType() == MVT::i16 ||
4967  Op1.getValueType() == MVT::i32 ||
4968  Op1.getValueType() == MVT::i64)) {
4969 
4970  SDValue BSwapOp = Op1.getOperand(0);
4971 
4972  if (BSwapOp.getValueType() == MVT::i16)
4973  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
4974 
4975  SDValue Ops[] = {
4976  N->getOperand(0), BSwapOp, N->getOperand(2),
4977  DAG.getValueType(Op1.getValueType())
4978  };
4979 
4980  return
4982  Ops, MemVT, SN->getMemOperand());
4983  }
4984  return SDValue();
4985 }
4986 
4987 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
4988  SDNode *N, DAGCombinerInfo &DCI) const {
4989  // Try to simplify a vector extraction.
4990  if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
4991  SDValue Op0 = N->getOperand(0);
4992  EVT VecVT = Op0.getValueType();
4993  return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
4994  IndexN->getZExtValue(), DCI, false);
4995  }
4996  return SDValue();
4997 }
4998 
4999 SDValue SystemZTargetLowering::combineJOIN_DWORDS(
5000  SDNode *N, DAGCombinerInfo &DCI) const {
5001  SelectionDAG &DAG = DCI.DAG;
5002  // (join_dwords X, X) == (replicate X)
5003  if (N->getOperand(0) == N->getOperand(1))
5004  return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
5005  N->getOperand(0));
5006  return SDValue();
5007 }
5008 
5009 SDValue SystemZTargetLowering::combineFP_ROUND(
5010  SDNode *N, DAGCombinerInfo &DCI) const {
5011  // (fpround (extract_vector_elt X 0))
5012  // (fpround (extract_vector_elt X 1)) ->
5013  // (extract_vector_elt (VROUND X) 0)
5014  // (extract_vector_elt (VROUND X) 1)
5015  //
5016  // This is a special case since the target doesn't really support v2f32s.
5017  SelectionDAG &DAG = DCI.DAG;
5018  SDValue Op0 = N->getOperand(0);
5019  if (N->getValueType(0) == MVT::f32 &&
5020  Op0.hasOneUse() &&
5022  Op0.getOperand(0).getValueType() == MVT::v2f64 &&
5023  Op0.getOperand(1).getOpcode() == ISD::Constant &&
5024  cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
5025  SDValue Vec = Op0.getOperand(0);
5026  for (auto *U : Vec->uses()) {
5027  if (U != Op0.getNode() &&
5028  U->hasOneUse() &&
5029  U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5030  U->getOperand(0) == Vec &&
5031  U->getOperand(1).getOpcode() == ISD::Constant &&
5032  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
5033  SDValue OtherRound = SDValue(*U->use_begin(), 0);
5034  if (OtherRound.getOpcode() == ISD::FP_ROUND &&
5035  OtherRound.getOperand(0) == SDValue(U, 0) &&
5036  OtherRound.getValueType() == MVT::f32) {
5037  SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
5038  MVT::v4f32, Vec);
5039  DCI.AddToWorklist(VRound.getNode());
5040  SDValue Extract1 =
5042  VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
5043  DCI.AddToWorklist(Extract1.getNode());
5044  DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
5045  SDValue Extract0 =
5047  VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
5048  return Extract0;
5049  }
5050  }
5051  }
5052  }
5053  return SDValue();
5054 }
5055 
5056 SDValue SystemZTargetLowering::combineBSWAP(
5057  SDNode *N, DAGCombinerInfo &DCI) const {
5058  SelectionDAG &DAG = DCI.DAG;
5059  // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
5060  // These loads are allowed to access memory multiple times, and so we must check
5061  // that the loads are not volatile before performing the combine.
5062  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
5063  N->getOperand(0).hasOneUse() &&
5064  (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
5065  N->getValueType(0) == MVT::i64) &&
5066  !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
5067  SDValue Load = N->getOperand(0);
5068  LoadSDNode *LD = cast<LoadSDNode>(Load);
5069 
5070  // Create the byte-swapping load.
5071  SDValue Ops[] = {
5072  LD->getChain(), // Chain
5073  LD->getBasePtr(), // Ptr
5074  DAG.getValueType(N->getValueType(0)) // VT
5075  };
5076  SDValue BSLoad =
5078  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
5080  Ops, LD->getMemoryVT(), LD->getMemOperand());
5081 
5082  // If this is an i16 load, insert the truncate.
5083  SDValue ResVal = BSLoad;
5084  if (N->getValueType(0) == MVT::i16)
5085  ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
5086 
5087  // First, combine the bswap away. This makes the value produced by the
5088  // load dead.
5089  DCI.CombineTo(N, ResVal);
5090 
5091  // Next, combine the load away, we give it a bogus result value but a real
5092  // chain result. The result value is dead because the bswap is dead.
5093  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
5094 
5095  // Return N so it doesn't get rechecked!
5096  return SDValue(N, 0);
5097  }
5098  return SDValue();
5099 }
5100 
5101 SDValue SystemZTargetLowering::combineSHIFTROT(
5102  SDNode *N, DAGCombinerInfo &DCI) const {
5103 
5104  SelectionDAG &DAG = DCI.DAG;
5105 
5106  // Shift/rotate instructions only use the last 6 bits of the second operand
5107  // register. If the second operand is the result of an AND with an immediate
5108  // value that has its last 6 bits set, we can safely remove the AND operation.
5109  //
5110  // If the AND operation doesn't have the last 6 bits set, we can't remove it
5111  // entirely, but we can still truncate it to a 16-bit value. This prevents
5112  // us from ending up with a NILL with a signed operand, which will cause the
5113  // instruction printer to abort.
5114  SDValue N1 = N->getOperand(1);
5115  if (N1.getOpcode() == ISD::AND) {
5116  SDValue AndMaskOp = N1->getOperand(1);
5117  auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
5118 
5119  // The AND mask is constant
5120  if (AndMask) {
5121  auto AmtVal = AndMask->getZExtValue();
5122 
5123  // Bottom 6 bits are set
5124  if ((AmtVal & 0x3f) == 0x3f) {
5125  SDValue AndOp = N1->getOperand(0);
5126 
5127  // This is the only use, so remove the node
5128  if (N1.hasOneUse()) {
5129  // Combine the AND away
5130  DCI.CombineTo(N1.getNode(), AndOp);
5131 
5132  // Return N so it isn't rechecked
5133  return SDValue(N, 0);
5134 
5135  // The node will be reused, so create a new node for this one use
5136  } else {
5137  SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
5138  N->getValueType(0), N->getOperand(0),
5139  AndOp);
5140  DCI.AddToWorklist(Replace.getNode());
5141 
5142  return Replace;
5143  }
5144 
5145  // We can't remove the AND, but we can use NILL here (normally we would
5146  // use NILF). Only keep the last 16 bits of the mask. The actual
5147  // transformation will be handled by .td definitions.
5148  } else if (AmtVal >> 16 != 0) {
5149  SDValue AndOp = N1->getOperand(0);
5150 
5151  auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
5152  SDLoc(AndMaskOp),
5153  AndMaskOp.getValueType());
5154 
5155  auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
5156  AndOp, NewMask);
5157 
5158  SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
5159  N->getValueType(0), N->getOperand(0),
5160  NewAnd);
5161  DCI.AddToWorklist(Replace.getNode());
5162 
5163  return Replace;
5164  }
5165  }
5166  }
5167 
5168  return SDValue();
5169 }
5170 
5172  DAGCombinerInfo &DCI) const {
5173  switch(N->getOpcode()) {
5174  default: break;
5175  case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
5177  case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
5178  case ISD::STORE: return combineSTORE(N, DCI);
5179  case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
5180  case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
5181  case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
5182  case ISD::BSWAP: return combineBSWAP(N, DCI);
5183  case ISD::SHL:
5184  case ISD::SRA:
5185  case ISD::SRL:
5186  case ISD::ROTL: return combineSHIFTROT(N, DCI);
5187  }
5188 
5189  return SDValue();
5190 }
5191 
5192 //===----------------------------------------------------------------------===//
5193 // Custom insertion
5194 //===----------------------------------------------------------------------===//
5195 
5196 // Create a new basic block after MBB.
5198  MachineFunction &MF = *MBB->getParent();
5200  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
5201  return NewMBB;
5202 }
5203 
5204 // Split MBB after MI and return the new block (the one that contains
5205 // instructions after MI).
5208  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
5209  NewMBB->splice(NewMBB->begin(), MBB,
5210  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
5211  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
5212  return NewMBB;
5213 }
5214 
5215 // Split MBB before MI and return the new block (the one that contains MI).
5218  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
5219  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
5220  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
5221  return NewMBB;
5222 }
5223 
5224 // Force base value Base into a register before MI. Return the register.
5225 static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
5226  const SystemZInstrInfo *TII) {
5227  if (Base.isReg())
5228  return Base.getReg();
5229 
5231  MachineFunction &MF = *MBB->getParent();
5233 
5234  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
5235  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
5236  .addOperand(Base)
5237  .addImm(0)
5238  .addReg(0);
5239  return Reg;
5240 }
5241 
5242 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
5244 SystemZTargetLowering::emitSelect(MachineInstr &MI,
5246  unsigned LOCROpcode) const {
5247  const SystemZInstrInfo *TII =
5248  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5249 
5250  unsigned DestReg = MI.getOperand(0).getReg();
5251  unsigned TrueReg = MI.getOperand(1).getReg();
5252  unsigned FalseReg = MI.getOperand(2).getReg();
5253  unsigned CCValid = MI.getOperand(3).getImm();
5254  unsigned CCMask = MI.getOperand(4).getImm();
5255  DebugLoc DL = MI.getDebugLoc();
5256 
5257  // Use LOCROpcode if possible.
5258  if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
5259  BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
5260  .addReg(FalseReg).addReg(TrueReg)
5261  .addImm(CCValid).addImm(CCMask);
5262  MI.eraseFromParent();
5263  return MBB;
5264  }
5265 
5266  MachineBasicBlock *StartMBB = MBB;
5267  MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
5268  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
5269 
5270  // StartMBB:
5271  // BRC CCMask, JoinMBB
5272  // # fallthrough to FalseMBB
5273  MBB = StartMBB;
5274  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5275  .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
5276  MBB->addSuccessor(JoinMBB);
5277  MBB->addSuccessor(FalseMBB);
5278 
5279  // FalseMBB:
5280  // # fallthrough to JoinMBB
5281  MBB = FalseMBB;
5282  MBB->addSuccessor(JoinMBB);
5283 
5284  // JoinMBB:
5285  // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
5286  // ...
5287  MBB = JoinMBB;
5288  BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
5289  .addReg(TrueReg).addMBB(StartMBB)
5290  .addReg(FalseReg).addMBB(FalseMBB);
5291 
5292  MI.eraseFromParent();
5293  return JoinMBB;
5294 }
5295 
5296 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
5297 // StoreOpcode is the store to use and Invert says whether the store should
5298 // happen when the condition is false rather than true. If a STORE ON
5299 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
5300 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
5301  MachineBasicBlock *MBB,
5302  unsigned StoreOpcode,
5303  unsigned STOCOpcode,
5304  bool Invert) const {
5305  const SystemZInstrInfo *TII =
5306  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5307 
5308  unsigned SrcReg = MI.getOperand(0).getReg();
5309  MachineOperand Base = MI.getOperand(1);
5310  int64_t Disp = MI.getOperand(2).getImm();
5311  unsigned IndexReg = MI.getOperand(3).getReg();
5312  unsigned CCValid = MI.getOperand(4).getImm();
5313  unsigned CCMask = MI.getOperand(5).getImm();
5314  DebugLoc DL = MI.getDebugLoc();
5315 
5316  StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
5317 
5318  // Use STOCOpcode if possible. We could use different store patterns in
5319  // order to avoid matching the index register, but the performance trade-offs
5320  // might be more complicated in that case.
5321  if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
5322  if (Invert)
5323  CCMask ^= CCValid;
5324  BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
5325  .addReg(SrcReg).addOperand(Base).addImm(Disp)
5326  .addImm(CCValid).addImm(CCMask);
5327  MI.eraseFromParent();
5328  return MBB;
5329  }
5330 
5331  // Get the condition needed to branch around the store.
5332  if (!Invert)
5333  CCMask ^= CCValid;
5334 
5335  MachineBasicBlock *StartMBB = MBB;
5336  MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
5337  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
5338 
5339  // StartMBB:
5340  // BRC CCMask, JoinMBB
5341  // # fallthrough to FalseMBB
5342  MBB = StartMBB;
5343  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5344  .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
5345  MBB->addSuccessor(JoinMBB);
5346  MBB->addSuccessor(FalseMBB);
5347 
5348  // FalseMBB:
5349  // store %SrcReg, %Disp(%Index,%Base)
5350  // # fallthrough to JoinMBB
5351  MBB = FalseMBB;
5352  BuildMI(MBB, DL, TII->get(StoreOpcode))
5353  .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
5354  MBB->addSuccessor(JoinMBB);
5355 
5356  MI.eraseFromParent();
5357  return JoinMBB;
5358 }
5359 
5360 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
5361 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
5362 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
5363 // BitSize is the width of the field in bits, or 0 if this is a partword
5364 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
5365 // is one of the operands. Invert says whether the field should be
5366 // inverted after performing BinOpcode (e.g. for NAND).
5367 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
5368  MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
5369  unsigned BitSize, bool Invert) const {
5370  MachineFunction &MF = *MBB->getParent();
5371  const SystemZInstrInfo *TII =
5372  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5374  bool IsSubWord = (BitSize < 32);
5375 
5376  // Extract the operands. Base can be a register or a frame index.
5377  // Src2 can be a register or immediate.
5378  unsigned Dest = MI.getOperand(0).getReg();
5380  int64_t Disp = MI.getOperand(2).getImm();
5382  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
5383  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
5384  DebugLoc DL = MI.getDebugLoc();
5385  if (IsSubWord)
5386  BitSize = MI.getOperand(6).getImm();
5387 
5388  // Subword operations use 32-bit registers.
5389  const TargetRegisterClass *RC = (BitSize <= 32 ?
5390  &SystemZ::GR32BitRegClass :
5391  &SystemZ::GR64BitRegClass);
5392  unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
5393  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
5394 
5395  // Get the right opcodes for the displacement.
5396  LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
5397  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
5398  assert(LOpcode && CSOpcode && "Displacement out of range");
5399 
5400  // Create virtual registers for temporary results.
5401  unsigned OrigVal = MRI.createVirtualRegister(RC);
5402  unsigned OldVal = MRI.createVirtualRegister(RC);
5403  unsigned NewVal = (BinOpcode || IsSubWord ?
5404  MRI.createVirtualRegister(RC) : Src2.getReg());
5405  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
5406  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
5407 
5408  // Insert a basic block for the main loop.
5409  MachineBasicBlock *StartMBB = MBB;
5410  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5411  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5412 
5413  // StartMBB:
5414  // ...
5415  // %OrigVal = L Disp(%Base)
5416  // # fall through to LoopMMB
5417  MBB = StartMBB;
5418  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
5419  .addOperand(Base).addImm(Disp).addReg(0);
5420  MBB->addSuccessor(LoopMBB);
5421 
5422  // LoopMBB:
5423  // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
5424  // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
5425  // %RotatedNewVal = OP %RotatedOldVal, %Src2
5426  // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
5427  // %Dest = CS %OldVal, %NewVal, Disp(%Base)
5428  // JNE LoopMBB
5429  // # fall through to DoneMMB
5430  MBB = LoopMBB;
5431  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
5432  .addReg(OrigVal).addMBB(StartMBB)
5433  .addReg(Dest).addMBB(LoopMBB);
5434  if (IsSubWord)
5435  BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
5436  .addReg(OldVal).addReg(BitShift).addImm(0);
5437  if (Invert) {
5438  // Perform the operation normally and then invert every bit of the field.
5439  unsigned Tmp = MRI.createVirtualRegister(RC);
5440  BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
5441  .addReg(RotatedOldVal).addOperand(Src2);
5442  if (BitSize <= 32)
5443  // XILF with the upper BitSize bits set.
5444  BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
5445  .addReg(Tmp).addImm(-1U << (32 - BitSize));
5446  else {
5447  // Use LCGR and add -1 to the result, which is more compact than
5448  // an XILF, XILH pair.
5449  unsigned Tmp2 = MRI.createVirtualRegister(RC);
5450  BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
5451  BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
5452  .addReg(Tmp2).addImm(-1);
5453  }
5454  } else if (BinOpcode)
5455  // A simply binary operation.
5456  BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
5457  .addReg(RotatedOldVal).addOperand(Src2);
5458  else if (IsSubWord)
5459  // Use RISBG to rotate Src2 into position and use it to replace the
5460  // field in RotatedOldVal.
5461  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
5462  .addReg(RotatedOldVal).addReg(Src2.getReg())
5463  .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
5464  if (IsSubWord)
5465  BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
5466  .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
5467  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
5468  .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
5469  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5471  MBB->addSuccessor(LoopMBB);
5472  MBB->addSuccessor(DoneMBB);
5473 
5474  MI.eraseFromParent();
5475  return DoneMBB;
5476 }
5477 
5478 // Implement EmitInstrWithCustomInserter for pseudo
5479 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
5480 // instruction that should be used to compare the current field with the
5481 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
5482 // for when the current field should be kept. BitSize is the width of
5483 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
5484 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
5485  MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
5486  unsigned KeepOldMask, unsigned BitSize) const {
5487  MachineFunction &MF = *MBB->getParent();
5488  const SystemZInstrInfo *TII =
5489  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5490  MachineRegisterInfo &MRI = MF.getRegInfo();
5491  bool IsSubWord = (BitSize < 32);
5492 
5493  // Extract the operands. Base can be a register or a frame index.
5494  unsigned Dest = MI.getOperand(0).getReg();
5496  int64_t Disp = MI.getOperand(2).getImm();
5497  unsigned Src2 = MI.getOperand(3).getReg();
5498  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
5499  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
5500  DebugLoc DL = MI.getDebugLoc();
5501  if (IsSubWord)
5502  BitSize = MI.getOperand(6).getImm();
5503 
5504  // Subword operations use 32-bit registers.
5505  const TargetRegisterClass *RC = (BitSize <= 32 ?
5506  &SystemZ::GR32BitRegClass :
5507  &SystemZ::GR64BitRegClass);
5508  unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
5509  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
5510 
5511  // Get the right opcodes for the displacement.
5512  LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
5513  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
5514  assert(LOpcode && CSOpcode && "Displacement out of range");
5515 
5516  // Create virtual registers for temporary results.
5517  unsigned OrigVal = MRI.createVirtualRegister(RC);
5518  unsigned OldVal = MRI.createVirtualRegister(RC);
5519  unsigned NewVal = MRI.createVirtualRegister(RC);
5520  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
5521  unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
5522  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
5523 
5524  // Insert 3 basic blocks for the loop.
5525  MachineBasicBlock *StartMBB = MBB;
5526  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5527  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5528  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
5529  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
5530 
5531  // StartMBB:
5532  // ...
5533  // %OrigVal = L Disp(%Base)
5534  // # fall through to LoopMMB
5535  MBB = StartMBB;
5536  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
5537  .addOperand(Base).addImm(Disp).addReg(0);
5538  MBB->addSuccessor(LoopMBB);
5539 
5540  // LoopMBB:
5541  // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
5542  // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
5543  // CompareOpcode %RotatedOldVal, %Src2
5544  // BRC KeepOldMask, UpdateMBB
5545  MBB = LoopMBB;
5546  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
5547  .addReg(OrigVal).addMBB(StartMBB)
5548  .addReg(Dest).addMBB(UpdateMBB);
5549  if (IsSubWord)
5550  BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
5551  .addReg(OldVal).addReg(BitShift).addImm(0);
5552  BuildMI(MBB, DL, TII->get(CompareOpcode))
5553  .addReg(RotatedOldVal).addReg(Src2);
5554  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5555  .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
5556  MBB->addSuccessor(UpdateMBB);
5557  MBB->addSuccessor(UseAltMBB);
5558 
5559  // UseAltMBB:
5560  // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
5561  // # fall through to UpdateMMB
5562  MBB = UseAltMBB;
5563  if (IsSubWord)
5564  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
5565  .addReg(RotatedOldVal).addReg(Src2)
5566  .addImm(32).addImm(31 + BitSize).addImm(0);
5567  MBB->addSuccessor(UpdateMBB);
5568 
5569  // UpdateMBB:
5570  // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
5571  // [ %RotatedAltVal, UseAltMBB ]
5572  // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
5573  // %Dest = CS %OldVal, %NewVal, Disp(%Base)
5574  // JNE LoopMBB
5575  // # fall through to DoneMMB
5576  MBB = UpdateMBB;
5577  BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
5578  .addReg(RotatedOldVal).addMBB(LoopMBB)
5579  .addReg(RotatedAltVal).addMBB(UseAltMBB);
5580  if (IsSubWord)
5581  BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
5582  .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
5583  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
5584  .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
5585  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5587  MBB->addSuccessor(LoopMBB);
5588  MBB->addSuccessor(DoneMBB);
5589 
5590  MI.eraseFromParent();
5591  return DoneMBB;
5592 }
5593 
5594 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
5595 // instruction MI.
5597 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
5598  MachineBasicBlock *MBB) const {
5599 
5600  MachineFunction &MF = *MBB->getParent();
5601  const SystemZInstrInfo *TII =
5602  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5603  MachineRegisterInfo &MRI = MF.getRegInfo();
5604 
5605  // Extract the operands. Base can be a register or a frame index.
5606  unsigned Dest = MI.getOperand(0).getReg();
5608  int64_t Disp = MI.getOperand(2).getImm();
5609  unsigned OrigCmpVal = MI.getOperand(3).getReg();
5610  unsigned OrigSwapVal = MI.getOperand(4).getReg();
5611  unsigned BitShift = MI.getOperand(5).getReg();
5612  unsigned NegBitShift = MI.getOperand(6).getReg();
5613  int64_t BitSize = MI.getOperand(7).getImm();
5614  DebugLoc DL = MI.getDebugLoc();
5615 
5616  const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
5617 
5618  // Get the right opcodes for the displacement.
5619  unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
5620  unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
5621  assert(LOpcode && CSOpcode && "Displacement out of range");
5622 
5623  // Create virtual registers for temporary results.
5624  unsigned OrigOldVal = MRI.createVirtualRegister(RC);
5625  unsigned OldVal = MRI.createVirtualRegister(RC);
5626  unsigned CmpVal = MRI.createVirtualRegister(RC);
5627  unsigned SwapVal = MRI.createVirtualRegister(RC);
5628  unsigned StoreVal = MRI.createVirtualRegister(RC);
5629  unsigned RetryOldVal = MRI.createVirtualRegister(RC);
5630  unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
5631  unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
5632 
5633  // Insert 2 basic blocks for the loop.
5634  MachineBasicBlock *StartMBB = MBB;
5635  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5636  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5637  MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
5638 
5639  // StartMBB:
5640  // ...
5641  // %OrigOldVal = L Disp(%Base)
5642  // # fall through to LoopMMB
5643  MBB = StartMBB;
5644  BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
5645  .addOperand(Base).addImm(Disp).addReg(0);
5646  MBB->addSuccessor(LoopMBB);
5647 
5648  // LoopMBB:
5649  // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
5650  // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
5651  // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
5652  // %Dest = RLL %OldVal, BitSize(%BitShift)
5653  // ^^ The low BitSize bits contain the field
5654  // of interest.
5655  // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
5656  // ^^ Replace the upper 32-BitSize bits of the
5657  // comparison value with those that we loaded,
5658  // so that we can use a full word comparison.
5659  // CR %Dest, %RetryCmpVal
5660  // JNE DoneMBB
5661  // # Fall through to SetMBB
5662  MBB = LoopMBB;
5663  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
5664  .addReg(OrigOldVal).addMBB(StartMBB)
5665  .addReg(RetryOldVal).addMBB(SetMBB);
5666  BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
5667  .addReg(OrigCmpVal).addMBB(StartMBB)
5668  .addReg(RetryCmpVal).addMBB(SetMBB);
5669  BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
5670  .addReg(OrigSwapVal).addMBB(StartMBB)
5671  .addReg(RetrySwapVal).addMBB(SetMBB);
5672  BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
5673  .addReg(OldVal).addReg(BitShift).addImm(BitSize);
5674  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
5675  .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
5676  BuildMI(MBB, DL, TII->get(SystemZ::CR))
5677  .addReg(Dest).addReg(RetryCmpVal);
5678  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5679  .addImm(SystemZ::CCMASK_ICMP)
5681  MBB->addSuccessor(DoneMBB);
5682  MBB->addSuccessor(SetMBB);
5683 
5684  // SetMBB:
5685  // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
5686  // ^^ Replace the upper 32-BitSize bits of the new
5687  // value with those that we loaded.
5688  // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
5689  // ^^ Rotate the new field to its proper position.
5690  // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
5691  // JNE LoopMBB
5692  // # fall through to ExitMMB
5693  MBB = SetMBB;
5694  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
5695  .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
5696  BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
5697  .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
5698  BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
5699  .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
5700  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5702  MBB->addSuccessor(LoopMBB);
5703  MBB->addSuccessor(DoneMBB);
5704 
5705  MI.eraseFromParent();
5706  return DoneMBB;
5707 }
5708 
5709 // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
5710 // if the high register of the GR128 value must be cleared or false if
5711 // it's "don't care". SubReg is subreg_l32 when extending a GR32
5712 // and subreg_l64 when extending a GR64.
5713 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
5714  MachineBasicBlock *MBB,
5715  bool ClearEven,
5716  unsigned SubReg) const {
5717  MachineFunction &MF = *MBB->getParent();
5718  const SystemZInstrInfo *TII =
5719  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5720  MachineRegisterInfo &MRI = MF.getRegInfo();
5721  DebugLoc DL = MI.getDebugLoc();
5722 
5723  unsigned Dest = MI.getOperand(0).getReg();
5724  unsigned Src = MI.getOperand(1).getReg();
5725  unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
5726 
5727  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
5728  if (ClearEven) {
5729  unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
5730  unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
5731 
5732  BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
5733  .addImm(0);
5734  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
5735  .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
5736  In128 = NewIn128;
5737  }
5738  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
5739  .addReg(In128).addReg(Src).addImm(SubReg);
5740 
5741  MI.eraseFromParent();
5742  return MBB;
5743 }
5744 
5745 MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
5746  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
5747  MachineFunction &MF = *MBB->getParent();
5748  const SystemZInstrInfo *TII =
5749  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5750  MachineRegisterInfo &MRI = MF.getRegInfo();
5751  DebugLoc DL = MI.getDebugLoc();
5752 
5753  MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
5754  uint64_t DestDisp = MI.getOperand(1).getImm();
5755  MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
5756  uint64_t SrcDisp = MI.getOperand(3).getImm();
5757  uint64_t Length = MI.getOperand(4).getImm();
5758 
5759  // When generating more than one CLC, all but the last will need to
5760  // branch to the end when a difference is found.
5761  MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
5762  splitBlockAfter(MI, MBB) : nullptr);
5763 
5764  // Check for the loop form, in which operand 5 is the trip count.
5765  if (MI.getNumExplicitOperands() > 5) {
5766  bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
5767 
5768  uint64_t StartCountReg = MI.getOperand(5).getReg();
5769  uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
5770  uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
5771  forceReg(MI, DestBase, TII));
5772 
5773  const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
5774  uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
5775  uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
5776  MRI.createVirtualRegister(RC));
5777  uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
5778  uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
5779  MRI.createVirtualRegister(RC));
5780 
5781  RC = &SystemZ::GR64BitRegClass;
5782  uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
5783  uint64_t NextCountReg = MRI.createVirtualRegister(RC);
5784 
5785  MachineBasicBlock *StartMBB = MBB;
5786  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5787  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5788  MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
5789 
5790  // StartMBB:
5791  // # fall through to LoopMMB
5792  MBB->addSuccessor(LoopMBB);
5793 
5794  // LoopMBB:
5795  // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
5796  // [ %NextDestReg, NextMBB ]
5797  // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
5798  // [ %NextSrcReg, NextMBB ]
5799  // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
5800  // [ %NextCountReg, NextMBB ]
5801  // ( PFD 2, 768+DestDisp(%ThisDestReg) )
5802  // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
5803  // ( JLH EndMBB )
5804  //
5805  // The prefetch is used only for MVC. The JLH is used only for CLC.
5806  MBB = LoopMBB;
5807 
5808  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
5809  .addReg(StartDestReg).addMBB(StartMBB)
5810  .addReg(NextDestReg).addMBB(NextMBB);
5811  if (!HaveSingleBase)
5812  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
5813  .addReg(StartSrcReg).addMBB(StartMBB)
5814  .addReg(NextSrcReg).addMBB(NextMBB);
5815  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
5816  .addReg(StartCountReg).addMBB(StartMBB)
5817  .addReg(NextCountReg).addMBB(NextMBB);
5818  if (Opcode == SystemZ::MVC)
5819  BuildMI(MBB, DL, TII->get(SystemZ::PFD))
5820  .addImm(SystemZ::PFD_WRITE)
5821  .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
5822  BuildMI(MBB, DL, TII->get(Opcode))
5823  .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
5824  .addReg(ThisSrcReg).addImm(SrcDisp);
5825  if (EndMBB) {
5826  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5828  .addMBB(EndMBB);
5829  MBB->addSuccessor(EndMBB);
5830  MBB->addSuccessor(NextMBB);
5831  }
5832 
5833  // NextMBB:
5834  // %NextDestReg = LA 256(%ThisDestReg)
5835  // %NextSrcReg = LA 256(%ThisSrcReg)
5836  // %NextCountReg = AGHI %ThisCountReg, -1
5837  // CGHI %NextCountReg, 0
5838  // JLH LoopMBB
5839  // # fall through to DoneMMB
5840  //
5841  // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
5842  MBB = NextMBB;
5843 
5844  BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
5845  .addReg(ThisDestReg).addImm(256).addReg(0);
5846  if (!HaveSingleBase)
5847  BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
5848  .addReg(ThisSrcReg).addImm(256).addReg(0);
5849  BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
5850  .addReg(ThisCountReg).addImm(-1);
5851  BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
5852  .addReg(NextCountReg).addImm(0);
5853  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5855  .addMBB(LoopMBB);
5856  MBB->addSuccessor(LoopMBB);
5857  MBB->addSuccessor(DoneMBB);
5858 
5859  DestBase = MachineOperand::CreateReg(NextDestReg, false);
5860  SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
5861  Length &= 255;
5862  MBB = DoneMBB;
5863  }
5864  // Handle any remaining bytes with straight-line code.
5865  while (Length > 0) {
5866  uint64_t ThisLength = std::min(Length, uint64_t(256));
5867  // The previous iteration might have created out-of-range displacements.
5868  // Apply them using LAY if so.
5869  if (!isUInt<12>(DestDisp)) {
5870  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
5871  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
5872  .addOperand(DestBase)
5873  .addImm(DestDisp)
5874  .addReg(0);
5875  DestBase = MachineOperand::CreateReg(Reg, false);
5876  DestDisp = 0;
5877  }
5878  if (!isUInt<12>(SrcDisp)) {
5879  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
5880  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
5881  .addOperand(SrcBase)
5882  .addImm(SrcDisp)
5883  .addReg(0);
5884  SrcBase = MachineOperand::CreateReg(Reg, false);
5885  SrcDisp = 0;
5886  }
5887  BuildMI(*MBB, MI, DL, TII->get(Opcode))
5888  .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
5889  .addOperand(SrcBase).addImm(SrcDisp);
5890  DestDisp += ThisLength;
5891  SrcDisp += ThisLength;
5892  Length -= ThisLength;
5893  // If there's another CLC to go, branch to the end if a difference
5894  // was found.
5895  if (EndMBB && Length > 0) {
5896  MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
5897  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5899  .addMBB(EndMBB);
5900  MBB->addSuccessor(EndMBB);
5901  MBB->addSuccessor(NextMBB);
5902  MBB = NextMBB;
5903  }
5904  }
5905  if (EndMBB) {
5906  MBB->addSuccessor(EndMBB);
5907  MBB = EndMBB;
5908  MBB->addLiveIn(SystemZ::CC);
5909  }
5910 
5911  MI.eraseFromParent();
5912  return MBB;
5913 }
5914 
5915 // Decompose string pseudo-instruction MI into a loop that continually performs
5916 // Opcode until CC != 3.
5917 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
5918  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
5919  MachineFunction &MF = *MBB->getParent();
5920  const SystemZInstrInfo *TII =
5921  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5922  MachineRegisterInfo &MRI = MF.getRegInfo();
5923  DebugLoc DL = MI.getDebugLoc();
5924 
5925  uint64_t End1Reg = MI.getOperand(0).getReg();
5926  uint64_t Start1Reg = MI.getOperand(1).getReg();
5927  uint64_t Start2Reg = MI.getOperand(2).getReg();
5928  uint64_t CharReg = MI.getOperand(3).getReg();
5929 
5930  const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
5931  uint64_t This1Reg = MRI.createVirtualRegister(RC);
5932  uint64_t This2Reg = MRI.createVirtualRegister(RC);
5933  uint64_t End2Reg = MRI.createVirtualRegister(RC);
5934 
5935  MachineBasicBlock *StartMBB = MBB;
5936  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5937  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5938 
5939  // StartMBB:
5940  // # fall through to LoopMMB
5941  MBB->addSuccessor(LoopMBB);
5942 
5943  // LoopMBB:
5944  // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
5945  // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
5946  // R0L = %CharReg
5947  // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
5948  // JO LoopMBB
5949  // # fall through to DoneMMB
5950  //
5951  // The load of R0L can be hoisted by post-RA LICM.
5952  MBB = LoopMBB;
5953 
5954  BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
5955  .addReg(Start1Reg).addMBB(StartMBB)
5956  .addReg(End1Reg).addMBB(LoopMBB);
5957  BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
5958  .addReg(Start2Reg).addMBB(StartMBB)
5959  .addReg(End2Reg).addMBB(LoopMBB);
5960  BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
5961  BuildMI(MBB, DL, TII->get(Opcode))
5962  .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
5963  .addReg(This1Reg).addReg(This2Reg);
5964  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5966  MBB->addSuccessor(LoopMBB);
5967  MBB->addSuccessor(DoneMBB);
5968 
5969  DoneMBB->addLiveIn(SystemZ::CC);
5970 
5971  MI.eraseFromParent();
5972  return DoneMBB;
5973 }
5974 
5975 // Update TBEGIN instruction with final opcode and register clobbers.
5976 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
5977  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
5978  bool NoFloat) const {
5979  MachineFunction &MF = *MBB->getParent();
5980  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
5981  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
5982 
5983  // Update opcode.
5984  MI.setDesc(TII->get(Opcode));
5985 
5986  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
5987  // Make sure to add the corresponding GRSM bits if they are missing.
5988  uint64_t Control = MI.getOperand(2).getImm();
5989  static const unsigned GPRControlBit[16] = {
5990  0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
5991  0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
5992  };
5993  Control |= GPRControlBit[15];
5994  if (TFI->hasFP(MF))
5995  Control |= GPRControlBit[11];
5996  MI.getOperand(2).setImm(Control);
5997 
5998  // Add GPR clobbers.
5999  for (int I = 0; I < 16; I++) {
6000  if ((Control & GPRControlBit[I]) == 0) {
6001  unsigned Reg = SystemZMC::GR64Regs[I];
6002  MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
6003  }
6004  }
6005 
6006  // Add FPR/VR clobbers.
6007  if (!NoFloat && (Control & 4) != 0) {
6008  if (Subtarget.hasVector()) {
6009  for (int I = 0; I < 32; I++) {
6010  unsigned Reg = SystemZMC::VR128Regs[I];
6011  MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
6012  }
6013  } else {
6014  for (int I = 0; I < 16; I++) {
6015  unsigned Reg = SystemZMC::FP64Regs[I];
6016  MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
6017  }
6018  }
6019  }
6020 
6021  return MBB;
6022 }
6023 
6024 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
6025  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
6026  MachineFunction &MF = *MBB->getParent();
6027  MachineRegisterInfo *MRI = &MF.getRegInfo();
6028  const SystemZInstrInfo *TII =
6029  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6030  DebugLoc DL = MI.getDebugLoc();
6031 
6032  unsigned SrcReg = MI.getOperand(0).getReg();
6033 
6034  // Create new virtual register of the same class as source.
6035  const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
6036  unsigned DstReg = MRI->createVirtualRegister(RC);
6037 
6038  // Replace pseudo with a normal load-and-test that models the def as
6039  // well.
6040  BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
6041  .addReg(SrcReg);
6042  MI.eraseFromParent();
6043 
6044  return MBB;
6045 }
6046 
6048  MachineInstr &MI, MachineBasicBlock *MBB) const {
6049  switch (MI.getOpcode()) {
6050  case SystemZ::Select32Mux:
6051  return emitSelect(MI, MBB,
6052  Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
6053  case SystemZ::Select32:
6054  return emitSelect(MI, MBB, SystemZ::LOCR);
6055  case SystemZ::Select64:
6056  return emitSelect(MI, MBB, SystemZ::LOCGR);
6057  case SystemZ::SelectF32:
6058  case SystemZ::SelectF64:
6059  case SystemZ::SelectF128:
6060  return emitSelect(MI, MBB, 0);
6061 
6062  case SystemZ::CondStore8Mux:
6063  return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
6064  case SystemZ::CondStore8MuxInv:
6065  return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
6066  case SystemZ::CondStore16Mux:
6067  return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
6068  case SystemZ::CondStore16MuxInv:
6069  return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
6070  case SystemZ::CondStore32Mux:
6071  return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
6072  case SystemZ::CondStore32MuxInv:
6073  return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
6074  case SystemZ::CondStore8:
6075  return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
6076  case SystemZ::CondStore8Inv:
6077  return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
6078  case SystemZ::CondStore16:
6079  return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
6080  case SystemZ::CondStore16Inv:
6081  return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
6082  case SystemZ::CondStore32:
6083  return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
6084  case SystemZ::CondStore32Inv:
6085  return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
6086  case SystemZ::CondStore64:
6087  return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
6088  case SystemZ::CondStore64Inv:
6089  return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
6090  case SystemZ::CondStoreF32:
6091  return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
6092  case SystemZ::CondStoreF32Inv:
6093  return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
6094  case SystemZ::CondStoreF64:
6095  return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
6096  case SystemZ::CondStoreF64Inv:
6097  return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
6098 
6099  case SystemZ::AEXT128_64:
6100  return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
6101  case SystemZ::ZEXT128_32:
6102  return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
6103  case SystemZ::ZEXT128_64:
6104  return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
6105 
6106  case SystemZ::ATOMIC_SWAPW:
6107  return emitAtomicLoadBinary(MI, MBB, 0, 0);
6108  case SystemZ::ATOMIC_SWAP_32:
6109  return emitAtomicLoadBinary(MI, MBB, 0, 32);
6110  case SystemZ::ATOMIC_SWAP_64:
6111  return emitAtomicLoadBinary(MI, MBB, 0, 64);
6112 
6113  case SystemZ::ATOMIC_LOADW_AR:
6114  return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
6115  case SystemZ::ATOMIC_LOADW_AFI:
6116  return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
6117  case SystemZ::ATOMIC_LOAD_AR:
6118  return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
6119  case SystemZ::ATOMIC_LOAD_AHI:
6120  return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
6121  case SystemZ::ATOMIC_LOAD_AFI:
6122  return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
6123  case SystemZ::ATOMIC_LOAD_AGR:
6124  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
6125  case SystemZ::ATOMIC_LOAD_AGHI:
6126  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
6127  case SystemZ::ATOMIC_LOAD_AGFI:
6128  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
6129 
6130  case SystemZ::ATOMIC_LOADW_SR:
6131  return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
6132  case SystemZ::ATOMIC_LOAD_SR:
6133  return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
6134  case SystemZ::ATOMIC_LOAD_SGR:
6135  return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
6136 
6137  case SystemZ::ATOMIC_LOADW_NR:
6138  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
6139  case SystemZ::ATOMIC_LOADW_NILH:
6140  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
6141  case SystemZ::ATOMIC_LOAD_NR:
6142  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
6143  case SystemZ::ATOMIC_LOAD_NILL:
6144  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
6145  case SystemZ::ATOMIC_LOAD_NILH:
6146  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
6147  case SystemZ::ATOMIC_LOAD_NILF:
6148  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
6149  case SystemZ::ATOMIC_LOAD_NGR:
6150  return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
6151  case SystemZ::ATOMIC_LOAD_NILL64:
6152  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
6153  case SystemZ::ATOMIC_LOAD_NILH64:
6154  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
6155  case SystemZ::ATOMIC_LOAD_NIHL64:
6156  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
6157  case SystemZ::ATOMIC_LOAD_NIHH64:
6158  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
6159  case SystemZ::ATOMIC_LOAD_NILF64:
6160  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
6161  case SystemZ::ATOMIC_LOAD_NIHF64:
6162  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
6163 
6165  return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
6166  case SystemZ::ATOMIC_LOADW_OILH:
6167  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
6169  return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
6170  case SystemZ::ATOMIC_LOAD_OILL:
6171  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
6172  case SystemZ::ATOMIC_LOAD_OILH:
6173  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
6174  case SystemZ::ATOMIC_LOAD_OILF:
6175  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
6176  case SystemZ::ATOMIC_LOAD_OGR:
6177  return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
6178  case SystemZ::ATOMIC_LOAD_OILL64:
6179  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
6180  case SystemZ::ATOMIC_LOAD_OILH64:
6181  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
6182  case SystemZ::ATOMIC_LOAD_OIHL64:
6183  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
6184  case SystemZ::ATOMIC_LOAD_OIHH64:
6185  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
6186  case SystemZ::ATOMIC_LOAD_OILF64:
6187  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
6188  case SystemZ::ATOMIC_LOAD_OIHF64:
6189  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
6190 
6191  case SystemZ::ATOMIC_LOADW_XR:
6192  return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
6193  case SystemZ::ATOMIC_LOADW_XILF:
6194  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
6195  case SystemZ::ATOMIC_LOAD_XR:
6196  return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
6197  case SystemZ::ATOMIC_LOAD_XILF:
6198  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
6199  case SystemZ::ATOMIC_LOAD_XGR:
6200  return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
6201  case SystemZ::ATOMIC_LOAD_XILF64:
6202  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
6203  case SystemZ::ATOMIC_LOAD_XIHF64:
6204  return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
6205 
6206  case SystemZ::ATOMIC_LOADW_NRi:
6207  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
6208  case SystemZ::ATOMIC_LOADW_NILHi:
6209  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
6210  case SystemZ::ATOMIC_LOAD_NRi:
6211  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
6212  case SystemZ::ATOMIC_LOAD_NILLi:
6213  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
6214  case SystemZ::ATOMIC_LOAD_NILHi:
6215  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
6216  case SystemZ::ATOMIC_LOAD_NILFi:
6217  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
6218  case SystemZ::ATOMIC_LOAD_NGRi:
6219  return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
6220  case SystemZ::ATOMIC_LOAD_NILL64i:
6221  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
6222  case SystemZ::ATOMIC_LOAD_NILH64i:
6223  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
6224  case SystemZ::ATOMIC_LOAD_NIHL64i:
6225  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
6226  case SystemZ::ATOMIC_LOAD_NIHH64i:
6227  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
6228  case SystemZ::ATOMIC_LOAD_NILF64i:
6229  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
6230  case SystemZ::ATOMIC_LOAD_NIHF64i:
6231  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
6232 
6234  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
6236  case SystemZ::ATOMIC_LOAD_MIN_32:
6237  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
6239  case SystemZ::ATOMIC_LOAD_MIN_64:
6240  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
6242 
6244  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
6246  case SystemZ::ATOMIC_LOAD_MAX_32:
6247  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
6249  case SystemZ::ATOMIC_LOAD_MAX_64:
6250  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
6252 
6254  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
6256  case SystemZ::ATOMIC_LOAD_UMIN_32:
6257  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
6259  case SystemZ::ATOMIC_LOAD_UMIN_64:
6260  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
6262 
6264  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
6266  case SystemZ::ATOMIC_LOAD_UMAX_32:
6267  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
6269  case SystemZ::ATOMIC_LOAD_UMAX_64:
6270  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
6272 
6274  return emitAtomicCmpSwapW(MI, MBB);
6275  case SystemZ::MVCSequence:
6276  case SystemZ::MVCLoop:
6277  return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
6278  case SystemZ::NCSequence:
6279  case SystemZ::NCLoop:
6280  return emitMemMemWrapper(MI, MBB, SystemZ::NC);
6281  case SystemZ::OCSequence:
6282  case SystemZ::OCLoop:
6283  return emitMemMemWrapper(MI, MBB, SystemZ::OC);
6284  case SystemZ::XCSequence:
6285  case SystemZ::XCLoop:
6286  return emitMemMemWrapper(MI, MBB, SystemZ::XC);
6287  case SystemZ::CLCSequence:
6288  case SystemZ::CLCLoop:
6289  return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
6290  case SystemZ::CLSTLoop:
6291  return emitStringWrapper(MI, MBB, SystemZ::CLST);
6292  case SystemZ::MVSTLoop:
6293  return emitStringWrapper(MI, MBB, SystemZ::MVST);
6294  case SystemZ::SRSTLoop:
6295  return emitStringWrapper(MI, MBB, SystemZ::SRST);
6296  case SystemZ::TBEGIN:
6297  return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
6298  case SystemZ::TBEGIN_nofloat:
6299  return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
6300  case SystemZ::TBEGINC:
6301  return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
6302  case SystemZ::LTEBRCompare_VecPseudo:
6303  return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
6304  case SystemZ::LTDBRCompare_VecPseudo:
6305  return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
6306  case SystemZ::LTXBRCompare_VecPseudo:
6307  return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
6308 
6309  default:
6310  llvm_unreachable("Unexpected instr type to insert");
6311  }
6312 }
MachineLoop * L
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
void setFrameAddressIsTaken(bool T)
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:315
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
unsigned Log2_32_Ceil(uint32_t Value)
Log2_32_Ceil - This function returns the ceil log base 2 of the specified value, 32 if the value is z...
Definition: MathExtras.h:526
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:762
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static SDValue getCCResult(SelectionDAG &DAG, SDNode *After)
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static void VerifyVectorTypes(const SmallVectorImpl< ISD::InputArg > &Ins)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:208
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
const unsigned PFD_READ
Definition: SystemZ.h:99
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
LLVMContext & Context
const unsigned GR32Regs[16]
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
const int64_t CallFrameSize
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:724
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask)
size_t i
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static const Permute PermuteForms[]
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
LocInfo getLocInfo() const
const MCPhysReg ArgFPRs[NumArgFPRs]
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasOneUse() const
Return true if there is exactly one use of this node.
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned FP128Regs[16]
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
unsigned odd128(bool Is32bit)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
SDVTList getVTList() const
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:449
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:160
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:329
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, SDValue CmpOp1)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:615
This class represents a function call, abstracting a target machine's calling convention.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
const GlobalValue * getGlobal() const
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
bool mayBeEmittedAsTailCall(CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SystemZInstrInfo *TII, const SDLoc &DL, EVT VT, uint64_t Value, unsigned BitsPerElement)
bool hasInterlockedAccess1() const
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
unsigned getNumOperands() const
Return the number of values used by this operation.
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
const unsigned FP32Regs[16]
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:440
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &ArgsFlags, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void reserve(size_type N)
Definition: SmallVector.h:377
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
const SDValue & getBasePtr() const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:180
static bool shouldSwapCmpOperands(const Comparison &C)
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
uint64_t High
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:690
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
unsigned getResNo() const
get the index which selects a specific result in the SDNode
static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP)
bool isRegLoc() const
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const unsigned PFD_WRITE
Definition: SystemZ.h:100
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:388
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
bool hasLoadStoreOnCond() const
lazy value info
The address of a basic block.
Definition: Constants.h:822
A description of a memory reference used in the backend.
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
struct fuzzer::@269 Flags
static MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
Shift and rotation operations.
Definition: ISDOpcodes.h:344
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:67
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
const unsigned CCMASK_2
Definition: SystemZ.h:29
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned SubReg
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:719
SynchronizationScope
Definition: Instructions.h:50
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM's memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:611
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
static bool isScalarToVector(SDValue Op)
bool hasLoadStoreOnCond2() const
unsigned getLocReg() const
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
SDValue getRegisterMask(const uint32_t *RegMask)
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * MBB
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:76
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:239
SmallVector< ISD::OutputArg, 32 > Outs
const unsigned NumArgFPRs
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:699
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
MachineConstantPoolValue * getMachineCPVal() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
EVT getMemoryVT() const
Return the type of the in-memory value.
#define EQ(a, b)
Definition: regexec.c:112
int64_t getImm() const
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:427
const unsigned VectorBits
Definition: SystemZ.h:138
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:52
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:65
const unsigned CCMASK_CS
Definition: SystemZ.h:53
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SynchronizationScope SynchScope)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands...
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:64
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
SDNode * getNode() const
get the SDNode which holds the desired result
static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
#define P(N)
static void adjustForLTGFR(Comparison &C)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isNegZero() const
Definition: APFloat.h:1047
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:309
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
bool isMachineConstantPoolEntry() const
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
static void getVPermMask(ShuffleVectorSDNode *VSN, SmallVectorImpl< int > &Bytes)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:88
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isExtInLoc() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
const unsigned CCMASK_TM
Definition: SystemZ.h:69
MVT getLocVT() const
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:42
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:637
bool isVector() const
isVector - Return true if this is a vector value type.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:279
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map)
const Constant * getConstVal() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl< SDValue > &Elems)
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
#define CONV(X)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
static mvt_range fp_valuetypes()
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
uint32_t Offset
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op)
static const unsigned End
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
unsigned getOpcode() const
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:68
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:676
Value * getOperand(unsigned i) const
Definition: User.h:145
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
const unsigned CCMASK_3
Definition: SystemZ.h:30
static mvt_range vector_valuetypes()
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
const unsigned CCMASK_TDC
Definition: SystemZ.h:93
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL)
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:705
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const
unsigned getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Chain, SDValue Value)
#define OPCODE(NAME)
This structure contains all information that is necessary for lowering calls.
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:150
T findFirstSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the first set bit starting from the least significant bit.
Definition: MathExtras.h:194
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override
Determine if the target supports unaligned memory accesses.
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const BlockAddress * getBlockAddress() const
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
const SystemZInstrInfo * getInstrInfo() const override
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:62
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void setIsKill(bool Val=true)
bool hasPopulationCount() const
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:546
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:540
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const TargetFrameLowering * getFrameLowering() const override
Iterator for intrusive lists based on ilist_node.
CCState - This class holds information needed while lowering arguments and return values...
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
static bool canTreatAsByteVector(EVT VT)
CCValAssign - Represent assignment of one arg/retval to a location.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
const unsigned FP64Regs[16]
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
const SDValue & getChain() const
static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask)
static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Value)
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned GR128Regs[16]
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
#define NC
Definition: regutils.h:42
const unsigned GR64Regs[16]
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:584
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static mvt_range integer_valuetypes()
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:63
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, unsigned CCValid, unsigned CCMask)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:715
const unsigned CCMASK_1
Definition: SystemZ.h:28
Class for arbitrary precision integers.
Definition: APInt.h:77
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function...
const unsigned VectorBytes
Definition: SystemZ.h:142
iterator_range< use_iterator > uses()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
unsigned EmulatedTLS
EmulatedTLS - This flag enables emulated TLS model, using emutls function in the runtime library...
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
static void VerifyVectorType(MVT VT, EVT ArgVT)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isMemLoc() const
const unsigned CCMASK_0
Definition: SystemZ.h:27
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static MachineOperand earlyUseOperand(MachineOperand Op)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca...
Definition: ISDOpcodes.h:758
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
Representation of each machine instruction.
Definition: MachineInstr.h:52
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:633
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Extend, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:578
SmallVector< SDValue, 32 > OutVals
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isUndef() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:610
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
static bool is32Bit(EVT VT)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
bool isTailCall() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const override
This callback is used to prepare for a volatile or atomic load.
static MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:530
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static unsigned reverseCCMask(unsigned CCMask)
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:723
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
Definition: SelectionDAG.h:742
unsigned getPointerSize() const
Get the pointer size for this target.
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:346
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
unsigned getAlignment() const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
SDValue getRegister(unsigned Reg, EVT VT)
unsigned even128(bool Is32bit)
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
bool isZero() const
Definition: APFloat.h:1031
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:685
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
iterator end() const
Definition: StringRef.h:105
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:145
Primary interface to the complete machine description for the target machine.
A SystemZ-specific constant pool value.
static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
bool hasFPExtension() const
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
const unsigned IPM_CC
Definition: SystemZ.h:96
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
unsigned getLocMemOffset() const
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:397
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
static void adjustForFNeg(Comparison &C)
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:698
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:61
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
const unsigned CCMASK_TEND
Definition: SystemZ.h:81
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
const unsigned VR128Regs[32]
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:694
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, bool &Invert)
const SystemZRegisterInfo * getRegisterInfo() const override
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:66
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
uint64_t getZExtValue() const
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:326
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
void resize(size_type N)
Definition: SmallVector.h:352
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:545
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:155