LLVM  3.7.0
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SystemZTargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SystemZISelLowering.h"
15 #include "SystemZCallingConv.h"
18 #include "SystemZTargetMachine.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include <cctype>
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "systemz-lower"
29 
30 namespace {
31 // Represents a sequence for extracting a 0/1 value from an IPM result:
32 // (((X ^ XORValue) + AddValue) >> Bit)
33 struct IPMConversion {
34  IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
35  : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
36 
37  int64_t XORValue;
38  int64_t AddValue;
39  unsigned Bit;
40 };
41 
42 // Represents information about a comparison.
43 struct Comparison {
44  Comparison(SDValue Op0In, SDValue Op1In)
45  : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
46 
47  // The operands to the comparison.
48  SDValue Op0, Op1;
49 
50  // The opcode that should be used to compare Op0 and Op1.
51  unsigned Opcode;
52 
53  // A SystemZICMP value. Only used for integer comparisons.
54  unsigned ICmpType;
55 
56  // The mask of CC values that Opcode can produce.
57  unsigned CCValid;
58 
59  // The mask of CC values for which the original condition is true.
60  unsigned CCMask;
61 };
62 } // end anonymous namespace
63 
64 // Classify VT as either 32 or 64 bit.
65 static bool is32Bit(EVT VT) {
66  switch (VT.getSimpleVT().SimpleTy) {
67  case MVT::i32:
68  return true;
69  case MVT::i64:
70  return false;
71  default:
72  llvm_unreachable("Unsupported type");
73  }
74 }
75 
76 // Return a version of MachineOperand that can be safely used before the
77 // final use.
79  if (Op.isReg())
80  Op.setIsKill(false);
81  return Op;
82 }
83 
85  const SystemZSubtarget &STI)
86  : TargetLowering(TM), Subtarget(STI) {
87  auto &DL = *TM.getDataLayout();
88  MVT PtrVT = getPointerTy(DL);
89 
90  // Set up the register classes.
91  if (Subtarget.hasHighWord())
92  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
93  else
94  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
95  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
96  if (Subtarget.hasVector()) {
97  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
98  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
99  } else {
100  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
101  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
102  }
103  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
104 
105  if (Subtarget.hasVector()) {
106  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
107  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
112  }
113 
114  // Compute derived properties from the register classes
116 
117  // Set up special registers.
118  setExceptionPointerRegister(SystemZ::R6D);
119  setExceptionSelectorRegister(SystemZ::R7D);
121 
122  // TODO: It may be better to default to latency-oriented scheduling, however
123  // LLVM's current latency-oriented scheduler can't handle physreg definitions
124  // such as SystemZ has with CC, so set this to the register-pressure
125  // scheduler, because it can.
127 
130 
131  // Instructions are strings of 2-byte aligned 2-byte values.
133 
134  // Handle operations that are handled in a similar way for all types.
135  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
137  ++I) {
138  MVT VT = MVT::SimpleValueType(I);
139  if (isTypeLegal(VT)) {
140  // Lower SET_CC into an IPM-based sequence.
142 
143  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
145 
146  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
149  }
150  }
151 
152  // Expand jump table branches as address arithmetic followed by an
153  // indirect jump.
155 
156  // Expand BRCOND into a BR_CC (see above).
158 
159  // Handle integer types.
160  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
162  ++I) {
163  MVT VT = MVT::SimpleValueType(I);
164  if (isTypeLegal(VT)) {
165  // Expand individual DIV and REMs into DIVREMs.
172 
173  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
174  // stores, putting a serialization instruction after the stores.
177 
178  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
179  // available, or if the operand is constant.
181 
182  // Use POPCNT on z196 and above.
183  if (Subtarget.hasPopulationCount())
185  else
187 
188  // No special instructions for these.
193 
194  // Use *MUL_LOHI where possible instead of MULH*.
199 
200  // Only z196 and above have native support for conversions to unsigned.
201  if (!Subtarget.hasFPExtension())
203  }
204  }
205 
206  // Type legalization will convert 8- and 16-bit atomic operations into
207  // forms that operate on i32s (but still keeping the original memory VT).
208  // Lower them into full i32 operations.
221 
222  // z10 has instructions for signed but not unsigned FP conversion.
223  // Handle unsigned 32-bit types as signed 64-bit types.
224  if (!Subtarget.hasFPExtension()) {
227  }
228 
229  // We have native support for a 64-bit CTLZ, via FLOGR.
232 
233  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
235 
236  // FIXME: Can we support these natively?
240 
241  // We have native instructions for i8, i16 and i32 extensions, but not i1.
243  for (MVT VT : MVT::integer_valuetypes()) {
247  }
248 
249  // Handle the various types of symbolic address.
255 
256  // We need to handle dynamic allocations specially because of the
257  // 160-byte area at the bottom of the stack.
259 
260  // Use custom expanders so that we can force the function to use
261  // a frame pointer.
264 
265  // Handle prefetches with PFD or PFDRL.
267 
268  for (MVT VT : MVT::vector_valuetypes()) {
269  // Assume by default that all vector operations need to be expanded.
270  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
271  if (getOperationAction(Opcode, VT) == Legal)
272  setOperationAction(Opcode, VT, Expand);
273 
274  // Likewise all truncating stores and extending loads.
275  for (MVT InnerVT : MVT::vector_valuetypes()) {
276  setTruncStoreAction(VT, InnerVT, Expand);
277  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
278  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
279  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
280  }
281 
282  if (isTypeLegal(VT)) {
283  // These operations are legal for anything that can be stored in a
284  // vector register, even if there is no native support for the format
285  // as such. In particular, we can do these for v4f32 even though there
286  // are no specific instructions for that format.
292 
293  // Likewise, except that we need to replace the nodes with something
294  // more specific.
297  }
298  }
299 
300  // Handle integer vector types.
301  for (MVT VT : MVT::integer_vector_valuetypes()) {
302  if (isTypeLegal(VT)) {
303  // These operations have direct equivalents.
308  if (VT != MVT::v2i64)
318 
319  // Convert a GPR scalar to a vector by inserting it into element 0.
321 
322  // Use a series of unpacks for extensions.
325 
326  // Detect shifts by a scalar amount and convert them into
327  // V*_BY_SCALAR.
331 
332  // At present ROTL isn't matched by DAGCombiner. ROTR should be
333  // converted into ROTL.
336 
337  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
338  // and inverting the result as necessary.
340  }
341  }
342 
343  if (Subtarget.hasVector()) {
344  // There should be no need to check for float types other than v2f64
345  // since <2 x f32> isn't a legal type.
350  }
351 
352  // Handle floating-point types.
353  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
355  ++I) {
356  MVT VT = MVT::SimpleValueType(I);
357  if (isTypeLegal(VT)) {
358  // We can use FI for FRINT.
360 
361  // We can use the extended form of FI for other rounding operations.
362  if (Subtarget.hasFPExtension()) {
368  }
369 
370  // No special instructions for these.
374  }
375  }
376 
377  // Handle floating-point vector types.
378  if (Subtarget.hasVector()) {
379  // Scalar-to-vector conversion is just a subreg.
382 
383  // Some insertions and extractions can be done directly but others
384  // need to go via integers.
389 
390  // These operations have direct equivalents.
405  }
406 
407  // We have fused multiply-addition for f32 and f64 but not f128.
411 
412  // Needed so that we don't try to implement f128 constant loads using
413  // a load-and-extend of a f80 constant (in cases where the constant
414  // would fit in an f80).
415  for (MVT VT : MVT::fp_valuetypes())
417 
418  // Floating-point truncation and stores need to be done separately.
422 
423  // We have 64-bit FPR<->GPR moves, but need special handling for
424  // 32-bit forms.
425  if (!Subtarget.hasVector()) {
428  }
429 
430  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
431  // structure, but VAEND is a no-op.
435 
436  // Codes for which we want to perform some z-specific combinations.
441 
442  // Handle intrinsics.
445 
446  // We want to use MVC in preference to even a single load/store pair.
447  MaxStoresPerMemcpy = 0;
449 
450  // The main memset sequence is a byte store followed by an MVC.
451  // Two STC or MV..I stores win over that, but the kind of fused stores
452  // generated by target-independent code don't when the byte value is
453  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
454  // than "STC;MVC". Handle the choice in target-specific code instead.
455  MaxStoresPerMemset = 0;
457 }
458 
460  LLVMContext &, EVT VT) const {
461  if (!VT.isVector())
462  return MVT::i32;
464 }
465 
467  VT = VT.getScalarType();
468 
469  if (!VT.isSimple())
470  return false;
471 
472  switch (VT.getSimpleVT().SimpleTy) {
473  case MVT::f32:
474  case MVT::f64:
475  return true;
476  case MVT::f128:
477  return false;
478  default:
479  break;
480  }
481 
482  return false;
483 }
484 
485 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
486  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
487  return Imm.isZero() || Imm.isNegZero();
488 }
489 
491  // We can use CGFI or CLGFI.
492  return isInt<32>(Imm) || isUInt<32>(Imm);
493 }
494 
496  // We can use ALGFI or SLGFI.
497  return isUInt<32>(Imm) || isUInt<32>(-Imm);
498 }
499 
501  unsigned,
502  unsigned,
503  bool *Fast) const {
504  // Unaligned accesses should never be slower than the expanded version.
505  // We check specifically for aligned accesses in the few cases where
506  // they are required.
507  if (Fast)
508  *Fast = true;
509  return true;
510 }
511 
513  const AddrMode &AM, Type *Ty,
514  unsigned AS) const {
515  // Punt on globals for now, although they can be used in limited
516  // RELATIVE LONG cases.
517  if (AM.BaseGV)
518  return false;
519 
520  // Require a 20-bit signed offset.
521  if (!isInt<20>(AM.BaseOffs))
522  return false;
523 
524  // Indexing is OK but no scale factor can be applied.
525  return AM.Scale == 0 || AM.Scale == 1;
526 }
527 
529  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
530  return false;
531  unsigned FromBits = FromType->getPrimitiveSizeInBits();
532  unsigned ToBits = ToType->getPrimitiveSizeInBits();
533  return FromBits > ToBits;
534 }
535 
537  if (!FromVT.isInteger() || !ToVT.isInteger())
538  return false;
539  unsigned FromBits = FromVT.getSizeInBits();
540  unsigned ToBits = ToVT.getSizeInBits();
541  return FromBits > ToBits;
542 }
543 
544 //===----------------------------------------------------------------------===//
545 // Inline asm support
546 //===----------------------------------------------------------------------===//
547 
550  if (Constraint.size() == 1) {
551  switch (Constraint[0]) {
552  case 'a': // Address register
553  case 'd': // Data register (equivalent to 'r')
554  case 'f': // Floating-point register
555  case 'h': // High-part register
556  case 'r': // General-purpose register
557  return C_RegisterClass;
558 
559  case 'Q': // Memory with base and unsigned 12-bit displacement
560  case 'R': // Likewise, plus an index
561  case 'S': // Memory with base and signed 20-bit displacement
562  case 'T': // Likewise, plus an index
563  case 'm': // Equivalent to 'T'.
564  return C_Memory;
565 
566  case 'I': // Unsigned 8-bit constant
567  case 'J': // Unsigned 12-bit constant
568  case 'K': // Signed 16-bit constant
569  case 'L': // Signed 20-bit displacement (on all targets we support)
570  case 'M': // 0x7fffffff
571  return C_Other;
572 
573  default:
574  break;
575  }
576  }
577  return TargetLowering::getConstraintType(Constraint);
578 }
579 
582  const char *constraint) const {
583  ConstraintWeight weight = CW_Invalid;
584  Value *CallOperandVal = info.CallOperandVal;
585  // If we don't have a value, we can't do a match,
586  // but allow it at the lowest weight.
587  if (!CallOperandVal)
588  return CW_Default;
589  Type *type = CallOperandVal->getType();
590  // Look at the constraint type.
591  switch (*constraint) {
592  default:
593  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
594  break;
595 
596  case 'a': // Address register
597  case 'd': // Data register (equivalent to 'r')
598  case 'h': // High-part register
599  case 'r': // General-purpose register
600  if (CallOperandVal->getType()->isIntegerTy())
601  weight = CW_Register;
602  break;
603 
604  case 'f': // Floating-point register
605  if (type->isFloatingPointTy())
606  weight = CW_Register;
607  break;
608 
609  case 'I': // Unsigned 8-bit constant
610  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
611  if (isUInt<8>(C->getZExtValue()))
612  weight = CW_Constant;
613  break;
614 
615  case 'J': // Unsigned 12-bit constant
616  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
617  if (isUInt<12>(C->getZExtValue()))
618  weight = CW_Constant;
619  break;
620 
621  case 'K': // Signed 16-bit constant
622  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
623  if (isInt<16>(C->getSExtValue()))
624  weight = CW_Constant;
625  break;
626 
627  case 'L': // Signed 20-bit displacement (on all targets we support)
628  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
629  if (isInt<20>(C->getSExtValue()))
630  weight = CW_Constant;
631  break;
632 
633  case 'M': // 0x7fffffff
634  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
635  if (C->getZExtValue() == 0x7fffffff)
636  weight = CW_Constant;
637  break;
638  }
639  return weight;
640 }
641 
642 // Parse a "{tNNN}" register constraint for which the register type "t"
643 // has already been verified. MC is the class associated with "t" and
644 // Map maps 0-based register numbers to LLVM register numbers.
645 static std::pair<unsigned, const TargetRegisterClass *>
647  const unsigned *Map) {
648  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
649  if (isdigit(Constraint[2])) {
650  unsigned Index;
651  bool Failed =
652  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
653  if (!Failed && Index < 16 && Map[Index])
654  return std::make_pair(Map[Index], RC);
655  }
656  return std::make_pair(0U, nullptr);
657 }
658 
659 std::pair<unsigned, const TargetRegisterClass *>
661  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
662  if (Constraint.size() == 1) {
663  // GCC Constraint Letters
664  switch (Constraint[0]) {
665  default: break;
666  case 'd': // Data register (equivalent to 'r')
667  case 'r': // General-purpose register
668  if (VT == MVT::i64)
669  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
670  else if (VT == MVT::i128)
671  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
672  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
673 
674  case 'a': // Address register
675  if (VT == MVT::i64)
676  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
677  else if (VT == MVT::i128)
678  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
679  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
680 
681  case 'h': // High-part register (an LLVM extension)
682  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
683 
684  case 'f': // Floating-point register
685  if (VT == MVT::f64)
686  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
687  else if (VT == MVT::f128)
688  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
689  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
690  }
691  }
692  if (Constraint.size() > 0 && Constraint[0] == '{') {
693  // We need to override the default register parsing for GPRs and FPRs
694  // because the interpretation depends on VT. The internal names of
695  // the registers are also different from the external names
696  // (F0D and F0S instead of F0, etc.).
697  if (Constraint[1] == 'r') {
698  if (VT == MVT::i32)
699  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
701  if (VT == MVT::i128)
702  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
704  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
706  }
707  if (Constraint[1] == 'f') {
708  if (VT == MVT::f32)
709  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
711  if (VT == MVT::f128)
712  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
714  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
716  }
717  }
718  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
719 }
720 
722 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
723  std::vector<SDValue> &Ops,
724  SelectionDAG &DAG) const {
725  // Only support length 1 constraints for now.
726  if (Constraint.length() == 1) {
727  switch (Constraint[0]) {
728  case 'I': // Unsigned 8-bit constant
729  if (auto *C = dyn_cast<ConstantSDNode>(Op))
730  if (isUInt<8>(C->getZExtValue()))
731  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
732  Op.getValueType()));
733  return;
734 
735  case 'J': // Unsigned 12-bit constant
736  if (auto *C = dyn_cast<ConstantSDNode>(Op))
737  if (isUInt<12>(C->getZExtValue()))
738  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
739  Op.getValueType()));
740  return;
741 
742  case 'K': // Signed 16-bit constant
743  if (auto *C = dyn_cast<ConstantSDNode>(Op))
744  if (isInt<16>(C->getSExtValue()))
745  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
746  Op.getValueType()));
747  return;
748 
749  case 'L': // Signed 20-bit displacement (on all targets we support)
750  if (auto *C = dyn_cast<ConstantSDNode>(Op))
751  if (isInt<20>(C->getSExtValue()))
752  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
753  Op.getValueType()));
754  return;
755 
756  case 'M': // 0x7fffffff
757  if (auto *C = dyn_cast<ConstantSDNode>(Op))
758  if (C->getZExtValue() == 0x7fffffff)
759  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
760  Op.getValueType()));
761  return;
762  }
763  }
764  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
765 }
766 
767 //===----------------------------------------------------------------------===//
768 // Calling conventions
769 //===----------------------------------------------------------------------===//
770 
771 #include "SystemZGenCallingConv.inc"
772 
774  Type *ToType) const {
775  return isTruncateFree(FromType, ToType);
776 }
777 
779  if (!CI->isTailCall())
780  return false;
781  return true;
782 }
783 
784 // We do not yet support 128-bit single-element vector types. If the user
785 // attempts to use such types as function argument or return type, prefer
786 // to error out instead of emitting code violating the ABI.
787 static void VerifyVectorType(MVT VT, EVT ArgVT) {
788  if (ArgVT.isVector() && !VT.isVector())
789  report_fatal_error("Unsupported vector argument or return type");
790 }
791 
793  for (unsigned i = 0; i < Ins.size(); ++i)
794  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
795 }
796 
798  for (unsigned i = 0; i < Outs.size(); ++i)
799  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
800 }
801 
802 // Value is a value that has been passed to us in the location described by VA
803 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
804 // any loads onto Chain.
806  CCValAssign &VA, SDValue Chain,
807  SDValue Value) {
808  // If the argument has been promoted from a smaller type, insert an
809  // assertion to capture this.
810  if (VA.getLocInfo() == CCValAssign::SExt)
811  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
812  DAG.getValueType(VA.getValVT()));
813  else if (VA.getLocInfo() == CCValAssign::ZExt)
814  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
815  DAG.getValueType(VA.getValVT()));
816 
817  if (VA.isExtInLoc())
818  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
819  else if (VA.getLocInfo() == CCValAssign::Indirect)
820  Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
821  MachinePointerInfo(), false, false, false, 0);
822  else if (VA.getLocInfo() == CCValAssign::BCvt) {
823  // If this is a short vector argument loaded from the stack,
824  // extend from i64 to full vector size and then bitcast.
825  assert(VA.getLocVT() == MVT::i64);
826  assert(VA.getValVT().isVector());
827  Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
828  Value, DAG.getUNDEF(MVT::i64));
829  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
830  } else
831  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
832  return Value;
833 }
834 
835 // Value is a value of type VA.getValVT() that we need to copy into
836 // the location described by VA. Return a copy of Value converted to
837 // VA.getValVT(). The caller is responsible for handling indirect values.
839  CCValAssign &VA, SDValue Value) {
840  switch (VA.getLocInfo()) {
841  case CCValAssign::SExt:
842  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
843  case CCValAssign::ZExt:
844  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
845  case CCValAssign::AExt:
846  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
847  case CCValAssign::BCvt:
848  // If this is a short vector argument to be stored to the stack,
849  // bitcast to v2i64 and then extract first element.
850  assert(VA.getLocVT() == MVT::i64);
851  assert(VA.getValVT().isVector());
852  Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
853  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
854  DAG.getConstant(0, DL, MVT::i32));
855  case CCValAssign::Full:
856  return Value;
857  default:
858  llvm_unreachable("Unhandled getLocInfo()");
859  }
860 }
861 
863 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
865  SDLoc DL, SelectionDAG &DAG,
866  SmallVectorImpl<SDValue> &InVals) const {
868  MachineFrameInfo *MFI = MF.getFrameInfo();
869  MachineRegisterInfo &MRI = MF.getRegInfo();
870  SystemZMachineFunctionInfo *FuncInfo =
872  auto *TFL =
873  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
874 
875  // Detect unsupported vector argument types.
876  if (Subtarget.hasVector())
877  VerifyVectorTypes(Ins);
878 
879  // Assign locations to all of the incoming arguments.
881  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
882  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
883 
884  unsigned NumFixedGPRs = 0;
885  unsigned NumFixedFPRs = 0;
886  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
887  SDValue ArgValue;
888  CCValAssign &VA = ArgLocs[I];
889  EVT LocVT = VA.getLocVT();
890  if (VA.isRegLoc()) {
891  // Arguments passed in registers
892  const TargetRegisterClass *RC;
893  switch (LocVT.getSimpleVT().SimpleTy) {
894  default:
895  // Integers smaller than i64 should be promoted to i64.
896  llvm_unreachable("Unexpected argument type");
897  case MVT::i32:
898  NumFixedGPRs += 1;
899  RC = &SystemZ::GR32BitRegClass;
900  break;
901  case MVT::i64:
902  NumFixedGPRs += 1;
903  RC = &SystemZ::GR64BitRegClass;
904  break;
905  case MVT::f32:
906  NumFixedFPRs += 1;
907  RC = &SystemZ::FP32BitRegClass;
908  break;
909  case MVT::f64:
910  NumFixedFPRs += 1;
911  RC = &SystemZ::FP64BitRegClass;
912  break;
913  case MVT::v16i8:
914  case MVT::v8i16:
915  case MVT::v4i32:
916  case MVT::v2i64:
917  case MVT::v4f32:
918  case MVT::v2f64:
919  RC = &SystemZ::VR128BitRegClass;
920  break;
921  }
922 
923  unsigned VReg = MRI.createVirtualRegister(RC);
924  MRI.addLiveIn(VA.getLocReg(), VReg);
925  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
926  } else {
927  assert(VA.isMemLoc() && "Argument not register or memory");
928 
929  // Create the frame index object for this incoming parameter.
930  int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
931  VA.getLocMemOffset(), true);
932 
933  // Create the SelectionDAG nodes corresponding to a load
934  // from this parameter. Unpromoted ints and floats are
935  // passed as right-justified 8-byte values.
936  EVT PtrVT = getPointerTy(DAG.getDataLayout());
937  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
938  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
939  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
940  DAG.getIntPtrConstant(4, DL));
941  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
943  false, false, false, 0);
944  }
945 
946  // Convert the value of the argument register into the value that's
947  // being passed.
948  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
949  }
950 
951  if (IsVarArg) {
952  // Save the number of non-varargs registers for later use by va_start, etc.
953  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
954  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
955 
956  // Likewise the address (in the form of a frame index) of where the
957  // first stack vararg would be. The 1-byte size here is arbitrary.
958  int64_t StackSize = CCInfo.getNextStackOffset();
959  FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
960 
961  // ...and a similar frame index for the caller-allocated save area
962  // that will be used to store the incoming registers.
963  int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
964  unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
965  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
966 
967  // Store the FPR varargs in the reserved frame slots. (We store the
968  // GPRs as part of the prologue.)
969  if (NumFixedFPRs < SystemZ::NumArgFPRs) {
971  for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
972  unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
973  int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
974  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
975  unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
976  &SystemZ::FP64BitRegClass);
977  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
978  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
980  false, false, 0);
981 
982  }
983  // Join the stores, which are independent of one another.
984  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
985  makeArrayRef(&MemOps[NumFixedFPRs],
986  SystemZ::NumArgFPRs-NumFixedFPRs));
987  }
988  }
989 
990  return Chain;
991 }
992 
993 static bool canUseSiblingCall(const CCState &ArgCCInfo,
994  SmallVectorImpl<CCValAssign> &ArgLocs) {
995  // Punt if there are any indirect or stack arguments, or if the call
996  // needs the call-saved argument register R6.
997  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
998  CCValAssign &VA = ArgLocs[I];
999  if (VA.getLocInfo() == CCValAssign::Indirect)
1000  return false;
1001  if (!VA.isRegLoc())
1002  return false;
1003  unsigned Reg = VA.getLocReg();
1004  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1005  return false;
1006  }
1007  return true;
1008 }
1009 
1010 SDValue
1012  SmallVectorImpl<SDValue> &InVals) const {
1013  SelectionDAG &DAG = CLI.DAG;
1014  SDLoc &DL = CLI.DL;
1016  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1018  SDValue Chain = CLI.Chain;
1019  SDValue Callee = CLI.Callee;
1020  bool &IsTailCall = CLI.IsTailCall;
1021  CallingConv::ID CallConv = CLI.CallConv;
1022  bool IsVarArg = CLI.IsVarArg;
1023  MachineFunction &MF = DAG.getMachineFunction();
1024  EVT PtrVT = getPointerTy(MF.getDataLayout());
1025 
1026  // Detect unsupported vector argument and return types.
1027  if (Subtarget.hasVector()) {
1028  VerifyVectorTypes(Outs);
1029  VerifyVectorTypes(Ins);
1030  }
1031 
1032  // Analyze the operands of the call, assigning locations to each operand.
1034  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1035  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1036 
1037  // We don't support GuaranteedTailCallOpt, only automatically-detected
1038  // sibling calls.
1039  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
1040  IsTailCall = false;
1041 
1042  // Get a count of how many bytes are to be pushed on the stack.
1043  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1044 
1045  // Mark the start of the call.
1046  if (!IsTailCall)
1047  Chain = DAG.getCALLSEQ_START(Chain,
1048  DAG.getConstant(NumBytes, DL, PtrVT, true),
1049  DL);
1050 
1051  // Copy argument values to their designated locations.
1053  SmallVector<SDValue, 8> MemOpChains;
1054  SDValue StackPtr;
1055  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1056  CCValAssign &VA = ArgLocs[I];
1057  SDValue ArgValue = OutVals[I];
1058 
1059  if (VA.getLocInfo() == CCValAssign::Indirect) {
1060  // Store the argument in a stack slot and pass its address.
1061  SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
1062  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1063  MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1065  false, false, 0));
1066  ArgValue = SpillSlot;
1067  } else
1068  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1069 
1070  if (VA.isRegLoc())
1071  // Queue up the argument copies and emit them at the end.
1072  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1073  else {
1074  assert(VA.isMemLoc() && "Argument not register or memory");
1075 
1076  // Work out the address of the stack slot. Unpromoted ints and
1077  // floats are passed as right-justified 8-byte values.
1078  if (!StackPtr.getNode())
1079  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1080  unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
1081  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1082  Offset += 4;
1083  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1084  DAG.getIntPtrConstant(Offset, DL));
1085 
1086  // Emit the store.
1087  MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
1089  false, false, 0));
1090  }
1091  }
1092 
1093  // Join the stores, which are independent of one another.
1094  if (!MemOpChains.empty())
1095  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1096 
1097  // Accept direct calls by converting symbolic call addresses to the
1098  // associated Target* opcodes. Force %r1 to be used for indirect
1099  // tail calls.
1100  SDValue Glue;
1101  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1102  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1103  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1104  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1105  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1106  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1107  } else if (IsTailCall) {
1108  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1109  Glue = Chain.getValue(1);
1110  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1111  }
1112 
1113  // Build a sequence of copy-to-reg nodes, chained and glued together.
1114  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1115  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1116  RegsToPass[I].second, Glue);
1117  Glue = Chain.getValue(1);
1118  }
1119 
1120  // The first call operand is the chain and the second is the target address.
1122  Ops.push_back(Chain);
1123  Ops.push_back(Callee);
1124 
1125  // Add argument registers to the end of the list so that they are
1126  // known live into the call.
1127  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1128  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1129  RegsToPass[I].second.getValueType()));
1130 
1131  // Add a register mask operand representing the call-preserved registers.
1132  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1133  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1134  assert(Mask && "Missing call preserved mask for calling convention");
1135  Ops.push_back(DAG.getRegisterMask(Mask));
1136 
1137  // Glue the call to the argument copies, if any.
1138  if (Glue.getNode())
1139  Ops.push_back(Glue);
1140 
1141  // Emit the call.
1142  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1143  if (IsTailCall)
1144  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1145  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1146  Glue = Chain.getValue(1);
1147 
1148  // Mark the end of the call, which is glued to the call itself.
1149  Chain = DAG.getCALLSEQ_END(Chain,
1150  DAG.getConstant(NumBytes, DL, PtrVT, true),
1151  DAG.getConstant(0, DL, PtrVT, true),
1152  Glue, DL);
1153  Glue = Chain.getValue(1);
1154 
1155  // Assign locations to each value returned by this call.
1157  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1158  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1159 
1160  // Copy all of the result registers out of their specified physreg.
1161  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1162  CCValAssign &VA = RetLocs[I];
1163 
1164  // Copy the value out, gluing the copy to the end of the call sequence.
1165  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1166  VA.getLocVT(), Glue);
1167  Chain = RetValue.getValue(1);
1168  Glue = RetValue.getValue(2);
1169 
1170  // Convert the value of the return register into the value that's
1171  // being returned.
1172  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1173  }
1174 
1175  return Chain;
1176 }
1177 
1180  MachineFunction &MF, bool isVarArg,
1181  const SmallVectorImpl<ISD::OutputArg> &Outs,
1182  LLVMContext &Context) const {
1183  // Detect unsupported vector return types.
1184  if (Subtarget.hasVector())
1185  VerifyVectorTypes(Outs);
1186 
1188  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1189  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1190 }
1191 
1192 SDValue
1194  CallingConv::ID CallConv, bool IsVarArg,
1195  const SmallVectorImpl<ISD::OutputArg> &Outs,
1196  const SmallVectorImpl<SDValue> &OutVals,
1197  SDLoc DL, SelectionDAG &DAG) const {
1198  MachineFunction &MF = DAG.getMachineFunction();
1199 
1200  // Detect unsupported vector return types.
1201  if (Subtarget.hasVector())
1202  VerifyVectorTypes(Outs);
1203 
1204  // Assign locations to each returned value.
1206  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1207  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1208 
1209  // Quick exit for void returns
1210  if (RetLocs.empty())
1211  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1212 
1213  // Copy the result values into the output registers.
1214  SDValue Glue;
1215  SmallVector<SDValue, 4> RetOps;
1216  RetOps.push_back(Chain);
1217  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1218  CCValAssign &VA = RetLocs[I];
1219  SDValue RetValue = OutVals[I];
1220 
1221  // Make the return register live on exit.
1222  assert(VA.isRegLoc() && "Can only return in registers!");
1223 
1224  // Promote the value as required.
1225  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1226 
1227  // Chain and glue the copies together.
1228  unsigned Reg = VA.getLocReg();
1229  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1230  Glue = Chain.getValue(1);
1231  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1232  }
1233 
1234  // Update chain and glue.
1235  RetOps[0] = Chain;
1236  if (Glue.getNode())
1237  RetOps.push_back(Glue);
1238 
1239  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1240 }
1241 
1244  return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
1245 }
1246 
1247 // Return true if Op is an intrinsic node with chain that returns the CC value
1248 // as its only (other) argument. Provide the associated SystemZISD opcode and
1249 // the mask of valid CC values if so.
1250 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1251  unsigned &CCValid) {
1252  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1253  switch (Id) {
1254  case Intrinsic::s390_tbegin:
1255  Opcode = SystemZISD::TBEGIN;
1256  CCValid = SystemZ::CCMASK_TBEGIN;
1257  return true;
1258 
1259  case Intrinsic::s390_tbegin_nofloat:
1260  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1261  CCValid = SystemZ::CCMASK_TBEGIN;
1262  return true;
1263 
1264  case Intrinsic::s390_tend:
1265  Opcode = SystemZISD::TEND;
1266  CCValid = SystemZ::CCMASK_TEND;
1267  return true;
1268 
1269  default:
1270  return false;
1271  }
1272 }
1273 
1274 // Return true if Op is an intrinsic node without chain that returns the
1275 // CC value as its final argument. Provide the associated SystemZISD
1276 // opcode and the mask of valid CC values if so.
1277 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1278  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1279  switch (Id) {
1280  case Intrinsic::s390_vpkshs:
1281  case Intrinsic::s390_vpksfs:
1282  case Intrinsic::s390_vpksgs:
1283  Opcode = SystemZISD::PACKS_CC;
1284  CCValid = SystemZ::CCMASK_VCMP;
1285  return true;
1286 
1287  case Intrinsic::s390_vpklshs:
1288  case Intrinsic::s390_vpklsfs:
1289  case Intrinsic::s390_vpklsgs:
1290  Opcode = SystemZISD::PACKLS_CC;
1291  CCValid = SystemZ::CCMASK_VCMP;
1292  return true;
1293 
1294  case Intrinsic::s390_vceqbs:
1295  case Intrinsic::s390_vceqhs:
1296  case Intrinsic::s390_vceqfs:
1297  case Intrinsic::s390_vceqgs:
1298  Opcode = SystemZISD::VICMPES;
1299  CCValid = SystemZ::CCMASK_VCMP;
1300  return true;
1301 
1302  case Intrinsic::s390_vchbs:
1303  case Intrinsic::s390_vchhs:
1304  case Intrinsic::s390_vchfs:
1305  case Intrinsic::s390_vchgs:
1306  Opcode = SystemZISD::VICMPHS;
1307  CCValid = SystemZ::CCMASK_VCMP;
1308  return true;
1309 
1310  case Intrinsic::s390_vchlbs:
1311  case Intrinsic::s390_vchlhs:
1312  case Intrinsic::s390_vchlfs:
1313  case Intrinsic::s390_vchlgs:
1314  Opcode = SystemZISD::VICMPHLS;
1315  CCValid = SystemZ::CCMASK_VCMP;
1316  return true;
1317 
1318  case Intrinsic::s390_vtm:
1319  Opcode = SystemZISD::VTM;
1320  CCValid = SystemZ::CCMASK_VCMP;
1321  return true;
1322 
1323  case Intrinsic::s390_vfaebs:
1324  case Intrinsic::s390_vfaehs:
1325  case Intrinsic::s390_vfaefs:
1326  Opcode = SystemZISD::VFAE_CC;
1327  CCValid = SystemZ::CCMASK_ANY;
1328  return true;
1329 
1330  case Intrinsic::s390_vfaezbs:
1331  case Intrinsic::s390_vfaezhs:
1332  case Intrinsic::s390_vfaezfs:
1333  Opcode = SystemZISD::VFAEZ_CC;
1334  CCValid = SystemZ::CCMASK_ANY;
1335  return true;
1336 
1337  case Intrinsic::s390_vfeebs:
1338  case Intrinsic::s390_vfeehs:
1339  case Intrinsic::s390_vfeefs:
1340  Opcode = SystemZISD::VFEE_CC;
1341  CCValid = SystemZ::CCMASK_ANY;
1342  return true;
1343 
1344  case Intrinsic::s390_vfeezbs:
1345  case Intrinsic::s390_vfeezhs:
1346  case Intrinsic::s390_vfeezfs:
1347  Opcode = SystemZISD::VFEEZ_CC;
1348  CCValid = SystemZ::CCMASK_ANY;
1349  return true;
1350 
1351  case Intrinsic::s390_vfenebs:
1352  case Intrinsic::s390_vfenehs:
1353  case Intrinsic::s390_vfenefs:
1354  Opcode = SystemZISD::VFENE_CC;
1355  CCValid = SystemZ::CCMASK_ANY;
1356  return true;
1357 
1358  case Intrinsic::s390_vfenezbs:
1359  case Intrinsic::s390_vfenezhs:
1360  case Intrinsic::s390_vfenezfs:
1361  Opcode = SystemZISD::VFENEZ_CC;
1362  CCValid = SystemZ::CCMASK_ANY;
1363  return true;
1364 
1365  case Intrinsic::s390_vistrbs:
1366  case Intrinsic::s390_vistrhs:
1367  case Intrinsic::s390_vistrfs:
1368  Opcode = SystemZISD::VISTR_CC;
1370  return true;
1371 
1372  case Intrinsic::s390_vstrcbs:
1373  case Intrinsic::s390_vstrchs:
1374  case Intrinsic::s390_vstrcfs:
1375  Opcode = SystemZISD::VSTRC_CC;
1376  CCValid = SystemZ::CCMASK_ANY;
1377  return true;
1378 
1379  case Intrinsic::s390_vstrczbs:
1380  case Intrinsic::s390_vstrczhs:
1381  case Intrinsic::s390_vstrczfs:
1382  Opcode = SystemZISD::VSTRCZ_CC;
1383  CCValid = SystemZ::CCMASK_ANY;
1384  return true;
1385 
1386  case Intrinsic::s390_vfcedbs:
1387  Opcode = SystemZISD::VFCMPES;
1388  CCValid = SystemZ::CCMASK_VCMP;
1389  return true;
1390 
1391  case Intrinsic::s390_vfchdbs:
1392  Opcode = SystemZISD::VFCMPHS;
1393  CCValid = SystemZ::CCMASK_VCMP;
1394  return true;
1395 
1396  case Intrinsic::s390_vfchedbs:
1397  Opcode = SystemZISD::VFCMPHES;
1398  CCValid = SystemZ::CCMASK_VCMP;
1399  return true;
1400 
1401  case Intrinsic::s390_vftcidb:
1402  Opcode = SystemZISD::VFTCI;
1403  CCValid = SystemZ::CCMASK_VCMP;
1404  return true;
1405 
1406  default:
1407  return false;
1408  }
1409 }
1410 
1411 // Emit an intrinsic with chain with a glued value instead of its CC result.
1413  unsigned Opcode) {
1414  // Copy all operands except the intrinsic ID.
1415  unsigned NumOps = Op.getNumOperands();
1417  Ops.reserve(NumOps - 1);
1418  Ops.push_back(Op.getOperand(0));
1419  for (unsigned I = 2; I < NumOps; ++I)
1420  Ops.push_back(Op.getOperand(I));
1421 
1422  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1423  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1424  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1425  SDValue OldChain = SDValue(Op.getNode(), 1);
1426  SDValue NewChain = SDValue(Intr.getNode(), 0);
1427  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1428  return Intr;
1429 }
1430 
1431 // Emit an intrinsic with a glued value instead of its CC result.
1433  unsigned Opcode) {
1434  // Copy all operands except the intrinsic ID.
1435  unsigned NumOps = Op.getNumOperands();
1437  Ops.reserve(NumOps - 1);
1438  for (unsigned I = 1; I < NumOps; ++I)
1439  Ops.push_back(Op.getOperand(I));
1440 
1441  if (Op->getNumValues() == 1)
1442  return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
1443  assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
1444  SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
1445  return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1446 }
1447 
1448 // CC is a comparison that will be implemented using an integer or
1449 // floating-point comparison. Return the condition code mask for
1450 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1451 // unsigned comparisons and clear for signed ones. In the floating-point
1452 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1453 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1454 #define CONV(X) \
1455  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1456  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1457  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1458 
1459  switch (CC) {
1460  default:
1461  llvm_unreachable("Invalid integer condition!");
1462 
1463  CONV(EQ);
1464  CONV(NE);
1465  CONV(GT);
1466  CONV(GE);
1467  CONV(LT);
1468  CONV(LE);
1469 
1470  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1471  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1472  }
1473 #undef CONV
1474 }
1475 
1476 // Return a sequence for getting a 1 from an IPM result when CC has a
1477 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
1478 // The handling of CC values outside CCValid doesn't matter.
1479 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
1480  // Deal with cases where the result can be taken directly from a bit
1481  // of the IPM result.
1482  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
1483  return IPMConversion(0, 0, SystemZ::IPM_CC);
1484  if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
1485  return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
1486 
1487  // Deal with cases where we can add a value to force the sign bit
1488  // to contain the right value. Putting the bit in 31 means we can
1489  // use SRL rather than RISBG(L), and also makes it easier to get a
1490  // 0/-1 value, so it has priority over the other tests below.
1491  //
1492  // These sequences rely on the fact that the upper two bits of the
1493  // IPM result are zero.
1494  uint64_t TopBit = uint64_t(1) << 31;
1495  if (CCMask == (CCValid & SystemZ::CCMASK_0))
1496  return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
1497  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
1498  return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
1499  if (CCMask == (CCValid & (SystemZ::CCMASK_0
1501  | SystemZ::CCMASK_2)))
1502  return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
1503  if (CCMask == (CCValid & SystemZ::CCMASK_3))
1504  return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
1505  if (CCMask == (CCValid & (SystemZ::CCMASK_1
1507  | SystemZ::CCMASK_3)))
1508  return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
1509 
1510  // Next try inverting the value and testing a bit. 0/1 could be
1511  // handled this way too, but we dealt with that case above.
1512  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
1513  return IPMConversion(-1, 0, SystemZ::IPM_CC);
1514 
1515  // Handle cases where adding a value forces a non-sign bit to contain
1516  // the right value.
1517  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
1518  return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
1519  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
1520  return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
1521 
1522  // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are
1523  // can be done by inverting the low CC bit and applying one of the
1524  // sign-based extractions above.
1525  if (CCMask == (CCValid & SystemZ::CCMASK_1))
1526  return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
1527  if (CCMask == (CCValid & SystemZ::CCMASK_2))
1528  return IPMConversion(1 << SystemZ::IPM_CC,
1529  TopBit - (3 << SystemZ::IPM_CC), 31);
1530  if (CCMask == (CCValid & (SystemZ::CCMASK_0
1531  | SystemZ::CCMASK_1
1532  | SystemZ::CCMASK_3)))
1533  return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
1534  if (CCMask == (CCValid & (SystemZ::CCMASK_0
1535  | SystemZ::CCMASK_2
1536  | SystemZ::CCMASK_3)))
1537  return IPMConversion(1 << SystemZ::IPM_CC,
1538  TopBit - (1 << SystemZ::IPM_CC), 31);
1539 
1540  llvm_unreachable("Unexpected CC combination");
1541 }
1542 
1543 // If C can be converted to a comparison against zero, adjust the operands
1544 // as necessary.
1545 static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
1546  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1547  return;
1548 
1549  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1550  if (!ConstOp1)
1551  return;
1552 
1553  int64_t Value = ConstOp1->getSExtValue();
1554  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1555  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1556  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1557  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1558  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1559  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1560  }
1561 }
1562 
1563 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1564 // adjust the operands as necessary.
1565 static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
1566  // For us to make any changes, it must a comparison between a single-use
1567  // load and a constant.
1568  if (!C.Op0.hasOneUse() ||
1569  C.Op0.getOpcode() != ISD::LOAD ||
1570  C.Op1.getOpcode() != ISD::Constant)
1571  return;
1572 
1573  // We must have an 8- or 16-bit load.
1574  auto *Load = cast<LoadSDNode>(C.Op0);
1575  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1576  if (NumBits != 8 && NumBits != 16)
1577  return;
1578 
1579  // The load must be an extending one and the constant must be within the
1580  // range of the unextended value.
1581  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1582  uint64_t Value = ConstOp1->getZExtValue();
1583  uint64_t Mask = (1 << NumBits) - 1;
1584  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1585  // Make sure that ConstOp1 is in range of C.Op0.
1586  int64_t SignedValue = ConstOp1->getSExtValue();
1587  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
1588  return;
1589  if (C.ICmpType != SystemZICMP::SignedOnly) {
1590  // Unsigned comparison between two sign-extended values is equivalent
1591  // to unsigned comparison between two zero-extended values.
1592  Value &= Mask;
1593  } else if (NumBits == 8) {
1594  // Try to treat the comparison as unsigned, so that we can use CLI.
1595  // Adjust CCMask and Value as necessary.
1596  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
1597  // Test whether the high bit of the byte is set.
1598  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1599  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
1600  // Test whether the high bit of the byte is clear.
1601  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
1602  else
1603  // No instruction exists for this combination.
1604  return;
1605  C.ICmpType = SystemZICMP::UnsignedOnly;
1606  }
1607  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
1608  if (Value > Mask)
1609  return;
1610  assert(C.ICmpType == SystemZICMP::Any &&
1611  "Signedness shouldn't matter here.");
1612  } else
1613  return;
1614 
1615  // Make sure that the first operand is an i32 of the right extension type.
1616  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
1617  ISD::SEXTLOAD :
1618  ISD::ZEXTLOAD);
1619  if (C.Op0.getValueType() != MVT::i32 ||
1620  Load->getExtensionType() != ExtType)
1621  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
1622  Load->getChain(), Load->getBasePtr(),
1623  Load->getPointerInfo(), Load->getMemoryVT(),
1624  Load->isVolatile(), Load->isNonTemporal(),
1625  Load->isInvariant(), Load->getAlignment());
1626 
1627  // Make sure that the second operand is an i32 with the right value.
1628  if (C.Op1.getValueType() != MVT::i32 ||
1629  Value != ConstOp1->getZExtValue())
1630  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
1631 }
1632 
1633 // Return true if Op is either an unextended load, or a load suitable
1634 // for integer register-memory comparisons of type ICmpType.
1635 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
1636  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
1637  if (Load) {
1638  // There are no instructions to compare a register with a memory byte.
1639  if (Load->getMemoryVT() == MVT::i8)
1640  return false;
1641  // Otherwise decide on extension type.
1642  switch (Load->getExtensionType()) {
1643  case ISD::NON_EXTLOAD:
1644  return true;
1645  case ISD::SEXTLOAD:
1646  return ICmpType != SystemZICMP::UnsignedOnly;
1647  case ISD::ZEXTLOAD:
1648  return ICmpType != SystemZICMP::SignedOnly;
1649  default:
1650  break;
1651  }
1652  }
1653  return false;
1654 }
1655 
1656 // Return true if it is better to swap the operands of C.
1657 static bool shouldSwapCmpOperands(const Comparison &C) {
1658  // Leave f128 comparisons alone, since they have no memory forms.
1659  if (C.Op0.getValueType() == MVT::f128)
1660  return false;
1661 
1662  // Always keep a floating-point constant second, since comparisons with
1663  // zero can use LOAD TEST and comparisons with other constants make a
1664  // natural memory operand.
1665  if (isa<ConstantFPSDNode>(C.Op1))
1666  return false;
1667 
1668  // Never swap comparisons with zero since there are many ways to optimize
1669  // those later.
1670  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
1671  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
1672  return false;
1673 
1674  // Also keep natural memory operands second if the loaded value is
1675  // only used here. Several comparisons have memory forms.
1676  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
1677  return false;
1678 
1679  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1680  // In that case we generally prefer the memory to be second.
1681  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
1682  // The only exceptions are when the second operand is a constant and
1683  // we can use things like CHHSI.
1684  if (!ConstOp1)
1685  return true;
1686  // The unsigned memory-immediate instructions can handle 16-bit
1687  // unsigned integers.
1688  if (C.ICmpType != SystemZICMP::SignedOnly &&
1689  isUInt<16>(ConstOp1->getZExtValue()))
1690  return false;
1691  // The signed memory-immediate instructions can handle 16-bit
1692  // signed integers.
1693  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
1694  isInt<16>(ConstOp1->getSExtValue()))
1695  return false;
1696  return true;
1697  }
1698 
1699  // Try to promote the use of CGFR and CLGFR.
1700  unsigned Opcode0 = C.Op0.getOpcode();
1701  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
1702  return true;
1703  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
1704  return true;
1705  if (C.ICmpType != SystemZICMP::SignedOnly &&
1706  Opcode0 == ISD::AND &&
1707  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
1708  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
1709  return true;
1710 
1711  return false;
1712 }
1713 
1714 // Return a version of comparison CC mask CCMask in which the LT and GT
1715 // actions are swapped.
1716 static unsigned reverseCCMask(unsigned CCMask) {
1717  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
1719  (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
1720  (CCMask & SystemZ::CCMASK_CMP_UO));
1721 }
1722 
1723 // Check whether C tests for equality between X and Y and whether X - Y
1724 // or Y - X is also computed. In that case it's better to compare the
1725 // result of the subtraction against zero.
1726 static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
1727  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
1728  C.CCMask == SystemZ::CCMASK_CMP_NE) {
1729  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1730  SDNode *N = *I;
1731  if (N->getOpcode() == ISD::SUB &&
1732  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
1733  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
1734  C.Op0 = SDValue(N, 0);
1735  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
1736  return;
1737  }
1738  }
1739  }
1740 }
1741 
1742 // Check whether C compares a floating-point value with zero and if that
1743 // floating-point value is also negated. In this case we can use the
1744 // negation to set CC, so avoiding separate LOAD AND TEST and
1745 // LOAD (NEGATIVE/COMPLEMENT) instructions.
1746 static void adjustForFNeg(Comparison &C) {
1747  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
1748  if (C1 && C1->isZero()) {
1749  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1750  SDNode *N = *I;
1751  if (N->getOpcode() == ISD::FNEG) {
1752  C.Op0 = SDValue(N, 0);
1753  C.CCMask = reverseCCMask(C.CCMask);
1754  return;
1755  }
1756  }
1757  }
1758 }
1759 
1760 // Check whether C compares (shl X, 32) with 0 and whether X is
1761 // also sign-extended. In that case it is better to test the result
1762 // of the sign extension using LTGFR.
1763 //
1764 // This case is important because InstCombine transforms a comparison
1765 // with (sext (trunc X)) into a comparison with (shl X, 32).
1766 static void adjustForLTGFR(Comparison &C) {
1767  // Check for a comparison between (shl X, 32) and 0.
1768  if (C.Op0.getOpcode() == ISD::SHL &&
1769  C.Op0.getValueType() == MVT::i64 &&
1770  C.Op1.getOpcode() == ISD::Constant &&
1771  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1772  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
1773  if (C1 && C1->getZExtValue() == 32) {
1774  SDValue ShlOp0 = C.Op0.getOperand(0);
1775  // See whether X has any SIGN_EXTEND_INREG uses.
1776  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
1777  SDNode *N = *I;
1778  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
1779  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
1780  C.Op0 = SDValue(N, 0);
1781  return;
1782  }
1783  }
1784  }
1785  }
1786 }
1787 
1788 // If C compares the truncation of an extending load, try to compare
1789 // the untruncated value instead. This exposes more opportunities to
1790 // reuse CC.
1791 static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
1792  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
1793  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
1794  C.Op1.getOpcode() == ISD::Constant &&
1795  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1796  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
1797  if (L->getMemoryVT().getStoreSizeInBits()
1798  <= C.Op0.getValueType().getSizeInBits()) {
1799  unsigned Type = L->getExtensionType();
1800  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
1801  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
1802  C.Op0 = C.Op0.getOperand(0);
1803  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
1804  }
1805  }
1806  }
1807 }
1808 
1809 // Return true if shift operation N has an in-range constant shift value.
1810 // Store it in ShiftVal if so.
1811 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
1812  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
1813  if (!Shift)
1814  return false;
1815 
1816  uint64_t Amount = Shift->getZExtValue();
1817  if (Amount >= N.getValueType().getSizeInBits())
1818  return false;
1819 
1820  ShiftVal = Amount;
1821  return true;
1822 }
1823 
1824 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
1825 // instruction and whether the CC value is descriptive enough to handle
1826 // a comparison of type Opcode between the AND result and CmpVal.
1827 // CCMask says which comparison result is being tested and BitSize is
1828 // the number of bits in the operands. If TEST UNDER MASK can be used,
1829 // return the corresponding CC mask, otherwise return 0.
1830 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
1831  uint64_t Mask, uint64_t CmpVal,
1832  unsigned ICmpType) {
1833  assert(Mask != 0 && "ANDs with zero should have been removed by now");
1834 
1835  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
1836  if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
1837  !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
1838  return 0;
1839 
1840  // Work out the masks for the lowest and highest bits.
1841  unsigned HighShift = 63 - countLeadingZeros(Mask);
1842  uint64_t High = uint64_t(1) << HighShift;
1843  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
1844 
1845  // Signed ordered comparisons are effectively unsigned if the sign
1846  // bit is dropped.
1847  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
1848 
1849  // Check for equality comparisons with 0, or the equivalent.
1850  if (CmpVal == 0) {
1851  if (CCMask == SystemZ::CCMASK_CMP_EQ)
1852  return SystemZ::CCMASK_TM_ALL_0;
1853  if (CCMask == SystemZ::CCMASK_CMP_NE)
1855  }
1856  if (EffectivelyUnsigned && CmpVal <= Low) {
1857  if (CCMask == SystemZ::CCMASK_CMP_LT)
1858  return SystemZ::CCMASK_TM_ALL_0;
1859  if (CCMask == SystemZ::CCMASK_CMP_GE)
1861  }
1862  if (EffectivelyUnsigned && CmpVal < Low) {
1863  if (CCMask == SystemZ::CCMASK_CMP_LE)
1864  return SystemZ::CCMASK_TM_ALL_0;
1865  if (CCMask == SystemZ::CCMASK_CMP_GT)
1867  }
1868 
1869  // Check for equality comparisons with the mask, or the equivalent.
1870  if (CmpVal == Mask) {
1871  if (CCMask == SystemZ::CCMASK_CMP_EQ)
1872  return SystemZ::CCMASK_TM_ALL_1;
1873  if (CCMask == SystemZ::CCMASK_CMP_NE)
1875  }
1876  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
1877  if (CCMask == SystemZ::CCMASK_CMP_GT)
1878  return SystemZ::CCMASK_TM_ALL_1;
1879  if (CCMask == SystemZ::CCMASK_CMP_LE)
1881  }
1882  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
1883  if (CCMask == SystemZ::CCMASK_CMP_GE)
1884  return SystemZ::CCMASK_TM_ALL_1;
1885  if (CCMask == SystemZ::CCMASK_CMP_LT)
1887  }
1888 
1889  // Check for ordered comparisons with the top bit.
1890  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
1891  if (CCMask == SystemZ::CCMASK_CMP_LE)
1892  return SystemZ::CCMASK_TM_MSB_0;
1893  if (CCMask == SystemZ::CCMASK_CMP_GT)
1894  return SystemZ::CCMASK_TM_MSB_1;
1895  }
1896  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
1897  if (CCMask == SystemZ::CCMASK_CMP_LT)
1898  return SystemZ::CCMASK_TM_MSB_0;
1899  if (CCMask == SystemZ::CCMASK_CMP_GE)
1900  return SystemZ::CCMASK_TM_MSB_1;
1901  }
1902 
1903  // If there are just two bits, we can do equality checks for Low and High
1904  // as well.
1905  if (Mask == Low + High) {
1906  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
1908  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
1910  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
1912  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
1914  }
1915 
1916  // Looks like we've exhausted our options.
1917  return 0;
1918 }
1919 
1920 // See whether C can be implemented as a TEST UNDER MASK instruction.
1921 // Update the arguments with the TM version if so.
1922 static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
1923  // Check that we have a comparison with a constant.
1924  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
1925  if (!ConstOp1)
1926  return;
1927  uint64_t CmpVal = ConstOp1->getZExtValue();
1928 
1929  // Check whether the nonconstant input is an AND with a constant mask.
1930  Comparison NewC(C);
1931  uint64_t MaskVal;
1932  ConstantSDNode *Mask = nullptr;
1933  if (C.Op0.getOpcode() == ISD::AND) {
1934  NewC.Op0 = C.Op0.getOperand(0);
1935  NewC.Op1 = C.Op0.getOperand(1);
1936  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
1937  if (!Mask)
1938  return;
1939  MaskVal = Mask->getZExtValue();
1940  } else {
1941  // There is no instruction to compare with a 64-bit immediate
1942  // so use TMHH instead if possible. We need an unsigned ordered
1943  // comparison with an i64 immediate.
1944  if (NewC.Op0.getValueType() != MVT::i64 ||
1945  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
1946  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
1947  NewC.ICmpType == SystemZICMP::SignedOnly)
1948  return;
1949  // Convert LE and GT comparisons into LT and GE.
1950  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
1951  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
1952  if (CmpVal == uint64_t(-1))
1953  return;
1954  CmpVal += 1;
1955  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1956  }
1957  // If the low N bits of Op1 are zero than the low N bits of Op0 can
1958  // be masked off without changing the result.
1959  MaskVal = -(CmpVal & -CmpVal);
1960  NewC.ICmpType = SystemZICMP::UnsignedOnly;
1961  }
1962  if (!MaskVal)
1963  return;
1964 
1965  // Check whether the combination of mask, comparison value and comparison
1966  // type are suitable.
1967  unsigned BitSize = NewC.Op0.getValueType().getSizeInBits();
1968  unsigned NewCCMask, ShiftVal;
1969  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
1970  NewC.Op0.getOpcode() == ISD::SHL &&
1971  isSimpleShift(NewC.Op0, ShiftVal) &&
1972  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
1973  MaskVal >> ShiftVal,
1974  CmpVal >> ShiftVal,
1975  SystemZICMP::Any))) {
1976  NewC.Op0 = NewC.Op0.getOperand(0);
1977  MaskVal >>= ShiftVal;
1978  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
1979  NewC.Op0.getOpcode() == ISD::SRL &&
1980  isSimpleShift(NewC.Op0, ShiftVal) &&
1981  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
1982  MaskVal << ShiftVal,
1983  CmpVal << ShiftVal,
1985  NewC.Op0 = NewC.Op0.getOperand(0);
1986  MaskVal <<= ShiftVal;
1987  } else {
1988  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
1989  NewC.ICmpType);
1990  if (!NewCCMask)
1991  return;
1992  }
1993 
1994  // Go ahead and make the change.
1995  C.Opcode = SystemZISD::TM;
1996  C.Op0 = NewC.Op0;
1997  if (Mask && Mask->getZExtValue() == MaskVal)
1998  C.Op1 = SDValue(Mask, 0);
1999  else
2000  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2001  C.CCValid = SystemZ::CCMASK_TM;
2002  C.CCMask = NewCCMask;
2003 }
2004 
2005 // Return a Comparison that tests the condition-code result of intrinsic
2006 // node Call against constant integer CC using comparison code Cond.
2007 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2008 // and CCValid is the set of possible condition-code results.
2009 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2010  SDValue Call, unsigned CCValid, uint64_t CC,
2011  ISD::CondCode Cond) {
2012  Comparison C(Call, SDValue());
2013  C.Opcode = Opcode;
2014  C.CCValid = CCValid;
2015  if (Cond == ISD::SETEQ)
2016  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2017  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2018  else if (Cond == ISD::SETNE)
2019  // ...and the inverse of that.
2020  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2021  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2022  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2023  // always true for CC>3.
2024  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2025  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2026  // ...and the inverse of that.
2027  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2028  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2029  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2030  // always true for CC>3.
2031  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2032  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2033  // ...and the inverse of that.
2034  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2035  else
2036  llvm_unreachable("Unexpected integer comparison type");
2037  C.CCMask &= CCValid;
2038  return C;
2039 }
2040 
2041 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2042 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2043  ISD::CondCode Cond, SDLoc DL) {
2044  if (CmpOp1.getOpcode() == ISD::Constant) {
2045  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2046  unsigned Opcode, CCValid;
2047  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2048  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2049  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2050  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2051  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2052  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2053  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2054  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2055  }
2056  Comparison C(CmpOp0, CmpOp1);
2057  C.CCMask = CCMaskForCondCode(Cond);
2058  if (C.Op0.getValueType().isFloatingPoint()) {
2059  C.CCValid = SystemZ::CCMASK_FCMP;
2060  C.Opcode = SystemZISD::FCMP;
2061  adjustForFNeg(C);
2062  } else {
2063  C.CCValid = SystemZ::CCMASK_ICMP;
2064  C.Opcode = SystemZISD::ICMP;
2065  // Choose the type of comparison. Equality and inequality tests can
2066  // use either signed or unsigned comparisons. The choice also doesn't
2067  // matter if both sign bits are known to be clear. In those cases we
2068  // want to give the main isel code the freedom to choose whichever
2069  // form fits best.
2070  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2071  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2072  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2073  C.ICmpType = SystemZICMP::Any;
2074  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2075  C.ICmpType = SystemZICMP::UnsignedOnly;
2076  else
2077  C.ICmpType = SystemZICMP::SignedOnly;
2078  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2079  adjustZeroCmp(DAG, DL, C);
2080  adjustSubwordCmp(DAG, DL, C);
2081  adjustForSubtraction(DAG, DL, C);
2082  adjustForLTGFR(C);
2083  adjustICmpTruncate(DAG, DL, C);
2084  }
2085 
2086  if (shouldSwapCmpOperands(C)) {
2087  std::swap(C.Op0, C.Op1);
2088  C.CCMask = reverseCCMask(C.CCMask);
2089  }
2090 
2091  adjustForTestUnderMask(DAG, DL, C);
2092  return C;
2093 }
2094 
2095 // Emit the comparison instruction described by C.
2096 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
2097  if (!C.Op1.getNode()) {
2098  SDValue Op;
2099  switch (C.Op0.getOpcode()) {
2101  Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
2102  break;
2104  Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
2105  break;
2106  default:
2107  llvm_unreachable("Invalid comparison operands");
2108  }
2109  return SDValue(Op.getNode(), Op->getNumValues() - 1);
2110  }
2111  if (C.Opcode == SystemZISD::ICMP)
2112  return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
2113  DAG.getConstant(C.ICmpType, DL, MVT::i32));
2114  if (C.Opcode == SystemZISD::TM) {
2115  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2116  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2117  return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
2118  DAG.getConstant(RegisterOnly, DL, MVT::i32));
2119  }
2120  return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
2121 }
2122 
2123 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2124 // 64 bits. Extend is the extension type to use. Store the high part
2125 // in Hi and the low part in Lo.
2126 static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
2127  unsigned Extend, SDValue Op0, SDValue Op1,
2128  SDValue &Hi, SDValue &Lo) {
2129  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2130  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2131  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2132  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2133  DAG.getConstant(32, DL, MVT::i64));
2134  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2135  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2136 }
2137 
2138 // Lower a binary operation that produces two VT results, one in each
2139 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2140 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
2141 // on the extended Op0 and (unextended) Op1. Store the even register result
2142 // in Even and the odd register result in Odd.
2143 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
2144  unsigned Extend, unsigned Opcode,
2145  SDValue Op0, SDValue Op1,
2146  SDValue &Even, SDValue &Odd) {
2147  SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
2148  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
2149  SDValue(In128, 0), Op1);
2150  bool Is32Bit = is32Bit(VT);
2151  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2152  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2153 }
2154 
2155 // Return an i32 value that is 1 if the CC value produced by Glue is
2156 // in the mask CCMask and 0 otherwise. CC is known to have a value
2157 // in CCValid, so other values can be ignored.
2159  unsigned CCValid, unsigned CCMask) {
2160  IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
2161  SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
2162 
2163  if (Conversion.XORValue)
2164  Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
2165  DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
2166 
2167  if (Conversion.AddValue)
2168  Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
2169  DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
2170 
2171  // The SHR/AND sequence should get optimized to an RISBG.
2172  Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
2173  DAG.getConstant(Conversion.Bit, DL, MVT::i32));
2174  if (Conversion.Bit != 31)
2175  Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
2176  DAG.getConstant(1, DL, MVT::i32));
2177  return Result;
2178 }
2179 
2180 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2181 // be done directly. IsFP is true if CC is for a floating-point rather than
2182 // integer comparison.
2183 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2184  switch (CC) {
2185  case ISD::SETOEQ:
2186  case ISD::SETEQ:
2187  return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
2188 
2189  case ISD::SETOGE:
2190  case ISD::SETGE:
2191  return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
2192 
2193  case ISD::SETOGT:
2194  case ISD::SETGT:
2195  return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
2196 
2197  case ISD::SETUGT:
2198  return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
2199 
2200  default:
2201  return 0;
2202  }
2203 }
2204 
2205 // Return the SystemZISD vector comparison operation for CC or its inverse,
2206 // or 0 if neither can be done directly. Indicate in Invert whether the
2207 // result is for the inverse of CC. IsFP is true if CC is for a
2208 // floating-point rather than integer comparison.
2209 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2210  bool &Invert) {
2211  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2212  Invert = false;
2213  return Opcode;
2214  }
2215 
2216  CC = ISD::getSetCCInverse(CC, !IsFP);
2217  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2218  Invert = true;
2219  return Opcode;
2220  }
2221 
2222  return 0;
2223 }
2224 
2225 // Return a v2f64 that contains the extended form of elements Start and Start+1
2226 // of v4f32 value Op.
2227 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
2228  SDValue Op) {
2229  int Mask[] = { Start, -1, Start + 1, -1 };
2230  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2231  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2232 }
2233 
2234 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2235 // producing a result of type VT.
2236 static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
2237  EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
2238  // There is no hardware support for v4f32, so extend the vector into
2239  // two v2f64s and compare those.
2240  if (CmpOp0.getValueType() == MVT::v4f32) {
2241  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2242  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2243  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2244  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2245  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2246  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2247  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2248  }
2249  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2250 }
2251 
2252 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2253 // an integer mask of type VT.
2255  ISD::CondCode CC, SDValue CmpOp0,
2256  SDValue CmpOp1) {
2257  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2258  bool Invert = false;
2259  SDValue Cmp;
2260  switch (CC) {
2261  // Handle tests for order using (or (ogt y x) (oge x y)).
2262  case ISD::SETUO:
2263  Invert = true;
2264  case ISD::SETO: {
2265  assert(IsFP && "Unexpected integer comparison");
2266  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2267  SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2268  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2269  break;
2270  }
2271 
2272  // Handle <> tests using (or (ogt y x) (ogt x y)).
2273  case ISD::SETUEQ:
2274  Invert = true;
2275  case ISD::SETONE: {
2276  assert(IsFP && "Unexpected integer comparison");
2277  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2278  SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2279  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2280  break;
2281  }
2282 
2283  // Otherwise a single comparison is enough. It doesn't really
2284  // matter whether we try the inversion or the swap first, since
2285  // there are no cases where both work.
2286  default:
2287  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2288  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2289  else {
2291  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2292  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2293  else
2294  llvm_unreachable("Unhandled comparison");
2295  }
2296  break;
2297  }
2298  if (Invert) {
2300  DAG.getConstant(65535, DL, MVT::i32));
2301  Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
2302  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2303  }
2304  return Cmp;
2305 }
2306 
2307 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2308  SelectionDAG &DAG) const {
2309  SDValue CmpOp0 = Op.getOperand(0);
2310  SDValue CmpOp1 = Op.getOperand(1);
2311  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2312  SDLoc DL(Op);
2313  EVT VT = Op.getValueType();
2314  if (VT.isVector())
2315  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2316 
2317  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2318  SDValue Glue = emitCmp(DAG, DL, C);
2319  return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
2320 }
2321 
2322 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2323  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2324  SDValue CmpOp0 = Op.getOperand(2);
2325  SDValue CmpOp1 = Op.getOperand(3);
2326  SDValue Dest = Op.getOperand(4);
2327  SDLoc DL(Op);
2328 
2329  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2330  SDValue Glue = emitCmp(DAG, DL, C);
2331  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
2332  Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2333  DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
2334 }
2335 
2336 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2337 // allowing Pos and Neg to be wider than CmpOp.
2338 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2339  return (Neg.getOpcode() == ISD::SUB &&
2340  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2341  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2342  Neg.getOperand(1) == Pos &&
2343  (Pos == CmpOp ||
2344  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2345  Pos.getOperand(0) == CmpOp)));
2346 }
2347 
2348 // Return the absolute or negative absolute of Op; IsNegative decides which.
2350  bool IsNegative) {
2351  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2352  if (IsNegative)
2353  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2354  DAG.getConstant(0, DL, Op.getValueType()), Op);
2355  return Op;
2356 }
2357 
2358 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2359  SelectionDAG &DAG) const {
2360  SDValue CmpOp0 = Op.getOperand(0);
2361  SDValue CmpOp1 = Op.getOperand(1);
2362  SDValue TrueOp = Op.getOperand(2);
2363  SDValue FalseOp = Op.getOperand(3);
2364  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2365  SDLoc DL(Op);
2366 
2367  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2368 
2369  // Check for absolute and negative-absolute selections, including those
2370  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2371  // This check supplements the one in DAGCombiner.
2372  if (C.Opcode == SystemZISD::ICMP &&
2373  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2374  C.CCMask != SystemZ::CCMASK_CMP_NE &&
2375  C.Op1.getOpcode() == ISD::Constant &&
2376  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2377  if (isAbsolute(C.Op0, TrueOp, FalseOp))
2378  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2379  if (isAbsolute(C.Op0, FalseOp, TrueOp))
2380  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2381  }
2382 
2383  SDValue Glue = emitCmp(DAG, DL, C);
2384 
2385  // Special case for handling -1/0 results. The shifts we use here
2386  // should get optimized with the IPM conversion sequence.
2387  auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
2388  auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
2389  if (TrueC && FalseC) {
2390  int64_t TrueVal = TrueC->getSExtValue();
2391  int64_t FalseVal = FalseC->getSExtValue();
2392  if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
2393  // Invert the condition if we want -1 on false.
2394  if (TrueVal == 0)
2395  C.CCMask ^= C.CCValid;
2396  SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
2397  EVT VT = Op.getValueType();
2398  // Extend the result to VT. Upper bits are ignored.
2399  if (!is32Bit(VT))
2400  Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
2401  // Sign-extend from the low bit.
2402  SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
2403  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
2404  return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
2405  }
2406  }
2407 
2408  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2409  DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
2410 
2411  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
2412  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
2413 }
2414 
2415 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2416  SelectionDAG &DAG) const {
2417  SDLoc DL(Node);
2418  const GlobalValue *GV = Node->getGlobal();
2419  int64_t Offset = Node->getOffset();
2420  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2423 
2424  SDValue Result;
2425  if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
2426  // Assign anchors at 1<<12 byte boundaries.
2427  uint64_t Anchor = Offset & ~uint64_t(0xfff);
2428  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2429  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2430 
2431  // The offset can be folded into the address if it is aligned to a halfword.
2432  Offset -= Anchor;
2433  if (Offset != 0 && (Offset & 1) == 0) {
2434  SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2435  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2436  Offset = 0;
2437  }
2438  } else {
2439  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2440  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2441  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2442  MachinePointerInfo::getGOT(), false, false, false, 0);
2443  }
2444 
2445  // If there was a non-zero offset that we didn't fold, create an explicit
2446  // addition for it.
2447  if (Offset != 0)
2448  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2449  DAG.getConstant(Offset, DL, PtrVT));
2450 
2451  return Result;
2452 }
2453 
2454 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2455  SelectionDAG &DAG,
2456  unsigned Opcode,
2457  SDValue GOTOffset) const {
2458  SDLoc DL(Node);
2459  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2460  SDValue Chain = DAG.getEntryNode();
2461  SDValue Glue;
2462 
2463  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2464  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2465  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2466  Glue = Chain.getValue(1);
2467  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2468  Glue = Chain.getValue(1);
2469 
2470  // The first call operand is the chain and the second is the TLS symbol.
2472  Ops.push_back(Chain);
2473  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2474  Node->getValueType(0),
2475  0, 0));
2476 
2477  // Add argument registers to the end of the list so that they are
2478  // known live into the call.
2479  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2480  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2481 
2482  // Add a register mask operand representing the call-preserved registers.
2483  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2484  const uint32_t *Mask =
2486  assert(Mask && "Missing call preserved mask for calling convention");
2487  Ops.push_back(DAG.getRegisterMask(Mask));
2488 
2489  // Glue the call to the argument copies.
2490  Ops.push_back(Glue);
2491 
2492  // Emit the call.
2493  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2494  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2495  Glue = Chain.getValue(1);
2496 
2497  // Copy the return value from %r2.
2498  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2499 }
2500 
2501 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2502  SelectionDAG &DAG) const {
2503  SDLoc DL(Node);
2504  const GlobalValue *GV = Node->getGlobal();
2505  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2506  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2507 
2508  // The high part of the thread pointer is in access register 0.
2510  DAG.getConstant(0, DL, MVT::i32));
2511  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2512 
2513  // The low part of the thread pointer is in access register 1.
2515  DAG.getConstant(1, DL, MVT::i32));
2516  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2517 
2518  // Merge them into a single 64-bit address.
2519  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2520  DAG.getConstant(32, DL, PtrVT));
2521  SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2522 
2523  // Get the offset of GA from the thread pointer, based on the TLS model.
2524  SDValue Offset;
2525  switch (model) {
2526  case TLSModel::GeneralDynamic: {
2527  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2530 
2531  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2532  Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
2534  false, false, false, 0);
2535 
2536  // Call __tls_get_offset to retrieve the offset.
2537  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2538  break;
2539  }
2540 
2541  case TLSModel::LocalDynamic: {
2542  // Load the GOT offset of the module ID.
2545 
2546  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2547  Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
2549  false, false, false, 0);
2550 
2551  // Call __tls_get_offset to retrieve the module base offset.
2552  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2553 
2554  // Note: The SystemZLDCleanupPass will remove redundant computations
2555  // of the module base offset. Count total number of local-dynamic
2556  // accesses to trigger execution of that pass.
2560 
2561  // Add the per-symbol offset.
2563 
2564  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2565  DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
2567  false, false, false, 0);
2568 
2569  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2570  break;
2571  }
2572 
2573  case TLSModel::InitialExec: {
2574  // Load the offset from the GOT.
2575  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2577  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2578  Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
2579  Offset, MachinePointerInfo::getGOT(),
2580  false, false, false, 0);
2581  break;
2582  }
2583 
2584  case TLSModel::LocalExec: {
2585  // Force the offset into the constant pool and load it from there.
2588 
2589  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2590  Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
2592  false, false, false, 0);
2593  break;
2594  }
2595  }
2596 
2597  // Add the base and offset together.
2598  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2599 }
2600 
2601 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2602  SelectionDAG &DAG) const {
2603  SDLoc DL(Node);
2604  const BlockAddress *BA = Node->getBlockAddress();
2605  int64_t Offset = Node->getOffset();
2606  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2607 
2608  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
2609  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2610  return Result;
2611 }
2612 
2613 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
2614  SelectionDAG &DAG) const {
2615  SDLoc DL(JT);
2616  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2617  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2618 
2619  // Use LARL to load the address of the table.
2620  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2621 }
2622 
2623 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
2624  SelectionDAG &DAG) const {
2625  SDLoc DL(CP);
2626  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2627 
2628  SDValue Result;
2629  if (CP->isMachineConstantPoolEntry())
2630  Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2631  CP->getAlignment());
2632  else
2633  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2634  CP->getAlignment(), CP->getOffset());
2635 
2636  // Use LARL to load the address of the constant pool entry.
2637  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2638 }
2639 
2640 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
2641  SelectionDAG &DAG) const {
2642  SDLoc DL(Op);
2643  SDValue In = Op.getOperand(0);
2644  EVT InVT = In.getValueType();
2645  EVT ResVT = Op.getValueType();
2646 
2647  // Convert loads directly. This is normally done by DAGCombiner,
2648  // but we need this case for bitcasts that are created during lowering
2649  // and which are then lowered themselves.
2650  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
2651  return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
2652  LoadN->getMemOperand());
2653 
2654  if (InVT == MVT::i32 && ResVT == MVT::f32) {
2655  SDValue In64;
2656  if (Subtarget.hasHighWord()) {
2658  MVT::i64);
2659  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2660  MVT::i64, SDValue(U64, 0), In);
2661  } else {
2662  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
2663  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
2664  DAG.getConstant(32, DL, MVT::i64));
2665  }
2666  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
2667  return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
2668  DL, MVT::f32, Out64);
2669  }
2670  if (InVT == MVT::f32 && ResVT == MVT::i32) {
2672  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
2673  MVT::f64, SDValue(U64, 0), In);
2674  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
2675  if (Subtarget.hasHighWord())
2676  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
2677  MVT::i32, Out64);
2678  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
2679  DAG.getConstant(32, DL, MVT::i64));
2680  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
2681  }
2682  llvm_unreachable("Unexpected bitcast combination");
2683 }
2684 
2685 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
2686  SelectionDAG &DAG) const {
2687  MachineFunction &MF = DAG.getMachineFunction();
2688  SystemZMachineFunctionInfo *FuncInfo =
2690  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2691 
2692  SDValue Chain = Op.getOperand(0);
2693  SDValue Addr = Op.getOperand(1);
2694  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2695  SDLoc DL(Op);
2696 
2697  // The initial values of each field.
2698  const unsigned NumFields = 4;
2699  SDValue Fields[NumFields] = {
2700  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
2701  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
2702  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
2703  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
2704  };
2705 
2706  // Store each field into its respective slot.
2707  SDValue MemOps[NumFields];
2708  unsigned Offset = 0;
2709  for (unsigned I = 0; I < NumFields; ++I) {
2710  SDValue FieldAddr = Addr;
2711  if (Offset != 0)
2712  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
2713  DAG.getIntPtrConstant(Offset, DL));
2714  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
2715  MachinePointerInfo(SV, Offset),
2716  false, false, 0);
2717  Offset += 8;
2718  }
2719  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2720 }
2721 
2722 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
2723  SelectionDAG &DAG) const {
2724  SDValue Chain = Op.getOperand(0);
2725  SDValue DstPtr = Op.getOperand(1);
2726  SDValue SrcPtr = Op.getOperand(2);
2727  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2728  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
2729  SDLoc DL(Op);
2730 
2731  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
2732  /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
2733  /*isTailCall*/false,
2734  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
2735 }
2736 
2737 SDValue SystemZTargetLowering::
2738 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
2739  SDValue Chain = Op.getOperand(0);
2740  SDValue Size = Op.getOperand(1);
2741  SDLoc DL(Op);
2742 
2743  unsigned SPReg = getStackPointerRegisterToSaveRestore();
2744 
2745  // Get a reference to the stack pointer.
2746  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
2747 
2748  // Get the new stack pointer value.
2749  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
2750 
2751  // Copy the new stack pointer back.
2752  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
2753 
2754  // The allocated data lives above the 160 bytes allocated for the standard
2755  // frame, plus any outgoing stack arguments. We don't know how much that
2756  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
2757  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
2758  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
2759 
2760  SDValue Ops[2] = { Result, Chain };
2761  return DAG.getMergeValues(Ops, DL);
2762 }
2763 
2764 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
2765  SelectionDAG &DAG) const {
2766  EVT VT = Op.getValueType();
2767  SDLoc DL(Op);
2768  SDValue Ops[2];
2769  if (is32Bit(VT))
2770  // Just do a normal 64-bit multiplication and extract the results.
2771  // We define this so that it can be used for constant division.
2773  Op.getOperand(1), Ops[1], Ops[0]);
2774  else {
2775  // Do a full 128-bit multiplication based on UMUL_LOHI64:
2776  //
2777  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
2778  //
2779  // but using the fact that the upper halves are either all zeros
2780  // or all ones:
2781  //
2782  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
2783  //
2784  // and grouping the right terms together since they are quicker than the
2785  // multiplication:
2786  //
2787  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
2788  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
2789  SDValue LL = Op.getOperand(0);
2790  SDValue RL = Op.getOperand(1);
2791  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
2792  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
2793  // UMUL_LOHI64 returns the low result in the odd register and the high
2794  // result in the even register. SMUL_LOHI is defined to return the
2795  // low half first, so the results are in reverse order.
2796  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
2797  LL, RL, Ops[1], Ops[0]);
2798  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
2799  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
2800  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
2801  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
2802  }
2803  return DAG.getMergeValues(Ops, DL);
2804 }
2805 
2806 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
2807  SelectionDAG &DAG) const {
2808  EVT VT = Op.getValueType();
2809  SDLoc DL(Op);
2810  SDValue Ops[2];
2811  if (is32Bit(VT))
2812  // Just do a normal 64-bit multiplication and extract the results.
2813  // We define this so that it can be used for constant division.
2815  Op.getOperand(1), Ops[1], Ops[0]);
2816  else
2817  // UMUL_LOHI64 returns the low result in the odd register and the high
2818  // result in the even register. UMUL_LOHI is defined to return the
2819  // low half first, so the results are in reverse order.
2820  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
2821  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
2822  return DAG.getMergeValues(Ops, DL);
2823 }
2824 
2825 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
2826  SelectionDAG &DAG) const {
2827  SDValue Op0 = Op.getOperand(0);
2828  SDValue Op1 = Op.getOperand(1);
2829  EVT VT = Op.getValueType();
2830  SDLoc DL(Op);
2831  unsigned Opcode;
2832 
2833  // We use DSGF for 32-bit division.
2834  if (is32Bit(VT)) {
2835  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
2836  Opcode = SystemZISD::SDIVREM32;
2837  } else if (DAG.ComputeNumSignBits(Op1) > 32) {
2838  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
2839  Opcode = SystemZISD::SDIVREM32;
2840  } else
2841  Opcode = SystemZISD::SDIVREM64;
2842 
2843  // DSG(F) takes a 64-bit dividend, so the even register in the GR128
2844  // input is "don't care". The instruction returns the remainder in
2845  // the even register and the quotient in the odd register.
2846  SDValue Ops[2];
2847  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
2848  Op0, Op1, Ops[1], Ops[0]);
2849  return DAG.getMergeValues(Ops, DL);
2850 }
2851 
2852 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
2853  SelectionDAG &DAG) const {
2854  EVT VT = Op.getValueType();
2855  SDLoc DL(Op);
2856 
2857  // DL(G) uses a double-width dividend, so we need to clear the even
2858  // register in the GR128 input. The instruction returns the remainder
2859  // in the even register and the quotient in the odd register.
2860  SDValue Ops[2];
2861  if (is32Bit(VT))
2862  lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
2863  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
2864  else
2865  lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
2866  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
2867  return DAG.getMergeValues(Ops, DL);
2868 }
2869 
2870 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
2871  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
2872 
2873  // Get the known-zero masks for each operand.
2874  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
2875  APInt KnownZero[2], KnownOne[2];
2876  DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
2877  DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
2878 
2879  // See if the upper 32 bits of one operand and the lower 32 bits of the
2880  // other are known zero. They are the low and high operands respectively.
2881  uint64_t Masks[] = { KnownZero[0].getZExtValue(),
2882  KnownZero[1].getZExtValue() };
2883  unsigned High, Low;
2884  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
2885  High = 1, Low = 0;
2886  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
2887  High = 0, Low = 1;
2888  else
2889  return Op;
2890 
2891  SDValue LowOp = Ops[Low];
2892  SDValue HighOp = Ops[High];
2893 
2894  // If the high part is a constant, we're better off using IILH.
2895  if (HighOp.getOpcode() == ISD::Constant)
2896  return Op;
2897 
2898  // If the low part is a constant that is outside the range of LHI,
2899  // then we're better off using IILF.
2900  if (LowOp.getOpcode() == ISD::Constant) {
2901  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
2902  if (!isInt<16>(Value))
2903  return Op;
2904  }
2905 
2906  // Check whether the high part is an AND that doesn't change the
2907  // high 32 bits and just masks out low bits. We can skip it if so.
2908  if (HighOp.getOpcode() == ISD::AND &&
2909  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
2910  SDValue HighOp0 = HighOp.getOperand(0);
2911  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
2912  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
2913  HighOp = HighOp0;
2914  }
2915 
2916  // Take advantage of the fact that all GR32 operations only change the
2917  // low 32 bits by truncating Low to an i32 and inserting it directly
2918  // using a subreg. The interesting cases are those where the truncation
2919  // can be folded.
2920  SDLoc DL(Op);
2921  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
2922  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
2923  MVT::i64, HighOp, Low32);
2924 }
2925 
2926 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
2927  SelectionDAG &DAG) const {
2928  EVT VT = Op.getValueType();
2929  SDLoc DL(Op);
2930  Op = Op.getOperand(0);
2931 
2932  // Handle vector types via VPOPCT.
2933  if (VT.isVector()) {
2934  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
2935  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
2936  switch (VT.getVectorElementType().getSizeInBits()) {
2937  case 8:
2938  break;
2939  case 16: {
2940  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
2941  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
2942  SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
2943  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
2944  Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
2945  break;
2946  }
2947  case 32: {
2949  DAG.getConstant(0, DL, MVT::i32));
2950  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
2951  break;
2952  }
2953  case 64: {
2955  DAG.getConstant(0, DL, MVT::i32));
2956  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
2957  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
2958  break;
2959  }
2960  default:
2961  llvm_unreachable("Unexpected type");
2962  }
2963  return Op;
2964  }
2965 
2966  // Get the known-zero mask for the operand.
2967  APInt KnownZero, KnownOne;
2968  DAG.computeKnownBits(Op, KnownZero, KnownOne);
2969  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
2970  if (NumSignificantBits == 0)
2971  return DAG.getConstant(0, DL, VT);
2972 
2973  // Skip known-zero high parts of the operand.
2974  int64_t OrigBitSize = VT.getSizeInBits();
2975  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
2976  BitSize = std::min(BitSize, OrigBitSize);
2977 
2978  // The POPCNT instruction counts the number of bits in each byte.
2979  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
2980  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
2981  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
2982 
2983  // Add up per-byte counts in a binary tree. All bits of Op at
2984  // position larger than BitSize remain zero throughout.
2985  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
2986  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
2987  if (BitSize != OrigBitSize)
2988  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
2989  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
2990  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
2991  }
2992 
2993  // Extract overall result from high byte.
2994  if (BitSize > 8)
2995  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
2996  DAG.getConstant(BitSize - 8, DL, VT));
2997 
2998  return Op;
2999 }
3000 
3001 // Op is an atomic load. Lower it into a normal volatile load.
3002 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3003  SelectionDAG &DAG) const {
3004  auto *Node = cast<AtomicSDNode>(Op.getNode());
3005  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3006  Node->getChain(), Node->getBasePtr(),
3007  Node->getMemoryVT(), Node->getMemOperand());
3008 }
3009 
3010 // Op is an atomic store. Lower it into a normal volatile store followed
3011 // by a serialization.
3012 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3013  SelectionDAG &DAG) const {
3014  auto *Node = cast<AtomicSDNode>(Op.getNode());
3015  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3016  Node->getBasePtr(), Node->getMemoryVT(),
3017  Node->getMemOperand());
3018  return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
3019  Chain), 0);
3020 }
3021 
3022 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3023 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3024 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3025  SelectionDAG &DAG,
3026  unsigned Opcode) const {
3027  auto *Node = cast<AtomicSDNode>(Op.getNode());
3028 
3029  // 32-bit operations need no code outside the main loop.
3030  EVT NarrowVT = Node->getMemoryVT();
3031  EVT WideVT = MVT::i32;
3032  if (NarrowVT == WideVT)
3033  return Op;
3034 
3035  int64_t BitSize = NarrowVT.getSizeInBits();
3036  SDValue ChainIn = Node->getChain();
3037  SDValue Addr = Node->getBasePtr();
3038  SDValue Src2 = Node->getVal();
3039  MachineMemOperand *MMO = Node->getMemOperand();
3040  SDLoc DL(Node);
3041  EVT PtrVT = Addr.getValueType();
3042 
3043  // Convert atomic subtracts of constants into additions.
3044  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3045  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3047  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3048  }
3049 
3050  // Get the address of the containing word.
3051  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3052  DAG.getConstant(-4, DL, PtrVT));
3053 
3054  // Get the number of bits that the word must be rotated left in order
3055  // to bring the field to the top bits of a GR32.
3056  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3057  DAG.getConstant(3, DL, PtrVT));
3058  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3059 
3060  // Get the complementing shift amount, for rotating a field in the top
3061  // bits back to its proper position.
3062  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3063  DAG.getConstant(0, DL, WideVT), BitShift);
3064 
3065  // Extend the source operand to 32 bits and prepare it for the inner loop.
3066  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3067  // operations require the source to be shifted in advance. (This shift
3068  // can be folded if the source is constant.) For AND and NAND, the lower
3069  // bits must be set, while for other opcodes they should be left clear.
3070  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3071  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3072  DAG.getConstant(32 - BitSize, DL, WideVT));
3073  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3075  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3076  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3077 
3078  // Construct the ATOMIC_LOADW_* node.
3079  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3080  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3081  DAG.getConstant(BitSize, DL, WideVT) };
3082  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3083  NarrowVT, MMO);
3084 
3085  // Rotate the result of the final CS so that the field is in the lower
3086  // bits of a GR32, then truncate it.
3087  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3088  DAG.getConstant(BitSize, DL, WideVT));
3089  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3090 
3091  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3092  return DAG.getMergeValues(RetOps, DL);
3093 }
3094 
3095 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3096 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3097 // operations into additions.
3098 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3099  SelectionDAG &DAG) const {
3100  auto *Node = cast<AtomicSDNode>(Op.getNode());
3101  EVT MemVT = Node->getMemoryVT();
3102  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3103  // A full-width operation.
3104  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3105  SDValue Src2 = Node->getVal();
3106  SDValue NegSrc2;
3107  SDLoc DL(Src2);
3108 
3109  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3110  // Use an addition if the operand is constant and either LAA(G) is
3111  // available or the negative value is in the range of A(G)FHI.
3112  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3113  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3114  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3115  } else if (Subtarget.hasInterlockedAccess1())
3116  // Use LAA(G) if available.
3117  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3118  Src2);
3119 
3120  if (NegSrc2.getNode())
3121  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3122  Node->getChain(), Node->getBasePtr(), NegSrc2,
3123  Node->getMemOperand(), Node->getOrdering(),
3124  Node->getSynchScope());
3125 
3126  // Use the node as-is.
3127  return Op;
3128  }
3129 
3130  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3131 }
3132 
3133 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
3134 // into a fullword ATOMIC_CMP_SWAPW operation.
3135 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3136  SelectionDAG &DAG) const {
3137  auto *Node = cast<AtomicSDNode>(Op.getNode());
3138 
3139  // We have native support for 32-bit compare and swap.
3140  EVT NarrowVT = Node->getMemoryVT();
3141  EVT WideVT = MVT::i32;
3142  if (NarrowVT == WideVT)
3143  return Op;
3144 
3145  int64_t BitSize = NarrowVT.getSizeInBits();
3146  SDValue ChainIn = Node->getOperand(0);
3147  SDValue Addr = Node->getOperand(1);
3148  SDValue CmpVal = Node->getOperand(2);
3149  SDValue SwapVal = Node->getOperand(3);
3150  MachineMemOperand *MMO = Node->getMemOperand();
3151  SDLoc DL(Node);
3152  EVT PtrVT = Addr.getValueType();
3153 
3154  // Get the address of the containing word.
3155  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3156  DAG.getConstant(-4, DL, PtrVT));
3157 
3158  // Get the number of bits that the word must be rotated left in order
3159  // to bring the field to the top bits of a GR32.
3160  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3161  DAG.getConstant(3, DL, PtrVT));
3162  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3163 
3164  // Get the complementing shift amount, for rotating a field in the top
3165  // bits back to its proper position.
3166  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3167  DAG.getConstant(0, DL, WideVT), BitShift);
3168 
3169  // Construct the ATOMIC_CMP_SWAPW node.
3170  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3171  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3172  NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3174  VTList, Ops, NarrowVT, MMO);
3175  return AtomicOp;
3176 }
3177 
3178 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3179  SelectionDAG &DAG) const {
3180  MachineFunction &MF = DAG.getMachineFunction();
3181  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3182  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3183  SystemZ::R15D, Op.getValueType());
3184 }
3185 
3186 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3187  SelectionDAG &DAG) const {
3188  MachineFunction &MF = DAG.getMachineFunction();
3189  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3190  return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
3191  SystemZ::R15D, Op.getOperand(1));
3192 }
3193 
3194 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3195  SelectionDAG &DAG) const {
3196  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3197  if (!IsData)
3198  // Just preserve the chain.
3199  return Op.getOperand(0);
3200 
3201  SDLoc DL(Op);
3202  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3203  unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
3204  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3205  SDValue Ops[] = {
3206  Op.getOperand(0),
3207  DAG.getConstant(Code, DL, MVT::i32),
3208  Op.getOperand(1)
3209  };
3211  Node->getVTList(), Ops,
3212  Node->getMemoryVT(), Node->getMemOperand());
3213 }
3214 
3215 // Return an i32 that contains the value of CC immediately after After,
3216 // whose final operand must be MVT::Glue.
3217 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
3218  SDLoc DL(After);
3219  SDValue Glue = SDValue(After, After->getNumValues() - 1);
3220  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
3221  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3223 }
3224 
3225 SDValue
3226 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3227  SelectionDAG &DAG) const {
3228  unsigned Opcode, CCValid;
3229  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3230  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3231  SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
3232  SDValue CC = getCCResult(DAG, Glued.getNode());
3233  DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3234  return SDValue();
3235  }
3236 
3237  return SDValue();
3238 }
3239 
3240 SDValue
3241 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3242  SelectionDAG &DAG) const {
3243  unsigned Opcode, CCValid;
3244  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3245  SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
3246  SDValue CC = getCCResult(DAG, Glued.getNode());
3247  if (Op->getNumValues() == 1)
3248  return CC;
3249  assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3250  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
3251  Glued, CC);
3252  }
3253 
3254  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3255  switch (Id) {
3256  case Intrinsic::s390_vpdi:
3257  return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3258  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3259 
3260  case Intrinsic::s390_vperm:
3261  return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3262  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3263 
3264  case Intrinsic::s390_vuphb:
3265  case Intrinsic::s390_vuphh:
3266  case Intrinsic::s390_vuphf:
3267  return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3268  Op.getOperand(1));
3269 
3270  case Intrinsic::s390_vuplhb:
3271  case Intrinsic::s390_vuplhh:
3272  case Intrinsic::s390_vuplhf:
3273  return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3274  Op.getOperand(1));
3275 
3276  case Intrinsic::s390_vuplb:
3277  case Intrinsic::s390_vuplhw:
3278  case Intrinsic::s390_vuplf:
3279  return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3280  Op.getOperand(1));
3281 
3282  case Intrinsic::s390_vupllb:
3283  case Intrinsic::s390_vupllh:
3284  case Intrinsic::s390_vupllf:
3285  return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3286  Op.getOperand(1));
3287 
3288  case Intrinsic::s390_vsumb:
3289  case Intrinsic::s390_vsumh:
3290  case Intrinsic::s390_vsumgh:
3291  case Intrinsic::s390_vsumgf:
3292  case Intrinsic::s390_vsumqf:
3293  case Intrinsic::s390_vsumqg:
3294  return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3295  Op.getOperand(1), Op.getOperand(2));
3296  }
3297 
3298  return SDValue();
3299 }
3300 
3301 namespace {
3302 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3303 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3304 // Operand is the constant third operand, otherwise it is the number of
3305 // bytes in each element of the result.
3306 struct Permute {
3307  unsigned Opcode;
3308  unsigned Operand;
3309  unsigned char Bytes[SystemZ::VectorBytes];
3310 };
3311 }
3312 
3313 static const Permute PermuteForms[] = {
3314  // VMRHG
3316  { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3317  // VMRHF
3319  { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3320  // VMRHH
3322  { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3323  // VMRHB
3325  { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3326  // VMRLG
3327  { SystemZISD::MERGE_LOW, 8,
3328  { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3329  // VMRLF
3330  { SystemZISD::MERGE_LOW, 4,
3331  { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3332  // VMRLH
3333  { SystemZISD::MERGE_LOW, 2,
3334  { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3335  // VMRLB
3336  { SystemZISD::MERGE_LOW, 1,
3337  { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3338  // VPKG
3339  { SystemZISD::PACK, 4,
3340  { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3341  // VPKF
3342  { SystemZISD::PACK, 2,
3343  { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3344  // VPKH
3345  { SystemZISD::PACK, 1,
3346  { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3347  // VPDI V1, V2, 4 (low half of V1, high half of V2)
3349  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3350  // VPDI V1, V2, 1 (high half of V1, low half of V2)
3352  { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3353 };
3354 
3355 // Called after matching a vector shuffle against a particular pattern.
3356 // Both the original shuffle and the pattern have two vector operands.
3357 // OpNos[0] is the operand of the original shuffle that should be used for
3358 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3359 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3360 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3361 // for operands 0 and 1 of the pattern.
3362 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
3363  if (OpNos[0] < 0) {
3364  if (OpNos[1] < 0)
3365  return false;
3366  OpNo0 = OpNo1 = OpNos[1];
3367  } else if (OpNos[1] < 0) {
3368  OpNo0 = OpNo1 = OpNos[0];
3369  } else {
3370  OpNo0 = OpNos[0];
3371  OpNo1 = OpNos[1];
3372  }
3373  return true;
3374 }
3375 
3376 // Bytes is a VPERM-like permute vector, except that -1 is used for
3377 // undefined bytes. Return true if the VPERM can be implemented using P.
3378 // When returning true set OpNo0 to the VPERM operand that should be
3379 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3380 //
3381 // For example, if swapping the VPERM operands allows P to match, OpNo0
3382 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3383 // operand, but rewriting it to use two duplicated operands allows it to
3384 // match P, then OpNo0 and OpNo1 will be the same.
3385 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
3386  unsigned &OpNo0, unsigned &OpNo1) {
3387  int OpNos[] = { -1, -1 };
3388  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
3389  int Elt = Bytes[I];
3390  if (Elt >= 0) {
3391  // Make sure that the two permute vectors use the same suboperand
3392  // byte number. Only the operand numbers (the high bits) are
3393  // allowed to differ.
3394  if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
3395  return false;
3396  int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
3397  int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
3398  // Make sure that the operand mappings are consistent with previous
3399  // elements.
3400  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3401  return false;
3402  OpNos[ModelOpNo] = RealOpNo;
3403  }
3404  }
3405  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3406 }
3407 
3408 // As above, but search for a matching permute.
3409 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
3410  unsigned &OpNo0, unsigned &OpNo1) {
3411  for (auto &P : PermuteForms)
3412  if (matchPermute(Bytes, P, OpNo0, OpNo1))
3413  return &P;
3414  return nullptr;
3415 }
3416 
3417 // Bytes is a VPERM-like permute vector, except that -1 is used for
3418 // undefined bytes. This permute is an operand of an outer permute.
3419 // See whether redistributing the -1 bytes gives a shuffle that can be
3420 // implemented using P. If so, set Transform to a VPERM-like permute vector
3421 // that, when applied to the result of P, gives the original permute in Bytes.
3422 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3423  const Permute &P,
3424  SmallVectorImpl<int> &Transform) {
3425  unsigned To = 0;
3426  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
3427  int Elt = Bytes[From];
3428  if (Elt < 0)
3429  // Byte number From of the result is undefined.
3430  Transform[From] = -1;
3431  else {
3432  while (P.Bytes[To] != Elt) {
3433  To += 1;
3434  if (To == SystemZ::VectorBytes)
3435  return false;
3436  }
3437  Transform[From] = To;
3438  }
3439  }
3440  return true;
3441 }
3442 
3443 // As above, but search for a matching permute.
3444 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3445  SmallVectorImpl<int> &Transform) {
3446  for (auto &P : PermuteForms)
3447  if (matchDoublePermute(Bytes, P, Transform))
3448  return &P;
3449  return nullptr;
3450 }
3451 
3452 // Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
3453 // as if it had type vNi8.
3455  SmallVectorImpl<int> &Bytes) {
3456  EVT VT = VSN->getValueType(0);
3457  unsigned NumElements = VT.getVectorNumElements();
3458  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3459  Bytes.resize(NumElements * BytesPerElement, -1);
3460  for (unsigned I = 0; I < NumElements; ++I) {
3461  int Index = VSN->getMaskElt(I);
3462  if (Index >= 0)
3463  for (unsigned J = 0; J < BytesPerElement; ++J)
3464  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3465  }
3466 }
3467 
3468 // Bytes is a VPERM-like permute vector, except that -1 is used for
3469 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
3470 // the result come from a contiguous sequence of bytes from one input.
3471 // Set Base to the selector for the first byte if so.
3472 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
3473  unsigned BytesPerElement, int &Base) {
3474  Base = -1;
3475  for (unsigned I = 0; I < BytesPerElement; ++I) {
3476  if (Bytes[Start + I] >= 0) {
3477  unsigned Elem = Bytes[Start + I];
3478  if (Base < 0) {
3479  Base = Elem - I;
3480  // Make sure the bytes would come from one input operand.
3481  if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
3482  return false;
3483  } else if (unsigned(Base) != Elem - I)
3484  return false;
3485  }
3486  }
3487  return true;
3488 }
3489 
3490 // Bytes is a VPERM-like permute vector, except that -1 is used for
3491 // undefined bytes. Return true if it can be performed using VSLDI.
3492 // When returning true, set StartIndex to the shift amount and OpNo0
3493 // and OpNo1 to the VPERM operands that should be used as the first
3494 // and second shift operand respectively.
3495 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
3496  unsigned &StartIndex, unsigned &OpNo0,
3497  unsigned &OpNo1) {
3498  int OpNos[] = { -1, -1 };
3499  int Shift = -1;
3500  for (unsigned I = 0; I < 16; ++I) {
3501  int Index = Bytes[I];
3502  if (Index >= 0) {
3503  int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
3504  int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
3505  int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
3506  if (Shift < 0)
3507  Shift = ExpectedShift;
3508  else if (Shift != ExpectedShift)
3509  return false;
3510  // Make sure that the operand mappings are consistent with previous
3511  // elements.
3512  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3513  return false;
3514  OpNos[ModelOpNo] = RealOpNo;
3515  }
3516  }
3517  StartIndex = Shift;
3518  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3519 }
3520 
3521 // Create a node that performs P on operands Op0 and Op1, casting the
3522 // operands to the appropriate type. The type of the result is determined by P.
3524  const Permute &P, SDValue Op0, SDValue Op1) {
3525  // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
3526  // elements of a PACK are twice as wide as the outputs.
3527  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
3528  P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
3529  P.Operand);
3530  // Cast both operands to the appropriate type.
3531  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
3532  SystemZ::VectorBytes / InBytes);
3533  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
3534  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
3535  SDValue Op;
3536  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
3537  SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
3538  Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
3539  } else if (P.Opcode == SystemZISD::PACK) {
3540  MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
3541  SystemZ::VectorBytes / P.Operand);
3542  Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
3543  } else {
3544  Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
3545  }
3546  return Op;
3547 }
3548 
3549 // Bytes is a VPERM-like permute vector, except that -1 is used for
3550 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
3551 // VSLDI or VPERM.
3553  const SmallVectorImpl<int> &Bytes) {
3554  for (unsigned I = 0; I < 2; ++I)
3555  Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
3556 
3557  // First see whether VSLDI can be used.
3558  unsigned StartIndex, OpNo0, OpNo1;
3559  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
3560  return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
3561  Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
3562 
3563  // Fall back on VPERM. Construct an SDNode for the permute vector.
3564  SDValue IndexNodes[SystemZ::VectorBytes];
3565  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
3566  if (Bytes[I] >= 0)
3567  IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
3568  else
3569  IndexNodes[I] = DAG.getUNDEF(MVT::i32);
3570  SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
3571  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
3572 }
3573 
3574 namespace {
3575 // Describes a general N-operand vector shuffle.
3576 struct GeneralShuffle {
3577  GeneralShuffle(EVT vt) : VT(vt) {}
3578  void addUndef();
3579  void add(SDValue, unsigned);
3580  SDValue getNode(SelectionDAG &, SDLoc);
3581 
3582  // The operands of the shuffle.
3584 
3585  // Index I is -1 if byte I of the result is undefined. Otherwise the
3586  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
3587  // Bytes[I] / SystemZ::VectorBytes.
3589 
3590  // The type of the shuffle result.
3591  EVT VT;
3592 };
3593 }
3594 
3595 // Add an extra undefined element to the shuffle.
3596 void GeneralShuffle::addUndef() {
3597  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3598  for (unsigned I = 0; I < BytesPerElement; ++I)
3599  Bytes.push_back(-1);
3600 }
3601 
3602 // Add an extra element to the shuffle, taking it from element Elem of Op.
3603 // A null Op indicates a vector input whose value will be calculated later;
3604 // there is at most one such input per shuffle and it always has the same
3605 // type as the result.
3606 void GeneralShuffle::add(SDValue Op, unsigned Elem) {
3607  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3608 
3609  // The source vector can have wider elements than the result,
3610  // either through an explicit TRUNCATE or because of type legalization.
3611  // We want the least significant part.
3612  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
3613  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
3614  assert(FromBytesPerElement >= BytesPerElement &&
3615  "Invalid EXTRACT_VECTOR_ELT");
3616  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
3617  (FromBytesPerElement - BytesPerElement));
3618 
3619  // Look through things like shuffles and bitcasts.
3620  while (Op.getNode()) {
3621  if (Op.getOpcode() == ISD::BITCAST)
3622  Op = Op.getOperand(0);
3623  else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
3624  // See whether the bytes we need come from a contiguous part of one
3625  // operand.
3627  getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
3628  int NewByte;
3629  if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
3630  break;
3631  if (NewByte < 0) {
3632  addUndef();
3633  return;
3634  }
3635  Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
3636  Byte = unsigned(NewByte) % SystemZ::VectorBytes;
3637  } else if (Op.getOpcode() == ISD::UNDEF) {
3638  addUndef();
3639  return;
3640  } else
3641  break;
3642  }
3643 
3644  // Make sure that the source of the extraction is in Ops.
3645  unsigned OpNo = 0;
3646  for (; OpNo < Ops.size(); ++OpNo)
3647  if (Ops[OpNo] == Op)
3648  break;
3649  if (OpNo == Ops.size())
3650  Ops.push_back(Op);
3651 
3652  // Add the element to Bytes.
3653  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
3654  for (unsigned I = 0; I < BytesPerElement; ++I)
3655  Bytes.push_back(Base + I);
3656 }
3657 
3658 // Return SDNodes for the completed shuffle.
3659 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
3660  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
3661 
3662  if (Ops.size() == 0)
3663  return DAG.getUNDEF(VT);
3664 
3665  // Make sure that there are at least two shuffle operands.
3666  if (Ops.size() == 1)
3667  Ops.push_back(DAG.getUNDEF(MVT::v16i8));
3668 
3669  // Create a tree of shuffles, deferring root node until after the loop.
3670  // Try to redistribute the undefined elements of non-root nodes so that
3671  // the non-root shuffles match something like a pack or merge, then adjust
3672  // the parent node's permute vector to compensate for the new order.
3673  // Among other things, this copes with vectors like <2 x i16> that were
3674  // padded with undefined elements during type legalization.
3675  //
3676  // In the best case this redistribution will lead to the whole tree
3677  // using packs and merges. It should rarely be a loss in other cases.
3678  unsigned Stride = 1;
3679  for (; Stride * 2 < Ops.size(); Stride *= 2) {
3680  for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
3681  SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
3682 
3683  // Create a mask for just these two operands.
3685  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
3686  unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
3687  unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
3688  if (OpNo == I)
3689  NewBytes[J] = Byte;
3690  else if (OpNo == I + Stride)
3691  NewBytes[J] = SystemZ::VectorBytes + Byte;
3692  else
3693  NewBytes[J] = -1;
3694  }
3695  // See if it would be better to reorganize NewMask to avoid using VPERM.
3696  SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
3697  if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
3698  Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
3699  // Applying NewBytesMap to Ops[I] gets back to NewBytes.
3700  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
3701  if (NewBytes[J] >= 0) {
3702  assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
3703  "Invalid double permute");
3704  Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
3705  } else
3706  assert(NewBytesMap[J] < 0 && "Invalid double permute");
3707  }
3708  } else {
3709  // Just use NewBytes on the operands.
3710  Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
3711  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
3712  if (NewBytes[J] >= 0)
3713  Bytes[J] = I * SystemZ::VectorBytes + J;
3714  }
3715  }
3716  }
3717 
3718  // Now we just have 2 inputs. Put the second operand in Ops[1].
3719  if (Stride > 1) {
3720  Ops[1] = Ops[Stride];
3721  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
3722  if (Bytes[I] >= int(SystemZ::VectorBytes))
3723  Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
3724  }
3725 
3726  // Look for an instruction that can do the permute without resorting
3727  // to VPERM.
3728  unsigned OpNo0, OpNo1;
3729  SDValue Op;
3730  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
3731  Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
3732  else
3733  Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
3734  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
3735 }
3736 
3737 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
3738 static bool isScalarToVector(SDValue Op) {
3739  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
3740  if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
3741  return false;
3742  return true;
3743 }
3744 
3745 // Return a vector of type VT that contains Value in the first element.
3746 // The other elements don't matter.
3748  SDValue Value) {
3749  // If we have a constant, replicate it to all elements and let the
3750  // BUILD_VECTOR lowering take care of it.
3751  if (Value.getOpcode() == ISD::Constant ||
3752  Value.getOpcode() == ISD::ConstantFP) {
3754  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
3755  }
3756  if (Value.getOpcode() == ISD::UNDEF)
3757  return DAG.getUNDEF(VT);
3758  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
3759 }
3760 
3761 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
3762 // element 1. Used for cases in which replication is cheap.
3764  SDValue Op0, SDValue Op1) {
3765  if (Op0.getOpcode() == ISD::UNDEF) {
3766  if (Op1.getOpcode() == ISD::UNDEF)
3767  return DAG.getUNDEF(VT);
3768  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
3769  }
3770  if (Op1.getOpcode() == ISD::UNDEF)
3771  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
3772  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
3773  buildScalarToVector(DAG, DL, VT, Op0),
3774  buildScalarToVector(DAG, DL, VT, Op1));
3775 }
3776 
3777 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
3778 // vector for them.
3780  SDValue Op1) {
3781  if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
3782  return DAG.getUNDEF(MVT::v2i64);
3783  // If one of the two inputs is undefined then replicate the other one,
3784  // in order to avoid using another register unnecessarily.
3785  if (Op0.getOpcode() == ISD::UNDEF)
3786  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
3787  else if (Op1.getOpcode() == ISD::UNDEF)
3788  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3789  else {
3790  Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3791  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
3792  }
3793  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
3794 }
3795 
3796 // Try to represent constant BUILD_VECTOR node BVN using a
3797 // SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
3798 // on success.
3799 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
3800  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
3801  unsigned BytesPerElement = ElemVT.getStoreSize();
3802  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
3803  SDValue Op = BVN->getOperand(I);
3804  if (Op.getOpcode() != ISD::UNDEF) {
3805  uint64_t Value;
3806  if (Op.getOpcode() == ISD::Constant)
3807  Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
3808  else if (Op.getOpcode() == ISD::ConstantFP)
3809  Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
3810  .getZExtValue());
3811  else
3812  return false;
3813  for (unsigned J = 0; J < BytesPerElement; ++J) {
3814  uint64_t Byte = (Value >> (J * 8)) & 0xff;
3815  if (Byte == 0xff)
3816  Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
3817  else if (Byte != 0)
3818  return false;
3819  }
3820  }
3821  }
3822  return true;
3823 }
3824 
3825 // Try to load a vector constant in which BitsPerElement-bit value Value
3826 // is replicated to fill the vector. VT is the type of the resulting
3827 // constant, which may have elements of a different size from BitsPerElement.
3828 // Return the SDValue of the constant on success, otherwise return
3829 // an empty value.
3831  const SystemZInstrInfo *TII,
3832  SDLoc DL, EVT VT, uint64_t Value,
3833  unsigned BitsPerElement) {
3834  // Signed 16-bit values can be replicated using VREPI.
3835  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
3836  if (isInt<16>(SignedValue)) {
3837  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
3838  SystemZ::VectorBits / BitsPerElement);
3839  SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
3840  DAG.getConstant(SignedValue, DL, MVT::i32));
3841  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
3842  }
3843  // See whether rotating the constant left some N places gives a value that
3844  // is one less than a power of 2 (i.e. all zeros followed by all ones).
3845  // If so we can use VGM.
3846  unsigned Start, End;
3847  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
3848  // isRxSBGMask returns the bit numbers for a full 64-bit value,
3849  // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
3850  // bit numbers for an BitsPerElement value, so that 0 denotes
3851  // 1 << (BitsPerElement-1).
3852  Start -= 64 - BitsPerElement;
3853  End -= 64 - BitsPerElement;
3854  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
3855  SystemZ::VectorBits / BitsPerElement);
3856  SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
3857  DAG.getConstant(Start, DL, MVT::i32),
3858  DAG.getConstant(End, DL, MVT::i32));
3859  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
3860  }
3861  return SDValue();
3862 }
3863 
3864 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
3865 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
3866 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
3867 // would benefit from this representation and return it if so.
3869  BuildVectorSDNode *BVN) {
3870  EVT VT = BVN->getValueType(0);
3871  unsigned NumElements = VT.getVectorNumElements();
3872 
3873  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
3874  // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
3875  // need a BUILD_VECTOR, add an additional placeholder operand for that
3876  // BUILD_VECTOR and store its operands in ResidueOps.
3877  GeneralShuffle GS(VT);
3879  bool FoundOne = false;
3880  for (unsigned I = 0; I < NumElements; ++I) {
3881  SDValue Op = BVN->getOperand(I);
3882  if (Op.getOpcode() == ISD::TRUNCATE)
3883  Op = Op.getOperand(0);
3884  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3885  Op.getOperand(1).getOpcode() == ISD::Constant) {
3886  unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
3887  GS.add(Op.getOperand(0), Elem);
3888  FoundOne = true;
3889  } else if (Op.getOpcode() == ISD::UNDEF) {
3890  GS.addUndef();
3891  } else {
3892  GS.add(SDValue(), ResidueOps.size());
3893  ResidueOps.push_back(Op);
3894  }
3895  }
3896 
3897  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
3898  if (!FoundOne)
3899  return SDValue();
3900 
3901  // Create the BUILD_VECTOR for the remaining elements, if any.
3902  if (!ResidueOps.empty()) {
3903  while (ResidueOps.size() < NumElements)
3904  ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
3905  for (auto &Op : GS.Ops) {
3906  if (!Op.getNode()) {
3907  Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
3908  break;
3909  }
3910  }
3911  }
3912  return GS.getNode(DAG, SDLoc(BVN));
3913 }
3914 
3915 // Combine GPR scalar values Elems into a vector of type VT.
3917  SmallVectorImpl<SDValue> &Elems) {
3918  // See whether there is a single replicated value.
3919  SDValue Single;
3920  unsigned int NumElements = Elems.size();
3921  unsigned int Count = 0;
3922  for (auto Elem : Elems) {
3923  if (Elem.getOpcode() != ISD::UNDEF) {
3924  if (!Single.getNode())
3925  Single = Elem;
3926  else if (Elem != Single) {
3927  Single = SDValue();
3928  break;
3929  }
3930  Count += 1;
3931  }
3932  }
3933  // There are three cases here:
3934  //
3935  // - if the only defined element is a loaded one, the best sequence
3936  // is a replicating load.
3937  //
3938  // - otherwise, if the only defined element is an i64 value, we will
3939  // end up with the same VLVGP sequence regardless of whether we short-cut
3940  // for replication or fall through to the later code.
3941  //
3942  // - otherwise, if the only defined element is an i32 or smaller value,
3943  // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
3944  // This is only a win if the single defined element is used more than once.
3945  // In other cases we're better off using a single VLVGx.
3946  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
3947  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
3948 
3949  // The best way of building a v2i64 from two i64s is to use VLVGP.
3950  if (VT == MVT::v2i64)
3951  return joinDwords(DAG, DL, Elems[0], Elems[1]);
3952 
3953  // Use a 64-bit merge high to combine two doubles.
3954  if (VT == MVT::v2f64)
3955  return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
3956 
3957  // Build v4f32 values directly from the FPRs:
3958  //
3959  // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
3960  // V V VMRHF
3961  // <ABxx> <CDxx>
3962  // V VMRHG
3963  // <ABCD>
3964  if (VT == MVT::v4f32) {
3965  SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
3966  SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
3967  // Avoid unnecessary undefs by reusing the other operand.
3968  if (Op01.getOpcode() == ISD::UNDEF)
3969  Op01 = Op23;
3970  else if (Op23.getOpcode() == ISD::UNDEF)
3971  Op23 = Op01;
3972  // Merging identical replications is a no-op.
3973  if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
3974  return Op01;
3975  Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
3976  Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
3978  DL, MVT::v2i64, Op01, Op23);
3979  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
3980  }
3981 
3982  // Collect the constant terms.
3984  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
3985 
3986  unsigned NumConstants = 0;
3987  for (unsigned I = 0; I < NumElements; ++I) {
3988  SDValue Elem = Elems[I];
3989  if (Elem.getOpcode() == ISD::Constant ||
3990  Elem.getOpcode() == ISD::ConstantFP) {
3991  NumConstants += 1;
3992  Constants[I] = Elem;
3993  Done[I] = true;
3994  }
3995  }
3996  // If there was at least one constant, fill in the other elements of
3997  // Constants with undefs to get a full vector constant and use that
3998  // as the starting point.
3999  SDValue Result;
4000  if (NumConstants > 0) {
4001  for (unsigned I = 0; I < NumElements; ++I)
4002  if (!Constants[I].getNode())
4003  Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4004  Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
4005  } else {
4006  // Otherwise try to use VLVGP to start the sequence in order to
4007  // avoid a false dependency on any previous contents of the vector
4008  // register. This only makes sense if one of the associated elements
4009  // is defined.
4010  unsigned I1 = NumElements / 2 - 1;
4011  unsigned I2 = NumElements - 1;
4012  bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
4013  bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
4014  if (Def1 || Def2) {
4015  SDValue Elem1 = Elems[Def1 ? I1 : I2];
4016  SDValue Elem2 = Elems[Def2 ? I2 : I1];
4017  Result = DAG.getNode(ISD::BITCAST, DL, VT,
4018  joinDwords(DAG, DL, Elem1, Elem2));
4019  Done[I1] = true;
4020  Done[I2] = true;
4021  } else
4022  Result = DAG.getUNDEF(VT);
4023  }
4024 
4025  // Use VLVGx to insert the other elements.
4026  for (unsigned I = 0; I < NumElements; ++I)
4027  if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
4028  Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4029  DAG.getConstant(I, DL, MVT::i32));
4030  return Result;
4031 }
4032 
4033 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4034  SelectionDAG &DAG) const {
4035  const SystemZInstrInfo *TII =
4036  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4037  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4038  SDLoc DL(Op);
4039  EVT VT = Op.getValueType();
4040 
4041  if (BVN->isConstant()) {
4042  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
4043  // preferred way of creating all-zero and all-one vectors so give it
4044  // priority over other methods below.
4045  uint64_t Mask = 0;
4046  if (tryBuildVectorByteMask(BVN, Mask)) {
4048  DAG.getConstant(Mask, DL, MVT::i32));
4049  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4050  }
4051 
4052  // Try using some form of replication.
4053  APInt SplatBits, SplatUndef;
4054  unsigned SplatBitSize;
4055  bool HasAnyUndefs;
4056  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4057  8, true) &&
4058  SplatBitSize <= 64) {
4059  // First try assuming that any undefined bits above the highest set bit
4060  // and below the lowest set bit are 1s. This increases the likelihood of
4061  // being able to use a sign-extended element value in VECTOR REPLICATE
4062  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
4063  uint64_t SplatBitsZ = SplatBits.getZExtValue();
4064  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
4065  uint64_t Lower = (SplatUndefZ
4066  & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
4067  uint64_t Upper = (SplatUndefZ
4068  & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
4069  uint64_t Value = SplatBitsZ | Upper | Lower;
4070  SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
4071  SplatBitSize);
4072  if (Op.getNode())
4073  return Op;
4074 
4075  // Now try assuming that any undefined bits between the first and
4076  // last defined set bits are set. This increases the chances of
4077  // using a non-wraparound mask.
4078  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
4079  Value = SplatBitsZ | Middle;
4080  Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
4081  if (Op.getNode())
4082  return Op;
4083  }
4084 
4085  // Fall back to loading it from memory.
4086  return SDValue();
4087  }
4088 
4089  // See if we should use shuffles to construct the vector from other vectors.
4090  SDValue Res = tryBuildVectorShuffle(DAG, BVN);
4091  if (Res.getNode())
4092  return Res;
4093 
4094  // Detect SCALAR_TO_VECTOR conversions.
4096  return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4097 
4098  // Otherwise use buildVector to build the vector up from GPRs.
4099  unsigned NumElements = Op.getNumOperands();
4101  for (unsigned I = 0; I < NumElements; ++I)
4102  Ops[I] = Op.getOperand(I);
4103  return buildVector(DAG, DL, VT, Ops);
4104 }
4105 
4106 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4107  SelectionDAG &DAG) const {
4108  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4109  SDLoc DL(Op);
4110  EVT VT = Op.getValueType();
4111  unsigned NumElements = VT.getVectorNumElements();
4112 
4113  if (VSN->isSplat()) {
4114  SDValue Op0 = Op.getOperand(0);
4115  unsigned Index = VSN->getSplatIndex();
4116  assert(Index < VT.getVectorNumElements() &&
4117  "Splat index should be defined and in first operand");
4118  // See whether the value we're splatting is directly available as a scalar.
4119  if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4120  Op0.getOpcode() == ISD::BUILD_VECTOR)
4121  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4122  // Otherwise keep it as a vector-to-vector operation.
4123  return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4124  DAG.getConstant(Index, DL, MVT::i32));
4125  }
4126 
4127  GeneralShuffle GS(VT);
4128  for (unsigned I = 0; I < NumElements; ++I) {
4129  int Elt = VSN->getMaskElt(I);
4130  if (Elt < 0)
4131  GS.addUndef();
4132  else
4133  GS.add(Op.getOperand(unsigned(Elt) / NumElements),
4134  unsigned(Elt) % NumElements);
4135  }
4136  return GS.getNode(DAG, SDLoc(VSN));
4137 }
4138 
4139 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
4140  SelectionDAG &DAG) const {
4141  SDLoc DL(Op);
4142  // Just insert the scalar into element 0 of an undefined vector.
4143  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4144  Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
4145  Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
4146 }
4147 
4148 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4149  SelectionDAG &DAG) const {
4150  // Handle insertions of floating-point values.
4151  SDLoc DL(Op);
4152  SDValue Op0 = Op.getOperand(0);
4153  SDValue Op1 = Op.getOperand(1);
4154  SDValue Op2 = Op.getOperand(2);
4155  EVT VT = Op.getValueType();
4156 
4157  // Insertions into constant indices of a v2f64 can be done using VPDI.
4158  // However, if the inserted value is a bitcast or a constant then it's
4159  // better to use GPRs, as below.
4160  if (VT == MVT::v2f64 &&
4161  Op1.getOpcode() != ISD::BITCAST &&
4162  Op1.getOpcode() != ISD::ConstantFP &&
4163  Op2.getOpcode() == ISD::Constant) {
4164  uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
4165  unsigned Mask = VT.getVectorNumElements() - 1;
4166  if (Index <= Mask)
4167  return Op;
4168  }
4169 
4170  // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4172  MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
4173  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
4174  DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
4175  DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
4176  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4177 }
4178 
4179 SDValue
4180 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4181  SelectionDAG &DAG) const {
4182  // Handle extractions of floating-point values.
4183  SDLoc DL(Op);
4184  SDValue Op0 = Op.getOperand(0);
4185  SDValue Op1 = Op.getOperand(1);
4186  EVT VT = Op.getValueType();
4187  EVT VecVT = Op0.getValueType();
4188 
4189  // Extractions of constant indices can be done directly.
4190  if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
4191  uint64_t Index = CIndexN->getZExtValue();
4192  unsigned Mask = VecVT.getVectorNumElements() - 1;
4193  if (Index <= Mask)
4194  return Op;
4195  }
4196 
4197  // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4198  MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
4199  MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
4200  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
4201  DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
4202  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4203 }
4204 
4205 SDValue
4206 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
4207  unsigned UnpackHigh) const {
4208  SDValue PackedOp = Op.getOperand(0);
4209  EVT OutVT = Op.getValueType();
4210  EVT InVT = PackedOp.getValueType();
4211  unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
4212  unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
4213  do {
4214  FromBits *= 2;
4215  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
4216  SystemZ::VectorBits / FromBits);
4217  PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
4218  } while (FromBits != ToBits);
4219  return PackedOp;
4220 }
4221 
4222 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
4223  unsigned ByScalar) const {
4224  // Look for cases where a vector shift can use the *_BY_SCALAR form.
4225  SDValue Op0 = Op.getOperand(0);
4226  SDValue Op1 = Op.getOperand(1);
4227  SDLoc DL(Op);
4228  EVT VT = Op.getValueType();
4229  unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
4230 
4231  // See whether the shift vector is a splat represented as BUILD_VECTOR.
4232  if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
4233  APInt SplatBits, SplatUndef;
4234  unsigned SplatBitSize;
4235  bool HasAnyUndefs;
4236  // Check for constant splats. Use ElemBitSize as the minimum element
4237  // width and reject splats that need wider elements.
4238  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4239  ElemBitSize, true) &&
4240  SplatBitSize == ElemBitSize) {
4241  SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
4242  DL, MVT::i32);
4243  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4244  }
4245  // Check for variable splats.
4246  BitVector UndefElements;
4247  SDValue Splat = BVN->getSplatValue(&UndefElements);
4248  if (Splat) {
4249  // Since i32 is the smallest legal type, we either need a no-op
4250  // or a truncation.
4251  SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
4252  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4253  }
4254  }
4255 
4256  // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4257  // and the shift amount is directly available in a GPR.
4258  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
4259  if (VSN->isSplat()) {
4260  SDValue VSNOp0 = VSN->getOperand(0);
4261  unsigned Index = VSN->getSplatIndex();
4262  assert(Index < VT.getVectorNumElements() &&
4263  "Splat index should be defined and in first operand");
4264  if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4265  VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
4266  // Since i32 is the smallest legal type, we either need a no-op
4267  // or a truncation.
4268  SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
4269  VSNOp0.getOperand(Index));
4270  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4271  }
4272  }
4273  }
4274 
4275  // Otherwise just treat the current form as legal.
4276  return Op;
4277 }
4278 
4280  SelectionDAG &DAG) const {
4281  switch (Op.getOpcode()) {
4282  case ISD::BR_CC:
4283  return lowerBR_CC(Op, DAG);
4284  case ISD::SELECT_CC:
4285  return lowerSELECT_CC(Op, DAG);
4286  case ISD::SETCC:
4287  return lowerSETCC(Op, DAG);
4288  case ISD::GlobalAddress:
4289  return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
4290  case ISD::GlobalTLSAddress:
4291  return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
4292  case ISD::BlockAddress:
4293  return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
4294  case ISD::JumpTable:
4295  return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
4296  case ISD::ConstantPool:
4297  return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
4298  case ISD::BITCAST:
4299  return lowerBITCAST(Op, DAG);
4300  case ISD::VASTART:
4301  return lowerVASTART(Op, DAG);
4302  case ISD::VACOPY:
4303  return lowerVACOPY(Op, DAG);
4305  return lowerDYNAMIC_STACKALLOC(Op, DAG);
4306  case ISD::SMUL_LOHI:
4307  return lowerSMUL_LOHI(Op, DAG);
4308  case ISD::UMUL_LOHI:
4309  return lowerUMUL_LOHI(Op, DAG);
4310  case ISD::SDIVREM:
4311  return lowerSDIVREM(Op, DAG);
4312  case ISD::UDIVREM:
4313  return lowerUDIVREM(Op, DAG);
4314  case ISD::OR:
4315  return lowerOR(Op, DAG);
4316  case ISD::CTPOP:
4317  return lowerCTPOP(Op, DAG);
4318  case ISD::CTLZ_ZERO_UNDEF:
4319  return DAG.getNode(ISD::CTLZ, SDLoc(Op),
4320  Op.getValueType(), Op.getOperand(0));
4321  case ISD::CTTZ_ZERO_UNDEF:
4322  return DAG.getNode(ISD::CTTZ, SDLoc(Op),
4323  Op.getValueType(), Op.getOperand(0));
4324  case ISD::ATOMIC_SWAP:
4325  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
4326  case ISD::ATOMIC_STORE:
4327  return lowerATOMIC_STORE(Op, DAG);
4328  case ISD::ATOMIC_LOAD:
4329  return lowerATOMIC_LOAD(Op, DAG);
4330  case ISD::ATOMIC_LOAD_ADD:
4331  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
4332  case ISD::ATOMIC_LOAD_SUB:
4333  return lowerATOMIC_LOAD_SUB(Op, DAG);
4334  case ISD::ATOMIC_LOAD_AND:
4335  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
4336  case ISD::ATOMIC_LOAD_OR:
4337  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
4338  case ISD::ATOMIC_LOAD_XOR:
4339  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
4340  case ISD::ATOMIC_LOAD_NAND:
4341  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
4342  case ISD::ATOMIC_LOAD_MIN:
4343  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
4344  case ISD::ATOMIC_LOAD_MAX:
4345  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
4346  case ISD::ATOMIC_LOAD_UMIN:
4347  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
4348  case ISD::ATOMIC_LOAD_UMAX:
4349  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
4350  case ISD::ATOMIC_CMP_SWAP:
4351  return lowerATOMIC_CMP_SWAP(Op, DAG);
4352  case ISD::STACKSAVE:
4353  return lowerSTACKSAVE(Op, DAG);
4354  case ISD::STACKRESTORE:
4355  return lowerSTACKRESTORE(Op, DAG);
4356  case ISD::PREFETCH:
4357  return lowerPREFETCH(Op, DAG);
4359  return lowerINTRINSIC_W_CHAIN(Op, DAG);
4361  return lowerINTRINSIC_WO_CHAIN(Op, DAG);
4362  case ISD::BUILD_VECTOR:
4363  return lowerBUILD_VECTOR(Op, DAG);
4364  case ISD::VECTOR_SHUFFLE:
4365  return lowerVECTOR_SHUFFLE(Op, DAG);
4366  case ISD::SCALAR_TO_VECTOR:
4367  return lowerSCALAR_TO_VECTOR(Op, DAG);
4369  return lowerINSERT_VECTOR_ELT(Op, DAG);
4371  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4373  return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
4375  return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
4376  case ISD::SHL:
4377  return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
4378  case ISD::SRL:
4379  return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
4380  case ISD::SRA:
4381  return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
4382  default:
4383  llvm_unreachable("Unexpected node to lower");
4384  }
4385 }
4386 
4387 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
4388 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
4389  switch ((SystemZISD::NodeType)Opcode) {
4390  case SystemZISD::FIRST_NUMBER: break;
4391  OPCODE(RET_FLAG);
4392  OPCODE(CALL);
4393  OPCODE(SIBCALL);
4394  OPCODE(TLS_GDCALL);
4395  OPCODE(TLS_LDCALL);
4398  OPCODE(IABS);
4399  OPCODE(ICMP);
4400  OPCODE(FCMP);
4401  OPCODE(TM);
4402  OPCODE(BR_CCMASK);
4406  OPCODE(POPCNT);
4408  OPCODE(SDIVREM32);
4409  OPCODE(SDIVREM64);
4410  OPCODE(UDIVREM32);
4411  OPCODE(UDIVREM64);
4412  OPCODE(MVC);
4413  OPCODE(MVC_LOOP);
4414  OPCODE(NC);
4415  OPCODE(NC_LOOP);
4416  OPCODE(OC);
4417  OPCODE(OC_LOOP);
4418  OPCODE(XC);
4419  OPCODE(XC_LOOP);
4420  OPCODE(CLC);
4421  OPCODE(CLC_LOOP);
4422  OPCODE(STPCPY);
4423  OPCODE(STRCMP);
4425  OPCODE(IPM);
4426  OPCODE(SERIALIZE);
4427  OPCODE(TBEGIN);
4429  OPCODE(TEND);
4430  OPCODE(BYTE_MASK);
4432  OPCODE(REPLICATE);
4434  OPCODE(SPLAT);
4435  OPCODE(MERGE_HIGH);
4436  OPCODE(MERGE_LOW);
4437  OPCODE(SHL_DOUBLE);
4439  OPCODE(PERMUTE);
4440  OPCODE(PACK);
4441  OPCODE(PACKS_CC);
4442  OPCODE(PACKLS_CC);
4445  OPCODE(UNPACK_LOW);
4450  OPCODE(VSUM);
4451  OPCODE(VICMPE);
4452  OPCODE(VICMPH);
4453  OPCODE(VICMPHL);
4454  OPCODE(VICMPES);
4455  OPCODE(VICMPHS);
4456  OPCODE(VICMPHLS);
4457  OPCODE(VFCMPE);
4458  OPCODE(VFCMPH);
4459  OPCODE(VFCMPHE);
4460  OPCODE(VFCMPES);
4461  OPCODE(VFCMPHS);
4462  OPCODE(VFCMPHES);
4463  OPCODE(VFTCI);
4464  OPCODE(VEXTEND);
4465  OPCODE(VROUND);
4466  OPCODE(VTM);
4467  OPCODE(VFAE_CC);
4468  OPCODE(VFAEZ_CC);
4469  OPCODE(VFEE_CC);
4470  OPCODE(VFEEZ_CC);
4471  OPCODE(VFENE_CC);
4472  OPCODE(VFENEZ_CC);
4473  OPCODE(VISTR_CC);
4474  OPCODE(VSTRC_CC);
4475  OPCODE(VSTRCZ_CC);
4488  OPCODE(PREFETCH);
4489  }
4490  return nullptr;
4491 #undef OPCODE
4492 }
4493 
4494 // Return true if VT is a vector whose elements are a whole number of bytes
4495 // in width.
4496 static bool canTreatAsByteVector(EVT VT) {
4497  return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
4498 }
4499 
4500 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
4501 // producing a result of type ResVT. Op is a possibly bitcast version
4502 // of the input vector and Index is the index (based on type VecVT) that
4503 // should be extracted. Return the new extraction if a simplification
4504 // was possible or if Force is true.
4505 SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
4506  SDValue Op, unsigned Index,
4507  DAGCombinerInfo &DCI,
4508  bool Force) const {
4509  SelectionDAG &DAG = DCI.DAG;
4510 
4511  // The number of bytes being extracted.
4512  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
4513 
4514  for (;;) {
4515  unsigned Opcode = Op.getOpcode();
4516  if (Opcode == ISD::BITCAST)
4517  // Look through bitcasts.
4518  Op = Op.getOperand(0);
4519  else if (Opcode == ISD::VECTOR_SHUFFLE &&
4521  // Get a VPERM-like permute mask and see whether the bytes covered
4522  // by the extracted element are a contiguous sequence from one
4523  // source operand.
4525  getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
4526  int First;
4527  if (!getShuffleInput(Bytes, Index * BytesPerElement,
4528  BytesPerElement, First))
4529  break;
4530  if (First < 0)
4531  return DAG.getUNDEF(ResVT);
4532  // Make sure the contiguous sequence starts at a multiple of the
4533  // original element size.
4534  unsigned Byte = unsigned(First) % Bytes.size();
4535  if (Byte % BytesPerElement != 0)
4536  break;
4537  // We can get the extracted value directly from an input.
4538  Index = Byte / BytesPerElement;
4539  Op = Op.getOperand(unsigned(First) / Bytes.size());
4540  Force = true;
4541  } else if (Opcode == ISD::BUILD_VECTOR &&
4543  // We can only optimize this case if the BUILD_VECTOR elements are
4544  // at least as wide as the extracted value.
4545  EVT OpVT = Op.getValueType();
4546  unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
4547  if (OpBytesPerElement < BytesPerElement)
4548  break;
4549  // Make sure that the least-significant bit of the extracted value
4550  // is the least significant bit of an input.
4551  unsigned End = (Index + 1) * BytesPerElement;
4552  if (End % OpBytesPerElement != 0)
4553  break;
4554  // We're extracting the low part of one operand of the BUILD_VECTOR.
4555  Op = Op.getOperand(End / OpBytesPerElement - 1);
4556  if (!Op.getValueType().isInteger()) {
4558  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4559  DCI.AddToWorklist(Op.getNode());
4560  }
4561  EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
4562  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4563  if (VT != ResVT) {
4564  DCI.AddToWorklist(Op.getNode());
4565  Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
4566  }
4567  return Op;
4568  } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
4569  Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
4570  Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
4571  canTreatAsByteVector(Op.getValueType()) &&
4572  canTreatAsByteVector(Op.getOperand(0).getValueType())) {
4573  // Make sure that only the unextended bits are significant.
4574  EVT ExtVT = Op.getValueType();
4575  EVT OpVT = Op.getOperand(0).getValueType();
4576  unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
4577  unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
4578  unsigned Byte = Index * BytesPerElement;
4579  unsigned SubByte = Byte % ExtBytesPerElement;
4580  unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
4581  if (SubByte < MinSubByte ||
4582  SubByte + BytesPerElement > ExtBytesPerElement)
4583  break;
4584  // Get the byte offset of the unextended element
4585  Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
4586  // ...then add the byte offset relative to that element.
4587  Byte += SubByte - MinSubByte;
4588  if (Byte % BytesPerElement != 0)
4589  break;
4590  Op = Op.getOperand(0);
4591  Index = Byte / BytesPerElement;
4592  Force = true;
4593  } else
4594  break;
4595  }
4596  if (Force) {
4597  if (Op.getValueType() != VecVT) {
4598  Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
4599  DCI.AddToWorklist(Op.getNode());
4600  }
4601  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
4602  DAG.getConstant(Index, DL, MVT::i32));
4603  }
4604  return SDValue();
4605 }
4606 
4607 // Optimize vector operations in scalar value Op on the basis that Op
4608 // is truncated to TruncVT.
4609 SDValue
4610 SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
4611  DAGCombinerInfo &DCI) const {
4612  // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
4613  // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
4614  // of type TruncVT.
4615  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4616  TruncVT.getSizeInBits() % 8 == 0) {
4617  SDValue Vec = Op.getOperand(0);
4618  EVT VecVT = Vec.getValueType();
4619  if (canTreatAsByteVector(VecVT)) {
4620  if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
4621  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
4622  unsigned TruncBytes = TruncVT.getStoreSize();
4623  if (BytesPerElement % TruncBytes == 0) {
4624  // Calculate the value of Y' in the above description. We are
4625  // splitting the original elements into Scale equal-sized pieces
4626  // and for truncation purposes want the last (least-significant)
4627  // of these pieces for IndexN. This is easiest to do by calculating
4628  // the start index of the following element and then subtracting 1.
4629  unsigned Scale = BytesPerElement / TruncBytes;
4630  unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
4631 
4632  // Defer the creation of the bitcast from X to combineExtract,
4633  // which might be able to optimize the extraction.
4634  VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
4635  VecVT.getStoreSize() / TruncBytes);
4636  EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
4637  return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
4638  }
4639  }
4640  }
4641  }
4642  return SDValue();
4643 }
4644 
4646  DAGCombinerInfo &DCI) const {
4647  SelectionDAG &DAG = DCI.DAG;
4648  unsigned Opcode = N->getOpcode();
4649  if (Opcode == ISD::SIGN_EXTEND) {
4650  // Convert (sext (ashr (shl X, C1), C2)) to
4651  // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
4652  // cheap as narrower ones.
4653  SDValue N0 = N->getOperand(0);
4654  EVT VT = N->getValueType(0);
4655  if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
4656  auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4657  SDValue Inner = N0.getOperand(0);
4658  if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
4659  if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
4660  unsigned Extra = (VT.getSizeInBits() -
4661  N0.getValueType().getSizeInBits());
4662  unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
4663  unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
4664  EVT ShiftVT = N0.getOperand(1).getValueType();
4665  SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
4666  Inner.getOperand(0));
4667  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
4668  DAG.getConstant(NewShlAmt, SDLoc(Inner),
4669  ShiftVT));
4670  return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
4671  DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
4672  }
4673  }
4674  }
4675  }
4676  if (Opcode == SystemZISD::MERGE_HIGH ||
4677  Opcode == SystemZISD::MERGE_LOW) {
4678  SDValue Op0 = N->getOperand(0);
4679  SDValue Op1 = N->getOperand(1);
4680  if (Op0.getOpcode() == ISD::BITCAST)
4681  Op0 = Op0.getOperand(0);
4682  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
4683  cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
4684  // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
4685  // for v4f32.
4686  if (Op1 == N->getOperand(0))
4687  return Op1;
4688  // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
4689  EVT VT = Op1.getValueType();
4690  unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
4691  if (ElemBytes <= 4) {
4692  Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
4695  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
4696  SystemZ::VectorBytes / ElemBytes / 2);
4697  if (VT != InVT) {
4698  Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
4699  DCI.AddToWorklist(Op1.getNode());
4700  }
4701  SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
4702  DCI.AddToWorklist(Op.getNode());
4703  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
4704  }
4705  }
4706  }
4707  // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
4708  // for the extraction to be done on a vMiN value, so that we can use VSTE.
4709  // If X has wider elements then convert it to:
4710  // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
4711  if (Opcode == ISD::STORE) {
4712  auto *SN = cast<StoreSDNode>(N);
4713  EVT MemVT = SN->getMemoryVT();
4714  if (MemVT.isInteger()) {
4715  SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
4716  SN->getValue(), DCI);
4717  if (Value.getNode()) {
4718  DCI.AddToWorklist(Value.getNode());
4719 
4720  // Rewrite the store with the new form of stored value.
4721  return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
4722  SN->getBasePtr(), SN->getMemoryVT(),
4723  SN->getMemOperand());
4724  }
4725  }
4726  }
4727  // Try to simplify a vector extraction.
4728  if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
4729  if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
4730  SDValue Op0 = N->getOperand(0);
4731  EVT VecVT = Op0.getValueType();
4732  return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
4733  IndexN->getZExtValue(), DCI, false);
4734  }
4735  }
4736  // (join_dwords X, X) == (replicate X)
4737  if (Opcode == SystemZISD::JOIN_DWORDS &&
4738  N->getOperand(0) == N->getOperand(1))
4739  return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
4740  N->getOperand(0));
4741  // (fround (extract_vector_elt X 0))
4742  // (fround (extract_vector_elt X 1)) ->
4743  // (extract_vector_elt (VROUND X) 0)
4744  // (extract_vector_elt (VROUND X) 1)
4745  //
4746  // This is a special case since the target doesn't really support v2f32s.
4747  if (Opcode == ISD::FP_ROUND) {
4748  SDValue Op0 = N->getOperand(0);
4749  if (N->getValueType(0) == MVT::f32 &&
4750  Op0.hasOneUse() &&
4752  Op0.getOperand(0).getValueType() == MVT::v2f64 &&
4753  Op0.getOperand(1).getOpcode() == ISD::Constant &&
4754  cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
4755  SDValue Vec = Op0.getOperand(0);
4756  for (auto *U : Vec->uses()) {
4757  if (U != Op0.getNode() &&
4758  U->hasOneUse() &&
4759  U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4760  U->getOperand(0) == Vec &&
4761  U->getOperand(1).getOpcode() == ISD::Constant &&
4762  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
4763  SDValue OtherRound = SDValue(*U->use_begin(), 0);
4764  if (OtherRound.getOpcode() == ISD::FP_ROUND &&
4765  OtherRound.getOperand(0) == SDValue(U, 0) &&
4766  OtherRound.getValueType() == MVT::f32) {
4767  SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
4768  MVT::v4f32, Vec);
4769  DCI.AddToWorklist(VRound.getNode());
4770  SDValue Extract1 =
4772  VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
4773  DCI.AddToWorklist(Extract1.getNode());
4774  DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
4775  SDValue Extract0 =
4777  VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
4778  return Extract0;
4779  }
4780  }
4781  }
4782  }
4783  }
4784  return SDValue();
4785 }
4786 
4787 //===----------------------------------------------------------------------===//
4788 // Custom insertion
4789 //===----------------------------------------------------------------------===//
4790 
4791 // Create a new basic block after MBB.
4793  MachineFunction &MF = *MBB->getParent();
4795  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
4796  return NewMBB;
4797 }
4798 
4799 // Split MBB after MI and return the new block (the one that contains
4800 // instructions after MI).
4802  MachineBasicBlock *MBB) {
4803  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
4804  NewMBB->splice(NewMBB->begin(), MBB,
4805  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
4806  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
4807  return NewMBB;
4808 }
4809 
4810 // Split MBB before MI and return the new block (the one that contains MI).
4812  MachineBasicBlock *MBB) {
4813  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
4814  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
4815  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
4816  return NewMBB;
4817 }
4818 
4819 // Force base value Base into a register before MI. Return the register.
4820 static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
4821  const SystemZInstrInfo *TII) {
4822  if (Base.isReg())
4823  return Base.getReg();
4824 
4825  MachineBasicBlock *MBB = MI->getParent();
4826  MachineFunction &MF = *MBB->getParent();
4827  MachineRegisterInfo &MRI = MF.getRegInfo();
4828 
4829  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
4830  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
4831  .addOperand(Base).addImm(0).addReg(0);
4832  return Reg;
4833 }
4834 
4835 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
4837 SystemZTargetLowering::emitSelect(MachineInstr *MI,
4838  MachineBasicBlock *MBB) const {
4839  const SystemZInstrInfo *TII =
4840  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4841 
4842  unsigned DestReg = MI->getOperand(0).getReg();
4843  unsigned TrueReg = MI->getOperand(1).getReg();
4844  unsigned FalseReg = MI->getOperand(2).getReg();
4845  unsigned CCValid = MI->getOperand(3).getImm();
4846  unsigned CCMask = MI->getOperand(4).getImm();
4847  DebugLoc DL = MI->getDebugLoc();
4848 
4849  MachineBasicBlock *StartMBB = MBB;
4850  MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
4851  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
4852 
4853  // StartMBB:
4854  // BRC CCMask, JoinMBB
4855  // # fallthrough to FalseMBB
4856  MBB = StartMBB;
4857  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
4858  .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
4859  MBB->addSuccessor(JoinMBB);
4860  MBB->addSuccessor(FalseMBB);
4861 
4862  // FalseMBB:
4863  // # fallthrough to JoinMBB
4864  MBB = FalseMBB;
4865  MBB->addSuccessor(JoinMBB);
4866 
4867  // JoinMBB:
4868  // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
4869  // ...
4870  MBB = JoinMBB;
4871  BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
4872  .addReg(TrueReg).addMBB(StartMBB)
4873  .addReg(FalseReg).addMBB(FalseMBB);
4874 
4875  MI->eraseFromParent();
4876  return JoinMBB;
4877 }
4878 
4879 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
4880 // StoreOpcode is the store to use and Invert says whether the store should
4881 // happen when the condition is false rather than true. If a STORE ON
4882 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
4884 SystemZTargetLowering::emitCondStore(MachineInstr *MI,
4885  MachineBasicBlock *MBB,
4886  unsigned StoreOpcode, unsigned STOCOpcode,
4887  bool Invert) const {
4888  const SystemZInstrInfo *TII =
4889  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4890 
4891  unsigned SrcReg = MI->getOperand(0).getReg();
4892  MachineOperand Base = MI->getOperand(1);
4893  int64_t Disp = MI->getOperand(2).getImm();
4894  unsigned IndexReg = MI->getOperand(3).getReg();
4895  unsigned CCValid = MI->getOperand(4).getImm();
4896  unsigned CCMask = MI->getOperand(5).getImm();
4897  DebugLoc DL = MI->getDebugLoc();
4898 
4899  StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
4900 
4901  // Use STOCOpcode if possible. We could use different store patterns in
4902  // order to avoid matching the index register, but the performance trade-offs
4903  // might be more complicated in that case.
4904  if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
4905  if (Invert)
4906  CCMask ^= CCValid;
4907  BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
4908  .addReg(SrcReg).addOperand(Base).addImm(Disp)
4909  .addImm(CCValid).addImm(CCMask);
4910  MI->eraseFromParent();
4911  return MBB;
4912  }
4913 
4914  // Get the condition needed to branch around the store.
4915  if (!Invert)
4916  CCMask ^= CCValid;
4917 
4918  MachineBasicBlock *StartMBB = MBB;
4919  MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
4920  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
4921 
4922  // StartMBB:
4923  // BRC CCMask, JoinMBB
4924  // # fallthrough to FalseMBB
4925  MBB = StartMBB;
4926  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
4927  .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
4928  MBB->addSuccessor(JoinMBB);
4929  MBB->addSuccessor(FalseMBB);
4930 
4931  // FalseMBB:
4932  // store %SrcReg, %Disp(%Index,%Base)
4933  // # fallthrough to JoinMBB
4934  MBB = FalseMBB;
4935  BuildMI(MBB, DL, TII->get(StoreOpcode))
4936  .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
4937  MBB->addSuccessor(JoinMBB);
4938 
4939  MI->eraseFromParent();
4940  return JoinMBB;
4941 }
4942 
4943 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
4944 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
4945 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
4946 // BitSize is the width of the field in bits, or 0 if this is a partword
4947 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
4948 // is one of the operands. Invert says whether the field should be
4949 // inverted after performing BinOpcode (e.g. for NAND).
4951 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
4952  MachineBasicBlock *MBB,
4953  unsigned BinOpcode,
4954  unsigned BitSize,
4955  bool Invert) const {
4956  MachineFunction &MF = *MBB->getParent();
4957  const SystemZInstrInfo *TII =
4958  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4959  MachineRegisterInfo &MRI = MF.getRegInfo();
4960  bool IsSubWord = (BitSize < 32);
4961 
4962  // Extract the operands. Base can be a register or a frame index.
4963  // Src2 can be a register or immediate.
4964  unsigned Dest = MI->getOperand(0).getReg();
4965  MachineOperand Base = earlyUseOperand(MI->getOperand(1));
4966  int64_t Disp = MI->getOperand(2).getImm();
4967  MachineOperand Src2 = earlyUseOperand(MI->getOperand(3));
4968  unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
4969  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
4970  DebugLoc DL = MI->getDebugLoc();
4971  if (IsSubWord)
4972  BitSize = MI->getOperand(6).getImm();
4973 
4974  // Subword operations use 32-bit registers.
4975  const TargetRegisterClass *RC = (BitSize <= 32 ?
4976  &SystemZ::GR32BitRegClass :
4977  &SystemZ::GR64BitRegClass);
4978  unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
4979  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
4980 
4981  // Get the right opcodes for the displacement.
4982  LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
4983  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
4984  assert(LOpcode && CSOpcode && "Displacement out of range");
4985 
4986  // Create virtual registers for temporary results.
4987  unsigned OrigVal = MRI.createVirtualRegister(RC);
4988  unsigned OldVal = MRI.createVirtualRegister(RC);
4989  unsigned NewVal = (BinOpcode || IsSubWord ?
4990  MRI.createVirtualRegister(RC) : Src2.getReg());
4991  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
4992  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
4993 
4994  // Insert a basic block for the main loop.
4995  MachineBasicBlock *StartMBB = MBB;
4996  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
4997  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
4998 
4999  // StartMBB:
5000  // ...
5001  // %OrigVal = L Disp(%Base)
5002  // # fall through to LoopMMB
5003  MBB = StartMBB;
5004  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
5005  .addOperand(Base).addImm(Disp).addReg(0);
5006  MBB->addSuccessor(LoopMBB);
5007 
5008  // LoopMBB:
5009  // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
5010  // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
5011  // %RotatedNewVal = OP %RotatedOldVal, %Src2
5012  // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
5013  // %Dest = CS %OldVal, %NewVal, Disp(%Base)
5014  // JNE LoopMBB
5015  // # fall through to DoneMMB
5016  MBB = LoopMBB;
5017  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
5018  .addReg(OrigVal).addMBB(StartMBB)
5019  .addReg(Dest).addMBB(LoopMBB);
5020  if (IsSubWord)
5021  BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
5022  .addReg(OldVal).addReg(BitShift).addImm(0);
5023  if (Invert) {
5024  // Perform the operation normally and then invert every bit of the field.
5025  unsigned Tmp = MRI.createVirtualRegister(RC);
5026  BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
5027  .addReg(RotatedOldVal).addOperand(Src2);
5028  if (BitSize <= 32)
5029  // XILF with the upper BitSize bits set.
5030  BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
5031  .addReg(Tmp).addImm(-1U << (32 - BitSize));
5032  else {
5033  // Use LCGR and add -1 to the result, which is more compact than
5034  // an XILF, XILH pair.
5035  unsigned Tmp2 = MRI.createVirtualRegister(RC);
5036  BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
5037  BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
5038  .addReg(Tmp2).addImm(-1);
5039  }
5040  } else if (BinOpcode)
5041  // A simply binary operation.
5042  BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
5043  .addReg(RotatedOldVal).addOperand(Src2);
5044  else if (IsSubWord)
5045  // Use RISBG to rotate Src2 into position and use it to replace the
5046  // field in RotatedOldVal.
5047  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
5048  .addReg(RotatedOldVal).addReg(Src2.getReg())
5049  .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
5050  if (IsSubWord)
5051  BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
5052  .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
5053  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
5054  .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
5055  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5057  MBB->addSuccessor(LoopMBB);
5058  MBB->addSuccessor(DoneMBB);
5059 
5060  MI->eraseFromParent();
5061  return DoneMBB;
5062 }
5063 
5064 // Implement EmitInstrWithCustomInserter for pseudo
5065 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
5066 // instruction that should be used to compare the current field with the
5067 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
5068 // for when the current field should be kept. BitSize is the width of
5069 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
5071 SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
5072  MachineBasicBlock *MBB,
5073  unsigned CompareOpcode,
5074  unsigned KeepOldMask,
5075  unsigned BitSize) const {
5076  MachineFunction &MF = *MBB->getParent();
5077  const SystemZInstrInfo *TII =
5078  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5079  MachineRegisterInfo &MRI = MF.getRegInfo();
5080  bool IsSubWord = (BitSize < 32);
5081 
5082  // Extract the operands. Base can be a register or a frame index.
5083  unsigned Dest = MI->getOperand(0).getReg();
5084  MachineOperand Base = earlyUseOperand(MI->getOperand(1));
5085  int64_t Disp = MI->getOperand(2).getImm();
5086  unsigned Src2 = MI->getOperand(3).getReg();
5087  unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
5088  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
5089  DebugLoc DL = MI->getDebugLoc();
5090  if (IsSubWord)
5091  BitSize = MI->getOperand(6).getImm();
5092 
5093  // Subword operations use 32-bit registers.
5094  const TargetRegisterClass *RC = (BitSize <= 32 ?
5095  &SystemZ::GR32BitRegClass :
5096  &SystemZ::GR64BitRegClass);
5097  unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
5098  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
5099 
5100  // Get the right opcodes for the displacement.
5101  LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
5102  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
5103  assert(LOpcode && CSOpcode && "Displacement out of range");
5104 
5105  // Create virtual registers for temporary results.
5106  unsigned OrigVal = MRI.createVirtualRegister(RC);
5107  unsigned OldVal = MRI.createVirtualRegister(RC);
5108  unsigned NewVal = MRI.createVirtualRegister(RC);
5109  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
5110  unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
5111  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
5112 
5113  // Insert 3 basic blocks for the loop.
5114  MachineBasicBlock *StartMBB = MBB;
5115  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5116  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5117  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
5118  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
5119 
5120  // StartMBB:
5121  // ...
5122  // %OrigVal = L Disp(%Base)
5123  // # fall through to LoopMMB
5124  MBB = StartMBB;
5125  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
5126  .addOperand(Base).addImm(Disp).addReg(0);
5127  MBB->addSuccessor(LoopMBB);
5128 
5129  // LoopMBB:
5130  // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
5131  // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
5132  // CompareOpcode %RotatedOldVal, %Src2
5133  // BRC KeepOldMask, UpdateMBB
5134  MBB = LoopMBB;
5135  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
5136  .addReg(OrigVal).addMBB(StartMBB)
5137  .addReg(Dest).addMBB(UpdateMBB);
5138  if (IsSubWord)
5139  BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
5140  .addReg(OldVal).addReg(BitShift).addImm(0);
5141  BuildMI(MBB, DL, TII->get(CompareOpcode))
5142  .addReg(RotatedOldVal).addReg(Src2);
5143  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5144  .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
5145  MBB->addSuccessor(UpdateMBB);
5146  MBB->addSuccessor(UseAltMBB);
5147 
5148  // UseAltMBB:
5149  // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
5150  // # fall through to UpdateMMB
5151  MBB = UseAltMBB;
5152  if (IsSubWord)
5153  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
5154  .addReg(RotatedOldVal).addReg(Src2)
5155  .addImm(32).addImm(31 + BitSize).addImm(0);
5156  MBB->addSuccessor(UpdateMBB);
5157 
5158  // UpdateMBB:
5159  // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
5160  // [ %RotatedAltVal, UseAltMBB ]
5161  // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
5162  // %Dest = CS %OldVal, %NewVal, Disp(%Base)
5163  // JNE LoopMBB
5164  // # fall through to DoneMMB
5165  MBB = UpdateMBB;
5166  BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
5167  .addReg(RotatedOldVal).addMBB(LoopMBB)
5168  .addReg(RotatedAltVal).addMBB(UseAltMBB);
5169  if (IsSubWord)
5170  BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
5171  .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
5172  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
5173  .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
5174  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5176  MBB->addSuccessor(LoopMBB);
5177  MBB->addSuccessor(DoneMBB);
5178 
5179  MI->eraseFromParent();
5180  return DoneMBB;
5181 }
5182 
5183 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
5184 // instruction MI.
5186 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
5187  MachineBasicBlock *MBB) const {
5188  MachineFunction &MF = *MBB->getParent();
5189  const SystemZInstrInfo *TII =
5190  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5191  MachineRegisterInfo &MRI = MF.getRegInfo();
5192 
5193  // Extract the operands. Base can be a register or a frame index.
5194  unsigned Dest = MI->getOperand(0).getReg();
5195  MachineOperand Base = earlyUseOperand(MI->getOperand(1));
5196  int64_t Disp = MI->getOperand(2).getImm();
5197  unsigned OrigCmpVal = MI->getOperand(3).getReg();
5198  unsigned OrigSwapVal = MI->getOperand(4).getReg();
5199  unsigned BitShift = MI->getOperand(5).getReg();
5200  unsigned NegBitShift = MI->getOperand(6).getReg();
5201  int64_t BitSize = MI->getOperand(7).getImm();
5202  DebugLoc DL = MI->getDebugLoc();
5203 
5204  const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
5205 
5206  // Get the right opcodes for the displacement.
5207  unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
5208  unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
5209  assert(LOpcode && CSOpcode && "Displacement out of range");
5210 
5211  // Create virtual registers for temporary results.
5212  unsigned OrigOldVal = MRI.createVirtualRegister(RC);
5213  unsigned OldVal = MRI.createVirtualRegister(RC);
5214  unsigned CmpVal = MRI.createVirtualRegister(RC);
5215  unsigned SwapVal = MRI.createVirtualRegister(RC);
5216  unsigned StoreVal = MRI.createVirtualRegister(RC);
5217  unsigned RetryOldVal = MRI.createVirtualRegister(RC);
5218  unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
5219  unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
5220 
5221  // Insert 2 basic blocks for the loop.
5222  MachineBasicBlock *StartMBB = MBB;
5223  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5224  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5225  MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
5226 
5227  // StartMBB:
5228  // ...
5229  // %OrigOldVal = L Disp(%Base)
5230  // # fall through to LoopMMB
5231  MBB = StartMBB;
5232  BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
5233  .addOperand(Base).addImm(Disp).addReg(0);
5234  MBB->addSuccessor(LoopMBB);
5235 
5236  // LoopMBB:
5237  // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
5238  // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
5239  // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
5240  // %Dest = RLL %OldVal, BitSize(%BitShift)
5241  // ^^ The low BitSize bits contain the field
5242  // of interest.
5243  // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
5244  // ^^ Replace the upper 32-BitSize bits of the
5245  // comparison value with those that we loaded,
5246  // so that we can use a full word comparison.
5247  // CR %Dest, %RetryCmpVal
5248  // JNE DoneMBB
5249  // # Fall through to SetMBB
5250  MBB = LoopMBB;
5251  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
5252  .addReg(OrigOldVal).addMBB(StartMBB)
5253  .addReg(RetryOldVal).addMBB(SetMBB);
5254  BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
5255  .addReg(OrigCmpVal).addMBB(StartMBB)
5256  .addReg(RetryCmpVal).addMBB(SetMBB);
5257  BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
5258  .addReg(OrigSwapVal).addMBB(StartMBB)
5259  .addReg(RetrySwapVal).addMBB(SetMBB);
5260  BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
5261  .addReg(OldVal).addReg(BitShift).addImm(BitSize);
5262  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
5263  .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
5264  BuildMI(MBB, DL, TII->get(SystemZ::CR))
5265  .addReg(Dest).addReg(RetryCmpVal);
5266  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5267  .addImm(SystemZ::CCMASK_ICMP)
5269  MBB->addSuccessor(DoneMBB);
5270  MBB->addSuccessor(SetMBB);
5271 
5272  // SetMBB:
5273  // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
5274  // ^^ Replace the upper 32-BitSize bits of the new
5275  // value with those that we loaded.
5276  // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
5277  // ^^ Rotate the new field to its proper position.
5278  // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
5279  // JNE LoopMBB
5280  // # fall through to ExitMMB
5281  MBB = SetMBB;
5282  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
5283  .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
5284  BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
5285  .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
5286  BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
5287  .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
5288  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5290  MBB->addSuccessor(LoopMBB);
5291  MBB->addSuccessor(DoneMBB);
5292 
5293  MI->eraseFromParent();
5294  return DoneMBB;
5295 }
5296 
5297 // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
5298 // if the high register of the GR128 value must be cleared or false if
5299 // it's "don't care". SubReg is subreg_l32 when extending a GR32
5300 // and subreg_l64 when extending a GR64.
5302 SystemZTargetLowering::emitExt128(MachineInstr *MI,
5303  MachineBasicBlock *MBB,
5304  bool ClearEven, unsigned SubReg) const {
5305  MachineFunction &MF = *MBB->getParent();
5306  const SystemZInstrInfo *TII =
5307  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5308  MachineRegisterInfo &MRI = MF.getRegInfo();
5309  DebugLoc DL = MI->getDebugLoc();
5310 
5311  unsigned Dest = MI->getOperand(0).getReg();
5312  unsigned Src = MI->getOperand(1).getReg();
5313  unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
5314 
5315  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
5316  if (ClearEven) {
5317  unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
5318  unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
5319 
5320  BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
5321  .addImm(0);
5322  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
5323  .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
5324  In128 = NewIn128;
5325  }
5326  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
5327  .addReg(In128).addReg(Src).addImm(SubReg);
5328 
5329  MI->eraseFromParent();
5330  return MBB;
5331 }
5332 
5334 SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
5335  MachineBasicBlock *MBB,
5336  unsigned Opcode) const {
5337  MachineFunction &MF = *MBB->getParent();
5338  const SystemZInstrInfo *TII =
5339  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5340  MachineRegisterInfo &MRI = MF.getRegInfo();
5341  DebugLoc DL = MI->getDebugLoc();
5342 
5343  MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
5344  uint64_t DestDisp = MI->getOperand(1).getImm();
5345  MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2));
5346  uint64_t SrcDisp = MI->getOperand(3).getImm();
5347  uint64_t Length = MI->getOperand(4).getImm();
5348 
5349  // When generating more than one CLC, all but the last will need to
5350  // branch to the end when a difference is found.
5351  MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
5352  splitBlockAfter(MI, MBB) : nullptr);
5353 
5354  // Check for the loop form, in which operand 5 is the trip count.
5355  if (MI->getNumExplicitOperands() > 5) {
5356  bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
5357 
5358  uint64_t StartCountReg = MI->getOperand(5).getReg();
5359  uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
5360  uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
5361  forceReg(MI, DestBase, TII));
5362 
5363  const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
5364  uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
5365  uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
5366  MRI.createVirtualRegister(RC));
5367  uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
5368  uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
5369  MRI.createVirtualRegister(RC));
5370 
5371  RC = &SystemZ::GR64BitRegClass;
5372  uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
5373  uint64_t NextCountReg = MRI.createVirtualRegister(RC);
5374 
5375  MachineBasicBlock *StartMBB = MBB;
5376  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5377  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5378  MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
5379 
5380  // StartMBB:
5381  // # fall through to LoopMMB
5382  MBB->addSuccessor(LoopMBB);
5383 
5384  // LoopMBB:
5385  // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
5386  // [ %NextDestReg, NextMBB ]
5387  // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
5388  // [ %NextSrcReg, NextMBB ]
5389  // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
5390  // [ %NextCountReg, NextMBB ]
5391  // ( PFD 2, 768+DestDisp(%ThisDestReg) )
5392  // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
5393  // ( JLH EndMBB )
5394  //
5395  // The prefetch is used only for MVC. The JLH is used only for CLC.
5396  MBB = LoopMBB;
5397 
5398  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
5399  .addReg(StartDestReg).addMBB(StartMBB)
5400  .addReg(NextDestReg).addMBB(NextMBB);
5401  if (!HaveSingleBase)
5402  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
5403  .addReg(StartSrcReg).addMBB(StartMBB)
5404  .addReg(NextSrcReg).addMBB(NextMBB);
5405  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
5406  .addReg(StartCountReg).addMBB(StartMBB)
5407  .addReg(NextCountReg).addMBB(NextMBB);
5408  if (Opcode == SystemZ::MVC)
5409  BuildMI(MBB, DL, TII->get(SystemZ::PFD))
5410  .addImm(SystemZ::PFD_WRITE)
5411  .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
5412  BuildMI(MBB, DL, TII->get(Opcode))
5413  .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
5414  .addReg(ThisSrcReg).addImm(SrcDisp);
5415  if (EndMBB) {
5416  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5418  .addMBB(EndMBB);
5419  MBB->addSuccessor(EndMBB);
5420  MBB->addSuccessor(NextMBB);
5421  }
5422 
5423  // NextMBB:
5424  // %NextDestReg = LA 256(%ThisDestReg)
5425  // %NextSrcReg = LA 256(%ThisSrcReg)
5426  // %NextCountReg = AGHI %ThisCountReg, -1
5427  // CGHI %NextCountReg, 0
5428  // JLH LoopMBB
5429  // # fall through to DoneMMB
5430  //
5431  // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
5432  MBB = NextMBB;
5433 
5434  BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
5435  .addReg(ThisDestReg).addImm(256).addReg(0);
5436  if (!HaveSingleBase)
5437  BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
5438  .addReg(ThisSrcReg).addImm(256).addReg(0);
5439  BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
5440  .addReg(ThisCountReg).addImm(-1);
5441  BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
5442  .addReg(NextCountReg).addImm(0);
5443  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5445  .addMBB(LoopMBB);
5446  MBB->addSuccessor(LoopMBB);
5447  MBB->addSuccessor(DoneMBB);
5448 
5449  DestBase = MachineOperand::CreateReg(NextDestReg, false);
5450  SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
5451  Length &= 255;
5452  MBB = DoneMBB;
5453  }
5454  // Handle any remaining bytes with straight-line code.
5455  while (Length > 0) {
5456  uint64_t ThisLength = std::min(Length, uint64_t(256));
5457  // The previous iteration might have created out-of-range displacements.
5458  // Apply them using LAY if so.
5459  if (!isUInt<12>(DestDisp)) {
5460  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
5461  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
5462  .addOperand(DestBase).addImm(DestDisp).addReg(0);
5463  DestBase = MachineOperand::CreateReg(Reg, false);
5464  DestDisp = 0;
5465  }
5466  if (!isUInt<12>(SrcDisp)) {
5467  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
5468  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
5469  .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
5470  SrcBase = MachineOperand::CreateReg(Reg, false);
5471  SrcDisp = 0;
5472  }
5473  BuildMI(*MBB, MI, DL, TII->get(Opcode))
5474  .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
5475  .addOperand(SrcBase).addImm(SrcDisp);
5476  DestDisp += ThisLength;
5477  SrcDisp += ThisLength;
5478  Length -= ThisLength;
5479  // If there's another CLC to go, branch to the end if a difference
5480  // was found.
5481  if (EndMBB && Length > 0) {
5482  MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
5483  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5485  .addMBB(EndMBB);
5486  MBB->addSuccessor(EndMBB);
5487  MBB->addSuccessor(NextMBB);
5488  MBB = NextMBB;
5489  }
5490  }
5491  if (EndMBB) {
5492  MBB->addSuccessor(EndMBB);
5493  MBB = EndMBB;
5494  MBB->addLiveIn(SystemZ::CC);
5495  }
5496 
5497  MI->eraseFromParent();
5498  return MBB;
5499 }
5500 
5501 // Decompose string pseudo-instruction MI into a loop that continually performs
5502 // Opcode until CC != 3.
5504 SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
5505  MachineBasicBlock *MBB,
5506  unsigned Opcode) const {
5507  MachineFunction &MF = *MBB->getParent();
5508  const SystemZInstrInfo *TII =
5509  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
5510  MachineRegisterInfo &MRI = MF.getRegInfo();
5511  DebugLoc DL = MI->getDebugLoc();
5512 
5513  uint64_t End1Reg = MI->getOperand(0).getReg();
5514  uint64_t Start1Reg = MI->getOperand(1).getReg();
5515  uint64_t Start2Reg = MI->getOperand(2).getReg();
5516  uint64_t CharReg = MI->getOperand(3).getReg();
5517 
5518  const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
5519  uint64_t This1Reg = MRI.createVirtualRegister(RC);
5520  uint64_t This2Reg = MRI.createVirtualRegister(RC);
5521  uint64_t End2Reg = MRI.createVirtualRegister(RC);
5522 
5523  MachineBasicBlock *StartMBB = MBB;
5524  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
5525  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
5526 
5527  // StartMBB:
5528  // # fall through to LoopMMB
5529  MBB->addSuccessor(LoopMBB);
5530 
5531  // LoopMBB:
5532  // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
5533  // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
5534  // R0L = %CharReg
5535  // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
5536  // JO LoopMBB
5537  // # fall through to DoneMMB
5538  //
5539  // The load of R0L can be hoisted by post-RA LICM.
5540  MBB = LoopMBB;
5541 
5542  BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
5543  .addReg(Start1Reg).addMBB(StartMBB)
5544  .addReg(End1Reg).addMBB(LoopMBB);
5545  BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
5546  .addReg(Start2Reg).addMBB(StartMBB)
5547  .addReg(End2Reg).addMBB(LoopMBB);
5548  BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
5549  BuildMI(MBB, DL, TII->get(Opcode))
5550  .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
5551  .addReg(This1Reg).addReg(This2Reg);
5552  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5554  MBB->addSuccessor(LoopMBB);
5555  MBB->addSuccessor(DoneMBB);
5556 
5557  DoneMBB->addLiveIn(SystemZ::CC);
5558 
5559  MI->eraseFromParent();
5560  return DoneMBB;
5561 }
5562 
5563 // Update TBEGIN instruction with final opcode and register clobbers.
5565 SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
5566  MachineBasicBlock *MBB,
5567  unsigned Opcode,
5568  bool NoFloat) const {
5569  MachineFunction &MF = *MBB->getParent();
5570  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
5571  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
5572 
5573  // Update opcode.
5574  MI->setDesc(TII->get(Opcode));
5575 
5576  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
5577  // Make sure to add the corresponding GRSM bits if they are missing.
5578  uint64_t Control = MI->getOperand(2).getImm();
5579  static const unsigned GPRControlBit[16] = {
5580  0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
5581  0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
5582  };
5583  Control |= GPRControlBit[15];
5584  if (TFI->hasFP(MF))
5585  Control |= GPRControlBit[11];
5586  MI->getOperand(2).setImm(Control);
5587 
5588  // Add GPR clobbers.
5589  for (int I = 0; I < 16; I++) {
5590  if ((Control & GPRControlBit[I]) == 0) {
5591  unsigned Reg = SystemZMC::GR64Regs[I];
5592  MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
5593  }
5594  }
5595 
5596  // Add FPR/VR clobbers.
5597  if (!NoFloat && (Control & 4) != 0) {
5598  if (Subtarget.hasVector()) {
5599  for (int I = 0; I < 32; I++) {
5600  unsigned Reg = SystemZMC::VR128Regs[I];
5601  MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
5602  }
5603  } else {
5604  for (int I = 0; I < 16; I++) {
5605  unsigned Reg = SystemZMC::FP64Regs[I];
5606  MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
5607  }
5608  }
5609  }
5610 
5611  return MBB;
5612 }
5613 
5616  switch (MI->getOpcode()) {
5617  case SystemZ::Select32Mux:
5618  case SystemZ::Select32:
5619  case SystemZ::SelectF32:
5620  case SystemZ::Select64:
5621  case SystemZ::SelectF64:
5622  case SystemZ::SelectF128:
5623  return emitSelect(MI, MBB);
5624 
5625  case SystemZ::CondStore8Mux:
5626  return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
5627  case SystemZ::CondStore8MuxInv:
5628  return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
5629  case SystemZ::CondStore16Mux:
5630  return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
5631  case SystemZ::CondStore16MuxInv:
5632  return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
5633  case SystemZ::CondStore8:
5634  return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
5635  case SystemZ::CondStore8Inv:
5636  return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
5637  case SystemZ::CondStore16:
5638  return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
5639  case SystemZ::CondStore16Inv:
5640  return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
5641  case SystemZ::CondStore32:
5642  return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
5643  case SystemZ::CondStore32Inv:
5644  return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
5645  case SystemZ::CondStore64:
5646  return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
5647  case SystemZ::CondStore64Inv:
5648  return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
5649  case SystemZ::CondStoreF32:
5650  return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
5651  case SystemZ::CondStoreF32Inv:
5652  return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
5653  case SystemZ::CondStoreF64:
5654  return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
5655  case SystemZ::CondStoreF64Inv:
5656  return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
5657 
5658  case SystemZ::AEXT128_64:
5659  return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
5660  case SystemZ::ZEXT128_32:
5661  return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
5662  case SystemZ::ZEXT128_64:
5663  return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
5664 
5665  case SystemZ::ATOMIC_SWAPW:
5666  return emitAtomicLoadBinary(MI, MBB, 0, 0);
5667  case SystemZ::ATOMIC_SWAP_32:
5668  return emitAtomicLoadBinary(MI, MBB, 0, 32);
5669  case SystemZ::ATOMIC_SWAP_64:
5670  return emitAtomicLoadBinary(MI, MBB, 0, 64);
5671 
5672  case SystemZ::ATOMIC_LOADW_AR:
5673  return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
5674  case SystemZ::ATOMIC_LOADW_AFI:
5675  return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
5676  case SystemZ::ATOMIC_LOAD_AR:
5677  return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
5678  case SystemZ::ATOMIC_LOAD_AHI:
5679  return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
5680  case SystemZ::ATOMIC_LOAD_AFI:
5681  return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
5682  case SystemZ::ATOMIC_LOAD_AGR:
5683  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
5684  case SystemZ::ATOMIC_LOAD_AGHI:
5685  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
5686  case SystemZ::ATOMIC_LOAD_AGFI:
5687  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
5688 
5689  case SystemZ::ATOMIC_LOADW_SR:
5690  return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
5691  case SystemZ::ATOMIC_LOAD_SR:
5692  return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
5693  case SystemZ::ATOMIC_LOAD_SGR:
5694  return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
5695 
5696  case SystemZ::ATOMIC_LOADW_NR:
5697  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
5698  case SystemZ::ATOMIC_LOADW_NILH:
5699  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
5700  case SystemZ::ATOMIC_LOAD_NR:
5701  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
5702  case SystemZ::ATOMIC_LOAD_NILL:
5703  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
5704  case SystemZ::ATOMIC_LOAD_NILH:
5705  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
5706  case SystemZ::ATOMIC_LOAD_NILF:
5707  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
5708  case SystemZ::ATOMIC_LOAD_NGR:
5709  return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
5710  case SystemZ::ATOMIC_LOAD_NILL64:
5711  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
5712  case SystemZ::ATOMIC_LOAD_NILH64:
5713  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
5714  case SystemZ::ATOMIC_LOAD_NIHL64:
5715  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
5716  case SystemZ::ATOMIC_LOAD_NIHH64:
5717  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
5718  case SystemZ::ATOMIC_LOAD_NILF64:
5719  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
5720  case SystemZ::ATOMIC_LOAD_NIHF64:
5721  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
5722 
5724  return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
5725  case SystemZ::ATOMIC_LOADW_OILH:
5726  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
5728  return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
5729  case SystemZ::ATOMIC_LOAD_OILL:
5730  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
5731  case SystemZ::ATOMIC_LOAD_OILH:
5732  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
5733  case SystemZ::ATOMIC_LOAD_OILF:
5734  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
5735  case SystemZ::ATOMIC_LOAD_OGR:
5736  return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
5737  case SystemZ::ATOMIC_LOAD_OILL64:
5738  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
5739  case SystemZ::ATOMIC_LOAD_OILH64:
5740  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
5741  case SystemZ::ATOMIC_LOAD_OIHL64:
5742  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
5743  case SystemZ::ATOMIC_LOAD_OIHH64:
5744  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
5745  case SystemZ::ATOMIC_LOAD_OILF64:
5746  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
5747  case SystemZ::ATOMIC_LOAD_OIHF64:
5748  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
5749 
5750  case SystemZ::ATOMIC_LOADW_XR:
5751  return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
5752  case SystemZ::ATOMIC_LOADW_XILF:
5753  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
5754  case SystemZ::ATOMIC_LOAD_XR:
5755  return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
5756  case SystemZ::ATOMIC_LOAD_XILF:
5757  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
5758  case SystemZ::ATOMIC_LOAD_XGR:
5759  return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
5760  case SystemZ::ATOMIC_LOAD_XILF64:
5761  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
5762  case SystemZ::ATOMIC_LOAD_XIHF64:
5763  return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
5764 
5765  case SystemZ::ATOMIC_LOADW_NRi:
5766  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
5767  case SystemZ::ATOMIC_LOADW_NILHi:
5768  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
5769  case SystemZ::ATOMIC_LOAD_NRi:
5770  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
5771  case SystemZ::ATOMIC_LOAD_NILLi:
5772  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
5773  case SystemZ::ATOMIC_LOAD_NILHi:
5774  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
5775  case SystemZ::ATOMIC_LOAD_NILFi:
5776  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
5777  case SystemZ::ATOMIC_LOAD_NGRi:
5778  return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
5779  case SystemZ::ATOMIC_LOAD_NILL64i:
5780  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
5781  case SystemZ::ATOMIC_LOAD_NILH64i:
5782  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
5783  case SystemZ::ATOMIC_LOAD_NIHL64i:
5784  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
5785  case SystemZ::ATOMIC_LOAD_NIHH64i:
5786  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
5787  case SystemZ::ATOMIC_LOAD_NILF64i:
5788  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
5789  case SystemZ::ATOMIC_LOAD_NIHF64i:
5790  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
5791 
5793  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
5795  case SystemZ::ATOMIC_LOAD_MIN_32:
5796  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
5798  case SystemZ::ATOMIC_LOAD_MIN_64:
5799  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
5801 
5803  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
5805  case SystemZ::ATOMIC_LOAD_MAX_32:
5806  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
5808  case SystemZ::ATOMIC_LOAD_MAX_64:
5809  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
5811 
5813  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
5815  case SystemZ::ATOMIC_LOAD_UMIN_32:
5816  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
5818  case SystemZ::ATOMIC_LOAD_UMIN_64:
5819  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
5821 
5823  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
5825  case SystemZ::ATOMIC_LOAD_UMAX_32:
5826  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
5828  case SystemZ::ATOMIC_LOAD_UMAX_64:
5829  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
5831 
5833  return emitAtomicCmpSwapW(MI, MBB);
5834  case SystemZ::MVCSequence:
5835  case SystemZ::MVCLoop:
5836  return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
5837  case SystemZ::NCSequence:
5838  case SystemZ::NCLoop:
5839  return emitMemMemWrapper(MI, MBB, SystemZ::NC);
5840  case SystemZ::OCSequence:
5841  case SystemZ::OCLoop:
5842  return emitMemMemWrapper(MI, MBB, SystemZ::OC);
5843  case SystemZ::XCSequence:
5844  case SystemZ::XCLoop:
5845  return emitMemMemWrapper(MI, MBB, SystemZ::XC);
5846  case SystemZ::CLCSequence:
5847  case SystemZ::CLCLoop:
5848  return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
5849  case SystemZ::CLSTLoop:
5850  return emitStringWrapper(MI, MBB, SystemZ::CLST);
5851  case SystemZ::MVSTLoop:
5852  return emitStringWrapper(MI, MBB, SystemZ::MVST);
5853  case SystemZ::SRSTLoop:
5854  return emitStringWrapper(MI, MBB, SystemZ::SRST);
5855  case SystemZ::TBEGIN:
5856  return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
5857  case SystemZ::TBEGIN_nofloat:
5858  return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
5859  case SystemZ::TBEGINC:
5860  return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
5861  default:
5862  llvm_unreachable("Unexpected instr type to insert");
5863  }
5864 }
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
static unsigned CCMaskForCondCode(ISD::CondCode CC)
bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:294
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
unsigned Log2_32_Ceil(uint32_t Value)
Log2_32_Ceil - This function returns the ceil log base 2 of the specified value, 32 if the value is z...
Definition: MathExtras.h:481
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:724
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static SDValue getCCResult(SelectionDAG &DAG, SDNode *After)
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static void VerifyVectorTypes(const SmallVectorImpl< ISD::InputArg > &Ins)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:206
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
const unsigned PFD_READ
Definition: SystemZ.h:94
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
const unsigned GR32Regs[16]
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:646
const int64_t CallFrameSize
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask)
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
static const Permute PermuteForms[]
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
LocInfo getLocInfo() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasOneUse() const
Return true if there is exactly one use of this node.
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned FP128Regs[16]
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
unsigned odd128(bool Is32bit)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDVTList getVTList() const
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:432
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:120
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, CodeModel::Model CM) const
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:285
static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, bool IsNegative)
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:585
CallInst - This class represents a function call, abstracting a target machine's calling convention...
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
const GlobalValue * getGlobal() const
bool mayBeEmittedAsTailCall(CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
void addLiveIn(unsigned Reg)
Adds the specified register as a live in.
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs)
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool hasInterlockedAccess1() const
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
unsigned getNumOperands() const
Return the number of values used by this operation.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
unsigned getNumOperands() const
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
A debug info location.
Definition: DebugLoc.h:34
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
transferSuccessorsAndUpdatePHIs - Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor blocks which refer to fromMBB to refer to this.
const SDValue & getOperand(unsigned Num) const
static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C)
const unsigned FP32Regs[16]
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:423
static MachinePointerInfo getConstantPool()
getConstantPool - Return a MachinePointerInfo record that refers to the constant pool.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &ArgsFlags, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void reserve(size_type N)
Definition: SmallVector.h:401
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:178
static bool shouldSwapCmpOperands(const Comparison &C)
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
unsigned getResNo() const
get the index which selects a specific result in the SDNode
static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP)
bool isRegLoc() const
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
getFixedStack - Return a MachinePointerInfo record that refers to the the specified FrameIndex...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
static MachineBasicBlock * splitBlockAfter(MachineInstr *MI, MachineBasicBlock *MBB)
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const unsigned PFD_WRITE
Definition: SystemZ.h:95
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:371
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
bool hasLoadStoreOnCond() const
lazy value info
BlockAddress - The address of a basic block.
Definition: Constants.h:802
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
MachineMemOperand - A description of a memory reference used in the backend.
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
Shift and rotation operations.
Definition: ISDOpcodes.h:332
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:67
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
const unsigned CCMASK_2
Definition: SystemZ.h:29
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C)
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue Value)
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:687
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, CCValAssign &VA, SDValue Chain, SDValue Value)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< ISD::InputArg, 32 > Ins
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:581
static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1)
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
static bool isScalarToVector(SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:637
unsigned getLocReg() const
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
SDValue getRegisterMask(const uint32_t *RegMask)
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
Return an ISD::VECTOR_SHUFFLE node.
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:76
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:238
SmallVector< ISD::OutputArg, 32 > Outs
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, CCValAssign &VA, SDValue Value)
const unsigned NumArgFPRs
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
MachineConstantPoolValue * getMachineCPVal() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
#define EQ(a, b)
Definition: regexec.c:112
int64_t getImm() const
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:410
const unsigned VectorBits
Definition: SystemZ.h:98
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type Ty1 to type Ty2.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:52
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:65
const unsigned CCMASK_CS
Definition: SystemZ.h:53
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:64
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
SDNode * getNode() const
get the SDNode which holds the desired result
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
bundle_iterator< MachineInstr, instr_iterator > iterator
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
#define P(N)
static void adjustForLTGFR(Comparison &C)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isNegZero() const
Definition: APFloat.h:439
unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const
static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SystemZInstrInfo *TII, SDLoc DL, EVT VT, uint64_t Value, unsigned BitsPerElement)
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
bool isMachineConstantPoolEntry() const
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
static void getVPermMask(ShuffleVectorSDNode *VSN, SmallVectorImpl< int > &Bytes)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:88
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isExtInLoc() const
const unsigned CCMASK_TM
Definition: SystemZ.h:69
MVT getLocVT() const
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
This is an important base class in LLVM.
Definition: Constant.h:41
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:607
bool isVector() const
isVector - Return true if this is a vector value type.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, SDLoc DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map)
const Constant * getConstVal() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C)
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
#define CONV(X)
static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
static mvt_range fp_valuetypes()
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:547
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, EVT VT, SDValue CmpOp0, SDValue CmpOp1)
unsigned getOpcode() const
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:68
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
const unsigned CCMASK_3
Definition: SystemZ.h:30
static mvt_range vector_valuetypes()
CondCode getSetCCSwappedOperands(CondCode Operation)
getSetCCSwappedOperands - Return the operation corresponding to (Y op X) when given the operation for...
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:673
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const
unsigned getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, SDValue Op)
EVT - Extended Value Type.
Definition: ValueTypes.h:31
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define OPCODE(NAME)
This structure contains all information that is necessary for lowering calls.
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:110
T findFirstSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the first set bit starting from the least significant bit.
Definition: MathExtras.h:192
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override
Determine if the target supports unaligned memory accesses.
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
const BlockAddress * getBlockAddress() const
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
const SystemZInstrInfo * getInstrInfo() const override
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:62
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from Ty1 to Ty2 is permitted when deciding whether a call is in tail posi...
void setIsKill(bool Val=true)
bool hasPopulationCount() const
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:484
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:478
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, SDLoc DL)
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const TargetFrameLowering * getFrameLowering() const override
CCState - This class holds information needed while lowering arguments and return values...
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
int64_t SignExtend64(uint64_t x)
SignExtend64 - Sign extend B-bit number x to 64-bit int.
Definition: MathExtras.h:634
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
void setExceptionPointerRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception address on entry to...
const DataLayout * getDataLayout() const
Deprecated in 3.7, will be removed in 3.8.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
SDValue getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
const unsigned ArgFPRs[NumArgFPRs]
static bool canTreatAsByteVector(EVT VT)
CCValAssign - Represent assignment of one arg/retval to a location.
const unsigned FP64Regs[16]
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
Information about stack frame layout on the target.
static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask)
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned GR128Regs[16]
static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, SDValue Op1)
SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SynchronizationScope SynchScope)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands...
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const override
This callback is used to prepare for a volatile or atomic load.
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
getSetCCInverse - Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operat...
#define NC
Definition: regutils.h:42
const unsigned GR64Regs[16]
static mvt_range integer_valuetypes()
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:63
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
const unsigned CCMASK_1
Definition: SystemZ.h:28
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:302
void setExceptionSelectorRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception typeid on entry to ...
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getCallPreservedMask - Return a mask of call-preserved registers for the given calling convention on ...
const unsigned VectorBytes
Definition: SystemZ.h:102
iterator_range< use_iterator > uses()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, unsigned CCValid, unsigned CCMask)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
static void VerifyVectorType(MVT VT, EVT ArgVT)
bool isMemLoc() const
const unsigned CCMASK_0
Definition: SystemZ.h:27
static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, const Permute &P, SDValue Op0, SDValue Op1)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static MachineOperand earlyUseOperand(MachineOperand Op)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static MachineBasicBlock * splitBlockBefore(MachineInstr *MI, MachineBasicBlock *MBB)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
Representation of each machine instruction.
Definition: MachineInstr.h:51
static MachinePointerInfo getGOT()
getGOT - Return a MachinePointerInfo record that refers to a GOT entry.
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:603
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:516
SmallVector< SDValue, 32 > OutVals
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, unsigned Extend, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
static bool is32Bit(EVT VT)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
bool isTailCall() const
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static unsigned reverseCCMask(unsigned CCMask)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
EVT getValueType() const
Return the ValueType of the referenced return value.
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
Definition: SelectionDAG.h:664
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
unsigned getAlignment() const
static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
SDValue getRegister(unsigned Reg, EVT VT)
unsigned even128(bool Is32bit)
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition: APFloat.h:414
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
SDValue getValueType(EVT)
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:653
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
iterator end() const
Definition: StringRef.h:92
BasicBlockListType::iterator iterator
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:105
Primary interface to the complete machine description for the target machine.
A SystemZ-specific constant pool value.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
bool hasFPExtension() const
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
const unsigned IPM_CC
Definition: SystemZ.h:91
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:434
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:188
unsigned getLocMemOffset() const
Conversion operators.
Definition: ISDOpcodes.h:380
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
static void adjustForFNeg(Comparison &C)
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:666
bool isIdenticalTo(const MachineOperand &Other) const
isIdenticalTo - Return true if this operand is identical to the specified operand.
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:61
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
const unsigned CCMASK_TEND
Definition: SystemZ.h:81
static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, SmallVectorImpl< SDValue > &Elems)
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
const unsigned VR128Regs[32]
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:662
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue Op0, SDValue Op1)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, bool &Invert)
const SystemZRegisterInfo * getRegisterInfo() const override
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:66
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget=false)
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
uint64_t getZExtValue() const
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
void resize(size_type N)
Definition: SmallVector.h:376
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:527
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:115