LLVM  mainline
SystemZISelLowering.cpp
Go to the documentation of this file.
00001 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the SystemZTargetLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "SystemZISelLowering.h"
00015 #include "SystemZCallingConv.h"
00016 #include "SystemZConstantPoolValue.h"
00017 #include "SystemZMachineFunctionInfo.h"
00018 #include "SystemZTargetMachine.h"
00019 #include "llvm/CodeGen/CallingConvLower.h"
00020 #include "llvm/CodeGen/MachineInstrBuilder.h"
00021 #include "llvm/CodeGen/MachineRegisterInfo.h"
00022 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00023 #include "llvm/IR/Intrinsics.h"
00024 #include <cctype>
00025 
00026 using namespace llvm;
00027 
00028 #define DEBUG_TYPE "systemz-lower"
00029 
00030 namespace {
00031 // Represents a sequence for extracting a 0/1 value from an IPM result:
00032 // (((X ^ XORValue) + AddValue) >> Bit)
00033 struct IPMConversion {
00034   IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
00035     : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
00036 
00037   int64_t XORValue;
00038   int64_t AddValue;
00039   unsigned Bit;
00040 };
00041 
00042 // Represents information about a comparison.
00043 struct Comparison {
00044   Comparison(SDValue Op0In, SDValue Op1In)
00045     : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
00046 
00047   // The operands to the comparison.
00048   SDValue Op0, Op1;
00049 
00050   // The opcode that should be used to compare Op0 and Op1.
00051   unsigned Opcode;
00052 
00053   // A SystemZICMP value.  Only used for integer comparisons.
00054   unsigned ICmpType;
00055 
00056   // The mask of CC values that Opcode can produce.
00057   unsigned CCValid;
00058 
00059   // The mask of CC values for which the original condition is true.
00060   unsigned CCMask;
00061 };
00062 } // end anonymous namespace
00063 
00064 // Classify VT as either 32 or 64 bit.
00065 static bool is32Bit(EVT VT) {
00066   switch (VT.getSimpleVT().SimpleTy) {
00067   case MVT::i32:
00068     return true;
00069   case MVT::i64:
00070     return false;
00071   default:
00072     llvm_unreachable("Unsupported type");
00073   }
00074 }
00075 
00076 // Return a version of MachineOperand that can be safely used before the
00077 // final use.
00078 static MachineOperand earlyUseOperand(MachineOperand Op) {
00079   if (Op.isReg())
00080     Op.setIsKill(false);
00081   return Op;
00082 }
00083 
00084 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
00085                                              const SystemZSubtarget &STI)
00086     : TargetLowering(tm), Subtarget(STI) {
00087   MVT PtrVT = getPointerTy();
00088 
00089   // Set up the register classes.
00090   if (Subtarget.hasHighWord())
00091     addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
00092   else
00093     addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
00094   addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
00095   if (Subtarget.hasVector()) {
00096     addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
00097     addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
00098   } else {
00099     addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
00100     addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
00101   }
00102   addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
00103 
00104   if (Subtarget.hasVector()) {
00105     addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
00106     addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
00107     addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
00108     addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
00109     addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
00110     addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
00111   }
00112 
00113   // Compute derived properties from the register classes
00114   computeRegisterProperties(Subtarget.getRegisterInfo());
00115 
00116   // Set up special registers.
00117   setExceptionPointerRegister(SystemZ::R6D);
00118   setExceptionSelectorRegister(SystemZ::R7D);
00119   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
00120 
00121   // TODO: It may be better to default to latency-oriented scheduling, however
00122   // LLVM's current latency-oriented scheduler can't handle physreg definitions
00123   // such as SystemZ has with CC, so set this to the register-pressure
00124   // scheduler, because it can.
00125   setSchedulingPreference(Sched::RegPressure);
00126 
00127   setBooleanContents(ZeroOrOneBooleanContent);
00128   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00129 
00130   // Instructions are strings of 2-byte aligned 2-byte values.
00131   setMinFunctionAlignment(2);
00132 
00133   // Handle operations that are handled in a similar way for all types.
00134   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
00135        I <= MVT::LAST_FP_VALUETYPE;
00136        ++I) {
00137     MVT VT = MVT::SimpleValueType(I);
00138     if (isTypeLegal(VT)) {
00139       // Lower SET_CC into an IPM-based sequence.
00140       setOperationAction(ISD::SETCC, VT, Custom);
00141 
00142       // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
00143       setOperationAction(ISD::SELECT, VT, Expand);
00144 
00145       // Lower SELECT_CC and BR_CC into separate comparisons and branches.
00146       setOperationAction(ISD::SELECT_CC, VT, Custom);
00147       setOperationAction(ISD::BR_CC,     VT, Custom);
00148     }
00149   }
00150 
00151   // Expand jump table branches as address arithmetic followed by an
00152   // indirect jump.
00153   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
00154 
00155   // Expand BRCOND into a BR_CC (see above).
00156   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00157 
00158   // Handle integer types.
00159   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
00160        I <= MVT::LAST_INTEGER_VALUETYPE;
00161        ++I) {
00162     MVT VT = MVT::SimpleValueType(I);
00163     if (isTypeLegal(VT)) {
00164       // Expand individual DIV and REMs into DIVREMs.
00165       setOperationAction(ISD::SDIV, VT, Expand);
00166       setOperationAction(ISD::UDIV, VT, Expand);
00167       setOperationAction(ISD::SREM, VT, Expand);
00168       setOperationAction(ISD::UREM, VT, Expand);
00169       setOperationAction(ISD::SDIVREM, VT, Custom);
00170       setOperationAction(ISD::UDIVREM, VT, Custom);
00171 
00172       // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
00173       // stores, putting a serialization instruction after the stores.
00174       setOperationAction(ISD::ATOMIC_LOAD,  VT, Custom);
00175       setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00176 
00177       // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
00178       // available, or if the operand is constant.
00179       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00180 
00181       // Use POPCNT on z196 and above.
00182       if (Subtarget.hasPopulationCount())
00183         setOperationAction(ISD::CTPOP, VT, Custom);
00184       else
00185         setOperationAction(ISD::CTPOP, VT, Expand);
00186 
00187       // No special instructions for these.
00188       setOperationAction(ISD::CTTZ,            VT, Expand);
00189       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00190       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00191       setOperationAction(ISD::ROTR,            VT, Expand);
00192 
00193       // Use *MUL_LOHI where possible instead of MULH*.
00194       setOperationAction(ISD::MULHS, VT, Expand);
00195       setOperationAction(ISD::MULHU, VT, Expand);
00196       setOperationAction(ISD::SMUL_LOHI, VT, Custom);
00197       setOperationAction(ISD::UMUL_LOHI, VT, Custom);
00198 
00199       // Only z196 and above have native support for conversions to unsigned.
00200       if (!Subtarget.hasFPExtension())
00201         setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00202     }
00203   }
00204 
00205   // Type legalization will convert 8- and 16-bit atomic operations into
00206   // forms that operate on i32s (but still keeping the original memory VT).
00207   // Lower them into full i32 operations.
00208   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
00209   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
00210   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
00211   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
00212   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
00213   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
00214   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
00215   setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
00216   setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
00217   setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
00218   setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
00219   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
00220 
00221   // z10 has instructions for signed but not unsigned FP conversion.
00222   // Handle unsigned 32-bit types as signed 64-bit types.
00223   if (!Subtarget.hasFPExtension()) {
00224     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
00225     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00226   }
00227 
00228   // We have native support for a 64-bit CTLZ, via FLOGR.
00229   setOperationAction(ISD::CTLZ, MVT::i32, Promote);
00230   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
00231 
00232   // Give LowerOperation the chance to replace 64-bit ORs with subregs.
00233   setOperationAction(ISD::OR, MVT::i64, Custom);
00234 
00235   // FIXME: Can we support these natively?
00236   setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
00237   setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
00238   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
00239 
00240   // We have native instructions for i8, i16 and i32 extensions, but not i1.
00241   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00242   for (MVT VT : MVT::integer_valuetypes()) {
00243     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00244     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00245     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1, Promote);
00246   }
00247 
00248   // Handle the various types of symbolic address.
00249   setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
00250   setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
00251   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
00252   setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
00253   setOperationAction(ISD::JumpTable,        PtrVT, Custom);
00254 
00255   // We need to handle dynamic allocations specially because of the
00256   // 160-byte area at the bottom of the stack.
00257   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
00258 
00259   // Use custom expanders so that we can force the function to use
00260   // a frame pointer.
00261   setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
00262   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
00263 
00264   // Handle prefetches with PFD or PFDRL.
00265   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
00266 
00267   for (MVT VT : MVT::vector_valuetypes()) {
00268     // Assume by default that all vector operations need to be expanded.
00269     for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
00270       if (getOperationAction(Opcode, VT) == Legal)
00271         setOperationAction(Opcode, VT, Expand);
00272 
00273     // Likewise all truncating stores and extending loads.
00274     for (MVT InnerVT : MVT::vector_valuetypes()) {
00275       setTruncStoreAction(VT, InnerVT, Expand);
00276       setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00277       setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00278       setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00279     }
00280 
00281     if (isTypeLegal(VT)) {
00282       // These operations are legal for anything that can be stored in a
00283       // vector register, even if there is no native support for the format
00284       // as such.  In particular, we can do these for v4f32 even though there
00285       // are no specific instructions for that format.
00286       setOperationAction(ISD::LOAD, VT, Legal);
00287       setOperationAction(ISD::STORE, VT, Legal);
00288       setOperationAction(ISD::VSELECT, VT, Legal);
00289       setOperationAction(ISD::BITCAST, VT, Legal);
00290       setOperationAction(ISD::UNDEF, VT, Legal);
00291 
00292       // Likewise, except that we need to replace the nodes with something
00293       // more specific.
00294       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
00295       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
00296     }
00297   }
00298 
00299   // Handle integer vector types.
00300   for (MVT VT : MVT::integer_vector_valuetypes()) {
00301     if (isTypeLegal(VT)) {
00302       // These operations have direct equivalents.
00303       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
00304       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
00305       setOperationAction(ISD::ADD, VT, Legal);
00306       setOperationAction(ISD::SUB, VT, Legal);
00307       if (VT != MVT::v2i64)
00308         setOperationAction(ISD::MUL, VT, Legal);
00309       setOperationAction(ISD::AND, VT, Legal);
00310       setOperationAction(ISD::OR, VT, Legal);
00311       setOperationAction(ISD::XOR, VT, Legal);
00312       setOperationAction(ISD::CTPOP, VT, Custom);
00313       setOperationAction(ISD::CTTZ, VT, Legal);
00314       setOperationAction(ISD::CTLZ, VT, Legal);
00315       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
00316       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
00317 
00318       // Convert a GPR scalar to a vector by inserting it into element 0.
00319       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
00320 
00321       // Use a series of unpacks for extensions.
00322       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
00323       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
00324 
00325       // Detect shifts by a scalar amount and convert them into
00326       // V*_BY_SCALAR.
00327       setOperationAction(ISD::SHL, VT, Custom);
00328       setOperationAction(ISD::SRA, VT, Custom);
00329       setOperationAction(ISD::SRL, VT, Custom);
00330 
00331       // At present ROTL isn't matched by DAGCombiner.  ROTR should be
00332       // converted into ROTL.
00333       setOperationAction(ISD::ROTL, VT, Expand);
00334       setOperationAction(ISD::ROTR, VT, Expand);
00335 
00336       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
00337       // and inverting the result as necessary.
00338       setOperationAction(ISD::SETCC, VT, Custom);
00339     }
00340   }
00341 
00342   if (Subtarget.hasVector()) {
00343     // There should be no need to check for float types other than v2f64
00344     // since <2 x f32> isn't a legal type.
00345     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00346     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00347     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00348     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00349   }
00350 
00351   // Handle floating-point types.
00352   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
00353        I <= MVT::LAST_FP_VALUETYPE;
00354        ++I) {
00355     MVT VT = MVT::SimpleValueType(I);
00356     if (isTypeLegal(VT)) {
00357       // We can use FI for FRINT.
00358       setOperationAction(ISD::FRINT, VT, Legal);
00359 
00360       // We can use the extended form of FI for other rounding operations.
00361       if (Subtarget.hasFPExtension()) {
00362         setOperationAction(ISD::FNEARBYINT, VT, Legal);
00363         setOperationAction(ISD::FFLOOR, VT, Legal);
00364         setOperationAction(ISD::FCEIL, VT, Legal);
00365         setOperationAction(ISD::FTRUNC, VT, Legal);
00366         setOperationAction(ISD::FROUND, VT, Legal);
00367       }
00368 
00369       // No special instructions for these.
00370       setOperationAction(ISD::FSIN, VT, Expand);
00371       setOperationAction(ISD::FCOS, VT, Expand);
00372       setOperationAction(ISD::FREM, VT, Expand);
00373     }
00374   }
00375 
00376   // Handle floating-point vector types.
00377   if (Subtarget.hasVector()) {
00378     // Scalar-to-vector conversion is just a subreg.
00379     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00380     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00381 
00382     // Some insertions and extractions can be done directly but others
00383     // need to go via integers.
00384     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
00385     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
00386     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00387     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00388 
00389     // These operations have direct equivalents.
00390     setOperationAction(ISD::FADD, MVT::v2f64, Legal);
00391     setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
00392     setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
00393     setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
00394     setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00395     setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00396     setOperationAction(ISD::FABS, MVT::v2f64, Legal);
00397     setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00398     setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
00399     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00400     setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00401     setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00402     setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00403     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00404   }
00405 
00406   // We have fused multiply-addition for f32 and f64 but not f128.
00407   setOperationAction(ISD::FMA, MVT::f32,  Legal);
00408   setOperationAction(ISD::FMA, MVT::f64,  Legal);
00409   setOperationAction(ISD::FMA, MVT::f128, Expand);
00410 
00411   // Needed so that we don't try to implement f128 constant loads using
00412   // a load-and-extend of a f80 constant (in cases where the constant
00413   // would fit in an f80).
00414   for (MVT VT : MVT::fp_valuetypes())
00415     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
00416 
00417   // Floating-point truncation and stores need to be done separately.
00418   setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
00419   setTruncStoreAction(MVT::f128, MVT::f32, Expand);
00420   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
00421 
00422   // We have 64-bit FPR<->GPR moves, but need special handling for
00423   // 32-bit forms.
00424   if (!Subtarget.hasVector()) {
00425     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
00426     setOperationAction(ISD::BITCAST, MVT::f32, Custom);
00427   }
00428 
00429   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
00430   // structure, but VAEND is a no-op.
00431   setOperationAction(ISD::VASTART, MVT::Other, Custom);
00432   setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
00433   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
00434 
00435   // Codes for which we want to perform some z-specific combinations.
00436   setTargetDAGCombine(ISD::SIGN_EXTEND);
00437   setTargetDAGCombine(ISD::STORE);
00438   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
00439   setTargetDAGCombine(ISD::FP_ROUND);
00440 
00441   // Handle intrinsics.
00442   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
00443   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00444 
00445   // We want to use MVC in preference to even a single load/store pair.
00446   MaxStoresPerMemcpy = 0;
00447   MaxStoresPerMemcpyOptSize = 0;
00448 
00449   // The main memset sequence is a byte store followed by an MVC.
00450   // Two STC or MV..I stores win over that, but the kind of fused stores
00451   // generated by target-independent code don't when the byte value is
00452   // variable.  E.g.  "STC <reg>;MHI <reg>,257;STH <reg>" is not better
00453   // than "STC;MVC".  Handle the choice in target-specific code instead.
00454   MaxStoresPerMemset = 0;
00455   MaxStoresPerMemsetOptSize = 0;
00456 }
00457 
00458 EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
00459   if (!VT.isVector())
00460     return MVT::i32;
00461   return VT.changeVectorElementTypeToInteger();
00462 }
00463 
00464 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
00465   VT = VT.getScalarType();
00466 
00467   if (!VT.isSimple())
00468     return false;
00469 
00470   switch (VT.getSimpleVT().SimpleTy) {
00471   case MVT::f32:
00472   case MVT::f64:
00473     return true;
00474   case MVT::f128:
00475     return false;
00476   default:
00477     break;
00478   }
00479 
00480   return false;
00481 }
00482 
00483 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
00484   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
00485   return Imm.isZero() || Imm.isNegZero();
00486 }
00487 
00488 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
00489   // We can use CGFI or CLGFI.
00490   return isInt<32>(Imm) || isUInt<32>(Imm);
00491 }
00492 
00493 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
00494   // We can use ALGFI or SLGFI.
00495   return isUInt<32>(Imm) || isUInt<32>(-Imm);
00496 }
00497 
00498 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
00499                                                            unsigned,
00500                                                            unsigned,
00501                                                            bool *Fast) const {
00502   // Unaligned accesses should never be slower than the expanded version.
00503   // We check specifically for aligned accesses in the few cases where
00504   // they are required.
00505   if (Fast)
00506     *Fast = true;
00507   return true;
00508 }
00509   
00510 bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
00511                                                   Type *Ty) const {
00512   // Punt on globals for now, although they can be used in limited
00513   // RELATIVE LONG cases.
00514   if (AM.BaseGV)
00515     return false;
00516 
00517   // Require a 20-bit signed offset.
00518   if (!isInt<20>(AM.BaseOffs))
00519     return false;
00520 
00521   // Indexing is OK but no scale factor can be applied.
00522   return AM.Scale == 0 || AM.Scale == 1;
00523 }
00524 
00525 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
00526   if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
00527     return false;
00528   unsigned FromBits = FromType->getPrimitiveSizeInBits();
00529   unsigned ToBits = ToType->getPrimitiveSizeInBits();
00530   return FromBits > ToBits;
00531 }
00532 
00533 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
00534   if (!FromVT.isInteger() || !ToVT.isInteger())
00535     return false;
00536   unsigned FromBits = FromVT.getSizeInBits();
00537   unsigned ToBits = ToVT.getSizeInBits();
00538   return FromBits > ToBits;
00539 }
00540 
00541 //===----------------------------------------------------------------------===//
00542 // Inline asm support
00543 //===----------------------------------------------------------------------===//
00544 
00545 TargetLowering::ConstraintType
00546 SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
00547   if (Constraint.size() == 1) {
00548     switch (Constraint[0]) {
00549     case 'a': // Address register
00550     case 'd': // Data register (equivalent to 'r')
00551     case 'f': // Floating-point register
00552     case 'h': // High-part register
00553     case 'r': // General-purpose register
00554       return C_RegisterClass;
00555 
00556     case 'Q': // Memory with base and unsigned 12-bit displacement
00557     case 'R': // Likewise, plus an index
00558     case 'S': // Memory with base and signed 20-bit displacement
00559     case 'T': // Likewise, plus an index
00560     case 'm': // Equivalent to 'T'.
00561       return C_Memory;
00562 
00563     case 'I': // Unsigned 8-bit constant
00564     case 'J': // Unsigned 12-bit constant
00565     case 'K': // Signed 16-bit constant
00566     case 'L': // Signed 20-bit displacement (on all targets we support)
00567     case 'M': // 0x7fffffff
00568       return C_Other;
00569 
00570     default:
00571       break;
00572     }
00573   }
00574   return TargetLowering::getConstraintType(Constraint);
00575 }
00576 
00577 TargetLowering::ConstraintWeight SystemZTargetLowering::
00578 getSingleConstraintMatchWeight(AsmOperandInfo &info,
00579                                const char *constraint) const {
00580   ConstraintWeight weight = CW_Invalid;
00581   Value *CallOperandVal = info.CallOperandVal;
00582   // If we don't have a value, we can't do a match,
00583   // but allow it at the lowest weight.
00584   if (!CallOperandVal)
00585     return CW_Default;
00586   Type *type = CallOperandVal->getType();
00587   // Look at the constraint type.
00588   switch (*constraint) {
00589   default:
00590     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
00591     break;
00592 
00593   case 'a': // Address register
00594   case 'd': // Data register (equivalent to 'r')
00595   case 'h': // High-part register
00596   case 'r': // General-purpose register
00597     if (CallOperandVal->getType()->isIntegerTy())
00598       weight = CW_Register;
00599     break;
00600 
00601   case 'f': // Floating-point register
00602     if (type->isFloatingPointTy())
00603       weight = CW_Register;
00604     break;
00605 
00606   case 'I': // Unsigned 8-bit constant
00607     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00608       if (isUInt<8>(C->getZExtValue()))
00609         weight = CW_Constant;
00610     break;
00611 
00612   case 'J': // Unsigned 12-bit constant
00613     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00614       if (isUInt<12>(C->getZExtValue()))
00615         weight = CW_Constant;
00616     break;
00617 
00618   case 'K': // Signed 16-bit constant
00619     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00620       if (isInt<16>(C->getSExtValue()))
00621         weight = CW_Constant;
00622     break;
00623 
00624   case 'L': // Signed 20-bit displacement (on all targets we support)
00625     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00626       if (isInt<20>(C->getSExtValue()))
00627         weight = CW_Constant;
00628     break;
00629 
00630   case 'M': // 0x7fffffff
00631     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00632       if (C->getZExtValue() == 0x7fffffff)
00633         weight = CW_Constant;
00634     break;
00635   }
00636   return weight;
00637 }
00638 
00639 // Parse a "{tNNN}" register constraint for which the register type "t"
00640 // has already been verified.  MC is the class associated with "t" and
00641 // Map maps 0-based register numbers to LLVM register numbers.
00642 static std::pair<unsigned, const TargetRegisterClass *>
00643 parseRegisterNumber(const std::string &Constraint,
00644                     const TargetRegisterClass *RC, const unsigned *Map) {
00645   assert(*(Constraint.end()-1) == '}' && "Missing '}'");
00646   if (isdigit(Constraint[2])) {
00647     std::string Suffix(Constraint.data() + 2, Constraint.size() - 2);
00648     unsigned Index = atoi(Suffix.c_str());
00649     if (Index < 16 && Map[Index])
00650       return std::make_pair(Map[Index], RC);
00651   }
00652   return std::make_pair(0U, nullptr);
00653 }
00654 
00655 std::pair<unsigned, const TargetRegisterClass *>
00656 SystemZTargetLowering::getRegForInlineAsmConstraint(
00657     const TargetRegisterInfo *TRI, const std::string &Constraint,
00658     MVT VT) const {
00659   if (Constraint.size() == 1) {
00660     // GCC Constraint Letters
00661     switch (Constraint[0]) {
00662     default: break;
00663     case 'd': // Data register (equivalent to 'r')
00664     case 'r': // General-purpose register
00665       if (VT == MVT::i64)
00666         return std::make_pair(0U, &SystemZ::GR64BitRegClass);
00667       else if (VT == MVT::i128)
00668         return std::make_pair(0U, &SystemZ::GR128BitRegClass);
00669       return std::make_pair(0U, &SystemZ::GR32BitRegClass);
00670 
00671     case 'a': // Address register
00672       if (VT == MVT::i64)
00673         return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
00674       else if (VT == MVT::i128)
00675         return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
00676       return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
00677 
00678     case 'h': // High-part register (an LLVM extension)
00679       return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
00680 
00681     case 'f': // Floating-point register
00682       if (VT == MVT::f64)
00683         return std::make_pair(0U, &SystemZ::FP64BitRegClass);
00684       else if (VT == MVT::f128)
00685         return std::make_pair(0U, &SystemZ::FP128BitRegClass);
00686       return std::make_pair(0U, &SystemZ::FP32BitRegClass);
00687     }
00688   }
00689   if (Constraint[0] == '{') {
00690     // We need to override the default register parsing for GPRs and FPRs
00691     // because the interpretation depends on VT.  The internal names of
00692     // the registers are also different from the external names
00693     // (F0D and F0S instead of F0, etc.).
00694     if (Constraint[1] == 'r') {
00695       if (VT == MVT::i32)
00696         return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
00697                                    SystemZMC::GR32Regs);
00698       if (VT == MVT::i128)
00699         return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
00700                                    SystemZMC::GR128Regs);
00701       return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
00702                                  SystemZMC::GR64Regs);
00703     }
00704     if (Constraint[1] == 'f') {
00705       if (VT == MVT::f32)
00706         return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
00707                                    SystemZMC::FP32Regs);
00708       if (VT == MVT::f128)
00709         return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
00710                                    SystemZMC::FP128Regs);
00711       return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
00712                                  SystemZMC::FP64Regs);
00713     }
00714   }
00715   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
00716 }
00717 
00718 void SystemZTargetLowering::
00719 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
00720                              std::vector<SDValue> &Ops,
00721                              SelectionDAG &DAG) const {
00722   // Only support length 1 constraints for now.
00723   if (Constraint.length() == 1) {
00724     switch (Constraint[0]) {
00725     case 'I': // Unsigned 8-bit constant
00726       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00727         if (isUInt<8>(C->getZExtValue()))
00728           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00729                                               Op.getValueType()));
00730       return;
00731 
00732     case 'J': // Unsigned 12-bit constant
00733       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00734         if (isUInt<12>(C->getZExtValue()))
00735           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00736                                               Op.getValueType()));
00737       return;
00738 
00739     case 'K': // Signed 16-bit constant
00740       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00741         if (isInt<16>(C->getSExtValue()))
00742           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
00743                                               Op.getValueType()));
00744       return;
00745 
00746     case 'L': // Signed 20-bit displacement (on all targets we support)
00747       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00748         if (isInt<20>(C->getSExtValue()))
00749           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
00750                                               Op.getValueType()));
00751       return;
00752 
00753     case 'M': // 0x7fffffff
00754       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00755         if (C->getZExtValue() == 0x7fffffff)
00756           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00757                                               Op.getValueType()));
00758       return;
00759     }
00760   }
00761   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
00762 }
00763 
00764 //===----------------------------------------------------------------------===//
00765 // Calling conventions
00766 //===----------------------------------------------------------------------===//
00767 
00768 #include "SystemZGenCallingConv.inc"
00769 
00770 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
00771                                                      Type *ToType) const {
00772   return isTruncateFree(FromType, ToType);
00773 }
00774 
00775 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
00776   if (!CI->isTailCall())
00777     return false;
00778   return true;
00779 }
00780 
00781 // We do not yet support 128-bit single-element vector types.  If the user
00782 // attempts to use such types as function argument or return type, prefer
00783 // to error out instead of emitting code violating the ABI.
00784 static void VerifyVectorType(MVT VT, EVT ArgVT) {
00785   if (ArgVT.isVector() && !VT.isVector())
00786     report_fatal_error("Unsupported vector argument or return type");
00787 }
00788 
00789 static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
00790   for (unsigned i = 0; i < Ins.size(); ++i)
00791     VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
00792 }
00793 
00794 static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
00795   for (unsigned i = 0; i < Outs.size(); ++i)
00796     VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
00797 }
00798 
00799 // Value is a value that has been passed to us in the location described by VA
00800 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
00801 // any loads onto Chain.
00802 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
00803                                    CCValAssign &VA, SDValue Chain,
00804                                    SDValue Value) {
00805   // If the argument has been promoted from a smaller type, insert an
00806   // assertion to capture this.
00807   if (VA.getLocInfo() == CCValAssign::SExt)
00808     Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
00809                         DAG.getValueType(VA.getValVT()));
00810   else if (VA.getLocInfo() == CCValAssign::ZExt)
00811     Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
00812                         DAG.getValueType(VA.getValVT()));
00813 
00814   if (VA.isExtInLoc())
00815     Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
00816   else if (VA.getLocInfo() == CCValAssign::Indirect)
00817     Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
00818                         MachinePointerInfo(), false, false, false, 0);
00819   else if (VA.getLocInfo() == CCValAssign::BCvt) {
00820     // If this is a short vector argument loaded from the stack,
00821     // extend from i64 to full vector size and then bitcast.
00822     assert(VA.getLocVT() == MVT::i64);
00823     assert(VA.getValVT().isVector());
00824     Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
00825                         Value, DAG.getUNDEF(MVT::i64));
00826     Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
00827   } else
00828     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
00829   return Value;
00830 }
00831 
00832 // Value is a value of type VA.getValVT() that we need to copy into
00833 // the location described by VA.  Return a copy of Value converted to
00834 // VA.getValVT().  The caller is responsible for handling indirect values.
00835 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
00836                                    CCValAssign &VA, SDValue Value) {
00837   switch (VA.getLocInfo()) {
00838   case CCValAssign::SExt:
00839     return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
00840   case CCValAssign::ZExt:
00841     return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
00842   case CCValAssign::AExt:
00843     return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
00844   case CCValAssign::BCvt:
00845     // If this is a short vector argument to be stored to the stack,
00846     // bitcast to v2i64 and then extract first element.
00847     assert(VA.getLocVT() == MVT::i64);
00848     assert(VA.getValVT().isVector());
00849     Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
00850     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
00851                        DAG.getConstant(0, DL, MVT::i32));
00852   case CCValAssign::Full:
00853     return Value;
00854   default:
00855     llvm_unreachable("Unhandled getLocInfo()");
00856   }
00857 }
00858 
00859 SDValue SystemZTargetLowering::
00860 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
00861                      const SmallVectorImpl<ISD::InputArg> &Ins,
00862                      SDLoc DL, SelectionDAG &DAG,
00863                      SmallVectorImpl<SDValue> &InVals) const {
00864   MachineFunction &MF = DAG.getMachineFunction();
00865   MachineFrameInfo *MFI = MF.getFrameInfo();
00866   MachineRegisterInfo &MRI = MF.getRegInfo();
00867   SystemZMachineFunctionInfo *FuncInfo =
00868       MF.getInfo<SystemZMachineFunctionInfo>();
00869   auto *TFL =
00870       static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
00871 
00872   // Detect unsupported vector argument types.
00873   if (Subtarget.hasVector())
00874     VerifyVectorTypes(Ins);
00875 
00876   // Assign locations to all of the incoming arguments.
00877   SmallVector<CCValAssign, 16> ArgLocs;
00878   SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
00879   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
00880 
00881   unsigned NumFixedGPRs = 0;
00882   unsigned NumFixedFPRs = 0;
00883   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
00884     SDValue ArgValue;
00885     CCValAssign &VA = ArgLocs[I];
00886     EVT LocVT = VA.getLocVT();
00887     if (VA.isRegLoc()) {
00888       // Arguments passed in registers
00889       const TargetRegisterClass *RC;
00890       switch (LocVT.getSimpleVT().SimpleTy) {
00891       default:
00892         // Integers smaller than i64 should be promoted to i64.
00893         llvm_unreachable("Unexpected argument type");
00894       case MVT::i32:
00895         NumFixedGPRs += 1;
00896         RC = &SystemZ::GR32BitRegClass;
00897         break;
00898       case MVT::i64:
00899         NumFixedGPRs += 1;
00900         RC = &SystemZ::GR64BitRegClass;
00901         break;
00902       case MVT::f32:
00903         NumFixedFPRs += 1;
00904         RC = &SystemZ::FP32BitRegClass;
00905         break;
00906       case MVT::f64:
00907         NumFixedFPRs += 1;
00908         RC = &SystemZ::FP64BitRegClass;
00909         break;
00910       case MVT::v16i8:
00911       case MVT::v8i16:
00912       case MVT::v4i32:
00913       case MVT::v2i64:
00914       case MVT::v4f32:
00915       case MVT::v2f64:
00916         RC = &SystemZ::VR128BitRegClass;
00917         break;
00918       }
00919 
00920       unsigned VReg = MRI.createVirtualRegister(RC);
00921       MRI.addLiveIn(VA.getLocReg(), VReg);
00922       ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
00923     } else {
00924       assert(VA.isMemLoc() && "Argument not register or memory");
00925 
00926       // Create the frame index object for this incoming parameter.
00927       int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
00928                                       VA.getLocMemOffset(), true);
00929 
00930       // Create the SelectionDAG nodes corresponding to a load
00931       // from this parameter.  Unpromoted ints and floats are
00932       // passed as right-justified 8-byte values.
00933       EVT PtrVT = getPointerTy();
00934       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
00935       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
00936         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
00937                           DAG.getIntPtrConstant(4, DL));
00938       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
00939                              MachinePointerInfo::getFixedStack(FI),
00940                              false, false, false, 0);
00941     }
00942 
00943     // Convert the value of the argument register into the value that's
00944     // being passed.
00945     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
00946   }
00947 
00948   if (IsVarArg) {
00949     // Save the number of non-varargs registers for later use by va_start, etc.
00950     FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
00951     FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
00952 
00953     // Likewise the address (in the form of a frame index) of where the
00954     // first stack vararg would be.  The 1-byte size here is arbitrary.
00955     int64_t StackSize = CCInfo.getNextStackOffset();
00956     FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
00957 
00958     // ...and a similar frame index for the caller-allocated save area
00959     // that will be used to store the incoming registers.
00960     int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
00961     unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
00962     FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
00963 
00964     // Store the FPR varargs in the reserved frame slots.  (We store the
00965     // GPRs as part of the prologue.)
00966     if (NumFixedFPRs < SystemZ::NumArgFPRs) {
00967       SDValue MemOps[SystemZ::NumArgFPRs];
00968       for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
00969         unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
00970         int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
00971         SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
00972         unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
00973                                      &SystemZ::FP64BitRegClass);
00974         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
00975         MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
00976                                  MachinePointerInfo::getFixedStack(FI),
00977                                  false, false, 0);
00978 
00979       }
00980       // Join the stores, which are independent of one another.
00981       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
00982                           makeArrayRef(&MemOps[NumFixedFPRs],
00983                                        SystemZ::NumArgFPRs-NumFixedFPRs));
00984     }
00985   }
00986 
00987   return Chain;
00988 }
00989 
00990 static bool canUseSiblingCall(const CCState &ArgCCInfo,
00991                               SmallVectorImpl<CCValAssign> &ArgLocs) {
00992   // Punt if there are any indirect or stack arguments, or if the call
00993   // needs the call-saved argument register R6.
00994   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
00995     CCValAssign &VA = ArgLocs[I];
00996     if (VA.getLocInfo() == CCValAssign::Indirect)
00997       return false;
00998     if (!VA.isRegLoc())
00999       return false;
01000     unsigned Reg = VA.getLocReg();
01001     if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
01002       return false;
01003   }
01004   return true;
01005 }
01006 
01007 SDValue
01008 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
01009                                  SmallVectorImpl<SDValue> &InVals) const {
01010   SelectionDAG &DAG = CLI.DAG;
01011   SDLoc &DL = CLI.DL;
01012   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01013   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
01014   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
01015   SDValue Chain = CLI.Chain;
01016   SDValue Callee = CLI.Callee;
01017   bool &IsTailCall = CLI.IsTailCall;
01018   CallingConv::ID CallConv = CLI.CallConv;
01019   bool IsVarArg = CLI.IsVarArg;
01020   MachineFunction &MF = DAG.getMachineFunction();
01021   EVT PtrVT = getPointerTy();
01022 
01023   // Detect unsupported vector argument and return types.
01024   if (Subtarget.hasVector()) {
01025     VerifyVectorTypes(Outs);
01026     VerifyVectorTypes(Ins);
01027   }
01028 
01029   // Analyze the operands of the call, assigning locations to each operand.
01030   SmallVector<CCValAssign, 16> ArgLocs;
01031   SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
01032   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
01033 
01034   // We don't support GuaranteedTailCallOpt, only automatically-detected
01035   // sibling calls.
01036   if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
01037     IsTailCall = false;
01038 
01039   // Get a count of how many bytes are to be pushed on the stack.
01040   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
01041 
01042   // Mark the start of the call.
01043   if (!IsTailCall)
01044     Chain = DAG.getCALLSEQ_START(Chain,
01045                                  DAG.getConstant(NumBytes, DL, PtrVT, true),
01046                                  DL);
01047 
01048   // Copy argument values to their designated locations.
01049   SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
01050   SmallVector<SDValue, 8> MemOpChains;
01051   SDValue StackPtr;
01052   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
01053     CCValAssign &VA = ArgLocs[I];
01054     SDValue ArgValue = OutVals[I];
01055 
01056     if (VA.getLocInfo() == CCValAssign::Indirect) {
01057       // Store the argument in a stack slot and pass its address.
01058       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
01059       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
01060       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
01061                                          MachinePointerInfo::getFixedStack(FI),
01062                                          false, false, 0));
01063       ArgValue = SpillSlot;
01064     } else
01065       ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
01066 
01067     if (VA.isRegLoc())
01068       // Queue up the argument copies and emit them at the end.
01069       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
01070     else {
01071       assert(VA.isMemLoc() && "Argument not register or memory");
01072 
01073       // Work out the address of the stack slot.  Unpromoted ints and
01074       // floats are passed as right-justified 8-byte values.
01075       if (!StackPtr.getNode())
01076         StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
01077       unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
01078       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
01079         Offset += 4;
01080       SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
01081                                     DAG.getIntPtrConstant(Offset, DL));
01082 
01083       // Emit the store.
01084       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
01085                                          MachinePointerInfo(),
01086                                          false, false, 0));
01087     }
01088   }
01089 
01090   // Join the stores, which are independent of one another.
01091   if (!MemOpChains.empty())
01092     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
01093 
01094   // Accept direct calls by converting symbolic call addresses to the
01095   // associated Target* opcodes.  Force %r1 to be used for indirect
01096   // tail calls.
01097   SDValue Glue;
01098   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01099     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
01100     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
01101   } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01102     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
01103     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
01104   } else if (IsTailCall) {
01105     Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
01106     Glue = Chain.getValue(1);
01107     Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
01108   }
01109 
01110   // Build a sequence of copy-to-reg nodes, chained and glued together.
01111   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
01112     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
01113                              RegsToPass[I].second, Glue);
01114     Glue = Chain.getValue(1);
01115   }
01116 
01117   // The first call operand is the chain and the second is the target address.
01118   SmallVector<SDValue, 8> Ops;
01119   Ops.push_back(Chain);
01120   Ops.push_back(Callee);
01121 
01122   // Add argument registers to the end of the list so that they are
01123   // known live into the call.
01124   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
01125     Ops.push_back(DAG.getRegister(RegsToPass[I].first,
01126                                   RegsToPass[I].second.getValueType()));
01127 
01128   // Add a register mask operand representing the call-preserved registers.
01129   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
01130   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
01131   assert(Mask && "Missing call preserved mask for calling convention");
01132   Ops.push_back(DAG.getRegisterMask(Mask));
01133 
01134   // Glue the call to the argument copies, if any.
01135   if (Glue.getNode())
01136     Ops.push_back(Glue);
01137 
01138   // Emit the call.
01139   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01140   if (IsTailCall)
01141     return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
01142   Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
01143   Glue = Chain.getValue(1);
01144 
01145   // Mark the end of the call, which is glued to the call itself.
01146   Chain = DAG.getCALLSEQ_END(Chain,
01147                              DAG.getConstant(NumBytes, DL, PtrVT, true),
01148                              DAG.getConstant(0, DL, PtrVT, true),
01149                              Glue, DL);
01150   Glue = Chain.getValue(1);
01151 
01152   // Assign locations to each value returned by this call.
01153   SmallVector<CCValAssign, 16> RetLocs;
01154   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
01155   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
01156 
01157   // Copy all of the result registers out of their specified physreg.
01158   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
01159     CCValAssign &VA = RetLocs[I];
01160 
01161     // Copy the value out, gluing the copy to the end of the call sequence.
01162     SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
01163                                           VA.getLocVT(), Glue);
01164     Chain = RetValue.getValue(1);
01165     Glue = RetValue.getValue(2);
01166 
01167     // Convert the value of the return register into the value that's
01168     // being returned.
01169     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
01170   }
01171 
01172   return Chain;
01173 }
01174 
01175 SDValue
01176 SystemZTargetLowering::LowerReturn(SDValue Chain,
01177                                    CallingConv::ID CallConv, bool IsVarArg,
01178                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
01179                                    const SmallVectorImpl<SDValue> &OutVals,
01180                                    SDLoc DL, SelectionDAG &DAG) const {
01181   MachineFunction &MF = DAG.getMachineFunction();
01182 
01183   // Detect unsupported vector return types.
01184   if (Subtarget.hasVector())
01185     VerifyVectorTypes(Outs);
01186 
01187   // Assign locations to each returned value.
01188   SmallVector<CCValAssign, 16> RetLocs;
01189   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
01190   RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
01191 
01192   // Quick exit for void returns
01193   if (RetLocs.empty())
01194     return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
01195 
01196   // Copy the result values into the output registers.
01197   SDValue Glue;
01198   SmallVector<SDValue, 4> RetOps;
01199   RetOps.push_back(Chain);
01200   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
01201     CCValAssign &VA = RetLocs[I];
01202     SDValue RetValue = OutVals[I];
01203 
01204     // Make the return register live on exit.
01205     assert(VA.isRegLoc() && "Can only return in registers!");
01206 
01207     // Promote the value as required.
01208     RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
01209 
01210     // Chain and glue the copies together.
01211     unsigned Reg = VA.getLocReg();
01212     Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
01213     Glue = Chain.getValue(1);
01214     RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
01215   }
01216 
01217   // Update chain and glue.
01218   RetOps[0] = Chain;
01219   if (Glue.getNode())
01220     RetOps.push_back(Glue);
01221 
01222   return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
01223 }
01224 
01225 SDValue SystemZTargetLowering::
01226 prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
01227   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
01228 }
01229 
01230 // Return true if Op is an intrinsic node with chain that returns the CC value
01231 // as its only (other) argument.  Provide the associated SystemZISD opcode and
01232 // the mask of valid CC values if so.
01233 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
01234                                       unsigned &CCValid) {
01235   unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
01236   switch (Id) {
01237   case Intrinsic::s390_tbegin:
01238     Opcode = SystemZISD::TBEGIN;
01239     CCValid = SystemZ::CCMASK_TBEGIN;
01240     return true;
01241 
01242   case Intrinsic::s390_tbegin_nofloat:
01243     Opcode = SystemZISD::TBEGIN_NOFLOAT;
01244     CCValid = SystemZ::CCMASK_TBEGIN;
01245     return true;
01246 
01247   case Intrinsic::s390_tend:
01248     Opcode = SystemZISD::TEND;
01249     CCValid = SystemZ::CCMASK_TEND;
01250     return true;
01251 
01252   default:
01253     return false;
01254   }
01255 }
01256 
01257 // Return true if Op is an intrinsic node without chain that returns the
01258 // CC value as its final argument.  Provide the associated SystemZISD
01259 // opcode and the mask of valid CC values if so.
01260 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
01261   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
01262   switch (Id) {
01263   case Intrinsic::s390_vpkshs:
01264   case Intrinsic::s390_vpksfs:
01265   case Intrinsic::s390_vpksgs:
01266     Opcode = SystemZISD::PACKS_CC;
01267     CCValid = SystemZ::CCMASK_VCMP;
01268     return true;
01269 
01270   case Intrinsic::s390_vpklshs:
01271   case Intrinsic::s390_vpklsfs:
01272   case Intrinsic::s390_vpklsgs:
01273     Opcode = SystemZISD::PACKLS_CC;
01274     CCValid = SystemZ::CCMASK_VCMP;
01275     return true;
01276 
01277   case Intrinsic::s390_vceqbs:
01278   case Intrinsic::s390_vceqhs:
01279   case Intrinsic::s390_vceqfs:
01280   case Intrinsic::s390_vceqgs:
01281     Opcode = SystemZISD::VICMPES;
01282     CCValid = SystemZ::CCMASK_VCMP;
01283     return true;
01284 
01285   case Intrinsic::s390_vchbs:
01286   case Intrinsic::s390_vchhs:
01287   case Intrinsic::s390_vchfs:
01288   case Intrinsic::s390_vchgs:
01289     Opcode = SystemZISD::VICMPHS;
01290     CCValid = SystemZ::CCMASK_VCMP;
01291     return true;
01292 
01293   case Intrinsic::s390_vchlbs:
01294   case Intrinsic::s390_vchlhs:
01295   case Intrinsic::s390_vchlfs:
01296   case Intrinsic::s390_vchlgs:
01297     Opcode = SystemZISD::VICMPHLS;
01298     CCValid = SystemZ::CCMASK_VCMP;
01299     return true;
01300 
01301   case Intrinsic::s390_vtm:
01302     Opcode = SystemZISD::VTM;
01303     CCValid = SystemZ::CCMASK_VCMP;
01304     return true;
01305 
01306   case Intrinsic::s390_vfaebs:
01307   case Intrinsic::s390_vfaehs:
01308   case Intrinsic::s390_vfaefs:
01309     Opcode = SystemZISD::VFAE_CC;
01310     CCValid = SystemZ::CCMASK_ANY;
01311     return true;
01312 
01313   case Intrinsic::s390_vfaezbs:
01314   case Intrinsic::s390_vfaezhs:
01315   case Intrinsic::s390_vfaezfs:
01316     Opcode = SystemZISD::VFAEZ_CC;
01317     CCValid = SystemZ::CCMASK_ANY;
01318     return true;
01319 
01320   case Intrinsic::s390_vfeebs:
01321   case Intrinsic::s390_vfeehs:
01322   case Intrinsic::s390_vfeefs:
01323     Opcode = SystemZISD::VFEE_CC;
01324     CCValid = SystemZ::CCMASK_ANY;
01325     return true;
01326 
01327   case Intrinsic::s390_vfeezbs:
01328   case Intrinsic::s390_vfeezhs:
01329   case Intrinsic::s390_vfeezfs:
01330     Opcode = SystemZISD::VFEEZ_CC;
01331     CCValid = SystemZ::CCMASK_ANY;
01332     return true;
01333 
01334   case Intrinsic::s390_vfenebs:
01335   case Intrinsic::s390_vfenehs:
01336   case Intrinsic::s390_vfenefs:
01337     Opcode = SystemZISD::VFENE_CC;
01338     CCValid = SystemZ::CCMASK_ANY;
01339     return true;
01340 
01341   case Intrinsic::s390_vfenezbs:
01342   case Intrinsic::s390_vfenezhs:
01343   case Intrinsic::s390_vfenezfs:
01344     Opcode = SystemZISD::VFENEZ_CC;
01345     CCValid = SystemZ::CCMASK_ANY;
01346     return true;
01347 
01348   case Intrinsic::s390_vistrbs:
01349   case Intrinsic::s390_vistrhs:
01350   case Intrinsic::s390_vistrfs:
01351     Opcode = SystemZISD::VISTR_CC;
01352     CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
01353     return true;
01354 
01355   case Intrinsic::s390_vstrcbs:
01356   case Intrinsic::s390_vstrchs:
01357   case Intrinsic::s390_vstrcfs:
01358     Opcode = SystemZISD::VSTRC_CC;
01359     CCValid = SystemZ::CCMASK_ANY;
01360     return true;
01361 
01362   case Intrinsic::s390_vstrczbs:
01363   case Intrinsic::s390_vstrczhs:
01364   case Intrinsic::s390_vstrczfs:
01365     Opcode = SystemZISD::VSTRCZ_CC;
01366     CCValid = SystemZ::CCMASK_ANY;
01367     return true;
01368 
01369   case Intrinsic::s390_vfcedbs:
01370     Opcode = SystemZISD::VFCMPES;
01371     CCValid = SystemZ::CCMASK_VCMP;
01372     return true;
01373 
01374   case Intrinsic::s390_vfchdbs:
01375     Opcode = SystemZISD::VFCMPHS;
01376     CCValid = SystemZ::CCMASK_VCMP;
01377     return true;
01378 
01379   case Intrinsic::s390_vfchedbs:
01380     Opcode = SystemZISD::VFCMPHES;
01381     CCValid = SystemZ::CCMASK_VCMP;
01382     return true;
01383 
01384   case Intrinsic::s390_vftcidb:
01385     Opcode = SystemZISD::VFTCI;
01386     CCValid = SystemZ::CCMASK_VCMP;
01387     return true;
01388 
01389   default:
01390     return false;
01391   }
01392 }
01393 
01394 // Emit an intrinsic with chain with a glued value instead of its CC result.
01395 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
01396                                              unsigned Opcode) {
01397   // Copy all operands except the intrinsic ID.
01398   unsigned NumOps = Op.getNumOperands();
01399   SmallVector<SDValue, 6> Ops;
01400   Ops.reserve(NumOps - 1);
01401   Ops.push_back(Op.getOperand(0));
01402   for (unsigned I = 2; I < NumOps; ++I)
01403     Ops.push_back(Op.getOperand(I));
01404 
01405   assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
01406   SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01407   SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
01408   SDValue OldChain = SDValue(Op.getNode(), 1);
01409   SDValue NewChain = SDValue(Intr.getNode(), 0);
01410   DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
01411   return Intr;
01412 }
01413 
01414 // Emit an intrinsic with a glued value instead of its CC result.
01415 static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
01416                                      unsigned Opcode) {
01417   // Copy all operands except the intrinsic ID.
01418   unsigned NumOps = Op.getNumOperands();
01419   SmallVector<SDValue, 6> Ops;
01420   Ops.reserve(NumOps - 1);
01421   for (unsigned I = 1; I < NumOps; ++I)
01422     Ops.push_back(Op.getOperand(I));
01423 
01424   if (Op->getNumValues() == 1)
01425     return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
01426   assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
01427   SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
01428   return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
01429 }
01430 
01431 // CC is a comparison that will be implemented using an integer or
01432 // floating-point comparison.  Return the condition code mask for
01433 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
01434 // unsigned comparisons and clear for signed ones.  In the floating-point
01435 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
01436 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
01437 #define CONV(X) \
01438   case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
01439   case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
01440   case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
01441 
01442   switch (CC) {
01443   default:
01444     llvm_unreachable("Invalid integer condition!");
01445 
01446   CONV(EQ);
01447   CONV(NE);
01448   CONV(GT);
01449   CONV(GE);
01450   CONV(LT);
01451   CONV(LE);
01452 
01453   case ISD::SETO:  return SystemZ::CCMASK_CMP_O;
01454   case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
01455   }
01456 #undef CONV
01457 }
01458 
01459 // Return a sequence for getting a 1 from an IPM result when CC has a
01460 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
01461 // The handling of CC values outside CCValid doesn't matter.
01462 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
01463   // Deal with cases where the result can be taken directly from a bit
01464   // of the IPM result.
01465   if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
01466     return IPMConversion(0, 0, SystemZ::IPM_CC);
01467   if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
01468     return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
01469 
01470   // Deal with cases where we can add a value to force the sign bit
01471   // to contain the right value.  Putting the bit in 31 means we can
01472   // use SRL rather than RISBG(L), and also makes it easier to get a
01473   // 0/-1 value, so it has priority over the other tests below.
01474   //
01475   // These sequences rely on the fact that the upper two bits of the
01476   // IPM result are zero.
01477   uint64_t TopBit = uint64_t(1) << 31;
01478   if (CCMask == (CCValid & SystemZ::CCMASK_0))
01479     return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
01480   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
01481     return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
01482   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01483                             | SystemZ::CCMASK_1
01484                             | SystemZ::CCMASK_2)))
01485     return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
01486   if (CCMask == (CCValid & SystemZ::CCMASK_3))
01487     return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
01488   if (CCMask == (CCValid & (SystemZ::CCMASK_1
01489                             | SystemZ::CCMASK_2
01490                             | SystemZ::CCMASK_3)))
01491     return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
01492 
01493   // Next try inverting the value and testing a bit.  0/1 could be
01494   // handled this way too, but we dealt with that case above.
01495   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
01496     return IPMConversion(-1, 0, SystemZ::IPM_CC);
01497 
01498   // Handle cases where adding a value forces a non-sign bit to contain
01499   // the right value.
01500   if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
01501     return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
01502   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
01503     return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
01504 
01505   // The remaining cases are 1, 2, 0/1/3 and 0/2/3.  All these are
01506   // can be done by inverting the low CC bit and applying one of the
01507   // sign-based extractions above.
01508   if (CCMask == (CCValid & SystemZ::CCMASK_1))
01509     return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
01510   if (CCMask == (CCValid & SystemZ::CCMASK_2))
01511     return IPMConversion(1 << SystemZ::IPM_CC,
01512                          TopBit - (3 << SystemZ::IPM_CC), 31);
01513   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01514                             | SystemZ::CCMASK_1
01515                             | SystemZ::CCMASK_3)))
01516     return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
01517   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01518                             | SystemZ::CCMASK_2
01519                             | SystemZ::CCMASK_3)))
01520     return IPMConversion(1 << SystemZ::IPM_CC,
01521                          TopBit - (1 << SystemZ::IPM_CC), 31);
01522 
01523   llvm_unreachable("Unexpected CC combination");
01524 }
01525 
01526 // If C can be converted to a comparison against zero, adjust the operands
01527 // as necessary.
01528 static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01529   if (C.ICmpType == SystemZICMP::UnsignedOnly)
01530     return;
01531 
01532   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
01533   if (!ConstOp1)
01534     return;
01535 
01536   int64_t Value = ConstOp1->getSExtValue();
01537   if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
01538       (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
01539       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
01540       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
01541     C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
01542     C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
01543   }
01544 }
01545 
01546 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
01547 // adjust the operands as necessary.
01548 static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01549   // For us to make any changes, it must a comparison between a single-use
01550   // load and a constant.
01551   if (!C.Op0.hasOneUse() ||
01552       C.Op0.getOpcode() != ISD::LOAD ||
01553       C.Op1.getOpcode() != ISD::Constant)
01554     return;
01555 
01556   // We must have an 8- or 16-bit load.
01557   auto *Load = cast<LoadSDNode>(C.Op0);
01558   unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
01559   if (NumBits != 8 && NumBits != 16)
01560     return;
01561 
01562   // The load must be an extending one and the constant must be within the
01563   // range of the unextended value.
01564   auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
01565   uint64_t Value = ConstOp1->getZExtValue();
01566   uint64_t Mask = (1 << NumBits) - 1;
01567   if (Load->getExtensionType() == ISD::SEXTLOAD) {
01568     // Make sure that ConstOp1 is in range of C.Op0.
01569     int64_t SignedValue = ConstOp1->getSExtValue();
01570     if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
01571       return;
01572     if (C.ICmpType != SystemZICMP::SignedOnly) {
01573       // Unsigned comparison between two sign-extended values is equivalent
01574       // to unsigned comparison between two zero-extended values.
01575       Value &= Mask;
01576     } else if (NumBits == 8) {
01577       // Try to treat the comparison as unsigned, so that we can use CLI.
01578       // Adjust CCMask and Value as necessary.
01579       if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
01580         // Test whether the high bit of the byte is set.
01581         Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
01582       else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
01583         // Test whether the high bit of the byte is clear.
01584         Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
01585       else
01586         // No instruction exists for this combination.
01587         return;
01588       C.ICmpType = SystemZICMP::UnsignedOnly;
01589     }
01590   } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
01591     if (Value > Mask)
01592       return;
01593     assert(C.ICmpType == SystemZICMP::Any &&
01594            "Signedness shouldn't matter here.");
01595   } else
01596     return;
01597 
01598   // Make sure that the first operand is an i32 of the right extension type.
01599   ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
01600                               ISD::SEXTLOAD :
01601                               ISD::ZEXTLOAD);
01602   if (C.Op0.getValueType() != MVT::i32 ||
01603       Load->getExtensionType() != ExtType)
01604     C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
01605                            Load->getChain(), Load->getBasePtr(),
01606                            Load->getPointerInfo(), Load->getMemoryVT(),
01607                            Load->isVolatile(), Load->isNonTemporal(),
01608                            Load->isInvariant(), Load->getAlignment());
01609 
01610   // Make sure that the second operand is an i32 with the right value.
01611   if (C.Op1.getValueType() != MVT::i32 ||
01612       Value != ConstOp1->getZExtValue())
01613     C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
01614 }
01615 
01616 // Return true if Op is either an unextended load, or a load suitable
01617 // for integer register-memory comparisons of type ICmpType.
01618 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
01619   auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
01620   if (Load) {
01621     // There are no instructions to compare a register with a memory byte.
01622     if (Load->getMemoryVT() == MVT::i8)
01623       return false;
01624     // Otherwise decide on extension type.
01625     switch (Load->getExtensionType()) {
01626     case ISD::NON_EXTLOAD:
01627       return true;
01628     case ISD::SEXTLOAD:
01629       return ICmpType != SystemZICMP::UnsignedOnly;
01630     case ISD::ZEXTLOAD:
01631       return ICmpType != SystemZICMP::SignedOnly;
01632     default:
01633       break;
01634     }
01635   }
01636   return false;
01637 }
01638 
01639 // Return true if it is better to swap the operands of C.
01640 static bool shouldSwapCmpOperands(const Comparison &C) {
01641   // Leave f128 comparisons alone, since they have no memory forms.
01642   if (C.Op0.getValueType() == MVT::f128)
01643     return false;
01644 
01645   // Always keep a floating-point constant second, since comparisons with
01646   // zero can use LOAD TEST and comparisons with other constants make a
01647   // natural memory operand.
01648   if (isa<ConstantFPSDNode>(C.Op1))
01649     return false;
01650 
01651   // Never swap comparisons with zero since there are many ways to optimize
01652   // those later.
01653   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
01654   if (ConstOp1 && ConstOp1->getZExtValue() == 0)
01655     return false;
01656 
01657   // Also keep natural memory operands second if the loaded value is
01658   // only used here.  Several comparisons have memory forms.
01659   if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
01660     return false;
01661 
01662   // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
01663   // In that case we generally prefer the memory to be second.
01664   if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
01665     // The only exceptions are when the second operand is a constant and
01666     // we can use things like CHHSI.
01667     if (!ConstOp1)
01668       return true;
01669     // The unsigned memory-immediate instructions can handle 16-bit
01670     // unsigned integers.
01671     if (C.ICmpType != SystemZICMP::SignedOnly &&
01672         isUInt<16>(ConstOp1->getZExtValue()))
01673       return false;
01674     // The signed memory-immediate instructions can handle 16-bit
01675     // signed integers.
01676     if (C.ICmpType != SystemZICMP::UnsignedOnly &&
01677         isInt<16>(ConstOp1->getSExtValue()))
01678       return false;
01679     return true;
01680   }
01681 
01682   // Try to promote the use of CGFR and CLGFR.
01683   unsigned Opcode0 = C.Op0.getOpcode();
01684   if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
01685     return true;
01686   if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
01687     return true;
01688   if (C.ICmpType != SystemZICMP::SignedOnly &&
01689       Opcode0 == ISD::AND &&
01690       C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
01691       cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
01692     return true;
01693 
01694   return false;
01695 }
01696 
01697 // Return a version of comparison CC mask CCMask in which the LT and GT
01698 // actions are swapped.
01699 static unsigned reverseCCMask(unsigned CCMask) {
01700   return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
01701           (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
01702           (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
01703           (CCMask & SystemZ::CCMASK_CMP_UO));
01704 }
01705 
01706 // Check whether C tests for equality between X and Y and whether X - Y
01707 // or Y - X is also computed.  In that case it's better to compare the
01708 // result of the subtraction against zero.
01709 static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01710   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
01711       C.CCMask == SystemZ::CCMASK_CMP_NE) {
01712     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
01713       SDNode *N = *I;
01714       if (N->getOpcode() == ISD::SUB &&
01715           ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
01716            (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
01717         C.Op0 = SDValue(N, 0);
01718         C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
01719         return;
01720       }
01721     }
01722   }
01723 }
01724 
01725 // Check whether C compares a floating-point value with zero and if that
01726 // floating-point value is also negated.  In this case we can use the
01727 // negation to set CC, so avoiding separate LOAD AND TEST and
01728 // LOAD (NEGATIVE/COMPLEMENT) instructions.
01729 static void adjustForFNeg(Comparison &C) {
01730   auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
01731   if (C1 && C1->isZero()) {
01732     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
01733       SDNode *N = *I;
01734       if (N->getOpcode() == ISD::FNEG) {
01735         C.Op0 = SDValue(N, 0);
01736         C.CCMask = reverseCCMask(C.CCMask);
01737         return;
01738       }
01739     }
01740   }
01741 }
01742 
01743 // Check whether C compares (shl X, 32) with 0 and whether X is
01744 // also sign-extended.  In that case it is better to test the result
01745 // of the sign extension using LTGFR.
01746 //
01747 // This case is important because InstCombine transforms a comparison
01748 // with (sext (trunc X)) into a comparison with (shl X, 32).
01749 static void adjustForLTGFR(Comparison &C) {
01750   // Check for a comparison between (shl X, 32) and 0.
01751   if (C.Op0.getOpcode() == ISD::SHL &&
01752       C.Op0.getValueType() == MVT::i64 &&
01753       C.Op1.getOpcode() == ISD::Constant &&
01754       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
01755     auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
01756     if (C1 && C1->getZExtValue() == 32) {
01757       SDValue ShlOp0 = C.Op0.getOperand(0);
01758       // See whether X has any SIGN_EXTEND_INREG uses.
01759       for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
01760         SDNode *N = *I;
01761         if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
01762             cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
01763           C.Op0 = SDValue(N, 0);
01764           return;
01765         }
01766       }
01767     }
01768   }
01769 }
01770 
01771 // If C compares the truncation of an extending load, try to compare
01772 // the untruncated value instead.  This exposes more opportunities to
01773 // reuse CC.
01774 static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01775   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
01776       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
01777       C.Op1.getOpcode() == ISD::Constant &&
01778       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
01779     auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
01780     if (L->getMemoryVT().getStoreSizeInBits()
01781         <= C.Op0.getValueType().getSizeInBits()) {
01782       unsigned Type = L->getExtensionType();
01783       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
01784           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
01785         C.Op0 = C.Op0.getOperand(0);
01786         C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
01787       }
01788     }
01789   }
01790 }
01791 
01792 // Return true if shift operation N has an in-range constant shift value.
01793 // Store it in ShiftVal if so.
01794 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
01795   auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
01796   if (!Shift)
01797     return false;
01798 
01799   uint64_t Amount = Shift->getZExtValue();
01800   if (Amount >= N.getValueType().getSizeInBits())
01801     return false;
01802 
01803   ShiftVal = Amount;
01804   return true;
01805 }
01806 
01807 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
01808 // instruction and whether the CC value is descriptive enough to handle
01809 // a comparison of type Opcode between the AND result and CmpVal.
01810 // CCMask says which comparison result is being tested and BitSize is
01811 // the number of bits in the operands.  If TEST UNDER MASK can be used,
01812 // return the corresponding CC mask, otherwise return 0.
01813 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
01814                                      uint64_t Mask, uint64_t CmpVal,
01815                                      unsigned ICmpType) {
01816   assert(Mask != 0 && "ANDs with zero should have been removed by now");
01817 
01818   // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
01819   if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
01820       !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
01821     return 0;
01822 
01823   // Work out the masks for the lowest and highest bits.
01824   unsigned HighShift = 63 - countLeadingZeros(Mask);
01825   uint64_t High = uint64_t(1) << HighShift;
01826   uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
01827 
01828   // Signed ordered comparisons are effectively unsigned if the sign
01829   // bit is dropped.
01830   bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
01831 
01832   // Check for equality comparisons with 0, or the equivalent.
01833   if (CmpVal == 0) {
01834     if (CCMask == SystemZ::CCMASK_CMP_EQ)
01835       return SystemZ::CCMASK_TM_ALL_0;
01836     if (CCMask == SystemZ::CCMASK_CMP_NE)
01837       return SystemZ::CCMASK_TM_SOME_1;
01838   }
01839   if (EffectivelyUnsigned && CmpVal <= Low) {
01840     if (CCMask == SystemZ::CCMASK_CMP_LT)
01841       return SystemZ::CCMASK_TM_ALL_0;
01842     if (CCMask == SystemZ::CCMASK_CMP_GE)
01843       return SystemZ::CCMASK_TM_SOME_1;
01844   }
01845   if (EffectivelyUnsigned && CmpVal < Low) {
01846     if (CCMask == SystemZ::CCMASK_CMP_LE)
01847       return SystemZ::CCMASK_TM_ALL_0;
01848     if (CCMask == SystemZ::CCMASK_CMP_GT)
01849       return SystemZ::CCMASK_TM_SOME_1;
01850   }
01851 
01852   // Check for equality comparisons with the mask, or the equivalent.
01853   if (CmpVal == Mask) {
01854     if (CCMask == SystemZ::CCMASK_CMP_EQ)
01855       return SystemZ::CCMASK_TM_ALL_1;
01856     if (CCMask == SystemZ::CCMASK_CMP_NE)
01857       return SystemZ::CCMASK_TM_SOME_0;
01858   }
01859   if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
01860     if (CCMask == SystemZ::CCMASK_CMP_GT)
01861       return SystemZ::CCMASK_TM_ALL_1;
01862     if (CCMask == SystemZ::CCMASK_CMP_LE)
01863       return SystemZ::CCMASK_TM_SOME_0;
01864   }
01865   if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
01866     if (CCMask == SystemZ::CCMASK_CMP_GE)
01867       return SystemZ::CCMASK_TM_ALL_1;
01868     if (CCMask == SystemZ::CCMASK_CMP_LT)
01869       return SystemZ::CCMASK_TM_SOME_0;
01870   }
01871 
01872   // Check for ordered comparisons with the top bit.
01873   if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
01874     if (CCMask == SystemZ::CCMASK_CMP_LE)
01875       return SystemZ::CCMASK_TM_MSB_0;
01876     if (CCMask == SystemZ::CCMASK_CMP_GT)
01877       return SystemZ::CCMASK_TM_MSB_1;
01878   }
01879   if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
01880     if (CCMask == SystemZ::CCMASK_CMP_LT)
01881       return SystemZ::CCMASK_TM_MSB_0;
01882     if (CCMask == SystemZ::CCMASK_CMP_GE)
01883       return SystemZ::CCMASK_TM_MSB_1;
01884   }
01885 
01886   // If there are just two bits, we can do equality checks for Low and High
01887   // as well.
01888   if (Mask == Low + High) {
01889     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
01890       return SystemZ::CCMASK_TM_MIXED_MSB_0;
01891     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
01892       return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
01893     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
01894       return SystemZ::CCMASK_TM_MIXED_MSB_1;
01895     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
01896       return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
01897   }
01898 
01899   // Looks like we've exhausted our options.
01900   return 0;
01901 }
01902 
01903 // See whether C can be implemented as a TEST UNDER MASK instruction.
01904 // Update the arguments with the TM version if so.
01905 static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01906   // Check that we have a comparison with a constant.
01907   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
01908   if (!ConstOp1)
01909     return;
01910   uint64_t CmpVal = ConstOp1->getZExtValue();
01911 
01912   // Check whether the nonconstant input is an AND with a constant mask.
01913   Comparison NewC(C);
01914   uint64_t MaskVal;
01915   ConstantSDNode *Mask = nullptr;
01916   if (C.Op0.getOpcode() == ISD::AND) {
01917     NewC.Op0 = C.Op0.getOperand(0);
01918     NewC.Op1 = C.Op0.getOperand(1);
01919     Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
01920     if (!Mask)
01921       return;
01922     MaskVal = Mask->getZExtValue();
01923   } else {
01924     // There is no instruction to compare with a 64-bit immediate
01925     // so use TMHH instead if possible.  We need an unsigned ordered
01926     // comparison with an i64 immediate.
01927     if (NewC.Op0.getValueType() != MVT::i64 ||
01928         NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
01929         NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
01930         NewC.ICmpType == SystemZICMP::SignedOnly)
01931       return;
01932     // Convert LE and GT comparisons into LT and GE.
01933     if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
01934         NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
01935       if (CmpVal == uint64_t(-1))
01936         return;
01937       CmpVal += 1;
01938       NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
01939     }
01940     // If the low N bits of Op1 are zero than the low N bits of Op0 can
01941     // be masked off without changing the result.
01942     MaskVal = -(CmpVal & -CmpVal);
01943     NewC.ICmpType = SystemZICMP::UnsignedOnly;
01944   }
01945   if (!MaskVal)
01946     return;
01947 
01948   // Check whether the combination of mask, comparison value and comparison
01949   // type are suitable.
01950   unsigned BitSize = NewC.Op0.getValueType().getSizeInBits();
01951   unsigned NewCCMask, ShiftVal;
01952   if (NewC.ICmpType != SystemZICMP::SignedOnly &&
01953       NewC.Op0.getOpcode() == ISD::SHL &&
01954       isSimpleShift(NewC.Op0, ShiftVal) &&
01955       (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
01956                                         MaskVal >> ShiftVal,
01957                                         CmpVal >> ShiftVal,
01958                                         SystemZICMP::Any))) {
01959     NewC.Op0 = NewC.Op0.getOperand(0);
01960     MaskVal >>= ShiftVal;
01961   } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
01962              NewC.Op0.getOpcode() == ISD::SRL &&
01963              isSimpleShift(NewC.Op0, ShiftVal) &&
01964              (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
01965                                                MaskVal << ShiftVal,
01966                                                CmpVal << ShiftVal,
01967                                                SystemZICMP::UnsignedOnly))) {
01968     NewC.Op0 = NewC.Op0.getOperand(0);
01969     MaskVal <<= ShiftVal;
01970   } else {
01971     NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
01972                                      NewC.ICmpType);
01973     if (!NewCCMask)
01974       return;
01975   }
01976 
01977   // Go ahead and make the change.
01978   C.Opcode = SystemZISD::TM;
01979   C.Op0 = NewC.Op0;
01980   if (Mask && Mask->getZExtValue() == MaskVal)
01981     C.Op1 = SDValue(Mask, 0);
01982   else
01983     C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
01984   C.CCValid = SystemZ::CCMASK_TM;
01985   C.CCMask = NewCCMask;
01986 }
01987 
01988 // Return a Comparison that tests the condition-code result of intrinsic
01989 // node Call against constant integer CC using comparison code Cond.
01990 // Opcode is the opcode of the SystemZISD operation for the intrinsic
01991 // and CCValid is the set of possible condition-code results.
01992 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
01993                                   SDValue Call, unsigned CCValid, uint64_t CC,
01994                                   ISD::CondCode Cond) {
01995   Comparison C(Call, SDValue());
01996   C.Opcode = Opcode;
01997   C.CCValid = CCValid;
01998   if (Cond == ISD::SETEQ)
01999     // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
02000     C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
02001   else if (Cond == ISD::SETNE)
02002     // ...and the inverse of that.
02003     C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
02004   else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
02005     // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
02006     // always true for CC>3.
02007     C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
02008   else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
02009     // ...and the inverse of that.
02010     C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
02011   else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
02012     // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
02013     // always true for CC>3.
02014     C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
02015   else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
02016     // ...and the inverse of that.
02017     C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
02018   else
02019     llvm_unreachable("Unexpected integer comparison type");
02020   C.CCMask &= CCValid;
02021   return C;
02022 }
02023 
02024 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
02025 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
02026                          ISD::CondCode Cond, SDLoc DL) {
02027   if (CmpOp1.getOpcode() == ISD::Constant) {
02028     uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
02029     unsigned Opcode, CCValid;
02030     if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
02031         CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
02032         isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
02033       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
02034     if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
02035         CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
02036         isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
02037       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
02038   }
02039   Comparison C(CmpOp0, CmpOp1);
02040   C.CCMask = CCMaskForCondCode(Cond);
02041   if (C.Op0.getValueType().isFloatingPoint()) {
02042     C.CCValid = SystemZ::CCMASK_FCMP;
02043     C.Opcode = SystemZISD::FCMP;
02044     adjustForFNeg(C);
02045   } else {
02046     C.CCValid = SystemZ::CCMASK_ICMP;
02047     C.Opcode = SystemZISD::ICMP;
02048     // Choose the type of comparison.  Equality and inequality tests can
02049     // use either signed or unsigned comparisons.  The choice also doesn't
02050     // matter if both sign bits are known to be clear.  In those cases we
02051     // want to give the main isel code the freedom to choose whichever
02052     // form fits best.
02053     if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
02054         C.CCMask == SystemZ::CCMASK_CMP_NE ||
02055         (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
02056       C.ICmpType = SystemZICMP::Any;
02057     else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
02058       C.ICmpType = SystemZICMP::UnsignedOnly;
02059     else
02060       C.ICmpType = SystemZICMP::SignedOnly;
02061     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
02062     adjustZeroCmp(DAG, DL, C);
02063     adjustSubwordCmp(DAG, DL, C);
02064     adjustForSubtraction(DAG, DL, C);
02065     adjustForLTGFR(C);
02066     adjustICmpTruncate(DAG, DL, C);
02067   }
02068 
02069   if (shouldSwapCmpOperands(C)) {
02070     std::swap(C.Op0, C.Op1);
02071     C.CCMask = reverseCCMask(C.CCMask);
02072   }
02073 
02074   adjustForTestUnderMask(DAG, DL, C);
02075   return C;
02076 }
02077 
02078 // Emit the comparison instruction described by C.
02079 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
02080   if (!C.Op1.getNode()) {
02081     SDValue Op;
02082     switch (C.Op0.getOpcode()) {
02083     case ISD::INTRINSIC_W_CHAIN:
02084       Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
02085       break;
02086     case ISD::INTRINSIC_WO_CHAIN:
02087       Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
02088       break;
02089     default:
02090       llvm_unreachable("Invalid comparison operands");
02091     }
02092     return SDValue(Op.getNode(), Op->getNumValues() - 1);
02093   }
02094   if (C.Opcode == SystemZISD::ICMP)
02095     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
02096                        DAG.getConstant(C.ICmpType, DL, MVT::i32));
02097   if (C.Opcode == SystemZISD::TM) {
02098     bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
02099                          bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
02100     return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
02101                        DAG.getConstant(RegisterOnly, DL, MVT::i32));
02102   }
02103   return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
02104 }
02105 
02106 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
02107 // 64 bits.  Extend is the extension type to use.  Store the high part
02108 // in Hi and the low part in Lo.
02109 static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
02110                             unsigned Extend, SDValue Op0, SDValue Op1,
02111                             SDValue &Hi, SDValue &Lo) {
02112   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
02113   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
02114   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
02115   Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
02116                    DAG.getConstant(32, DL, MVT::i64));
02117   Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
02118   Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
02119 }
02120 
02121 // Lower a binary operation that produces two VT results, one in each
02122 // half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
02123 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
02124 // on the extended Op0 and (unextended) Op1.  Store the even register result
02125 // in Even and the odd register result in Odd.
02126 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
02127                              unsigned Extend, unsigned Opcode,
02128                              SDValue Op0, SDValue Op1,
02129                              SDValue &Even, SDValue &Odd) {
02130   SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
02131   SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
02132                                SDValue(In128, 0), Op1);
02133   bool Is32Bit = is32Bit(VT);
02134   Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
02135   Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
02136 }
02137 
02138 // Return an i32 value that is 1 if the CC value produced by Glue is
02139 // in the mask CCMask and 0 otherwise.  CC is known to have a value
02140 // in CCValid, so other values can be ignored.
02141 static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
02142                          unsigned CCValid, unsigned CCMask) {
02143   IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
02144   SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
02145 
02146   if (Conversion.XORValue)
02147     Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
02148                          DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
02149 
02150   if (Conversion.AddValue)
02151     Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
02152                          DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
02153 
02154   // The SHR/AND sequence should get optimized to an RISBG.
02155   Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
02156                        DAG.getConstant(Conversion.Bit, DL, MVT::i32));
02157   if (Conversion.Bit != 31)
02158     Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
02159                          DAG.getConstant(1, DL, MVT::i32));
02160   return Result;
02161 }
02162 
02163 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
02164 // be done directly.  IsFP is true if CC is for a floating-point rather than
02165 // integer comparison.
02166 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
02167   switch (CC) {
02168   case ISD::SETOEQ:
02169   case ISD::SETEQ:
02170     return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
02171 
02172   case ISD::SETOGE:
02173   case ISD::SETGE:
02174     return IsFP ? SystemZISD::VFCMPHE : 0;
02175 
02176   case ISD::SETOGT:
02177   case ISD::SETGT:
02178     return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
02179 
02180   case ISD::SETUGT:
02181     return IsFP ? 0 : SystemZISD::VICMPHL;
02182 
02183   default:
02184     return 0;
02185   }
02186 }
02187 
02188 // Return the SystemZISD vector comparison operation for CC or its inverse,
02189 // or 0 if neither can be done directly.  Indicate in Invert whether the
02190 // result is for the inverse of CC.  IsFP is true if CC is for a
02191 // floating-point rather than integer comparison.
02192 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
02193                                             bool &Invert) {
02194   if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
02195     Invert = false;
02196     return Opcode;
02197   }
02198 
02199   CC = ISD::getSetCCInverse(CC, !IsFP);
02200   if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
02201     Invert = true;
02202     return Opcode;
02203   }
02204 
02205   return 0;
02206 }
02207 
02208 // Return a v2f64 that contains the extended form of elements Start and Start+1
02209 // of v4f32 value Op.
02210 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
02211                                   SDValue Op) {
02212   int Mask[] = { Start, -1, Start + 1, -1 };
02213   Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
02214   return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
02215 }
02216 
02217 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
02218 // producing a result of type VT.
02219 static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
02220                             EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
02221   // There is no hardware support for v4f32, so extend the vector into
02222   // two v2f64s and compare those.
02223   if (CmpOp0.getValueType() == MVT::v4f32) {
02224     SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
02225     SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
02226     SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
02227     SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
02228     SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
02229     SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
02230     return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
02231   }
02232   return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
02233 }
02234 
02235 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
02236 // an integer mask of type VT.
02237 static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
02238                                 ISD::CondCode CC, SDValue CmpOp0,
02239                                 SDValue CmpOp1) {
02240   bool IsFP = CmpOp0.getValueType().isFloatingPoint();
02241   bool Invert = false;
02242   SDValue Cmp;
02243   switch (CC) {
02244     // Handle tests for order using (or (ogt y x) (oge x y)).
02245   case ISD::SETUO:
02246     Invert = true;
02247   case ISD::SETO: {
02248     assert(IsFP && "Unexpected integer comparison");
02249     SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
02250     SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
02251     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
02252     break;
02253   }
02254 
02255     // Handle <> tests using (or (ogt y x) (ogt x y)).
02256   case ISD::SETUEQ:
02257     Invert = true;
02258   case ISD::SETONE: {
02259     assert(IsFP && "Unexpected integer comparison");
02260     SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
02261     SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
02262     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
02263     break;
02264   }
02265 
02266     // Otherwise a single comparison is enough.  It doesn't really
02267     // matter whether we try the inversion or the swap first, since
02268     // there are no cases where both work.
02269   default:
02270     if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
02271       Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
02272     else {
02273       CC = ISD::getSetCCSwappedOperands(CC);
02274       if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
02275         Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
02276       else
02277         llvm_unreachable("Unhandled comparison");
02278     }
02279     break;
02280   }
02281   if (Invert) {
02282     SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02283                                DAG.getConstant(65535, DL, MVT::i32));
02284     Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
02285     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
02286   }
02287   return Cmp;
02288 }
02289 
02290 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
02291                                           SelectionDAG &DAG) const {
02292   SDValue CmpOp0   = Op.getOperand(0);
02293   SDValue CmpOp1   = Op.getOperand(1);
02294   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
02295   SDLoc DL(Op);
02296   EVT VT = Op.getValueType();
02297   if (VT.isVector())
02298     return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
02299 
02300   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02301   SDValue Glue = emitCmp(DAG, DL, C);
02302   return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
02303 }
02304 
02305 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
02306   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
02307   SDValue CmpOp0   = Op.getOperand(2);
02308   SDValue CmpOp1   = Op.getOperand(3);
02309   SDValue Dest     = Op.getOperand(4);
02310   SDLoc DL(Op);
02311 
02312   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02313   SDValue Glue = emitCmp(DAG, DL, C);
02314   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
02315                      Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
02316                      DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
02317 }
02318 
02319 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
02320 // allowing Pos and Neg to be wider than CmpOp.
02321 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
02322   return (Neg.getOpcode() == ISD::SUB &&
02323           Neg.getOperand(0).getOpcode() == ISD::Constant &&
02324           cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
02325           Neg.getOperand(1) == Pos &&
02326           (Pos == CmpOp ||
02327            (Pos.getOpcode() == ISD::SIGN_EXTEND &&
02328             Pos.getOperand(0) == CmpOp)));
02329 }
02330 
02331 // Return the absolute or negative absolute of Op; IsNegative decides which.
02332 static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
02333                            bool IsNegative) {
02334   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
02335   if (IsNegative)
02336     Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
02337                      DAG.getConstant(0, DL, Op.getValueType()), Op);
02338   return Op;
02339 }
02340 
02341 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
02342                                               SelectionDAG &DAG) const {
02343   SDValue CmpOp0   = Op.getOperand(0);
02344   SDValue CmpOp1   = Op.getOperand(1);
02345   SDValue TrueOp   = Op.getOperand(2);
02346   SDValue FalseOp  = Op.getOperand(3);
02347   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
02348   SDLoc DL(Op);
02349 
02350   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02351 
02352   // Check for absolute and negative-absolute selections, including those
02353   // where the comparison value is sign-extended (for LPGFR and LNGFR).
02354   // This check supplements the one in DAGCombiner.
02355   if (C.Opcode == SystemZISD::ICMP &&
02356       C.CCMask != SystemZ::CCMASK_CMP_EQ &&
02357       C.CCMask != SystemZ::CCMASK_CMP_NE &&
02358       C.Op1.getOpcode() == ISD::Constant &&
02359       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
02360     if (isAbsolute(C.Op0, TrueOp, FalseOp))
02361       return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
02362     if (isAbsolute(C.Op0, FalseOp, TrueOp))
02363       return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
02364   }
02365 
02366   SDValue Glue = emitCmp(DAG, DL, C);
02367 
02368   // Special case for handling -1/0 results.  The shifts we use here
02369   // should get optimized with the IPM conversion sequence.
02370   auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
02371   auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
02372   if (TrueC && FalseC) {
02373     int64_t TrueVal = TrueC->getSExtValue();
02374     int64_t FalseVal = FalseC->getSExtValue();
02375     if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
02376       // Invert the condition if we want -1 on false.
02377       if (TrueVal == 0)
02378         C.CCMask ^= C.CCValid;
02379       SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
02380       EVT VT = Op.getValueType();
02381       // Extend the result to VT.  Upper bits are ignored.
02382       if (!is32Bit(VT))
02383         Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
02384       // Sign-extend from the low bit.
02385       SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
02386       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
02387       return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
02388     }
02389   }
02390 
02391   SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
02392                    DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
02393 
02394   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
02395   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
02396 }
02397 
02398 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
02399                                                   SelectionDAG &DAG) const {
02400   SDLoc DL(Node);
02401   const GlobalValue *GV = Node->getGlobal();
02402   int64_t Offset = Node->getOffset();
02403   EVT PtrVT = getPointerTy();
02404   Reloc::Model RM = DAG.getTarget().getRelocationModel();
02405   CodeModel::Model CM = DAG.getTarget().getCodeModel();
02406 
02407   SDValue Result;
02408   if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
02409     // Assign anchors at 1<<12 byte boundaries.
02410     uint64_t Anchor = Offset & ~uint64_t(0xfff);
02411     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
02412     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02413 
02414     // The offset can be folded into the address if it is aligned to a halfword.
02415     Offset -= Anchor;
02416     if (Offset != 0 && (Offset & 1) == 0) {
02417       SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
02418       Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
02419       Offset = 0;
02420     }
02421   } else {
02422     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
02423     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02424     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
02425                          MachinePointerInfo::getGOT(), false, false, false, 0);
02426   }
02427 
02428   // If there was a non-zero offset that we didn't fold, create an explicit
02429   // addition for it.
02430   if (Offset != 0)
02431     Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
02432                          DAG.getConstant(Offset, DL, PtrVT));
02433 
02434   return Result;
02435 }
02436 
02437 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
02438                                                  SelectionDAG &DAG,
02439                                                  unsigned Opcode,
02440                                                  SDValue GOTOffset) const {
02441   SDLoc DL(Node);
02442   EVT PtrVT = getPointerTy();
02443   SDValue Chain = DAG.getEntryNode();
02444   SDValue Glue;
02445 
02446   // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
02447   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
02448   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
02449   Glue = Chain.getValue(1);
02450   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
02451   Glue = Chain.getValue(1);
02452 
02453   // The first call operand is the chain and the second is the TLS symbol.
02454   SmallVector<SDValue, 8> Ops;
02455   Ops.push_back(Chain);
02456   Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
02457                                            Node->getValueType(0),
02458                                            0, 0));
02459 
02460   // Add argument registers to the end of the list so that they are
02461   // known live into the call.
02462   Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
02463   Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
02464 
02465   // Add a register mask operand representing the call-preserved registers.
02466   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
02467   const uint32_t *Mask =
02468       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
02469   assert(Mask && "Missing call preserved mask for calling convention");
02470   Ops.push_back(DAG.getRegisterMask(Mask));
02471 
02472   // Glue the call to the argument copies.
02473   Ops.push_back(Glue);
02474 
02475   // Emit the call.
02476   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
02477   Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
02478   Glue = Chain.getValue(1);
02479 
02480   // Copy the return value from %r2.
02481   return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
02482 }
02483 
02484 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
02485                  SelectionDAG &DAG) const {
02486   SDLoc DL(Node);
02487   const GlobalValue *GV = Node->getGlobal();
02488   EVT PtrVT = getPointerTy();
02489   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
02490 
02491   // The high part of the thread pointer is in access register 0.
02492   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
02493                              DAG.getConstant(0, DL, MVT::i32));
02494   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
02495 
02496   // The low part of the thread pointer is in access register 1.
02497   SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
02498                              DAG.getConstant(1, DL, MVT::i32));
02499   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
02500 
02501   // Merge them into a single 64-bit address.
02502   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
02503                                     DAG.getConstant(32, DL, PtrVT));
02504   SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
02505 
02506   // Get the offset of GA from the thread pointer, based on the TLS model.
02507   SDValue Offset;
02508   switch (model) {
02509     case TLSModel::GeneralDynamic: {
02510       // Load the GOT offset of the tls_index (module ID / per-symbol offset).
02511       SystemZConstantPoolValue *CPV =
02512         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
02513 
02514       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02515       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02516                            Offset, MachinePointerInfo::getConstantPool(),
02517                            false, false, false, 0);
02518 
02519       // Call __tls_get_offset to retrieve the offset.
02520       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
02521       break;
02522     }
02523 
02524     case TLSModel::LocalDynamic: {
02525       // Load the GOT offset of the module ID.
02526       SystemZConstantPoolValue *CPV =
02527         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
02528 
02529       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02530       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02531                            Offset, MachinePointerInfo::getConstantPool(),
02532                            false, false, false, 0);
02533 
02534       // Call __tls_get_offset to retrieve the module base offset.
02535       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
02536 
02537       // Note: The SystemZLDCleanupPass will remove redundant computations
02538       // of the module base offset.  Count total number of local-dynamic
02539       // accesses to trigger execution of that pass.
02540       SystemZMachineFunctionInfo* MFI =
02541         DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
02542       MFI->incNumLocalDynamicTLSAccesses();
02543 
02544       // Add the per-symbol offset.
02545       CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
02546 
02547       SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
02548       DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02549                               DTPOffset, MachinePointerInfo::getConstantPool(),
02550                               false, false, false, 0);
02551 
02552       Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
02553       break;
02554     }
02555 
02556     case TLSModel::InitialExec: {
02557       // Load the offset from the GOT.
02558       Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
02559                                           SystemZII::MO_INDNTPOFF);
02560       Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
02561       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02562                            Offset, MachinePointerInfo::getGOT(),
02563                            false, false, false, 0);
02564       break;
02565     }
02566 
02567     case TLSModel::LocalExec: {
02568       // Force the offset into the constant pool and load it from there.
02569       SystemZConstantPoolValue *CPV =
02570         SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
02571 
02572       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02573       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02574                            Offset, MachinePointerInfo::getConstantPool(),
02575                            false, false, false, 0);
02576       break;
02577     }
02578   }
02579 
02580   // Add the base and offset together.
02581   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
02582 }
02583 
02584 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
02585                                                  SelectionDAG &DAG) const {
02586   SDLoc DL(Node);
02587   const BlockAddress *BA = Node->getBlockAddress();
02588   int64_t Offset = Node->getOffset();
02589   EVT PtrVT = getPointerTy();
02590 
02591   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
02592   Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02593   return Result;
02594 }
02595 
02596 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
02597                                               SelectionDAG &DAG) const {
02598   SDLoc DL(JT);
02599   EVT PtrVT = getPointerTy();
02600   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
02601 
02602   // Use LARL to load the address of the table.
02603   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02604 }
02605 
02606 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
02607                                                  SelectionDAG &DAG) const {
02608   SDLoc DL(CP);
02609   EVT PtrVT = getPointerTy();
02610 
02611   SDValue Result;
02612   if (CP->isMachineConstantPoolEntry())
02613     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02614                CP->getAlignment());
02615   else
02616     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02617                CP->getAlignment(), CP->getOffset());
02618 
02619   // Use LARL to load the address of the constant pool entry.
02620   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02621 }
02622 
02623 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
02624                                             SelectionDAG &DAG) const {
02625   SDLoc DL(Op);
02626   SDValue In = Op.getOperand(0);
02627   EVT InVT = In.getValueType();
02628   EVT ResVT = Op.getValueType();
02629 
02630   // Convert loads directly.  This is normally done by DAGCombiner,
02631   // but we need this case for bitcasts that are created during lowering
02632   // and which are then lowered themselves.
02633   if (auto *LoadN = dyn_cast<LoadSDNode>(In))
02634     return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
02635                        LoadN->getMemOperand());
02636 
02637   if (InVT == MVT::i32 && ResVT == MVT::f32) {
02638     SDValue In64;
02639     if (Subtarget.hasHighWord()) {
02640       SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
02641                                        MVT::i64);
02642       In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
02643                                        MVT::i64, SDValue(U64, 0), In);
02644     } else {
02645       In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
02646       In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
02647                          DAG.getConstant(32, DL, MVT::i64));
02648     }
02649     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
02650     return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
02651                                       DL, MVT::f32, Out64);
02652   }
02653   if (InVT == MVT::f32 && ResVT == MVT::i32) {
02654     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
02655     SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
02656                                              MVT::f64, SDValue(U64, 0), In);
02657     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
02658     if (Subtarget.hasHighWord())
02659       return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
02660                                         MVT::i32, Out64);
02661     SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
02662                                 DAG.getConstant(32, DL, MVT::i64));
02663     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
02664   }
02665   llvm_unreachable("Unexpected bitcast combination");
02666 }
02667 
02668 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
02669                                             SelectionDAG &DAG) const {
02670   MachineFunction &MF = DAG.getMachineFunction();
02671   SystemZMachineFunctionInfo *FuncInfo =
02672     MF.getInfo<SystemZMachineFunctionInfo>();
02673   EVT PtrVT = getPointerTy();
02674 
02675   SDValue Chain   = Op.getOperand(0);
02676   SDValue Addr    = Op.getOperand(1);
02677   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02678   SDLoc DL(Op);
02679 
02680   // The initial values of each field.
02681   const unsigned NumFields = 4;
02682   SDValue Fields[NumFields] = {
02683     DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
02684     DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
02685     DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
02686     DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
02687   };
02688 
02689   // Store each field into its respective slot.
02690   SDValue MemOps[NumFields];
02691   unsigned Offset = 0;
02692   for (unsigned I = 0; I < NumFields; ++I) {
02693     SDValue FieldAddr = Addr;
02694     if (Offset != 0)
02695       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
02696                               DAG.getIntPtrConstant(Offset, DL));
02697     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
02698                              MachinePointerInfo(SV, Offset),
02699                              false, false, 0);
02700     Offset += 8;
02701   }
02702   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
02703 }
02704 
02705 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
02706                                            SelectionDAG &DAG) const {
02707   SDValue Chain      = Op.getOperand(0);
02708   SDValue DstPtr     = Op.getOperand(1);
02709   SDValue SrcPtr     = Op.getOperand(2);
02710   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
02711   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
02712   SDLoc DL(Op);
02713 
02714   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
02715                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
02716                        /*isTailCall*/false,
02717                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
02718 }
02719 
02720 SDValue SystemZTargetLowering::
02721 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
02722   SDValue Chain = Op.getOperand(0);
02723   SDValue Size  = Op.getOperand(1);
02724   SDLoc DL(Op);
02725 
02726   unsigned SPReg = getStackPointerRegisterToSaveRestore();
02727 
02728   // Get a reference to the stack pointer.
02729   SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
02730 
02731   // Get the new stack pointer value.
02732   SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
02733 
02734   // Copy the new stack pointer back.
02735   Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
02736 
02737   // The allocated data lives above the 160 bytes allocated for the standard
02738   // frame, plus any outgoing stack arguments.  We don't know how much that
02739   // amounts to yet, so emit a special ADJDYNALLOC placeholder.
02740   SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
02741   SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
02742 
02743   SDValue Ops[2] = { Result, Chain };
02744   return DAG.getMergeValues(Ops, DL);
02745 }
02746 
02747 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
02748                                               SelectionDAG &DAG) const {
02749   EVT VT = Op.getValueType();
02750   SDLoc DL(Op);
02751   SDValue Ops[2];
02752   if (is32Bit(VT))
02753     // Just do a normal 64-bit multiplication and extract the results.
02754     // We define this so that it can be used for constant division.
02755     lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
02756                     Op.getOperand(1), Ops[1], Ops[0]);
02757   else {
02758     // Do a full 128-bit multiplication based on UMUL_LOHI64:
02759     //
02760     //   (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
02761     //
02762     // but using the fact that the upper halves are either all zeros
02763     // or all ones:
02764     //
02765     //   (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
02766     //
02767     // and grouping the right terms together since they are quicker than the
02768     // multiplication:
02769     //
02770     //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
02771     SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
02772     SDValue LL = Op.getOperand(0);
02773     SDValue RL = Op.getOperand(1);
02774     SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
02775     SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
02776     // UMUL_LOHI64 returns the low result in the odd register and the high
02777     // result in the even register.  SMUL_LOHI is defined to return the
02778     // low half first, so the results are in reverse order.
02779     lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
02780                      LL, RL, Ops[1], Ops[0]);
02781     SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
02782     SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
02783     SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
02784     Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
02785   }
02786   return DAG.getMergeValues(Ops, DL);
02787 }
02788 
02789 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
02790                                               SelectionDAG &DAG) const {
02791   EVT VT = Op.getValueType();
02792   SDLoc DL(Op);
02793   SDValue Ops[2];
02794   if (is32Bit(VT))
02795     // Just do a normal 64-bit multiplication and extract the results.
02796     // We define this so that it can be used for constant division.
02797     lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
02798                     Op.getOperand(1), Ops[1], Ops[0]);
02799   else
02800     // UMUL_LOHI64 returns the low result in the odd register and the high
02801     // result in the even register.  UMUL_LOHI is defined to return the
02802     // low half first, so the results are in reverse order.
02803     lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
02804                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02805   return DAG.getMergeValues(Ops, DL);
02806 }
02807 
02808 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
02809                                             SelectionDAG &DAG) const {
02810   SDValue Op0 = Op.getOperand(0);
02811   SDValue Op1 = Op.getOperand(1);
02812   EVT VT = Op.getValueType();
02813   SDLoc DL(Op);
02814   unsigned Opcode;
02815 
02816   // We use DSGF for 32-bit division.
02817   if (is32Bit(VT)) {
02818     Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
02819     Opcode = SystemZISD::SDIVREM32;
02820   } else if (DAG.ComputeNumSignBits(Op1) > 32) {
02821     Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
02822     Opcode = SystemZISD::SDIVREM32;
02823   } else    
02824     Opcode = SystemZISD::SDIVREM64;
02825 
02826   // DSG(F) takes a 64-bit dividend, so the even register in the GR128
02827   // input is "don't care".  The instruction returns the remainder in
02828   // the even register and the quotient in the odd register.
02829   SDValue Ops[2];
02830   lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
02831                    Op0, Op1, Ops[1], Ops[0]);
02832   return DAG.getMergeValues(Ops, DL);
02833 }
02834 
02835 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
02836                                             SelectionDAG &DAG) const {
02837   EVT VT = Op.getValueType();
02838   SDLoc DL(Op);
02839 
02840   // DL(G) uses a double-width dividend, so we need to clear the even
02841   // register in the GR128 input.  The instruction returns the remainder
02842   // in the even register and the quotient in the odd register.
02843   SDValue Ops[2];
02844   if (is32Bit(VT))
02845     lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
02846                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02847   else
02848     lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
02849                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02850   return DAG.getMergeValues(Ops, DL);
02851 }
02852 
02853 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
02854   assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
02855 
02856   // Get the known-zero masks for each operand.
02857   SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
02858   APInt KnownZero[2], KnownOne[2];
02859   DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
02860   DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
02861 
02862   // See if the upper 32 bits of one operand and the lower 32 bits of the
02863   // other are known zero.  They are the low and high operands respectively.
02864   uint64_t Masks[] = { KnownZero[0].getZExtValue(),
02865                        KnownZero[1].getZExtValue() };
02866   unsigned High, Low;
02867   if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
02868     High = 1, Low = 0;
02869   else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
02870     High = 0, Low = 1;
02871   else
02872     return Op;
02873 
02874   SDValue LowOp = Ops[Low];
02875   SDValue HighOp = Ops[High];
02876 
02877   // If the high part is a constant, we're better off using IILH.
02878   if (HighOp.getOpcode() == ISD::Constant)
02879     return Op;
02880 
02881   // If the low part is a constant that is outside the range of LHI,
02882   // then we're better off using IILF.
02883   if (LowOp.getOpcode() == ISD::Constant) {
02884     int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
02885     if (!isInt<16>(Value))
02886       return Op;
02887   }
02888 
02889   // Check whether the high part is an AND that doesn't change the
02890   // high 32 bits and just masks out low bits.  We can skip it if so.
02891   if (HighOp.getOpcode() == ISD::AND &&
02892       HighOp.getOperand(1).getOpcode() == ISD::Constant) {
02893     SDValue HighOp0 = HighOp.getOperand(0);
02894     uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
02895     if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
02896       HighOp = HighOp0;
02897   }
02898 
02899   // Take advantage of the fact that all GR32 operations only change the
02900   // low 32 bits by truncating Low to an i32 and inserting it directly
02901   // using a subreg.  The interesting cases are those where the truncation
02902   // can be folded.
02903   SDLoc DL(Op);
02904   SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
02905   return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
02906                                    MVT::i64, HighOp, Low32);
02907 }
02908 
02909 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
02910                                           SelectionDAG &DAG) const {
02911   EVT VT = Op.getValueType();
02912   SDLoc DL(Op);
02913   Op = Op.getOperand(0);
02914 
02915   // Handle vector types via VPOPCT.
02916   if (VT.isVector()) {
02917     Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
02918     Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
02919     switch (VT.getVectorElementType().getSizeInBits()) {
02920     case 8:
02921       break;
02922     case 16: {
02923       Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
02924       SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
02925       SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
02926       Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
02927       Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
02928       break;
02929     }
02930     case 32: {
02931       SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02932                                 DAG.getConstant(0, DL, MVT::i32));
02933       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
02934       break;
02935     }
02936     case 64: {
02937       SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02938                                 DAG.getConstant(0, DL, MVT::i32));
02939       Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
02940       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
02941       break;
02942     }
02943     default:
02944       llvm_unreachable("Unexpected type");
02945     }
02946     return Op;
02947   }
02948 
02949   // Get the known-zero mask for the operand.
02950   APInt KnownZero, KnownOne;
02951   DAG.computeKnownBits(Op, KnownZero, KnownOne);
02952   unsigned NumSignificantBits = (~KnownZero).getActiveBits();
02953   if (NumSignificantBits == 0)
02954     return DAG.getConstant(0, DL, VT);
02955 
02956   // Skip known-zero high parts of the operand.
02957   int64_t OrigBitSize = VT.getSizeInBits();
02958   int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
02959   BitSize = std::min(BitSize, OrigBitSize);
02960 
02961   // The POPCNT instruction counts the number of bits in each byte.
02962   Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
02963   Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
02964   Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
02965 
02966   // Add up per-byte counts in a binary tree.  All bits of Op at
02967   // position larger than BitSize remain zero throughout.
02968   for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
02969     SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
02970     if (BitSize != OrigBitSize)
02971       Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
02972                         DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
02973     Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
02974   }
02975 
02976   // Extract overall result from high byte.
02977   if (BitSize > 8)
02978     Op = DAG.getNode(ISD::SRL, DL, VT, Op,
02979                      DAG.getConstant(BitSize - 8, DL, VT));
02980 
02981   return Op;
02982 }
02983 
02984 // Op is an atomic load.  Lower it into a normal volatile load.
02985 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
02986                                                 SelectionDAG &DAG) const {
02987   auto *Node = cast<AtomicSDNode>(Op.getNode());
02988   return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
02989                         Node->getChain(), Node->getBasePtr(),
02990                         Node->getMemoryVT(), Node->getMemOperand());
02991 }
02992 
02993 // Op is an atomic store.  Lower it into a normal volatile store followed
02994 // by a serialization.
02995 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
02996                                                  SelectionDAG &DAG) const {
02997   auto *Node = cast<AtomicSDNode>(Op.getNode());
02998   SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
02999                                     Node->getBasePtr(), Node->getMemoryVT(),
03000                                     Node->getMemOperand());
03001   return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
03002                                     Chain), 0);
03003 }
03004 
03005 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
03006 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
03007 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
03008                                                    SelectionDAG &DAG,
03009                                                    unsigned Opcode) const {
03010   auto *Node = cast<AtomicSDNode>(Op.getNode());
03011 
03012   // 32-bit operations need no code outside the main loop.
03013   EVT NarrowVT = Node->getMemoryVT();
03014   EVT WideVT = MVT::i32;
03015   if (NarrowVT == WideVT)
03016     return Op;
03017 
03018   int64_t BitSize = NarrowVT.getSizeInBits();
03019   SDValue ChainIn = Node->getChain();
03020   SDValue Addr = Node->getBasePtr();
03021   SDValue Src2 = Node->getVal();
03022   MachineMemOperand *MMO = Node->getMemOperand();
03023   SDLoc DL(Node);
03024   EVT PtrVT = Addr.getValueType();
03025 
03026   // Convert atomic subtracts of constants into additions.
03027   if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
03028     if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
03029       Opcode = SystemZISD::ATOMIC_LOADW_ADD;
03030       Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
03031     }
03032 
03033   // Get the address of the containing word.
03034   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
03035                                     DAG.getConstant(-4, DL, PtrVT));
03036 
03037   // Get the number of bits that the word must be rotated left in order
03038   // to bring the field to the top bits of a GR32.
03039   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
03040                                  DAG.getConstant(3, DL, PtrVT));
03041   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
03042 
03043   // Get the complementing shift amount, for rotating a field in the top
03044   // bits back to its proper position.
03045   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
03046                                     DAG.getConstant(0, DL, WideVT), BitShift);
03047 
03048   // Extend the source operand to 32 bits and prepare it for the inner loop.
03049   // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
03050   // operations require the source to be shifted in advance.  (This shift
03051   // can be folded if the source is constant.)  For AND and NAND, the lower
03052   // bits must be set, while for other opcodes they should be left clear.
03053   if (Opcode != SystemZISD::ATOMIC_SWAPW)
03054     Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
03055                        DAG.getConstant(32 - BitSize, DL, WideVT));
03056   if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
03057       Opcode == SystemZISD::ATOMIC_LOADW_NAND)
03058     Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
03059                        DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
03060 
03061   // Construct the ATOMIC_LOADW_* node.
03062   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
03063   SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
03064                     DAG.getConstant(BitSize, DL, WideVT) };
03065   SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
03066                                              NarrowVT, MMO);
03067 
03068   // Rotate the result of the final CS so that the field is in the lower
03069   // bits of a GR32, then truncate it.
03070   SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
03071                                     DAG.getConstant(BitSize, DL, WideVT));
03072   SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
03073 
03074   SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
03075   return DAG.getMergeValues(RetOps, DL);
03076 }
03077 
03078 // Op is an ATOMIC_LOAD_SUB operation.  Lower 8- and 16-bit operations
03079 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
03080 // operations into additions.
03081 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
03082                                                     SelectionDAG &DAG) const {
03083   auto *Node = cast<AtomicSDNode>(Op.getNode());
03084   EVT MemVT = Node->getMemoryVT();
03085   if (MemVT == MVT::i32 || MemVT == MVT::i64) {
03086     // A full-width operation.
03087     assert(Op.getValueType() == MemVT && "Mismatched VTs");
03088     SDValue Src2 = Node->getVal();
03089     SDValue NegSrc2;
03090     SDLoc DL(Src2);
03091 
03092     if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
03093       // Use an addition if the operand is constant and either LAA(G) is
03094       // available or the negative value is in the range of A(G)FHI.
03095       int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
03096       if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
03097         NegSrc2 = DAG.getConstant(Value, DL, MemVT);
03098     } else if (Subtarget.hasInterlockedAccess1())
03099       // Use LAA(G) if available.
03100       NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
03101                             Src2);
03102 
03103     if (NegSrc2.getNode())
03104       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
03105                            Node->getChain(), Node->getBasePtr(), NegSrc2,
03106                            Node->getMemOperand(), Node->getOrdering(),
03107                            Node->getSynchScope());
03108 
03109     // Use the node as-is.
03110     return Op;
03111   }
03112 
03113   return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
03114 }
03115 
03116 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation.  Lower the first two
03117 // into a fullword ATOMIC_CMP_SWAPW operation.
03118 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
03119                                                     SelectionDAG &DAG) const {
03120   auto *Node = cast<AtomicSDNode>(Op.getNode());
03121 
03122   // We have native support for 32-bit compare and swap.
03123   EVT NarrowVT = Node->getMemoryVT();
03124   EVT WideVT = MVT::i32;
03125   if (NarrowVT == WideVT)
03126     return Op;
03127 
03128   int64_t BitSize = NarrowVT.getSizeInBits();
03129   SDValue ChainIn = Node->getOperand(0);
03130   SDValue Addr = Node->getOperand(1);
03131   SDValue CmpVal = Node->getOperand(2);
03132   SDValue SwapVal = Node->getOperand(3);
03133   MachineMemOperand *MMO = Node->getMemOperand();
03134   SDLoc DL(Node);
03135   EVT PtrVT = Addr.getValueType();
03136 
03137   // Get the address of the containing word.
03138   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
03139                                     DAG.getConstant(-4, DL, PtrVT));
03140 
03141   // Get the number of bits that the word must be rotated left in order
03142   // to bring the field to the top bits of a GR32.
03143   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
03144                                  DAG.getConstant(3, DL, PtrVT));
03145   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
03146 
03147   // Get the complementing shift amount, for rotating a field in the top
03148   // bits back to its proper position.
03149   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
03150                                     DAG.getConstant(0, DL, WideVT), BitShift);
03151 
03152   // Construct the ATOMIC_CMP_SWAPW node.
03153   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
03154   SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
03155                     NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
03156   SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
03157                                              VTList, Ops, NarrowVT, MMO);
03158   return AtomicOp;
03159 }
03160 
03161 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
03162                                               SelectionDAG &DAG) const {
03163   MachineFunction &MF = DAG.getMachineFunction();
03164   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
03165   return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
03166                             SystemZ::R15D, Op.getValueType());
03167 }
03168 
03169 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
03170                                                  SelectionDAG &DAG) const {
03171   MachineFunction &MF = DAG.getMachineFunction();
03172   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
03173   return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
03174                           SystemZ::R15D, Op.getOperand(1));
03175 }
03176 
03177 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
03178                                              SelectionDAG &DAG) const {
03179   bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
03180   if (!IsData)
03181     // Just preserve the chain.
03182     return Op.getOperand(0);
03183 
03184   SDLoc DL(Op);
03185   bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
03186   unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
03187   auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
03188   SDValue Ops[] = {
03189     Op.getOperand(0),
03190     DAG.getConstant(Code, DL, MVT::i32),
03191     Op.getOperand(1)
03192   };
03193   return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
03194                                  Node->getVTList(), Ops,
03195                                  Node->getMemoryVT(), Node->getMemOperand());
03196 }
03197 
03198 // Return an i32 that contains the value of CC immediately after After,
03199 // whose final operand must be MVT::Glue.
03200 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
03201   SDLoc DL(After);
03202   SDValue Glue = SDValue(After, After->getNumValues() - 1);
03203   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
03204   return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
03205                      DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
03206 }
03207 
03208 SDValue
03209 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
03210                                               SelectionDAG &DAG) const {
03211   unsigned Opcode, CCValid;
03212   if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
03213     assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
03214     SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
03215     SDValue CC = getCCResult(DAG, Glued.getNode());
03216     DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
03217     return SDValue();
03218   }
03219 
03220   return SDValue();
03221 }
03222 
03223 SDValue
03224 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
03225                                                SelectionDAG &DAG) const {
03226   unsigned Opcode, CCValid;
03227   if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
03228     SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
03229     SDValue CC = getCCResult(DAG, Glued.getNode());
03230     if (Op->getNumValues() == 1)
03231       return CC;
03232     assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
03233     return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
03234         Glued, CC);
03235   }
03236 
03237   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03238   switch (Id) {
03239   case Intrinsic::s390_vpdi:
03240     return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
03241                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
03242 
03243   case Intrinsic::s390_vperm:
03244     return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
03245                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
03246 
03247   case Intrinsic::s390_vuphb:
03248   case Intrinsic::s390_vuphh:
03249   case Intrinsic::s390_vuphf:
03250     return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
03251                        Op.getOperand(1));
03252 
03253   case Intrinsic::s390_vuplhb:
03254   case Intrinsic::s390_vuplhh:
03255   case Intrinsic::s390_vuplhf:
03256     return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
03257                        Op.getOperand(1));
03258 
03259   case Intrinsic::s390_vuplb:
03260   case Intrinsic::s390_vuplhw:
03261   case Intrinsic::s390_vuplf:
03262     return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
03263                        Op.getOperand(1));
03264 
03265   case Intrinsic::s390_vupllb:
03266   case Intrinsic::s390_vupllh:
03267   case Intrinsic::s390_vupllf:
03268     return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
03269                        Op.getOperand(1));
03270 
03271   case Intrinsic::s390_vsumb:
03272   case Intrinsic::s390_vsumh:
03273   case Intrinsic::s390_vsumgh:
03274   case Intrinsic::s390_vsumgf:
03275   case Intrinsic::s390_vsumqf:
03276   case Intrinsic::s390_vsumqg:
03277     return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
03278                        Op.getOperand(1), Op.getOperand(2));
03279   }
03280 
03281   return SDValue();
03282 }
03283 
03284 namespace {
03285 // Says that SystemZISD operation Opcode can be used to perform the equivalent
03286 // of a VPERM with permute vector Bytes.  If Opcode takes three operands,
03287 // Operand is the constant third operand, otherwise it is the number of
03288 // bytes in each element of the result.
03289 struct Permute {
03290   unsigned Opcode;
03291   unsigned Operand;
03292   unsigned char Bytes[SystemZ::VectorBytes];
03293 };
03294 }
03295 
03296 static const Permute PermuteForms[] = {
03297   // VMRHG
03298   { SystemZISD::MERGE_HIGH, 8,
03299     { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
03300   // VMRHF
03301   { SystemZISD::MERGE_HIGH, 4,
03302     { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
03303   // VMRHH
03304   { SystemZISD::MERGE_HIGH, 2,
03305     { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
03306   // VMRHB
03307   { SystemZISD::MERGE_HIGH, 1,
03308     { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
03309   // VMRLG
03310   { SystemZISD::MERGE_LOW, 8,
03311     { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
03312   // VMRLF
03313   { SystemZISD::MERGE_LOW, 4,
03314     { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
03315   // VMRLH
03316   { SystemZISD::MERGE_LOW, 2,
03317     { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
03318   // VMRLB
03319   { SystemZISD::MERGE_LOW, 1,
03320     { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
03321   // VPKG
03322   { SystemZISD::PACK, 4,
03323     { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
03324   // VPKF
03325   { SystemZISD::PACK, 2,
03326     { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
03327   // VPKH
03328   { SystemZISD::PACK, 1,
03329     { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
03330   // VPDI V1, V2, 4  (low half of V1, high half of V2)
03331   { SystemZISD::PERMUTE_DWORDS, 4,
03332     { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
03333   // VPDI V1, V2, 1  (high half of V1, low half of V2)
03334   { SystemZISD::PERMUTE_DWORDS, 1,
03335     { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
03336 };
03337 
03338 // Called after matching a vector shuffle against a particular pattern.
03339 // Both the original shuffle and the pattern have two vector operands.
03340 // OpNos[0] is the operand of the original shuffle that should be used for
03341 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
03342 // OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
03343 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
03344 // for operands 0 and 1 of the pattern.
03345 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
03346   if (OpNos[0] < 0) {
03347     if (OpNos[1] < 0)
03348       return false;
03349     OpNo0 = OpNo1 = OpNos[1];
03350   } else if (OpNos[1] < 0) {
03351     OpNo0 = OpNo1 = OpNos[0];
03352   } else {
03353     OpNo0 = OpNos[0];
03354     OpNo1 = OpNos[1];
03355   }
03356   return true;
03357 }
03358 
03359 // Bytes is a VPERM-like permute vector, except that -1 is used for
03360 // undefined bytes.  Return true if the VPERM can be implemented using P.
03361 // When returning true set OpNo0 to the VPERM operand that should be
03362 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
03363 //
03364 // For example, if swapping the VPERM operands allows P to match, OpNo0
03365 // will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
03366 // operand, but rewriting it to use two duplicated operands allows it to
03367 // match P, then OpNo0 and OpNo1 will be the same.
03368 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
03369                          unsigned &OpNo0, unsigned &OpNo1) {
03370   int OpNos[] = { -1, -1 };
03371   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
03372     int Elt = Bytes[I];
03373     if (Elt >= 0) {
03374       // Make sure that the two permute vectors use the same suboperand
03375       // byte number.  Only the operand numbers (the high bits) are
03376       // allowed to differ.
03377       if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
03378         return false;
03379       int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
03380       int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
03381       // Make sure that the operand mappings are consistent with previous
03382       // elements.
03383       if (OpNos[ModelOpNo] == 1 - RealOpNo)
03384         return false;
03385       OpNos[ModelOpNo] = RealOpNo;
03386     }
03387   }
03388   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
03389 }
03390 
03391 // As above, but search for a matching permute.
03392 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
03393                                    unsigned &OpNo0, unsigned &OpNo1) {
03394   for (auto &P : PermuteForms)
03395     if (matchPermute(Bytes, P, OpNo0, OpNo1))
03396       return &P;
03397   return nullptr;
03398 }
03399 
03400 // Bytes is a VPERM-like permute vector, except that -1 is used for
03401 // undefined bytes.  This permute is an operand of an outer permute.
03402 // See whether redistributing the -1 bytes gives a shuffle that can be
03403 // implemented using P.  If so, set Transform to a VPERM-like permute vector
03404 // that, when applied to the result of P, gives the original permute in Bytes.
03405 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
03406                                const Permute &P,
03407                                SmallVectorImpl<int> &Transform) {
03408   unsigned To = 0;
03409   for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
03410     int Elt = Bytes[From];
03411     if (Elt < 0)
03412       // Byte number From of the result is undefined.
03413       Transform[From] = -1;
03414     else {
03415       while (P.Bytes[To] != Elt) {
03416         To += 1;
03417         if (To == SystemZ::VectorBytes)
03418           return false;
03419       }
03420       Transform[From] = To;
03421     }
03422   }
03423   return true;
03424 }
03425 
03426 // As above, but search for a matching permute.
03427 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
03428                                          SmallVectorImpl<int> &Transform) {
03429   for (auto &P : PermuteForms)
03430     if (matchDoublePermute(Bytes, P, Transform))
03431       return &P;
03432   return nullptr;
03433 }
03434 
03435 // Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
03436 // as if it had type vNi8.
03437 static void getVPermMask(ShuffleVectorSDNode *VSN,
03438                          SmallVectorImpl<int> &Bytes) {
03439   EVT VT = VSN->getValueType(0);
03440   unsigned NumElements = VT.getVectorNumElements();
03441   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03442   Bytes.resize(NumElements * BytesPerElement, -1);
03443   for (unsigned I = 0; I < NumElements; ++I) {
03444     int Index = VSN->getMaskElt(I);
03445     if (Index >= 0)
03446       for (unsigned J = 0; J < BytesPerElement; ++J)
03447         Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
03448   }
03449 }
03450 
03451 // Bytes is a VPERM-like permute vector, except that -1 is used for
03452 // undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
03453 // the result come from a contiguous sequence of bytes from one input.
03454 // Set Base to the selector for the first byte if so.
03455 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
03456                             unsigned BytesPerElement, int &Base) {
03457   Base = -1;
03458   for (unsigned I = 0; I < BytesPerElement; ++I) {
03459     if (Bytes[Start + I] >= 0) {
03460       unsigned Elem = Bytes[Start + I];
03461       if (Base < 0) {
03462         Base = Elem - I;
03463         // Make sure the bytes would come from one input operand.
03464         if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
03465           return false;
03466       } else if (unsigned(Base) != Elem - I)
03467         return false;
03468     }
03469   }
03470   return true;
03471 }
03472 
03473 // Bytes is a VPERM-like permute vector, except that -1 is used for
03474 // undefined bytes.  Return true if it can be performed using VSLDI.
03475 // When returning true, set StartIndex to the shift amount and OpNo0
03476 // and OpNo1 to the VPERM operands that should be used as the first
03477 // and second shift operand respectively.
03478 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
03479                                unsigned &StartIndex, unsigned &OpNo0,
03480                                unsigned &OpNo1) {
03481   int OpNos[] = { -1, -1 };
03482   int Shift = -1;
03483   for (unsigned I = 0; I < 16; ++I) {
03484     int Index = Bytes[I];
03485     if (Index >= 0) {
03486       int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
03487       int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
03488       int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
03489       if (Shift < 0)
03490         Shift = ExpectedShift;
03491       else if (Shift != ExpectedShift)
03492         return false;
03493       // Make sure that the operand mappings are consistent with previous
03494       // elements.
03495       if (OpNos[ModelOpNo] == 1 - RealOpNo)
03496         return false;
03497       OpNos[ModelOpNo] = RealOpNo;
03498     }
03499   }
03500   StartIndex = Shift;
03501   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
03502 }
03503 
03504 // Create a node that performs P on operands Op0 and Op1, casting the
03505 // operands to the appropriate type.  The type of the result is determined by P.
03506 static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
03507                               const Permute &P, SDValue Op0, SDValue Op1) {
03508   // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
03509   // elements of a PACK are twice as wide as the outputs.
03510   unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
03511                       P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
03512                       P.Operand);
03513   // Cast both operands to the appropriate type.
03514   MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
03515                               SystemZ::VectorBytes / InBytes);
03516   Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
03517   Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
03518   SDValue Op;
03519   if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
03520     SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
03521     Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
03522   } else if (P.Opcode == SystemZISD::PACK) {
03523     MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
03524                                  SystemZ::VectorBytes / P.Operand);
03525     Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
03526   } else {
03527     Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
03528   }
03529   return Op;
03530 }
03531 
03532 // Bytes is a VPERM-like permute vector, except that -1 is used for
03533 // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
03534 // VSLDI or VPERM.
03535 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
03536                                      const SmallVectorImpl<int> &Bytes) {
03537   for (unsigned I = 0; I < 2; ++I)
03538     Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
03539 
03540   // First see whether VSLDI can be used.
03541   unsigned StartIndex, OpNo0, OpNo1;
03542   if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
03543     return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
03544                        Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
03545 
03546   // Fall back on VPERM.  Construct an SDNode for the permute vector.
03547   SDValue IndexNodes[SystemZ::VectorBytes];
03548   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
03549     if (Bytes[I] >= 0)
03550       IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
03551     else
03552       IndexNodes[I] = DAG.getUNDEF(MVT::i32);
03553   SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
03554   return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
03555 }
03556 
03557 namespace {
03558 // Describes a general N-operand vector shuffle.
03559 struct GeneralShuffle {
03560   GeneralShuffle(EVT vt) : VT(vt) {}
03561   void addUndef();
03562   void add(SDValue, unsigned);
03563   SDValue getNode(SelectionDAG &, SDLoc);
03564 
03565   // The operands of the shuffle.
03566   SmallVector<SDValue, SystemZ::VectorBytes> Ops;
03567 
03568   // Index I is -1 if byte I of the result is undefined.  Otherwise the
03569   // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
03570   // Bytes[I] / SystemZ::VectorBytes.
03571   SmallVector<int, SystemZ::VectorBytes> Bytes;
03572 
03573   // The type of the shuffle result.
03574   EVT VT;
03575 };
03576 }
03577 
03578 // Add an extra undefined element to the shuffle.
03579 void GeneralShuffle::addUndef() {
03580   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03581   for (unsigned I = 0; I < BytesPerElement; ++I)
03582     Bytes.push_back(-1);
03583 }
03584 
03585 // Add an extra element to the shuffle, taking it from element Elem of Op.
03586 // A null Op indicates a vector input whose value will be calculated later;
03587 // there is at most one such input per shuffle and it always has the same
03588 // type as the result.
03589 void GeneralShuffle::add(SDValue Op, unsigned Elem) {
03590   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03591 
03592   // The source vector can have wider elements than the result,
03593   // either through an explicit TRUNCATE or because of type legalization.
03594   // We want the least significant part.
03595   EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
03596   unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
03597   assert(FromBytesPerElement >= BytesPerElement &&
03598          "Invalid EXTRACT_VECTOR_ELT");
03599   unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
03600                    (FromBytesPerElement - BytesPerElement));
03601 
03602   // Look through things like shuffles and bitcasts.
03603   while (Op.getNode()) {
03604     if (Op.getOpcode() == ISD::BITCAST)
03605       Op = Op.getOperand(0);
03606     else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
03607       // See whether the bytes we need come from a contiguous part of one
03608       // operand.
03609       SmallVector<int, SystemZ::VectorBytes> OpBytes;
03610       getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
03611       int NewByte;
03612       if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
03613         break;
03614       if (NewByte < 0) {
03615         addUndef();
03616         return;
03617       }
03618       Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
03619       Byte = unsigned(NewByte) % SystemZ::VectorBytes;
03620     } else if (Op.getOpcode() == ISD::UNDEF) {
03621       addUndef();
03622       return;
03623     } else
03624       break;
03625   }
03626 
03627   // Make sure that the source of the extraction is in Ops.
03628   unsigned OpNo = 0;
03629   for (; OpNo < Ops.size(); ++OpNo)
03630     if (Ops[OpNo] == Op)
03631       break;
03632   if (OpNo == Ops.size())
03633     Ops.push_back(Op);
03634 
03635   // Add the element to Bytes.
03636   unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
03637   for (unsigned I = 0; I < BytesPerElement; ++I)
03638     Bytes.push_back(Base + I);
03639 }
03640 
03641 // Return SDNodes for the completed shuffle.
03642 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
03643   assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
03644 
03645   if (Ops.size() == 0)
03646     return DAG.getUNDEF(VT);
03647 
03648   // Make sure that there are at least two shuffle operands.
03649   if (Ops.size() == 1)
03650     Ops.push_back(DAG.getUNDEF(MVT::v16i8));
03651 
03652   // Create a tree of shuffles, deferring root node until after the loop.
03653   // Try to redistribute the undefined elements of non-root nodes so that
03654   // the non-root shuffles match something like a pack or merge, then adjust
03655   // the parent node's permute vector to compensate for the new order.
03656   // Among other things, this copes with vectors like <2 x i16> that were
03657   // padded with undefined elements during type legalization.
03658   //
03659   // In the best case this redistribution will lead to the whole tree
03660   // using packs and merges.  It should rarely be a loss in other cases.
03661   unsigned Stride = 1;
03662   for (; Stride * 2 < Ops.size(); Stride *= 2) {
03663     for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
03664       SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
03665 
03666       // Create a mask for just these two operands.
03667       SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
03668       for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
03669         unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
03670         unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
03671         if (OpNo == I)
03672           NewBytes[J] = Byte;
03673         else if (OpNo == I + Stride)
03674           NewBytes[J] = SystemZ::VectorBytes + Byte;
03675         else
03676           NewBytes[J] = -1;
03677       }
03678       // See if it would be better to reorganize NewMask to avoid using VPERM.
03679       SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
03680       if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
03681         Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
03682         // Applying NewBytesMap to Ops[I] gets back to NewBytes.
03683         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
03684           if (NewBytes[J] >= 0) {
03685             assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
03686                    "Invalid double permute");
03687             Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
03688           } else
03689             assert(NewBytesMap[J] < 0 && "Invalid double permute");
03690         }
03691       } else {
03692         // Just use NewBytes on the operands.
03693         Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
03694         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
03695           if (NewBytes[J] >= 0)
03696             Bytes[J] = I * SystemZ::VectorBytes + J;
03697       }
03698     }
03699   }
03700 
03701   // Now we just have 2 inputs.  Put the second operand in Ops[1].
03702   if (Stride > 1) {
03703     Ops[1] = Ops[Stride];
03704     for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
03705       if (Bytes[I] >= int(SystemZ::VectorBytes))
03706         Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
03707   }
03708 
03709   // Look for an instruction that can do the permute without resorting
03710   // to VPERM.
03711   unsigned OpNo0, OpNo1;
03712   SDValue Op;
03713   if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
03714     Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
03715   else
03716     Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
03717   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03718 }
03719 
03720 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
03721 static bool isScalarToVector(SDValue Op) {
03722   for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
03723     if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
03724       return false;
03725   return true;
03726 }
03727 
03728 // Return a vector of type VT that contains Value in the first element.
03729 // The other elements don't matter.
03730 static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
03731                                    SDValue Value) {
03732   // If we have a constant, replicate it to all elements and let the
03733   // BUILD_VECTOR lowering take care of it.
03734   if (Value.getOpcode() == ISD::Constant ||
03735       Value.getOpcode() == ISD::ConstantFP) {
03736     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
03737     return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
03738   }
03739   if (Value.getOpcode() == ISD::UNDEF)
03740     return DAG.getUNDEF(VT);
03741   return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
03742 }
03743 
03744 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
03745 // element 1.  Used for cases in which replication is cheap.
03746 static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
03747                                  SDValue Op0, SDValue Op1) {
03748   if (Op0.getOpcode() == ISD::UNDEF) {
03749     if (Op1.getOpcode() == ISD::UNDEF)
03750       return DAG.getUNDEF(VT);
03751     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
03752   }
03753   if (Op1.getOpcode() == ISD::UNDEF)
03754     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
03755   return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
03756                      buildScalarToVector(DAG, DL, VT, Op0),
03757                      buildScalarToVector(DAG, DL, VT, Op1));
03758 }
03759 
03760 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
03761 // vector for them.
03762 static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
03763                           SDValue Op1) {
03764   if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
03765     return DAG.getUNDEF(MVT::v2i64);
03766   // If one of the two inputs is undefined then replicate the other one,
03767   // in order to avoid using another register unnecessarily.
03768   if (Op0.getOpcode() == ISD::UNDEF)
03769     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
03770   else if (Op1.getOpcode() == ISD::UNDEF)
03771     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
03772   else {
03773     Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
03774     Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
03775   }
03776   return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
03777 }
03778 
03779 // Try to represent constant BUILD_VECTOR node BVN using a
03780 // SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
03781 // on success.
03782 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
03783   EVT ElemVT = BVN->getValueType(0).getVectorElementType();
03784   unsigned BytesPerElement = ElemVT.getStoreSize();
03785   for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
03786     SDValue Op = BVN->getOperand(I);
03787     if (Op.getOpcode() != ISD::UNDEF) {
03788       uint64_t Value;
03789       if (Op.getOpcode() == ISD::Constant)
03790         Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
03791       else if (Op.getOpcode() == ISD::ConstantFP)
03792         Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
03793                  .getZExtValue());
03794       else
03795         return false;
03796       for (unsigned J = 0; J < BytesPerElement; ++J) {
03797         uint64_t Byte = (Value >> (J * 8)) & 0xff;
03798         if (Byte == 0xff)
03799           Mask |= 1 << ((E - I - 1) * BytesPerElement + J);
03800         else if (Byte != 0)
03801           return false;
03802       }
03803     }
03804   }
03805   return true;
03806 }
03807 
03808 // Try to load a vector constant in which BitsPerElement-bit value Value
03809 // is replicated to fill the vector.  VT is the type of the resulting
03810 // constant, which may have elements of a different size from BitsPerElement.
03811 // Return the SDValue of the constant on success, otherwise return
03812 // an empty value.
03813 static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
03814                                        const SystemZInstrInfo *TII,
03815                                        SDLoc DL, EVT VT, uint64_t Value,
03816                                        unsigned BitsPerElement) {
03817   // Signed 16-bit values can be replicated using VREPI.
03818   int64_t SignedValue = SignExtend64(Value, BitsPerElement);
03819   if (isInt<16>(SignedValue)) {
03820     MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
03821                                  SystemZ::VectorBits / BitsPerElement);
03822     SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
03823                              DAG.getConstant(SignedValue, DL, MVT::i32));
03824     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03825   }
03826   // See whether rotating the constant left some N places gives a value that
03827   // is one less than a power of 2 (i.e. all zeros followed by all ones).
03828   // If so we can use VGM.
03829   unsigned Start, End;
03830   if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
03831     // isRxSBGMask returns the bit numbers for a full 64-bit value,
03832     // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
03833     // bit numbers for an BitsPerElement value, so that 0 denotes
03834     // 1 << (BitsPerElement-1).
03835     Start -= 64 - BitsPerElement;
03836     End -= 64 - BitsPerElement;
03837     MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
03838                                  SystemZ::VectorBits / BitsPerElement);
03839     SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
03840                              DAG.getConstant(Start, DL, MVT::i32),
03841                              DAG.getConstant(End, DL, MVT::i32));
03842     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03843   }
03844   return SDValue();
03845 }
03846 
03847 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
03848 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
03849 // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
03850 // would benefit from this representation and return it if so.
03851 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
03852                                      BuildVectorSDNode *BVN) {
03853   EVT VT = BVN->getValueType(0);
03854   unsigned NumElements = VT.getVectorNumElements();
03855 
03856   // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
03857   // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
03858   // need a BUILD_VECTOR, add an additional placeholder operand for that
03859   // BUILD_VECTOR and store its operands in ResidueOps.
03860   GeneralShuffle GS(VT);
03861   SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
03862   bool FoundOne = false;
03863   for (unsigned I = 0; I < NumElements; ++I) {
03864     SDValue Op = BVN->getOperand(I);
03865     if (Op.getOpcode() == ISD::TRUNCATE)
03866       Op = Op.getOperand(0);
03867     if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
03868         Op.getOperand(1).getOpcode() == ISD::Constant) {
03869       unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
03870       GS.add(Op.getOperand(0), Elem);
03871       FoundOne = true;
03872     } else if (Op.getOpcode() == ISD::UNDEF) {
03873       GS.addUndef();
03874     } else {
03875       GS.add(SDValue(), ResidueOps.size());
03876       ResidueOps.push_back(Op);
03877     }
03878   }
03879 
03880   // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
03881   if (!FoundOne)
03882     return SDValue();
03883 
03884   // Create the BUILD_VECTOR for the remaining elements, if any.
03885   if (!ResidueOps.empty()) {
03886     while (ResidueOps.size() < NumElements)
03887       ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
03888     for (auto &Op : GS.Ops) {
03889       if (!Op.getNode()) {
03890         Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
03891         break;
03892       }
03893     }
03894   }
03895   return GS.getNode(DAG, SDLoc(BVN));
03896 }
03897 
03898 // Combine GPR scalar values Elems into a vector of type VT.
03899 static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
03900                            SmallVectorImpl<SDValue> &Elems) {
03901   // See whether there is a single replicated value.
03902   SDValue Single;
03903   unsigned int NumElements = Elems.size();
03904   unsigned int Count = 0;
03905   for (auto Elem : Elems) {
03906     if (Elem.getOpcode() != ISD::UNDEF) {
03907       if (!Single.getNode())
03908         Single = Elem;
03909       else if (Elem != Single) {
03910         Single = SDValue();
03911         break;
03912       }
03913       Count += 1;
03914     }
03915   }
03916   // There are three cases here:
03917   //
03918   // - if the only defined element is a loaded one, the best sequence
03919   //   is a replicating load.
03920   //
03921   // - otherwise, if the only defined element is an i64 value, we will
03922   //   end up with the same VLVGP sequence regardless of whether we short-cut
03923   //   for replication or fall through to the later code.
03924   //
03925   // - otherwise, if the only defined element is an i32 or smaller value,
03926   //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
03927   //   This is only a win if the single defined element is used more than once.
03928   //   In other cases we're better off using a single VLVGx.
03929   if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
03930     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
03931 
03932   // The best way of building a v2i64 from two i64s is to use VLVGP.
03933   if (VT == MVT::v2i64)
03934     return joinDwords(DAG, DL, Elems[0], Elems[1]);
03935 
03936   // Use a 64-bit merge high to combine two doubles.
03937   if (VT == MVT::v2f64)
03938     return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
03939 
03940   // Build v4f32 values directly from the FPRs:
03941   //
03942   //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
03943   //         V              V         VMRHF
03944   //      <ABxx>         <CDxx>
03945   //                V                 VMRHG
03946   //              <ABCD>
03947   if (VT == MVT::v4f32) {
03948     SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
03949     SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
03950     // Avoid unnecessary undefs by reusing the other operand.
03951     if (Op01.getOpcode() == ISD::UNDEF)
03952       Op01 = Op23;
03953     else if (Op23.getOpcode() == ISD::UNDEF)
03954       Op23 = Op01;
03955     // Merging identical replications is a no-op.
03956     if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
03957       return Op01;
03958     Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
03959     Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
03960     SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
03961                              DL, MVT::v2i64, Op01, Op23);
03962     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03963   }
03964 
03965   // Collect the constant terms.
03966   SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
03967   SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
03968 
03969   unsigned NumConstants = 0;
03970   for (unsigned I = 0; I < NumElements; ++I) {
03971     SDValue Elem = Elems[I];
03972     if (Elem.getOpcode() == ISD::Constant ||
03973         Elem.getOpcode() == ISD::ConstantFP) {
03974       NumConstants += 1;
03975       Constants[I] = Elem;
03976       Done[I] = true;
03977     }
03978   }
03979   // If there was at least one constant, fill in the other elements of
03980   // Constants with undefs to get a full vector constant and use that
03981   // as the starting point.
03982   SDValue Result;
03983   if (NumConstants > 0) {
03984     for (unsigned I = 0; I < NumElements; ++I)
03985       if (!Constants[I].getNode())
03986         Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
03987     Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
03988   } else {
03989     // Otherwise try to use VLVGP to start the sequence in order to
03990     // avoid a false dependency on any previous contents of the vector
03991     // register.  This only makes sense if one of the associated elements
03992     // is defined.
03993     unsigned I1 = NumElements / 2 - 1;
03994     unsigned I2 = NumElements - 1;
03995     bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
03996     bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
03997     if (Def1 || Def2) {
03998       SDValue Elem1 = Elems[Def1 ? I1 : I2];
03999       SDValue Elem2 = Elems[Def2 ? I2 : I1];
04000       Result = DAG.getNode(ISD::BITCAST, DL, VT,
04001                            joinDwords(DAG, DL, Elem1, Elem2));
04002       Done[I1] = true;
04003       Done[I2] = true;
04004     } else
04005       Result = DAG.getUNDEF(VT);
04006   }
04007 
04008   // Use VLVGx to insert the other elements.
04009   for (unsigned I = 0; I < NumElements; ++I)
04010     if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
04011       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
04012                            DAG.getConstant(I, DL, MVT::i32));
04013   return Result;
04014 }
04015 
04016 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
04017                                                  SelectionDAG &DAG) const {
04018   const SystemZInstrInfo *TII =
04019     static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04020   auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
04021   SDLoc DL(Op);
04022   EVT VT = Op.getValueType();
04023 
04024   if (BVN->isConstant()) {
04025     // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
04026     // preferred way of creating all-zero and all-one vectors so give it
04027     // priority over other methods below.
04028     uint64_t Mask = 0;
04029     if (tryBuildVectorByteMask(BVN, Mask)) {
04030       SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
04031                                DAG.getConstant(Mask, DL, MVT::i32));
04032       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
04033     }
04034 
04035     // Try using some form of replication.
04036     APInt SplatBits, SplatUndef;
04037     unsigned SplatBitSize;
04038     bool HasAnyUndefs;
04039     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
04040                              8, true) &&
04041         SplatBitSize <= 64) {
04042       // First try assuming that any undefined bits above the highest set bit
04043       // and below the lowest set bit are 1s.  This increases the likelihood of
04044       // being able to use a sign-extended element value in VECTOR REPLICATE
04045       // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
04046       uint64_t SplatBitsZ = SplatBits.getZExtValue();
04047       uint64_t SplatUndefZ = SplatUndef.getZExtValue();
04048       uint64_t Lower = (SplatUndefZ
04049                         & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
04050       uint64_t Upper = (SplatUndefZ
04051                         & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
04052       uint64_t Value = SplatBitsZ | Upper | Lower;
04053       SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
04054                                            SplatBitSize);
04055       if (Op.getNode())
04056         return Op;
04057 
04058       // Now try assuming that any undefined bits between the first and
04059       // last defined set bits are set.  This increases the chances of
04060       // using a non-wraparound mask.
04061       uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
04062       Value = SplatBitsZ | Middle;
04063       Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
04064       if (Op.getNode())
04065         return Op;
04066     }
04067 
04068     // Fall back to loading it from memory.
04069     return SDValue();
04070   }
04071 
04072   // See if we should use shuffles to construct the vector from other vectors.
04073   SDValue Res = tryBuildVectorShuffle(DAG, BVN);
04074   if (Res.getNode())
04075     return Res;
04076 
04077   // Detect SCALAR_TO_VECTOR conversions.
04078   if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
04079     return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
04080 
04081   // Otherwise use buildVector to build the vector up from GPRs.
04082   unsigned NumElements = Op.getNumOperands();
04083   SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
04084   for (unsigned I = 0; I < NumElements; ++I)
04085     Ops[I] = Op.getOperand(I);
04086   return buildVector(DAG, DL, VT, Ops);
04087 }
04088 
04089 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
04090                                                    SelectionDAG &DAG) const {
04091   auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
04092   SDLoc DL(Op);
04093   EVT VT = Op.getValueType();
04094   unsigned NumElements = VT.getVectorNumElements();
04095 
04096   if (VSN->isSplat()) {
04097     SDValue Op0 = Op.getOperand(0);
04098     unsigned Index = VSN->getSplatIndex();
04099     assert(Index < VT.getVectorNumElements() &&
04100            "Splat index should be defined and in first operand");
04101     // See whether the value we're splatting is directly available as a scalar.
04102     if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
04103         Op0.getOpcode() == ISD::BUILD_VECTOR)
04104       return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
04105     // Otherwise keep it as a vector-to-vector operation.
04106     return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
04107                        DAG.getConstant(Index, DL, MVT::i32));
04108   }
04109 
04110   GeneralShuffle GS(VT);
04111   for (unsigned I = 0; I < NumElements; ++I) {
04112     int Elt = VSN->getMaskElt(I);
04113     if (Elt < 0)
04114       GS.addUndef();
04115     else
04116       GS.add(Op.getOperand(unsigned(Elt) / NumElements),
04117              unsigned(Elt) % NumElements);
04118   }
04119   return GS.getNode(DAG, SDLoc(VSN));
04120 }
04121 
04122 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
04123                                                      SelectionDAG &DAG) const {
04124   SDLoc DL(Op);
04125   // Just insert the scalar into element 0 of an undefined vector.
04126   return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
04127                      Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
04128                      Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
04129 }
04130 
04131 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
04132                                                       SelectionDAG &DAG) const {
04133   // Handle insertions of floating-point values.
04134   SDLoc DL(Op);
04135   SDValue Op0 = Op.getOperand(0);
04136   SDValue Op1 = Op.getOperand(1);
04137   SDValue Op2 = Op.getOperand(2);
04138   EVT VT = Op.getValueType();
04139 
04140   // Insertions into constant indices of a v2f64 can be done using VPDI.
04141   // However, if the inserted value is a bitcast or a constant then it's
04142   // better to use GPRs, as below.
04143   if (VT == MVT::v2f64 &&
04144       Op1.getOpcode() != ISD::BITCAST &&
04145       Op1.getOpcode() != ISD::ConstantFP &&
04146       Op2.getOpcode() == ISD::Constant) {
04147     uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
04148     unsigned Mask = VT.getVectorNumElements() - 1;
04149     if (Index <= Mask)
04150       return Op;
04151   }
04152 
04153   // Otherwise bitcast to the equivalent integer form and insert via a GPR.
04154   MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
04155   MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
04156   SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
04157                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
04158                             DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
04159   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
04160 }
04161 
04162 SDValue
04163 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
04164                                                SelectionDAG &DAG) const {
04165   // Handle extractions of floating-point values.
04166   SDLoc DL(Op);
04167   SDValue Op0 = Op.getOperand(0);
04168   SDValue Op1 = Op.getOperand(1);
04169   EVT VT = Op.getValueType();
04170   EVT VecVT = Op0.getValueType();
04171 
04172   // Extractions of constant indices can be done directly.
04173   if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
04174     uint64_t Index = CIndexN->getZExtValue();
04175     unsigned Mask = VecVT.getVectorNumElements() - 1;
04176     if (Index <= Mask)
04177       return Op;
04178   }
04179 
04180   // Otherwise bitcast to the equivalent integer form and extract via a GPR.
04181   MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
04182   MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
04183   SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
04184                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
04185   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
04186 }
04187 
04188 SDValue
04189 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
04190                 unsigned UnpackHigh) const {
04191   SDValue PackedOp = Op.getOperand(0);
04192   EVT OutVT = Op.getValueType();
04193   EVT InVT = PackedOp.getValueType();
04194   unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
04195   unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
04196   do {
04197     FromBits *= 2;
04198     EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
04199                                  SystemZ::VectorBits / FromBits);
04200     PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
04201   } while (FromBits != ToBits);
04202   return PackedOp;
04203 }
04204 
04205 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
04206                                           unsigned ByScalar) const {
04207   // Look for cases where a vector shift can use the *_BY_SCALAR form.
04208   SDValue Op0 = Op.getOperand(0);
04209   SDValue Op1 = Op.getOperand(1);
04210   SDLoc DL(Op);
04211   EVT VT = Op.getValueType();
04212   unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
04213 
04214   // See whether the shift vector is a splat represented as BUILD_VECTOR.
04215   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
04216     APInt SplatBits, SplatUndef;
04217     unsigned SplatBitSize;
04218     bool HasAnyUndefs;
04219     // Check for constant splats.  Use ElemBitSize as the minimum element
04220     // width and reject splats that need wider elements.
04221     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
04222                              ElemBitSize, true) &&
04223         SplatBitSize == ElemBitSize) {
04224       SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
04225                                       DL, MVT::i32);
04226       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04227     }
04228     // Check for variable splats.
04229     BitVector UndefElements;
04230     SDValue Splat = BVN->getSplatValue(&UndefElements);
04231     if (Splat) {
04232       // Since i32 is the smallest legal type, we either need a no-op
04233       // or a truncation.
04234       SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
04235       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04236     }
04237   }
04238 
04239   // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
04240   // and the shift amount is directly available in a GPR.
04241   if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
04242     if (VSN->isSplat()) {
04243       SDValue VSNOp0 = VSN->getOperand(0);
04244       unsigned Index = VSN->getSplatIndex();
04245       assert(Index < VT.getVectorNumElements() &&
04246              "Splat index should be defined and in first operand");
04247       if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
04248           VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
04249         // Since i32 is the smallest legal type, we either need a no-op
04250         // or a truncation.
04251         SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
04252                                     VSNOp0.getOperand(Index));
04253         return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04254       }
04255     }
04256   }
04257 
04258   // Otherwise just treat the current form as legal.
04259   return Op;
04260 }
04261 
04262 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
04263                                               SelectionDAG &DAG) const {
04264   switch (Op.getOpcode()) {
04265   case ISD::BR_CC:
04266     return lowerBR_CC(Op, DAG);
04267   case ISD::SELECT_CC:
04268     return lowerSELECT_CC(Op, DAG);
04269   case ISD::SETCC:
04270     return lowerSETCC(Op, DAG);
04271   case ISD::GlobalAddress:
04272     return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
04273   case ISD::GlobalTLSAddress:
04274     return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
04275   case ISD::BlockAddress:
04276     return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
04277   case ISD::JumpTable:
04278     return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
04279   case ISD::ConstantPool:
04280     return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
04281   case ISD::BITCAST:
04282     return lowerBITCAST(Op, DAG);
04283   case ISD::VASTART:
04284     return lowerVASTART(Op, DAG);
04285   case ISD::VACOPY:
04286     return lowerVACOPY(Op, DAG);
04287   case ISD::DYNAMIC_STACKALLOC:
04288     return lowerDYNAMIC_STACKALLOC(Op, DAG);
04289   case ISD::SMUL_LOHI:
04290     return lowerSMUL_LOHI(Op, DAG);
04291   case ISD::UMUL_LOHI:
04292     return lowerUMUL_LOHI(Op, DAG);
04293   case ISD::SDIVREM:
04294     return lowerSDIVREM(Op, DAG);
04295   case ISD::UDIVREM:
04296     return lowerUDIVREM(Op, DAG);
04297   case ISD::OR:
04298     return lowerOR(Op, DAG);
04299   case ISD::CTPOP:
04300     return lowerCTPOP(Op, DAG);
04301   case ISD::CTLZ_ZERO_UNDEF:
04302     return DAG.getNode(ISD::CTLZ, SDLoc(Op),
04303                        Op.getValueType(), Op.getOperand(0));
04304   case ISD::CTTZ_ZERO_UNDEF:
04305     return DAG.getNode(ISD::CTTZ, SDLoc(Op),
04306                        Op.getValueType(), Op.getOperand(0));
04307   case ISD::ATOMIC_SWAP:
04308     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
04309   case ISD::ATOMIC_STORE:
04310     return lowerATOMIC_STORE(Op, DAG);
04311   case ISD::ATOMIC_LOAD:
04312     return lowerATOMIC_LOAD(Op, DAG);
04313   case ISD::ATOMIC_LOAD_ADD:
04314     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
04315   case ISD::ATOMIC_LOAD_SUB:
04316     return lowerATOMIC_LOAD_SUB(Op, DAG);
04317   case ISD::ATOMIC_LOAD_AND:
04318     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
04319   case ISD::ATOMIC_LOAD_OR:
04320     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
04321   case ISD::ATOMIC_LOAD_XOR:
04322     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
04323   case ISD::ATOMIC_LOAD_NAND:
04324     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
04325   case ISD::ATOMIC_LOAD_MIN:
04326     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
04327   case ISD::ATOMIC_LOAD_MAX:
04328     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
04329   case ISD::ATOMIC_LOAD_UMIN:
04330     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
04331   case ISD::ATOMIC_LOAD_UMAX:
04332     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
04333   case ISD::ATOMIC_CMP_SWAP:
04334     return lowerATOMIC_CMP_SWAP(Op, DAG);
04335   case ISD::STACKSAVE:
04336     return lowerSTACKSAVE(Op, DAG);
04337   case ISD::STACKRESTORE:
04338     return lowerSTACKRESTORE(Op, DAG);
04339   case ISD::PREFETCH:
04340     return lowerPREFETCH(Op, DAG);
04341   case ISD::INTRINSIC_W_CHAIN:
04342     return lowerINTRINSIC_W_CHAIN(Op, DAG);
04343   case ISD::INTRINSIC_WO_CHAIN:
04344     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
04345   case ISD::BUILD_VECTOR:
04346     return lowerBUILD_VECTOR(Op, DAG);
04347   case ISD::VECTOR_SHUFFLE:
04348     return lowerVECTOR_SHUFFLE(Op, DAG);
04349   case ISD::SCALAR_TO_VECTOR:
04350     return lowerSCALAR_TO_VECTOR(Op, DAG);
04351   case ISD::INSERT_VECTOR_ELT:
04352     return lowerINSERT_VECTOR_ELT(Op, DAG);
04353   case ISD::EXTRACT_VECTOR_ELT:
04354     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
04355   case ISD::SIGN_EXTEND_VECTOR_INREG:
04356     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
04357   case ISD::ZERO_EXTEND_VECTOR_INREG:
04358     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
04359   case ISD::SHL:
04360     return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
04361   case ISD::SRL:
04362     return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
04363   case ISD::SRA:
04364     return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
04365   default:
04366     llvm_unreachable("Unexpected node to lower");
04367   }
04368 }
04369 
04370 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
04371 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
04372   switch (Opcode) {
04373     OPCODE(RET_FLAG);
04374     OPCODE(CALL);
04375     OPCODE(SIBCALL);
04376     OPCODE(TLS_GDCALL);
04377     OPCODE(TLS_LDCALL);
04378     OPCODE(PCREL_WRAPPER);
04379     OPCODE(PCREL_OFFSET);
04380     OPCODE(IABS);
04381     OPCODE(ICMP);
04382     OPCODE(FCMP);
04383     OPCODE(TM);
04384     OPCODE(BR_CCMASK);
04385     OPCODE(SELECT_CCMASK);
04386     OPCODE(ADJDYNALLOC);
04387     OPCODE(EXTRACT_ACCESS);
04388     OPCODE(POPCNT);
04389     OPCODE(UMUL_LOHI64);
04390     OPCODE(SDIVREM32);
04391     OPCODE(SDIVREM64);
04392     OPCODE(UDIVREM32);
04393     OPCODE(UDIVREM64);
04394     OPCODE(MVC);
04395     OPCODE(MVC_LOOP);
04396     OPCODE(NC);
04397     OPCODE(NC_LOOP);
04398     OPCODE(OC);
04399     OPCODE(OC_LOOP);
04400     OPCODE(XC);
04401     OPCODE(XC_LOOP);
04402     OPCODE(CLC);
04403     OPCODE(CLC_LOOP);
04404     OPCODE(STPCPY);
04405     OPCODE(STRCMP);
04406     OPCODE(SEARCH_STRING);
04407     OPCODE(IPM);
04408     OPCODE(SERIALIZE);
04409     OPCODE(TBEGIN);
04410     OPCODE(TBEGIN_NOFLOAT);
04411     OPCODE(TEND);
04412     OPCODE(BYTE_MASK);
04413     OPCODE(ROTATE_MASK);
04414     OPCODE(REPLICATE);
04415     OPCODE(JOIN_DWORDS);
04416     OPCODE(SPLAT);
04417     OPCODE(MERGE_HIGH);
04418     OPCODE(MERGE_LOW);
04419     OPCODE(SHL_DOUBLE);
04420     OPCODE(PERMUTE_DWORDS);
04421     OPCODE(PERMUTE);
04422     OPCODE(PACK);
04423     OPCODE(PACKS_CC);
04424     OPCODE(PACKLS_CC);
04425     OPCODE(UNPACK_HIGH);
04426     OPCODE(UNPACKL_HIGH);
04427     OPCODE(UNPACK_LOW);
04428     OPCODE(UNPACKL_LOW);
04429     OPCODE(VSHL_BY_SCALAR);
04430     OPCODE(VSRL_BY_SCALAR);
04431     OPCODE(VSRA_BY_SCALAR);
04432     OPCODE(VSUM);
04433     OPCODE(VICMPE);
04434     OPCODE(VICMPH);
04435     OPCODE(VICMPHL);
04436     OPCODE(VICMPES);
04437     OPCODE(VICMPHS);
04438     OPCODE(VICMPHLS);
04439     OPCODE(VFCMPE);
04440     OPCODE(VFCMPH);
04441     OPCODE(VFCMPHE);
04442     OPCODE(VFCMPES);
04443     OPCODE(VFCMPHS);
04444     OPCODE(VFCMPHES);
04445     OPCODE(VFTCI);
04446     OPCODE(VEXTEND);
04447     OPCODE(VROUND);
04448     OPCODE(VTM);
04449     OPCODE(VFAE_CC);
04450     OPCODE(VFAEZ_CC);
04451     OPCODE(VFEE_CC);
04452     OPCODE(VFEEZ_CC);
04453     OPCODE(VFENE_CC);
04454     OPCODE(VFENEZ_CC);
04455     OPCODE(VISTR_CC);
04456     OPCODE(VSTRC_CC);
04457     OPCODE(VSTRCZ_CC);
04458     OPCODE(ATOMIC_SWAPW);
04459     OPCODE(ATOMIC_LOADW_ADD);
04460     OPCODE(ATOMIC_LOADW_SUB);
04461     OPCODE(ATOMIC_LOADW_AND);
04462     OPCODE(ATOMIC_LOADW_OR);
04463     OPCODE(ATOMIC_LOADW_XOR);
04464     OPCODE(ATOMIC_LOADW_NAND);
04465     OPCODE(ATOMIC_LOADW_MIN);
04466     OPCODE(ATOMIC_LOADW_MAX);
04467     OPCODE(ATOMIC_LOADW_UMIN);
04468     OPCODE(ATOMIC_LOADW_UMAX);
04469     OPCODE(ATOMIC_CMP_SWAPW);
04470     OPCODE(PREFETCH);
04471   }
04472   return nullptr;
04473 #undef OPCODE
04474 }
04475 
04476 // Return true if VT is a vector whose elements are a whole number of bytes
04477 // in width.
04478 static bool canTreatAsByteVector(EVT VT) {
04479   return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
04480 }
04481 
04482 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
04483 // producing a result of type ResVT.  Op is a possibly bitcast version
04484 // of the input vector and Index is the index (based on type VecVT) that
04485 // should be extracted.  Return the new extraction if a simplification
04486 // was possible or if Force is true.
04487 SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
04488                                               SDValue Op, unsigned Index,
04489                                               DAGCombinerInfo &DCI,
04490                                               bool Force) const {
04491   SelectionDAG &DAG = DCI.DAG;
04492 
04493   // The number of bytes being extracted.
04494   unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
04495 
04496   for (;;) {
04497     unsigned Opcode = Op.getOpcode();
04498     if (Opcode == ISD::BITCAST)
04499       // Look through bitcasts.
04500       Op = Op.getOperand(0);
04501     else if (Opcode == ISD::VECTOR_SHUFFLE &&
04502              canTreatAsByteVector(Op.getValueType())) {
04503       // Get a VPERM-like permute mask and see whether the bytes covered
04504       // by the extracted element are a contiguous sequence from one
04505       // source operand.
04506       SmallVector<int, SystemZ::VectorBytes> Bytes;
04507       getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
04508       int First;
04509       if (!getShuffleInput(Bytes, Index * BytesPerElement,
04510                            BytesPerElement, First))
04511         break;
04512       if (First < 0)
04513         return DAG.getUNDEF(ResVT);
04514       // Make sure the contiguous sequence starts at a multiple of the
04515       // original element size.
04516       unsigned Byte = unsigned(First) % Bytes.size();
04517       if (Byte % BytesPerElement != 0)
04518         break;
04519       // We can get the extracted value directly from an input.
04520       Index = Byte / BytesPerElement;
04521       Op = Op.getOperand(unsigned(First) / Bytes.size());
04522       Force = true;
04523     } else if (Opcode == ISD::BUILD_VECTOR &&
04524                canTreatAsByteVector(Op.getValueType())) {
04525       // We can only optimize this case if the BUILD_VECTOR elements are
04526       // at least as wide as the extracted value.
04527       EVT OpVT = Op.getValueType();
04528       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
04529       if (OpBytesPerElement < BytesPerElement)
04530         break;
04531       // Make sure that the least-significant bit of the extracted value
04532       // is the least significant bit of an input.
04533       unsigned End = (Index + 1) * BytesPerElement;
04534       if (End % OpBytesPerElement != 0)
04535         break;
04536       // We're extracting the low part of one operand of the BUILD_VECTOR.
04537       Op = Op.getOperand(End / OpBytesPerElement - 1);
04538       if (!Op.getValueType().isInteger()) {
04539         EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
04540         Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
04541         DCI.AddToWorklist(Op.getNode());
04542       }
04543       EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
04544       Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
04545       if (VT != ResVT) {
04546         DCI.AddToWorklist(Op.getNode());
04547         Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
04548       }
04549       return Op;
04550     } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
04551     Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
04552     Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
04553          canTreatAsByteVector(Op.getValueType()) &&
04554                canTreatAsByteVector(Op.getOperand(0).getValueType())) {
04555       // Make sure that only the unextended bits are significant.
04556       EVT ExtVT = Op.getValueType();
04557       EVT OpVT = Op.getOperand(0).getValueType();
04558       unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
04559       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
04560       unsigned Byte = Index * BytesPerElement;
04561       unsigned SubByte = Byte % ExtBytesPerElement;
04562       unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
04563       if (SubByte < MinSubByte ||
04564     SubByte + BytesPerElement > ExtBytesPerElement)
04565   break;
04566       // Get the byte offset of the unextended element
04567       Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
04568       // ...then add the byte offset relative to that element.
04569       Byte += SubByte - MinSubByte;
04570       if (Byte % BytesPerElement != 0)
04571   break;
04572       Op = Op.getOperand(0);
04573       Index = Byte / BytesPerElement;
04574       Force = true;
04575     } else
04576       break;
04577   }
04578   if (Force) {
04579     if (Op.getValueType() != VecVT) {
04580       Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
04581       DCI.AddToWorklist(Op.getNode());
04582     }
04583     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
04584                        DAG.getConstant(Index, DL, MVT::i32));
04585   }
04586   return SDValue();
04587 }
04588 
04589 // Optimize vector operations in scalar value Op on the basis that Op
04590 // is truncated to TruncVT.
04591 SDValue
04592 SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
04593                                               DAGCombinerInfo &DCI) const {
04594   // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
04595   // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
04596   // of type TruncVT.
04597   if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04598       TruncVT.getSizeInBits() % 8 == 0) {
04599     SDValue Vec = Op.getOperand(0);
04600     EVT VecVT = Vec.getValueType();
04601     if (canTreatAsByteVector(VecVT)) {
04602       if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
04603         unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
04604         unsigned TruncBytes = TruncVT.getStoreSize();
04605         if (BytesPerElement % TruncBytes == 0) {
04606           // Calculate the value of Y' in the above description.  We are
04607           // splitting the original elements into Scale equal-sized pieces
04608           // and for truncation purposes want the last (least-significant)
04609           // of these pieces for IndexN.  This is easiest to do by calculating
04610           // the start index of the following element and then subtracting 1.
04611           unsigned Scale = BytesPerElement / TruncBytes;
04612           unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
04613 
04614           // Defer the creation of the bitcast from X to combineExtract,
04615           // which might be able to optimize the extraction.
04616           VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
04617                                    VecVT.getStoreSize() / TruncBytes);
04618           EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
04619           return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
04620         }
04621       }
04622     }
04623   }
04624   return SDValue();
04625 }
04626 
04627 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
04628                                                  DAGCombinerInfo &DCI) const {
04629   SelectionDAG &DAG = DCI.DAG;
04630   unsigned Opcode = N->getOpcode();
04631   if (Opcode == ISD::SIGN_EXTEND) {
04632     // Convert (sext (ashr (shl X, C1), C2)) to
04633     // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
04634     // cheap as narrower ones.
04635     SDValue N0 = N->getOperand(0);
04636     EVT VT = N->getValueType(0);
04637     if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
04638       auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
04639       SDValue Inner = N0.getOperand(0);
04640       if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
04641         if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
04642           unsigned Extra = (VT.getSizeInBits() -
04643                             N0.getValueType().getSizeInBits());
04644           unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
04645           unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
04646           EVT ShiftVT = N0.getOperand(1).getValueType();
04647           SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
04648                                     Inner.getOperand(0));
04649           SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
04650                                     DAG.getConstant(NewShlAmt, SDLoc(Inner),
04651                                                     ShiftVT));
04652           return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
04653                              DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
04654         }
04655       }
04656     }
04657   }
04658   if (Opcode == SystemZISD::MERGE_HIGH ||
04659       Opcode == SystemZISD::MERGE_LOW) {
04660     SDValue Op0 = N->getOperand(0);
04661     SDValue Op1 = N->getOperand(1);
04662     if (Op0.getOpcode() == ISD::BITCAST)
04663       Op0 = Op0.getOperand(0);
04664     if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
04665         cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
04666       // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
04667       // for v4f32.
04668       if (Op1 == N->getOperand(0))
04669         return Op1;
04670       // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
04671       EVT VT = Op1.getValueType();
04672       unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
04673       if (ElemBytes <= 4) {
04674         Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
04675                   SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
04676         EVT InVT = VT.changeVectorElementTypeToInteger();
04677         EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
04678                                      SystemZ::VectorBytes / ElemBytes / 2);
04679         if (VT != InVT) {
04680           Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
04681           DCI.AddToWorklist(Op1.getNode());
04682         }
04683         SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
04684         DCI.AddToWorklist(Op.getNode());
04685         return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
04686       }
04687     }
04688   }
04689   // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
04690   // for the extraction to be done on a vMiN value, so that we can use VSTE.
04691   // If X has wider elements then convert it to:
04692   // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
04693   if (Opcode == ISD::STORE) {
04694     auto *SN = cast<StoreSDNode>(N);
04695     EVT MemVT = SN->getMemoryVT();
04696     if (MemVT.isInteger()) {
04697       SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
04698                                              SN->getValue(), DCI);
04699       if (Value.getNode()) {
04700         DCI.AddToWorklist(Value.getNode());
04701 
04702         // Rewrite the store with the new form of stored value.
04703         return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
04704                                  SN->getBasePtr(), SN->getMemoryVT(),
04705                                  SN->getMemOperand());
04706       }
04707     }
04708   }
04709   // Try to simplify a vector extraction.
04710   if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
04711     if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
04712       SDValue Op0 = N->getOperand(0);
04713       EVT VecVT = Op0.getValueType();
04714       return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
04715                             IndexN->getZExtValue(), DCI, false);
04716     }
04717   }
04718   // (join_dwords X, X) == (replicate X)
04719   if (Opcode == SystemZISD::JOIN_DWORDS &&
04720       N->getOperand(0) == N->getOperand(1))
04721     return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
04722                        N->getOperand(0));
04723   // (fround (extract_vector_elt X 0))
04724   // (fround (extract_vector_elt X 1)) ->
04725   // (extract_vector_elt (VROUND X) 0)
04726   // (extract_vector_elt (VROUND X) 1)
04727   //
04728   // This is a special case since the target doesn't really support v2f32s.
04729   if (Opcode == ISD::FP_ROUND) {
04730     SDValue Op0 = N->getOperand(0);
04731     if (N->getValueType(0) == MVT::f32 &&
04732         Op0.hasOneUse() &&
04733         Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04734         Op0.getOperand(0).getValueType() == MVT::v2f64 &&
04735         Op0.getOperand(1).getOpcode() == ISD::Constant &&
04736         cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
04737       SDValue Vec = Op0.getOperand(0);
04738       for (auto *U : Vec->uses()) {
04739         if (U != Op0.getNode() &&
04740             U->hasOneUse() &&
04741             U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04742             U->getOperand(0) == Vec &&
04743             U->getOperand(1).getOpcode() == ISD::Constant &&
04744             cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
04745           SDValue OtherRound = SDValue(*U->use_begin(), 0);
04746           if (OtherRound.getOpcode() == ISD::FP_ROUND &&
04747               OtherRound.getOperand(0) == SDValue(U, 0) &&
04748               OtherRound.getValueType() == MVT::f32) {
04749             SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
04750                                          MVT::v4f32, Vec);
04751             DCI.AddToWorklist(VRound.getNode());
04752             SDValue Extract1 =
04753               DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
04754                           VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
04755             DCI.AddToWorklist(Extract1.getNode());
04756             DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
04757             SDValue Extract0 =
04758               DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
04759                           VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
04760             return Extract0;
04761           }
04762         }
04763       }
04764     }
04765   }
04766   return SDValue();
04767 }
04768 
04769 //===----------------------------------------------------------------------===//
04770 // Custom insertion
04771 //===----------------------------------------------------------------------===//
04772 
04773 // Create a new basic block after MBB.
04774 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
04775   MachineFunction &MF = *MBB->getParent();
04776   MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
04777   MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
04778   return NewMBB;
04779 }
04780 
04781 // Split MBB after MI and return the new block (the one that contains
04782 // instructions after MI).
04783 static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
04784                                           MachineBasicBlock *MBB) {
04785   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
04786   NewMBB->splice(NewMBB->begin(), MBB,
04787                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
04788   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
04789   return NewMBB;
04790 }
04791 
04792 // Split MBB before MI and return the new block (the one that contains MI).
04793 static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
04794                                            MachineBasicBlock *MBB) {
04795   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
04796   NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
04797   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
04798   return NewMBB;
04799 }
04800 
04801 // Force base value Base into a register before MI.  Return the register.
04802 static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
04803                          const SystemZInstrInfo *TII) {
04804   if (Base.isReg())
04805     return Base.getReg();
04806 
04807   MachineBasicBlock *MBB = MI->getParent();
04808   MachineFunction &MF = *MBB->getParent();
04809   MachineRegisterInfo &MRI = MF.getRegInfo();
04810 
04811   unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
04812   BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
04813     .addOperand(Base).addImm(0).addReg(0);
04814   return Reg;
04815 }
04816 
04817 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
04818 MachineBasicBlock *
04819 SystemZTargetLowering::emitSelect(MachineInstr *MI,
04820                                   MachineBasicBlock *MBB) const {
04821   const SystemZInstrInfo *TII =
04822       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04823 
04824   unsigned DestReg  = MI->getOperand(0).getReg();
04825   unsigned TrueReg  = MI->getOperand(1).getReg();
04826   unsigned FalseReg = MI->getOperand(2).getReg();
04827   unsigned CCValid  = MI->getOperand(3).getImm();
04828   unsigned CCMask   = MI->getOperand(4).getImm();
04829   DebugLoc DL       = MI->getDebugLoc();
04830 
04831   MachineBasicBlock *StartMBB = MBB;
04832   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
04833   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
04834 
04835   //  StartMBB:
04836   //   BRC CCMask, JoinMBB
04837   //   # fallthrough to FalseMBB
04838   MBB = StartMBB;
04839   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
04840     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
04841   MBB->addSuccessor(JoinMBB);
04842   MBB->addSuccessor(FalseMBB);
04843 
04844   //  FalseMBB:
04845   //   # fallthrough to JoinMBB
04846   MBB = FalseMBB;
04847   MBB->addSuccessor(JoinMBB);
04848 
04849   //  JoinMBB:
04850   //   %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
04851   //  ...
04852   MBB = JoinMBB;
04853   BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
04854     .addReg(TrueReg).addMBB(StartMBB)
04855     .addReg(FalseReg).addMBB(FalseMBB);
04856 
04857   MI->eraseFromParent();
04858   return JoinMBB;
04859 }
04860 
04861 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
04862 // StoreOpcode is the store to use and Invert says whether the store should
04863 // happen when the condition is false rather than true.  If a STORE ON
04864 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
04865 MachineBasicBlock *
04866 SystemZTargetLowering::emitCondStore(MachineInstr *MI,
04867                                      MachineBasicBlock *MBB,
04868                                      unsigned StoreOpcode, unsigned STOCOpcode,
04869                                      bool Invert) const {
04870   const SystemZInstrInfo *TII =
04871       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04872 
04873   unsigned SrcReg     = MI->getOperand(0).getReg();
04874   MachineOperand Base = MI->getOperand(1);
04875   int64_t Disp        = MI->getOperand(2).getImm();
04876   unsigned IndexReg   = MI->getOperand(3).getReg();
04877   unsigned CCValid    = MI->getOperand(4).getImm();
04878   unsigned CCMask     = MI->getOperand(5).getImm();
04879   DebugLoc DL         = MI->getDebugLoc();
04880 
04881   StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
04882 
04883   // Use STOCOpcode if possible.  We could use different store patterns in
04884   // order to avoid matching the index register, but the performance trade-offs
04885   // might be more complicated in that case.
04886   if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
04887     if (Invert)
04888       CCMask ^= CCValid;
04889     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
04890       .addReg(SrcReg).addOperand(Base).addImm(Disp)
04891       .addImm(CCValid).addImm(CCMask);
04892     MI->eraseFromParent();
04893     return MBB;
04894   }
04895 
04896   // Get the condition needed to branch around the store.
04897   if (!Invert)
04898     CCMask ^= CCValid;
04899 
04900   MachineBasicBlock *StartMBB = MBB;
04901   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
04902   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
04903 
04904   //  StartMBB:
04905   //   BRC CCMask, JoinMBB
04906   //   # fallthrough to FalseMBB
04907   MBB = StartMBB;
04908   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
04909     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
04910   MBB->addSuccessor(JoinMBB);
04911   MBB->addSuccessor(FalseMBB);
04912 
04913   //  FalseMBB:
04914   //   store %SrcReg, %Disp(%Index,%Base)
04915   //   # fallthrough to JoinMBB
04916   MBB = FalseMBB;
04917   BuildMI(MBB, DL, TII->get(StoreOpcode))
04918     .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
04919   MBB->addSuccessor(JoinMBB);
04920 
04921   MI->eraseFromParent();
04922   return JoinMBB;
04923 }
04924 
04925 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
04926 // or ATOMIC_SWAP{,W} instruction MI.  BinOpcode is the instruction that
04927 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
04928 // BitSize is the width of the field in bits, or 0 if this is a partword
04929 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
04930 // is one of the operands.  Invert says whether the field should be
04931 // inverted after performing BinOpcode (e.g. for NAND).
04932 MachineBasicBlock *
04933 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
04934                                             MachineBasicBlock *MBB,
04935                                             unsigned BinOpcode,
04936                                             unsigned BitSize,
04937                                             bool Invert) const {
04938   MachineFunction &MF = *MBB->getParent();
04939   const SystemZInstrInfo *TII =
04940       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04941   MachineRegisterInfo &MRI = MF.getRegInfo();
04942   bool IsSubWord = (BitSize < 32);
04943 
04944   // Extract the operands.  Base can be a register or a frame index.
04945   // Src2 can be a register or immediate.
04946   unsigned Dest        = MI->getOperand(0).getReg();
04947   MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
04948   int64_t Disp         = MI->getOperand(2).getImm();
04949   MachineOperand Src2  = earlyUseOperand(MI->getOperand(3));
04950   unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
04951   unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
04952   DebugLoc DL          = MI->getDebugLoc();
04953   if (IsSubWord)
04954     BitSize = MI->getOperand(6).getImm();
04955 
04956   // Subword operations use 32-bit registers.
04957   const TargetRegisterClass *RC = (BitSize <= 32 ?
04958                                    &SystemZ::GR32BitRegClass :
04959                                    &SystemZ::GR64BitRegClass);
04960   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
04961   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
04962 
04963   // Get the right opcodes for the displacement.
04964   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
04965   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
04966   assert(LOpcode && CSOpcode && "Displacement out of range");
04967 
04968   // Create virtual registers for temporary results.
04969   unsigned OrigVal       = MRI.createVirtualRegister(RC);
04970   unsigned OldVal        = MRI.createVirtualRegister(RC);
04971   unsigned NewVal        = (BinOpcode || IsSubWord ?
04972                             MRI.createVirtualRegister(RC) : Src2.getReg());
04973   unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
04974   unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
04975 
04976   // Insert a basic block for the main loop.
04977   MachineBasicBlock *StartMBB = MBB;
04978   MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
04979   MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
04980 
04981   //  StartMBB:
04982   //   ...
04983   //   %OrigVal = L Disp(%Base)
04984   //   # fall through to LoopMMB
04985   MBB = StartMBB;
04986   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
04987     .addOperand(Base).addImm(Disp).addReg(0);
04988   MBB->addSuccessor(LoopMBB);
04989 
04990   //  LoopMBB:
04991   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
04992   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
04993   //   %RotatedNewVal = OP %RotatedOldVal, %Src2
04994   //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
04995   //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
04996   //   JNE LoopMBB
04997   //   # fall through to DoneMMB
04998   MBB = LoopMBB;
04999   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
05000     .addReg(OrigVal).addMBB(StartMBB)
05001     .addReg(Dest).addMBB(LoopMBB);
05002   if (IsSubWord)
05003     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
05004       .addReg(OldVal).addReg(BitShift).addImm(0);
05005   if (Invert) {
05006     // Perform the operation normally and then invert every bit of the field.
05007     unsigned Tmp = MRI.createVirtualRegister(RC);
05008     BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
05009       .addReg(RotatedOldVal).addOperand(Src2);
05010     if (BitSize <= 32)
05011       // XILF with the upper BitSize bits set.
05012       BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
05013         .addReg(Tmp).addImm(-1U << (32 - BitSize));
05014     else {
05015       // Use LCGR and add -1 to the result, which is more compact than
05016       // an XILF, XILH pair.
05017       unsigned Tmp2 = MRI.createVirtualRegister(RC);
05018       BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
05019       BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
05020         .addReg(Tmp2).addImm(-1);
05021     }
05022   } else if (BinOpcode)
05023     // A simply binary operation.
05024     BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
05025       .addReg(RotatedOldVal).addOperand(Src2);
05026   else if (IsSubWord)
05027     // Use RISBG to rotate Src2 into position and use it to replace the
05028     // field in RotatedOldVal.
05029     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
05030       .addReg(RotatedOldVal).addReg(Src2.getReg())
05031       .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
05032   if (IsSubWord)
05033     BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
05034       .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
05035   BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
05036     .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
05037   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05038     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
05039   MBB->addSuccessor(LoopMBB);
05040   MBB->addSuccessor(DoneMBB);
05041 
05042   MI->eraseFromParent();
05043   return DoneMBB;
05044 }
05045 
05046 // Implement EmitInstrWithCustomInserter for pseudo
05047 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI.  CompareOpcode is the
05048 // instruction that should be used to compare the current field with the
05049 // minimum or maximum value.  KeepOldMask is the BRC condition-code mask
05050 // for when the current field should be kept.  BitSize is the width of
05051 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
05052 MachineBasicBlock *
05053 SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
05054                                             MachineBasicBlock *MBB,
05055                                             unsigned CompareOpcode,
05056                                             unsigned KeepOldMask,
05057                                             unsigned BitSize) const {
05058   MachineFunction &MF = *MBB->getParent();
05059   const SystemZInstrInfo *TII =
05060       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
05061   MachineRegisterInfo &MRI = MF.getRegInfo();
05062   bool IsSubWord = (BitSize < 32);
05063 
05064   // Extract the operands.  Base can be a register or a frame index.
05065   unsigned Dest        = MI->getOperand(0).getReg();
05066   MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
05067   int64_t  Disp        = MI->getOperand(2).getImm();
05068   unsigned Src2        = MI->getOperand(3).getReg();
05069   unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
05070   unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
05071   DebugLoc DL          = MI->getDebugLoc();
05072   if (IsSubWord)
05073     BitSize = MI->getOperand(6).getImm();
05074 
05075   // Subword operations use 32-bit registers.
05076   const TargetRegisterClass *RC = (BitSize <= 32 ?
05077                                    &SystemZ::GR32BitRegClass :
05078                                    &SystemZ::GR64BitRegClass);
05079   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
05080   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
05081 
05082   // Get the right opcodes for the displacement.
05083   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
05084   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
05085   assert(LOpcode && CSOpcode && "Displacement out of range");
05086 
05087   // Create virtual registers for temporary results.
05088   unsigned OrigVal       = MRI.createVirtualRegister(RC);
05089   unsigned OldVal        = MRI.createVirtualRegister(RC);
05090   unsigned NewVal        = MRI.createVirtualRegister(RC);
05091   unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
05092   unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
05093   unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
05094 
05095   // Insert 3 basic blocks for the loop.
05096   MachineBasicBlock *StartMBB  = MBB;
05097   MachineBasicBlock *DoneMBB   = splitBlockBefore(MI, MBB);
05098   MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB);
05099   MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
05100   MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
05101 
05102   //  StartMBB:
05103   //   ...
05104   //   %OrigVal     = L Disp(%Base)
05105   //   # fall through to LoopMMB
05106   MBB = StartMBB;
05107   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
05108     .addOperand(Base).addImm(Disp).addReg(0);
05109   MBB->addSuccessor(LoopMBB);
05110 
05111   //  LoopMBB:
05112   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
05113   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
05114   //   CompareOpcode %RotatedOldVal, %Src2
05115   //   BRC KeepOldMask, UpdateMBB
05116   MBB = LoopMBB;
05117   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
05118     .addReg(OrigVal).addMBB(StartMBB)
05119     .addReg(Dest).addMBB(UpdateMBB);
05120   if (IsSubWord)
05121     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
05122       .addReg(OldVal).addReg(BitShift).addImm(0);
05123   BuildMI(MBB, DL, TII->get(CompareOpcode))
05124     .addReg(RotatedOldVal).addReg(Src2);
05125   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05126     .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
05127   MBB->addSuccessor(UpdateMBB);
05128   MBB->addSuccessor(UseAltMBB);
05129 
05130   //  UseAltMBB:
05131   //   %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
05132   //   # fall through to UpdateMMB
05133   MBB = UseAltMBB;
05134   if (IsSubWord)
05135     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
05136       .addReg(RotatedOldVal).addReg(Src2)
05137       .addImm(32).addImm(31 + BitSize).addImm(0);
05138   MBB->addSuccessor(UpdateMBB);
05139 
05140   //  UpdateMBB:
05141   //   %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
05142   //                        [ %RotatedAltVal, UseAltMBB ]
05143   //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
05144   //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
05145   //   JNE LoopMBB
05146   //   # fall through to DoneMMB
05147   MBB = UpdateMBB;
05148   BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
05149     .addReg(RotatedOldVal).addMBB(LoopMBB)
05150     .addReg(RotatedAltVal).addMBB(UseAltMBB);
05151   if (IsSubWord)
05152     BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
05153       .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
05154   BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
05155     .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
05156   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05157     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
05158   MBB->addSuccessor(LoopMBB);
05159   MBB->addSuccessor(DoneMBB);
05160 
05161   MI->eraseFromParent();
05162   return DoneMBB;
05163 }
05164 
05165 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
05166 // instruction MI.
05167 MachineBasicBlock *
05168 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
05169                                           MachineBasicBlock *MBB) const {
05170   MachineFunction &MF = *MBB->getParent();
05171   const SystemZInstrInfo *TII =
05172       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
05173   MachineRegisterInfo &MRI = MF.getRegInfo();
05174 
05175   // Extract the operands.  Base can be a register or a frame index.
05176   unsigned Dest        = MI->getOperand(0).getReg();
05177   MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
05178   int64_t  Disp        = MI->getOperand(2).getImm();
05179   unsigned OrigCmpVal  = MI->getOperand(3).getReg();
05180   unsigned OrigSwapVal = MI->getOperand(4).getReg();
05181   unsigned BitShift    = MI->getOperand(5).getReg();
05182   unsigned NegBitShift = MI->getOperand(6).getReg();
05183   int64_t  BitSize     = MI->getOperand(7).getImm();
05184   DebugLoc DL          = MI->getDebugLoc();
05185 
05186   const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
05187 
05188   // Get the right opcodes for the displacement.
05189   unsigned LOpcode  = TII->getOpcodeForOffset(SystemZ::L,  Disp);
05190   unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
05191   assert(LOpcode && CSOpcode && "Displacement out of range");
05192 
05193   // Create virtual registers for temporary results.
05194   unsigned OrigOldVal   = MRI.createVirtualRegister(RC);
05195   unsigned OldVal       = MRI.createVirtualRegister(RC);
05196   unsigned CmpVal       = MRI.createVirtualRegister(RC);
05197   unsigned SwapVal      = MRI.createVirtualRegister(RC);
05198   unsigned StoreVal     = MRI.createVirtualRegister(RC);
05199   unsigned RetryOldVal  = MRI.createVirtualRegister(RC);
05200   unsigned RetryCmpVal  = MRI.createVirtualRegister(RC);
05201   unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
05202 
05203   // Insert 2 basic blocks for the loop.
05204   MachineBasicBlock *StartMBB = MBB;
05205   MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
05206   MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
05207   MachineBasicBlock *SetMBB   = emitBlockAfter(LoopMBB);
05208 
05209   //  StartMBB:
05210   //   ...
05211   //   %OrigOldVal     = L Disp(%Base)
05212   //   # fall through to LoopMMB
05213   MBB = StartMBB;
05214   BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
05215     .addOperand(Base).addImm(Disp).addReg(0);
05216   MBB->addSuccessor(LoopMBB);
05217 
05218   //  LoopMBB:
05219   //   %OldVal        = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
05220   //   %CmpVal        = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
05221   //   %SwapVal       = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
05222   //   %Dest          = RLL %OldVal, BitSize(%BitShift)
05223   //                      ^^ The low BitSize bits contain the field
05224   //                         of interest.
05225   //   %RetryCmpVal   = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
05226   //                      ^^ Replace the upper 32-BitSize bits of the
05227   //                         comparison value with those that we loaded,
05228   //                         so that we can use a full word comparison.
05229   //   CR %Dest, %RetryCmpVal
05230   //   JNE DoneMBB
05231   //   # Fall through to SetMBB
05232   MBB = LoopMBB;
05233   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
05234     .addReg(OrigOldVal).addMBB(StartMBB)
05235     .addReg(RetryOldVal).addMBB(SetMBB);
05236   BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
05237     .addReg(OrigCmpVal).addMBB(StartMBB)
05238     .addReg(RetryCmpVal).addMBB(SetMBB);
05239   BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
05240     .addReg(OrigSwapVal).addMBB(StartMBB)
05241     .addReg(RetrySwapVal).addMBB(SetMBB);
05242   BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
05243     .addReg(OldVal).addReg(BitShift).addImm(BitSize);
05244   BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
05245     .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
05246   BuildMI(MBB, DL, TII->get(SystemZ::CR))
05247     .addReg(Dest).addReg(RetryCmpVal);
05248   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05249     .addImm(SystemZ::CCMASK_ICMP)
05250     .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
05251   MBB->addSuccessor(DoneMBB);
05252   MBB->addSuccessor(SetMBB);
05253 
05254   //  SetMBB:
05255   //   %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
05256   //                      ^^ Replace the upper 32-BitSize bits of the new
05257   //                         value with those that we loaded.
05258   //   %StoreVal    = RLL %RetrySwapVal, -BitSize(%NegBitShift)
05259   //                      ^^ Rotate the new field to its proper position.
05260   //   %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
05261   //   JNE LoopMBB
05262   //   # fall through to ExitMMB
05263   MBB = SetMBB;
05264   BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
05265     .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
05266   BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
05267     .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
05268   BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
05269     .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
05270   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05271     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
05272   MBB->addSuccessor(LoopMBB);
05273   MBB->addSuccessor(DoneMBB);
05274 
05275   MI->eraseFromParent();
05276   return DoneMBB;
05277 }
05278 
05279 // Emit an extension from a GR32 or GR64 to a GR128.  ClearEven is true
05280 // if the high register of the GR128 value must be cleared or false if
05281 // it's "don't care".  SubReg is subreg_l32 when extending a GR32
05282 // and subreg_l64 when extending a GR64.
05283 MachineBasicBlock *
05284 SystemZTargetLowering::emitExt128(MachineInstr *MI,
05285                                   MachineBasicBlock *MBB,
05286                                   bool ClearEven, unsigned SubReg) const {
05287   MachineFunction &MF = *MBB->getParent();
05288   const SystemZInstrInfo *TII =
05289       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
05290   MachineRegisterInfo &MRI = MF.getRegInfo();
05291   DebugLoc DL = MI->getDebugLoc();
05292 
05293   unsigned Dest  = MI->getOperand(0).getReg();
05294   unsigned Src   = MI->getOperand(1).getReg();
05295   unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
05296 
05297   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
05298   if (ClearEven) {
05299     unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
05300     unsigned Zero64   = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
05301 
05302     BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
05303       .addImm(0);
05304     BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
05305       .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
05306     In128 = NewIn128;
05307   }
05308   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
05309     .addReg(In128).addReg(Src).addImm(SubReg);
05310 
05311   MI->eraseFromParent();
05312   return MBB;
05313 }
05314 
05315 MachineBasicBlock *
05316 SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
05317                                          MachineBasicBlock *MBB,
05318                                          unsigned Opcode) const {
05319   MachineFunction &MF = *MBB->getParent();
05320   const SystemZInstrInfo *TII =
05321       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
05322   MachineRegisterInfo &MRI = MF.getRegInfo();
05323   DebugLoc DL = MI->getDebugLoc();
05324 
05325   MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
05326   uint64_t       DestDisp = MI->getOperand(1).getImm();
05327   MachineOperand SrcBase  = earlyUseOperand(MI->getOperand(2));
05328   uint64_t       SrcDisp  = MI->getOperand(3).getImm();
05329   uint64_t       Length   = MI->getOperand(4).getImm();
05330 
05331   // When generating more than one CLC, all but the last will need to
05332   // branch to the end when a difference is found.
05333   MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
05334                                splitBlockAfter(MI, MBB) : nullptr);
05335 
05336   // Check for the loop form, in which operand 5 is the trip count.
05337   if (MI->getNumExplicitOperands() > 5) {
05338     bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
05339 
05340     uint64_t StartCountReg = MI->getOperand(5).getReg();
05341     uint64_t StartSrcReg   = forceReg(MI, SrcBase, TII);
05342     uint64_t StartDestReg  = (HaveSingleBase ? StartSrcReg :
05343                               forceReg(MI, DestBase, TII));
05344 
05345     const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
05346     uint64_t ThisSrcReg  = MRI.createVirtualRegister(RC);
05347     uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
05348                             MRI.createVirtualRegister(RC));
05349     uint64_t NextSrcReg  = MRI.createVirtualRegister(RC);
05350     uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
05351                             MRI.createVirtualRegister(RC));
05352 
05353     RC = &SystemZ::GR64BitRegClass;
05354     uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
05355     uint64_t NextCountReg = MRI.createVirtualRegister(RC);
05356 
05357     MachineBasicBlock *StartMBB = MBB;
05358     MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
05359     MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
05360     MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
05361 
05362     //  StartMBB:
05363     //   # fall through to LoopMMB
05364     MBB->addSuccessor(LoopMBB);
05365 
05366     //  LoopMBB:
05367     //   %ThisDestReg = phi [ %StartDestReg, StartMBB ],
05368     //                      [ %NextDestReg, NextMBB ]
05369     //   %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
05370     //                     [ %NextSrcReg, NextMBB ]
05371     //   %ThisCountReg = phi [ %StartCountReg, StartMBB ],
05372     //                       [ %NextCountReg, NextMBB ]
05373     //   ( PFD 2, 768+DestDisp(%ThisDestReg) )
05374     //   Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
05375     //   ( JLH EndMBB )
05376     //
05377     // The prefetch is used only for MVC.  The JLH is used only for CLC.
05378     MBB = LoopMBB;
05379 
05380     BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
05381       .addReg(StartDestReg).addMBB(StartMBB)
05382       .addReg(NextDestReg).addMBB(NextMBB);
05383     if (!HaveSingleBase)
05384       BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
05385         .addReg(StartSrcReg).addMBB(StartMBB)
05386         .addReg(NextSrcReg).addMBB(NextMBB);
05387     BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
05388       .addReg(StartCountReg).addMBB(StartMBB)
05389       .addReg(NextCountReg).addMBB(NextMBB);
05390     if (Opcode == SystemZ::MVC)
05391       BuildMI(MBB, DL, TII->get(SystemZ::PFD))
05392         .addImm(SystemZ::PFD_WRITE)
05393         .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
05394     BuildMI(MBB, DL, TII->get(Opcode))
05395       .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
05396       .addReg(ThisSrcReg).addImm(SrcDisp);
05397     if (EndMBB) {
05398       BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05399         .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
05400         .addMBB(EndMBB);
05401       MBB->addSuccessor(EndMBB);
05402       MBB->addSuccessor(NextMBB);
05403     }
05404 
05405     // NextMBB:
05406     //   %NextDestReg = LA 256(%ThisDestReg)
05407     //   %NextSrcReg = LA 256(%ThisSrcReg)
05408     //   %NextCountReg = AGHI %ThisCountReg, -1
05409     //   CGHI %NextCountReg, 0
05410     //   JLH LoopMBB
05411     //   # fall through to DoneMMB
05412     //
05413     // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
05414     MBB = NextMBB;
05415 
05416     BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
05417       .addReg(ThisDestReg).addImm(256).addReg(0);
05418     if (!HaveSingleBase)
05419       BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
05420         .addReg(ThisSrcReg).addImm(256).addReg(0);
05421     BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
05422       .addReg(ThisCountReg).addImm(-1);
05423     BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
05424       .addReg(NextCountReg).addImm(0);
05425     BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05426       .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
05427       .addMBB(LoopMBB);
05428     MBB->addSuccessor(LoopMBB);
05429     MBB->addSuccessor(DoneMBB);
05430 
05431     DestBase = MachineOperand::CreateReg(NextDestReg, false);
05432     SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
05433     Length &= 255;
05434     MBB = DoneMBB;
05435   }
05436   // Handle any remaining bytes with straight-line code.
05437   while (Length > 0) {
05438     uint64_t ThisLength = std::min(Length, uint64_t(256));
05439     // The previous iteration might have created out-of-range displacements.
05440     // Apply them using LAY if so.
05441     if (!isUInt<12>(DestDisp)) {
05442       unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
05443       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
05444         .addOperand(DestBase).addImm(DestDisp).addReg(0);
05445       DestBase = MachineOperand::CreateReg(Reg, false);
05446       DestDisp = 0;
05447     }
05448     if (!isUInt<12>(SrcDisp)) {
05449       unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
05450       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
05451         .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
05452       SrcBase = MachineOperand::CreateReg(Reg, false);
05453       SrcDisp = 0;
05454     }
05455     BuildMI(*MBB, MI, DL, TII->get(Opcode))
05456       .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
05457       .addOperand(SrcBase).addImm(SrcDisp);
05458     DestDisp += ThisLength;
05459     SrcDisp += ThisLength;
05460     Length -= ThisLength;
05461     // If there's another CLC to go, branch to the end if a difference
05462     // was found.
05463     if (EndMBB && Length > 0) {
05464       MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
05465       BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05466         .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
05467         .addMBB(EndMBB);
05468       MBB->addSuccessor(EndMBB);
05469       MBB->addSuccessor(NextMBB);
05470       MBB = NextMBB;
05471     }
05472   }
05473   if (EndMBB) {
05474     MBB->addSuccessor(EndMBB);
05475     MBB = EndMBB;
05476     MBB->addLiveIn(SystemZ::CC);
05477   }
05478 
05479   MI->eraseFromParent();
05480   return MBB;
05481 }
05482 
05483 // Decompose string pseudo-instruction MI into a loop that continually performs
05484 // Opcode until CC != 3.
05485 MachineBasicBlock *
05486 SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
05487                                          MachineBasicBlock *MBB,
05488                                          unsigned Opcode) const {
05489   MachineFunction &MF = *MBB->getParent();
05490   const SystemZInstrInfo *TII =
05491       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
05492   MachineRegisterInfo &MRI = MF.getRegInfo();
05493   DebugLoc DL = MI->getDebugLoc();
05494 
05495   uint64_t End1Reg   = MI->getOperand(0).getReg();
05496   uint64_t Start1Reg = MI->getOperand(1).getReg();
05497   uint64_t Start2Reg = MI->getOperand(2).getReg();
05498   uint64_t CharReg   = MI->getOperand(3).getReg();
05499 
05500   const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
05501   uint64_t This1Reg = MRI.createVirtualRegister(RC);
05502   uint64_t This2Reg = MRI.createVirtualRegister(RC);
05503   uint64_t End2Reg  = MRI.createVirtualRegister(RC);
05504 
05505   MachineBasicBlock *StartMBB = MBB;
05506   MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
05507   MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
05508 
05509   //  StartMBB:
05510   //   # fall through to LoopMMB
05511   MBB->addSuccessor(LoopMBB);
05512 
05513   //  LoopMBB:
05514   //   %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
05515   //   %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
05516   //   R0L = %CharReg
05517   //   %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
05518   //   JO LoopMBB
05519   //   # fall through to DoneMMB
05520   //
05521   // The load of R0L can be hoisted by post-RA LICM.
05522   MBB = LoopMBB;
05523 
05524   BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
05525     .addReg(Start1Reg).addMBB(StartMBB)
05526     .addReg(End1Reg).addMBB(LoopMBB);
05527   BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
05528     .addReg(Start2Reg).addMBB(StartMBB)
05529     .addReg(End2Reg).addMBB(LoopMBB);
05530   BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
05531   BuildMI(MBB, DL, TII->get(Opcode))
05532     .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
05533     .addReg(This1Reg).addReg(This2Reg);
05534   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05535     .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
05536   MBB->addSuccessor(LoopMBB);
05537   MBB->addSuccessor(DoneMBB);
05538 
05539   DoneMBB->addLiveIn(SystemZ::CC);
05540 
05541   MI->eraseFromParent();
05542   return DoneMBB;
05543 }
05544 
05545 // Update TBEGIN instruction with final opcode and register clobbers.
05546 MachineBasicBlock *
05547 SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
05548                                             MachineBasicBlock *MBB,
05549                                             unsigned Opcode,
05550                                             bool NoFloat) const {
05551   MachineFunction &MF = *MBB->getParent();
05552   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
05553   const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
05554 
05555   // Update opcode.
05556   MI->setDesc(TII->get(Opcode));
05557 
05558   // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
05559   // Make sure to add the corresponding GRSM bits if they are missing.
05560   uint64_t Control = MI->getOperand(2).getImm();
05561   static const unsigned GPRControlBit[16] = {
05562     0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
05563     0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
05564   };
05565   Control |= GPRControlBit[15];
05566   if (TFI->hasFP(MF))
05567     Control |= GPRControlBit[11];
05568   MI->getOperand(2).setImm(Control);
05569 
05570   // Add GPR clobbers.
05571   for (int I = 0; I < 16; I++) {
05572     if ((Control & GPRControlBit[I]) == 0) {
05573       unsigned Reg = SystemZMC::GR64Regs[I];
05574       MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
05575     }
05576   }
05577 
05578   // Add FPR/VR clobbers.
05579   if (!NoFloat && (Control & 4) != 0) {
05580     if (Subtarget.hasVector()) {
05581       for (int I = 0; I < 32; I++) {
05582         unsigned Reg = SystemZMC::VR128Regs[I];
05583         MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
05584       }
05585     } else {
05586       for (int I = 0; I < 16; I++) {
05587         unsigned Reg = SystemZMC::FP64Regs[I];
05588         MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
05589       }
05590     }
05591   }
05592 
05593   return MBB;
05594 }
05595 
05596 MachineBasicBlock *SystemZTargetLowering::
05597 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
05598   switch (MI->getOpcode()) {
05599   case SystemZ::Select32Mux:
05600   case SystemZ::Select32:
05601   case SystemZ::SelectF32:
05602   case SystemZ::Select64:
05603   case SystemZ::SelectF64:
05604   case SystemZ::SelectF128:
05605     return emitSelect(MI, MBB);
05606 
05607   case SystemZ::CondStore8Mux:
05608     return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
05609   case SystemZ::CondStore8MuxInv:
05610     return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
05611   case SystemZ::CondStore16Mux:
05612     return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
05613   case SystemZ::CondStore16MuxInv:
05614     return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
05615   case SystemZ::CondStore8:
05616     return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
05617   case SystemZ::CondStore8Inv:
05618     return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
05619   case SystemZ::CondStore16:
05620     return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
05621   case SystemZ::CondStore16Inv:
05622     return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
05623   case SystemZ::CondStore32:
05624     return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
05625   case SystemZ::CondStore32Inv:
05626     return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
05627   case SystemZ::CondStore64:
05628     return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
05629   case SystemZ::CondStore64Inv:
05630     return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
05631   case SystemZ::CondStoreF32:
05632     return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
05633   case SystemZ::CondStoreF32Inv:
05634     return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
05635   case SystemZ::CondStoreF64:
05636     return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
05637   case SystemZ::CondStoreF64Inv:
05638     return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
05639 
05640   case SystemZ::AEXT128_64:
05641     return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
05642   case SystemZ::ZEXT128_32:
05643     return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
05644   case SystemZ::ZEXT128_64:
05645     return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
05646 
05647   case SystemZ::ATOMIC_SWAPW:
05648     return emitAtomicLoadBinary(MI, MBB, 0, 0);
05649   case SystemZ::ATOMIC_SWAP_32:
05650     return emitAtomicLoadBinary(MI, MBB, 0, 32);
05651   case SystemZ::ATOMIC_SWAP_64:
05652     return emitAtomicLoadBinary(MI, MBB, 0, 64);
05653 
05654   case SystemZ::ATOMIC_LOADW_AR:
05655     return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
05656   case SystemZ::ATOMIC_LOADW_AFI:
05657     return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
05658   case SystemZ::ATOMIC_LOAD_AR:
05659     return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
05660   case SystemZ::ATOMIC_LOAD_AHI:
05661     return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
05662   case SystemZ::ATOMIC_LOAD_AFI:
05663     return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
05664   case SystemZ::ATOMIC_LOAD_AGR:
05665     return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
05666   case SystemZ::ATOMIC_LOAD_AGHI:
05667     return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
05668   case SystemZ::ATOMIC_LOAD_AGFI:
05669     return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
05670 
05671   case SystemZ::ATOMIC_LOADW_SR:
05672     return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
05673   case SystemZ::ATOMIC_LOAD_SR:
05674     return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
05675   case SystemZ::ATOMIC_LOAD_SGR:
05676     return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
05677 
05678   case SystemZ::ATOMIC_LOADW_NR:
05679     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
05680   case SystemZ::ATOMIC_LOADW_NILH:
05681     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
05682   case SystemZ::ATOMIC_LOAD_NR:
05683     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
05684   case SystemZ::ATOMIC_LOAD_NILL:
05685     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
05686   case SystemZ::ATOMIC_LOAD_NILH:
05687     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
05688   case SystemZ::ATOMIC_LOAD_NILF:
05689     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
05690   case SystemZ::ATOMIC_LOAD_NGR:
05691     return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
05692   case SystemZ::ATOMIC_LOAD_NILL64:
05693     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
05694   case SystemZ::ATOMIC_LOAD_NILH64:
05695     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
05696   case SystemZ::ATOMIC_LOAD_NIHL64:
05697     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
05698   case SystemZ::ATOMIC_LOAD_NIHH64:
05699     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
05700   case SystemZ::ATOMIC_LOAD_NILF64:
05701     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
05702   case SystemZ::ATOMIC_LOAD_NIHF64:
05703     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
05704 
05705   case SystemZ::ATOMIC_LOADW_OR:
05706     return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
05707   case SystemZ::ATOMIC_LOADW_OILH:
05708     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
05709   case SystemZ::ATOMIC_LOAD_OR:
05710     return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
05711   case SystemZ::ATOMIC_LOAD_OILL:
05712     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
05713   case SystemZ::ATOMIC_LOAD_OILH:
05714     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
05715   case SystemZ::ATOMIC_LOAD_OILF:
05716     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
05717   case SystemZ::ATOMIC_LOAD_OGR:
05718     return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
05719   case SystemZ::ATOMIC_LOAD_OILL64:
05720     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
05721   case SystemZ::ATOMIC_LOAD_OILH64:
05722     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
05723   case SystemZ::ATOMIC_LOAD_OIHL64:
05724     return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
05725   case SystemZ::ATOMIC_LOAD_OIHH64:
05726     return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
05727   case SystemZ::ATOMIC_LOAD_OILF64:
05728     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
05729   case SystemZ::ATOMIC_LOAD_OIHF64:
05730     return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
05731 
05732   case SystemZ::ATOMIC_LOADW_XR:
05733     return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
05734   case SystemZ::ATOMIC_LOADW_XILF:
05735     return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
05736   case SystemZ::ATOMIC_LOAD_XR:
05737     return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
05738   case SystemZ::ATOMIC_LOAD_XILF:
05739     return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
05740   case SystemZ::ATOMIC_LOAD_XGR:
05741     return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
05742   case SystemZ::ATOMIC_LOAD_XILF64:
05743     return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
05744   case SystemZ::ATOMIC_LOAD_XIHF64:
05745     return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
05746 
05747   case SystemZ::ATOMIC_LOADW_NRi:
05748     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
05749   case SystemZ::ATOMIC_LOADW_NILHi:
05750     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
05751   case SystemZ::ATOMIC_LOAD_NRi:
05752     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
05753   case SystemZ::ATOMIC_LOAD_NILLi:
05754     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
05755   case SystemZ::ATOMIC_LOAD_NILHi:
05756     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
05757   case SystemZ::ATOMIC_LOAD_NILFi:
05758     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
05759   case SystemZ::ATOMIC_LOAD_NGRi:
05760     return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
05761   case SystemZ::ATOMIC_LOAD_NILL64i:
05762     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
05763   case SystemZ::ATOMIC_LOAD_NILH64i:
05764     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
05765   case SystemZ::ATOMIC_LOAD_NIHL64i:
05766     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
05767   case SystemZ::ATOMIC_LOAD_NIHH64i:
05768     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
05769   case SystemZ::ATOMIC_LOAD_NILF64i:
05770     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
05771   case SystemZ::ATOMIC_LOAD_NIHF64i:
05772     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
05773 
05774   case SystemZ::ATOMIC_LOADW_MIN:
05775     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
05776                                 SystemZ::CCMASK_CMP_LE, 0);
05777   case SystemZ::ATOMIC_LOAD_MIN_32:
05778     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
05779                                 SystemZ::CCMASK_CMP_LE, 32);
05780   case SystemZ::ATOMIC_LOAD_MIN_64:
05781     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
05782                                 SystemZ::CCMASK_CMP_LE, 64);
05783 
05784   case SystemZ::ATOMIC_LOADW_MAX:
05785     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
05786                                 SystemZ::CCMASK_CMP_GE, 0);
05787   case SystemZ::ATOMIC_LOAD_MAX_32:
05788     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
05789                                 SystemZ::CCMASK_CMP_GE, 32);
05790   case SystemZ::ATOMIC_LOAD_MAX_64:
05791     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
05792                                 SystemZ::CCMASK_CMP_GE, 64);
05793 
05794   case SystemZ::ATOMIC_LOADW_UMIN:
05795     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
05796                                 SystemZ::CCMASK_CMP_LE, 0);
05797   case SystemZ::ATOMIC_LOAD_UMIN_32:
05798     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
05799                                 SystemZ::CCMASK_CMP_LE, 32);
05800   case SystemZ::ATOMIC_LOAD_UMIN_64:
05801     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
05802                                 SystemZ::CCMASK_CMP_LE, 64);
05803 
05804   case SystemZ::ATOMIC_LOADW_UMAX:
05805     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
05806                                 SystemZ::CCMASK_CMP_GE, 0);
05807   case SystemZ::ATOMIC_LOAD_UMAX_32:
05808     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
05809                                 SystemZ::CCMASK_CMP_GE, 32);
05810   case SystemZ::ATOMIC_LOAD_UMAX_64:
05811     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
05812                                 SystemZ::CCMASK_CMP_GE, 64);
05813 
05814   case SystemZ::ATOMIC_CMP_SWAPW:
05815     return emitAtomicCmpSwapW(MI, MBB);
05816   case SystemZ::MVCSequence:
05817   case SystemZ::MVCLoop:
05818     return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
05819   case SystemZ::NCSequence:
05820   case SystemZ::NCLoop:
05821     return emitMemMemWrapper(MI, MBB, SystemZ::NC);
05822   case SystemZ::OCSequence:
05823   case SystemZ::OCLoop:
05824     return emitMemMemWrapper(MI, MBB, SystemZ::OC);
05825   case SystemZ::XCSequence:
05826   case SystemZ::XCLoop:
05827     return emitMemMemWrapper(MI, MBB, SystemZ::XC);
05828   case SystemZ::CLCSequence:
05829   case SystemZ::CLCLoop:
05830     return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
05831   case SystemZ::CLSTLoop:
05832     return emitStringWrapper(MI, MBB, SystemZ::CLST);
05833   case SystemZ::MVSTLoop:
05834     return emitStringWrapper(MI, MBB, SystemZ::MVST);
05835   case SystemZ::SRSTLoop:
05836     return emitStringWrapper(MI, MBB, SystemZ::SRST);
05837   case SystemZ::TBEGIN:
05838     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
05839   case SystemZ::TBEGIN_nofloat:
05840     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
05841   case SystemZ::TBEGINC:
05842     return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
05843   default:
05844     llvm_unreachable("Unexpected instr type to insert");
05845   }
05846 }