LLVM  mainline
SystemZISelLowering.cpp
Go to the documentation of this file.
00001 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the SystemZTargetLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "SystemZISelLowering.h"
00015 #include "SystemZCallingConv.h"
00016 #include "SystemZConstantPoolValue.h"
00017 #include "SystemZMachineFunctionInfo.h"
00018 #include "SystemZTargetMachine.h"
00019 #include "llvm/CodeGen/CallingConvLower.h"
00020 #include "llvm/CodeGen/MachineInstrBuilder.h"
00021 #include "llvm/CodeGen/MachineRegisterInfo.h"
00022 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00023 #include "llvm/IR/Intrinsics.h"
00024 #include <cctype>
00025 
00026 using namespace llvm;
00027 
00028 #define DEBUG_TYPE "systemz-lower"
00029 
00030 namespace {
00031 // Represents a sequence for extracting a 0/1 value from an IPM result:
00032 // (((X ^ XORValue) + AddValue) >> Bit)
00033 struct IPMConversion {
00034   IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
00035     : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
00036 
00037   int64_t XORValue;
00038   int64_t AddValue;
00039   unsigned Bit;
00040 };
00041 
00042 // Represents information about a comparison.
00043 struct Comparison {
00044   Comparison(SDValue Op0In, SDValue Op1In)
00045     : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
00046 
00047   // The operands to the comparison.
00048   SDValue Op0, Op1;
00049 
00050   // The opcode that should be used to compare Op0 and Op1.
00051   unsigned Opcode;
00052 
00053   // A SystemZICMP value.  Only used for integer comparisons.
00054   unsigned ICmpType;
00055 
00056   // The mask of CC values that Opcode can produce.
00057   unsigned CCValid;
00058 
00059   // The mask of CC values for which the original condition is true.
00060   unsigned CCMask;
00061 };
00062 } // end anonymous namespace
00063 
00064 // Classify VT as either 32 or 64 bit.
00065 static bool is32Bit(EVT VT) {
00066   switch (VT.getSimpleVT().SimpleTy) {
00067   case MVT::i32:
00068     return true;
00069   case MVT::i64:
00070     return false;
00071   default:
00072     llvm_unreachable("Unsupported type");
00073   }
00074 }
00075 
00076 // Return a version of MachineOperand that can be safely used before the
00077 // final use.
00078 static MachineOperand earlyUseOperand(MachineOperand Op) {
00079   if (Op.isReg())
00080     Op.setIsKill(false);
00081   return Op;
00082 }
00083 
00084 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
00085                                              const SystemZSubtarget &STI)
00086     : TargetLowering(TM), Subtarget(STI) {
00087   MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
00088 
00089   // Set up the register classes.
00090   if (Subtarget.hasHighWord())
00091     addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
00092   else
00093     addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
00094   addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
00095   if (Subtarget.hasVector()) {
00096     addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
00097     addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
00098   } else {
00099     addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
00100     addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
00101   }
00102   addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
00103 
00104   if (Subtarget.hasVector()) {
00105     addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
00106     addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
00107     addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
00108     addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
00109     addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
00110     addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
00111   }
00112 
00113   // Compute derived properties from the register classes
00114   computeRegisterProperties(Subtarget.getRegisterInfo());
00115 
00116   // Set up special registers.
00117   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
00118 
00119   // TODO: It may be better to default to latency-oriented scheduling, however
00120   // LLVM's current latency-oriented scheduler can't handle physreg definitions
00121   // such as SystemZ has with CC, so set this to the register-pressure
00122   // scheduler, because it can.
00123   setSchedulingPreference(Sched::RegPressure);
00124 
00125   setBooleanContents(ZeroOrOneBooleanContent);
00126   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00127 
00128   // Instructions are strings of 2-byte aligned 2-byte values.
00129   setMinFunctionAlignment(2);
00130 
00131   // Handle operations that are handled in a similar way for all types.
00132   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
00133        I <= MVT::LAST_FP_VALUETYPE;
00134        ++I) {
00135     MVT VT = MVT::SimpleValueType(I);
00136     if (isTypeLegal(VT)) {
00137       // Lower SET_CC into an IPM-based sequence.
00138       setOperationAction(ISD::SETCC, VT, Custom);
00139 
00140       // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
00141       setOperationAction(ISD::SELECT, VT, Expand);
00142 
00143       // Lower SELECT_CC and BR_CC into separate comparisons and branches.
00144       setOperationAction(ISD::SELECT_CC, VT, Custom);
00145       setOperationAction(ISD::BR_CC,     VT, Custom);
00146     }
00147   }
00148 
00149   // Expand jump table branches as address arithmetic followed by an
00150   // indirect jump.
00151   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
00152 
00153   // Expand BRCOND into a BR_CC (see above).
00154   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00155 
00156   // Handle integer types.
00157   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
00158        I <= MVT::LAST_INTEGER_VALUETYPE;
00159        ++I) {
00160     MVT VT = MVT::SimpleValueType(I);
00161     if (isTypeLegal(VT)) {
00162       // Expand individual DIV and REMs into DIVREMs.
00163       setOperationAction(ISD::SDIV, VT, Expand);
00164       setOperationAction(ISD::UDIV, VT, Expand);
00165       setOperationAction(ISD::SREM, VT, Expand);
00166       setOperationAction(ISD::UREM, VT, Expand);
00167       setOperationAction(ISD::SDIVREM, VT, Custom);
00168       setOperationAction(ISD::UDIVREM, VT, Custom);
00169 
00170       // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
00171       // stores, putting a serialization instruction after the stores.
00172       setOperationAction(ISD::ATOMIC_LOAD,  VT, Custom);
00173       setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00174 
00175       // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
00176       // available, or if the operand is constant.
00177       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00178 
00179       // Use POPCNT on z196 and above.
00180       if (Subtarget.hasPopulationCount())
00181         setOperationAction(ISD::CTPOP, VT, Custom);
00182       else
00183         setOperationAction(ISD::CTPOP, VT, Expand);
00184 
00185       // No special instructions for these.
00186       setOperationAction(ISD::CTTZ,            VT, Expand);
00187       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00188       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00189       setOperationAction(ISD::ROTR,            VT, Expand);
00190 
00191       // Use *MUL_LOHI where possible instead of MULH*.
00192       setOperationAction(ISD::MULHS, VT, Expand);
00193       setOperationAction(ISD::MULHU, VT, Expand);
00194       setOperationAction(ISD::SMUL_LOHI, VT, Custom);
00195       setOperationAction(ISD::UMUL_LOHI, VT, Custom);
00196 
00197       // Only z196 and above have native support for conversions to unsigned.
00198       if (!Subtarget.hasFPExtension())
00199         setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00200     }
00201   }
00202 
00203   // Type legalization will convert 8- and 16-bit atomic operations into
00204   // forms that operate on i32s (but still keeping the original memory VT).
00205   // Lower them into full i32 operations.
00206   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
00207   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
00208   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
00209   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
00210   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
00211   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
00212   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
00213   setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
00214   setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
00215   setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
00216   setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
00217   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
00218 
00219   // z10 has instructions for signed but not unsigned FP conversion.
00220   // Handle unsigned 32-bit types as signed 64-bit types.
00221   if (!Subtarget.hasFPExtension()) {
00222     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
00223     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00224   }
00225 
00226   // We have native support for a 64-bit CTLZ, via FLOGR.
00227   setOperationAction(ISD::CTLZ, MVT::i32, Promote);
00228   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
00229 
00230   // Give LowerOperation the chance to replace 64-bit ORs with subregs.
00231   setOperationAction(ISD::OR, MVT::i64, Custom);
00232 
00233   // FIXME: Can we support these natively?
00234   setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
00235   setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
00236   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
00237 
00238   // We have native instructions for i8, i16 and i32 extensions, but not i1.
00239   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00240   for (MVT VT : MVT::integer_valuetypes()) {
00241     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00242     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00243     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1, Promote);
00244   }
00245 
00246   // Handle the various types of symbolic address.
00247   setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
00248   setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
00249   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
00250   setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
00251   setOperationAction(ISD::JumpTable,        PtrVT, Custom);
00252 
00253   // We need to handle dynamic allocations specially because of the
00254   // 160-byte area at the bottom of the stack.
00255   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
00256 
00257   // Use custom expanders so that we can force the function to use
00258   // a frame pointer.
00259   setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
00260   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
00261 
00262   // Handle prefetches with PFD or PFDRL.
00263   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
00264 
00265   for (MVT VT : MVT::vector_valuetypes()) {
00266     // Assume by default that all vector operations need to be expanded.
00267     for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
00268       if (getOperationAction(Opcode, VT) == Legal)
00269         setOperationAction(Opcode, VT, Expand);
00270 
00271     // Likewise all truncating stores and extending loads.
00272     for (MVT InnerVT : MVT::vector_valuetypes()) {
00273       setTruncStoreAction(VT, InnerVT, Expand);
00274       setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00275       setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00276       setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00277     }
00278 
00279     if (isTypeLegal(VT)) {
00280       // These operations are legal for anything that can be stored in a
00281       // vector register, even if there is no native support for the format
00282       // as such.  In particular, we can do these for v4f32 even though there
00283       // are no specific instructions for that format.
00284       setOperationAction(ISD::LOAD, VT, Legal);
00285       setOperationAction(ISD::STORE, VT, Legal);
00286       setOperationAction(ISD::VSELECT, VT, Legal);
00287       setOperationAction(ISD::BITCAST, VT, Legal);
00288       setOperationAction(ISD::UNDEF, VT, Legal);
00289 
00290       // Likewise, except that we need to replace the nodes with something
00291       // more specific.
00292       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
00293       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
00294     }
00295   }
00296 
00297   // Handle integer vector types.
00298   for (MVT VT : MVT::integer_vector_valuetypes()) {
00299     if (isTypeLegal(VT)) {
00300       // These operations have direct equivalents.
00301       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
00302       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
00303       setOperationAction(ISD::ADD, VT, Legal);
00304       setOperationAction(ISD::SUB, VT, Legal);
00305       if (VT != MVT::v2i64)
00306         setOperationAction(ISD::MUL, VT, Legal);
00307       setOperationAction(ISD::AND, VT, Legal);
00308       setOperationAction(ISD::OR, VT, Legal);
00309       setOperationAction(ISD::XOR, VT, Legal);
00310       setOperationAction(ISD::CTPOP, VT, Custom);
00311       setOperationAction(ISD::CTTZ, VT, Legal);
00312       setOperationAction(ISD::CTLZ, VT, Legal);
00313       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
00314       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
00315 
00316       // Convert a GPR scalar to a vector by inserting it into element 0.
00317       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
00318 
00319       // Use a series of unpacks for extensions.
00320       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
00321       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
00322 
00323       // Detect shifts by a scalar amount and convert them into
00324       // V*_BY_SCALAR.
00325       setOperationAction(ISD::SHL, VT, Custom);
00326       setOperationAction(ISD::SRA, VT, Custom);
00327       setOperationAction(ISD::SRL, VT, Custom);
00328 
00329       // At present ROTL isn't matched by DAGCombiner.  ROTR should be
00330       // converted into ROTL.
00331       setOperationAction(ISD::ROTL, VT, Expand);
00332       setOperationAction(ISD::ROTR, VT, Expand);
00333 
00334       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
00335       // and inverting the result as necessary.
00336       setOperationAction(ISD::SETCC, VT, Custom);
00337     }
00338   }
00339 
00340   if (Subtarget.hasVector()) {
00341     // There should be no need to check for float types other than v2f64
00342     // since <2 x f32> isn't a legal type.
00343     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00344     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00345     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00346     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00347   }
00348 
00349   // Handle floating-point types.
00350   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
00351        I <= MVT::LAST_FP_VALUETYPE;
00352        ++I) {
00353     MVT VT = MVT::SimpleValueType(I);
00354     if (isTypeLegal(VT)) {
00355       // We can use FI for FRINT.
00356       setOperationAction(ISD::FRINT, VT, Legal);
00357 
00358       // We can use the extended form of FI for other rounding operations.
00359       if (Subtarget.hasFPExtension()) {
00360         setOperationAction(ISD::FNEARBYINT, VT, Legal);
00361         setOperationAction(ISD::FFLOOR, VT, Legal);
00362         setOperationAction(ISD::FCEIL, VT, Legal);
00363         setOperationAction(ISD::FTRUNC, VT, Legal);
00364         setOperationAction(ISD::FROUND, VT, Legal);
00365       }
00366 
00367       // No special instructions for these.
00368       setOperationAction(ISD::FSIN, VT, Expand);
00369       setOperationAction(ISD::FCOS, VT, Expand);
00370       setOperationAction(ISD::FSINCOS, VT, Expand);
00371       setOperationAction(ISD::FREM, VT, Expand);
00372       setOperationAction(ISD::FPOW, VT, Expand);
00373     }
00374   }
00375 
00376   // Handle floating-point vector types.
00377   if (Subtarget.hasVector()) {
00378     // Scalar-to-vector conversion is just a subreg.
00379     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00380     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00381 
00382     // Some insertions and extractions can be done directly but others
00383     // need to go via integers.
00384     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
00385     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
00386     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00387     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00388 
00389     // These operations have direct equivalents.
00390     setOperationAction(ISD::FADD, MVT::v2f64, Legal);
00391     setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
00392     setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
00393     setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
00394     setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00395     setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00396     setOperationAction(ISD::FABS, MVT::v2f64, Legal);
00397     setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00398     setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
00399     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00400     setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00401     setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00402     setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00403     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00404   }
00405 
00406   // We have fused multiply-addition for f32 and f64 but not f128.
00407   setOperationAction(ISD::FMA, MVT::f32,  Legal);
00408   setOperationAction(ISD::FMA, MVT::f64,  Legal);
00409   setOperationAction(ISD::FMA, MVT::f128, Expand);
00410 
00411   // Needed so that we don't try to implement f128 constant loads using
00412   // a load-and-extend of a f80 constant (in cases where the constant
00413   // would fit in an f80).
00414   for (MVT VT : MVT::fp_valuetypes())
00415     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
00416 
00417   // Floating-point truncation and stores need to be done separately.
00418   setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
00419   setTruncStoreAction(MVT::f128, MVT::f32, Expand);
00420   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
00421 
00422   // We have 64-bit FPR<->GPR moves, but need special handling for
00423   // 32-bit forms.
00424   if (!Subtarget.hasVector()) {
00425     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
00426     setOperationAction(ISD::BITCAST, MVT::f32, Custom);
00427   }
00428 
00429   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
00430   // structure, but VAEND is a no-op.
00431   setOperationAction(ISD::VASTART, MVT::Other, Custom);
00432   setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
00433   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
00434 
00435   // Codes for which we want to perform some z-specific combinations.
00436   setTargetDAGCombine(ISD::SIGN_EXTEND);
00437   setTargetDAGCombine(ISD::STORE);
00438   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
00439   setTargetDAGCombine(ISD::FP_ROUND);
00440 
00441   // Handle intrinsics.
00442   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
00443   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00444 
00445   // We want to use MVC in preference to even a single load/store pair.
00446   MaxStoresPerMemcpy = 0;
00447   MaxStoresPerMemcpyOptSize = 0;
00448 
00449   // The main memset sequence is a byte store followed by an MVC.
00450   // Two STC or MV..I stores win over that, but the kind of fused stores
00451   // generated by target-independent code don't when the byte value is
00452   // variable.  E.g.  "STC <reg>;MHI <reg>,257;STH <reg>" is not better
00453   // than "STC;MVC".  Handle the choice in target-specific code instead.
00454   MaxStoresPerMemset = 0;
00455   MaxStoresPerMemsetOptSize = 0;
00456 }
00457 
00458 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
00459                                               LLVMContext &, EVT VT) const {
00460   if (!VT.isVector())
00461     return MVT::i32;
00462   return VT.changeVectorElementTypeToInteger();
00463 }
00464 
00465 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
00466   VT = VT.getScalarType();
00467 
00468   if (!VT.isSimple())
00469     return false;
00470 
00471   switch (VT.getSimpleVT().SimpleTy) {
00472   case MVT::f32:
00473   case MVT::f64:
00474     return true;
00475   case MVT::f128:
00476     return false;
00477   default:
00478     break;
00479   }
00480 
00481   return false;
00482 }
00483 
00484 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
00485   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
00486   return Imm.isZero() || Imm.isNegZero();
00487 }
00488 
00489 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
00490   // We can use CGFI or CLGFI.
00491   return isInt<32>(Imm) || isUInt<32>(Imm);
00492 }
00493 
00494 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
00495   // We can use ALGFI or SLGFI.
00496   return isUInt<32>(Imm) || isUInt<32>(-Imm);
00497 }
00498 
00499 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
00500                                                            unsigned,
00501                                                            unsigned,
00502                                                            bool *Fast) const {
00503   // Unaligned accesses should never be slower than the expanded version.
00504   // We check specifically for aligned accesses in the few cases where
00505   // they are required.
00506   if (Fast)
00507     *Fast = true;
00508   return true;
00509 }
00510 
00511 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
00512                                                   const AddrMode &AM, Type *Ty,
00513                                                   unsigned AS) const {
00514   // Punt on globals for now, although they can be used in limited
00515   // RELATIVE LONG cases.
00516   if (AM.BaseGV)
00517     return false;
00518 
00519   // Require a 20-bit signed offset.
00520   if (!isInt<20>(AM.BaseOffs))
00521     return false;
00522 
00523   // Indexing is OK but no scale factor can be applied.
00524   return AM.Scale == 0 || AM.Scale == 1;
00525 }
00526 
00527 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
00528   if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
00529     return false;
00530   unsigned FromBits = FromType->getPrimitiveSizeInBits();
00531   unsigned ToBits = ToType->getPrimitiveSizeInBits();
00532   return FromBits > ToBits;
00533 }
00534 
00535 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
00536   if (!FromVT.isInteger() || !ToVT.isInteger())
00537     return false;
00538   unsigned FromBits = FromVT.getSizeInBits();
00539   unsigned ToBits = ToVT.getSizeInBits();
00540   return FromBits > ToBits;
00541 }
00542 
00543 //===----------------------------------------------------------------------===//
00544 // Inline asm support
00545 //===----------------------------------------------------------------------===//
00546 
00547 TargetLowering::ConstraintType
00548 SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
00549   if (Constraint.size() == 1) {
00550     switch (Constraint[0]) {
00551     case 'a': // Address register
00552     case 'd': // Data register (equivalent to 'r')
00553     case 'f': // Floating-point register
00554     case 'h': // High-part register
00555     case 'r': // General-purpose register
00556       return C_RegisterClass;
00557 
00558     case 'Q': // Memory with base and unsigned 12-bit displacement
00559     case 'R': // Likewise, plus an index
00560     case 'S': // Memory with base and signed 20-bit displacement
00561     case 'T': // Likewise, plus an index
00562     case 'm': // Equivalent to 'T'.
00563       return C_Memory;
00564 
00565     case 'I': // Unsigned 8-bit constant
00566     case 'J': // Unsigned 12-bit constant
00567     case 'K': // Signed 16-bit constant
00568     case 'L': // Signed 20-bit displacement (on all targets we support)
00569     case 'M': // 0x7fffffff
00570       return C_Other;
00571 
00572     default:
00573       break;
00574     }
00575   }
00576   return TargetLowering::getConstraintType(Constraint);
00577 }
00578 
00579 TargetLowering::ConstraintWeight SystemZTargetLowering::
00580 getSingleConstraintMatchWeight(AsmOperandInfo &info,
00581                                const char *constraint) const {
00582   ConstraintWeight weight = CW_Invalid;
00583   Value *CallOperandVal = info.CallOperandVal;
00584   // If we don't have a value, we can't do a match,
00585   // but allow it at the lowest weight.
00586   if (!CallOperandVal)
00587     return CW_Default;
00588   Type *type = CallOperandVal->getType();
00589   // Look at the constraint type.
00590   switch (*constraint) {
00591   default:
00592     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
00593     break;
00594 
00595   case 'a': // Address register
00596   case 'd': // Data register (equivalent to 'r')
00597   case 'h': // High-part register
00598   case 'r': // General-purpose register
00599     if (CallOperandVal->getType()->isIntegerTy())
00600       weight = CW_Register;
00601     break;
00602 
00603   case 'f': // Floating-point register
00604     if (type->isFloatingPointTy())
00605       weight = CW_Register;
00606     break;
00607 
00608   case 'I': // Unsigned 8-bit constant
00609     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00610       if (isUInt<8>(C->getZExtValue()))
00611         weight = CW_Constant;
00612     break;
00613 
00614   case 'J': // Unsigned 12-bit constant
00615     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00616       if (isUInt<12>(C->getZExtValue()))
00617         weight = CW_Constant;
00618     break;
00619 
00620   case 'K': // Signed 16-bit constant
00621     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00622       if (isInt<16>(C->getSExtValue()))
00623         weight = CW_Constant;
00624     break;
00625 
00626   case 'L': // Signed 20-bit displacement (on all targets we support)
00627     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00628       if (isInt<20>(C->getSExtValue()))
00629         weight = CW_Constant;
00630     break;
00631 
00632   case 'M': // 0x7fffffff
00633     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00634       if (C->getZExtValue() == 0x7fffffff)
00635         weight = CW_Constant;
00636     break;
00637   }
00638   return weight;
00639 }
00640 
00641 // Parse a "{tNNN}" register constraint for which the register type "t"
00642 // has already been verified.  MC is the class associated with "t" and
00643 // Map maps 0-based register numbers to LLVM register numbers.
00644 static std::pair<unsigned, const TargetRegisterClass *>
00645 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
00646                     const unsigned *Map) {
00647   assert(*(Constraint.end()-1) == '}' && "Missing '}'");
00648   if (isdigit(Constraint[2])) {
00649     unsigned Index;
00650     bool Failed =
00651         Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
00652     if (!Failed && Index < 16 && Map[Index])
00653       return std::make_pair(Map[Index], RC);
00654   }
00655   return std::make_pair(0U, nullptr);
00656 }
00657 
00658 std::pair<unsigned, const TargetRegisterClass *>
00659 SystemZTargetLowering::getRegForInlineAsmConstraint(
00660     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
00661   if (Constraint.size() == 1) {
00662     // GCC Constraint Letters
00663     switch (Constraint[0]) {
00664     default: break;
00665     case 'd': // Data register (equivalent to 'r')
00666     case 'r': // General-purpose register
00667       if (VT == MVT::i64)
00668         return std::make_pair(0U, &SystemZ::GR64BitRegClass);
00669       else if (VT == MVT::i128)
00670         return std::make_pair(0U, &SystemZ::GR128BitRegClass);
00671       return std::make_pair(0U, &SystemZ::GR32BitRegClass);
00672 
00673     case 'a': // Address register
00674       if (VT == MVT::i64)
00675         return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
00676       else if (VT == MVT::i128)
00677         return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
00678       return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
00679 
00680     case 'h': // High-part register (an LLVM extension)
00681       return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
00682 
00683     case 'f': // Floating-point register
00684       if (VT == MVT::f64)
00685         return std::make_pair(0U, &SystemZ::FP64BitRegClass);
00686       else if (VT == MVT::f128)
00687         return std::make_pair(0U, &SystemZ::FP128BitRegClass);
00688       return std::make_pair(0U, &SystemZ::FP32BitRegClass);
00689     }
00690   }
00691   if (Constraint.size() > 0 && Constraint[0] == '{') {
00692     // We need to override the default register parsing for GPRs and FPRs
00693     // because the interpretation depends on VT.  The internal names of
00694     // the registers are also different from the external names
00695     // (F0D and F0S instead of F0, etc.).
00696     if (Constraint[1] == 'r') {
00697       if (VT == MVT::i32)
00698         return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
00699                                    SystemZMC::GR32Regs);
00700       if (VT == MVT::i128)
00701         return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
00702                                    SystemZMC::GR128Regs);
00703       return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
00704                                  SystemZMC::GR64Regs);
00705     }
00706     if (Constraint[1] == 'f') {
00707       if (VT == MVT::f32)
00708         return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
00709                                    SystemZMC::FP32Regs);
00710       if (VT == MVT::f128)
00711         return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
00712                                    SystemZMC::FP128Regs);
00713       return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
00714                                  SystemZMC::FP64Regs);
00715     }
00716   }
00717   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
00718 }
00719 
00720 void SystemZTargetLowering::
00721 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
00722                              std::vector<SDValue> &Ops,
00723                              SelectionDAG &DAG) const {
00724   // Only support length 1 constraints for now.
00725   if (Constraint.length() == 1) {
00726     switch (Constraint[0]) {
00727     case 'I': // Unsigned 8-bit constant
00728       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00729         if (isUInt<8>(C->getZExtValue()))
00730           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00731                                               Op.getValueType()));
00732       return;
00733 
00734     case 'J': // Unsigned 12-bit constant
00735       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00736         if (isUInt<12>(C->getZExtValue()))
00737           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00738                                               Op.getValueType()));
00739       return;
00740 
00741     case 'K': // Signed 16-bit constant
00742       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00743         if (isInt<16>(C->getSExtValue()))
00744           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
00745                                               Op.getValueType()));
00746       return;
00747 
00748     case 'L': // Signed 20-bit displacement (on all targets we support)
00749       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00750         if (isInt<20>(C->getSExtValue()))
00751           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
00752                                               Op.getValueType()));
00753       return;
00754 
00755     case 'M': // 0x7fffffff
00756       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00757         if (C->getZExtValue() == 0x7fffffff)
00758           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00759                                               Op.getValueType()));
00760       return;
00761     }
00762   }
00763   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
00764 }
00765 
00766 //===----------------------------------------------------------------------===//
00767 // Calling conventions
00768 //===----------------------------------------------------------------------===//
00769 
00770 #include "SystemZGenCallingConv.inc"
00771 
00772 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
00773                                                      Type *ToType) const {
00774   return isTruncateFree(FromType, ToType);
00775 }
00776 
00777 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
00778   return CI->isTailCall();
00779 }
00780 
00781 // We do not yet support 128-bit single-element vector types.  If the user
00782 // attempts to use such types as function argument or return type, prefer
00783 // to error out instead of emitting code violating the ABI.
00784 static void VerifyVectorType(MVT VT, EVT ArgVT) {
00785   if (ArgVT.isVector() && !VT.isVector())
00786     report_fatal_error("Unsupported vector argument or return type");
00787 }
00788 
00789 static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
00790   for (unsigned i = 0; i < Ins.size(); ++i)
00791     VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
00792 }
00793 
00794 static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
00795   for (unsigned i = 0; i < Outs.size(); ++i)
00796     VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
00797 }
00798 
00799 // Value is a value that has been passed to us in the location described by VA
00800 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
00801 // any loads onto Chain.
00802 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
00803                                    CCValAssign &VA, SDValue Chain,
00804                                    SDValue Value) {
00805   // If the argument has been promoted from a smaller type, insert an
00806   // assertion to capture this.
00807   if (VA.getLocInfo() == CCValAssign::SExt)
00808     Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
00809                         DAG.getValueType(VA.getValVT()));
00810   else if (VA.getLocInfo() == CCValAssign::ZExt)
00811     Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
00812                         DAG.getValueType(VA.getValVT()));
00813 
00814   if (VA.isExtInLoc())
00815     Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
00816   else if (VA.getLocInfo() == CCValAssign::Indirect)
00817     Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
00818                         MachinePointerInfo(), false, false, false, 0);
00819   else if (VA.getLocInfo() == CCValAssign::BCvt) {
00820     // If this is a short vector argument loaded from the stack,
00821     // extend from i64 to full vector size and then bitcast.
00822     assert(VA.getLocVT() == MVT::i64);
00823     assert(VA.getValVT().isVector());
00824     Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
00825                         Value, DAG.getUNDEF(MVT::i64));
00826     Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
00827   } else
00828     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
00829   return Value;
00830 }
00831 
00832 // Value is a value of type VA.getValVT() that we need to copy into
00833 // the location described by VA.  Return a copy of Value converted to
00834 // VA.getValVT().  The caller is responsible for handling indirect values.
00835 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
00836                                    CCValAssign &VA, SDValue Value) {
00837   switch (VA.getLocInfo()) {
00838   case CCValAssign::SExt:
00839     return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
00840   case CCValAssign::ZExt:
00841     return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
00842   case CCValAssign::AExt:
00843     return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
00844   case CCValAssign::BCvt:
00845     // If this is a short vector argument to be stored to the stack,
00846     // bitcast to v2i64 and then extract first element.
00847     assert(VA.getLocVT() == MVT::i64);
00848     assert(VA.getValVT().isVector());
00849     Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
00850     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
00851                        DAG.getConstant(0, DL, MVT::i32));
00852   case CCValAssign::Full:
00853     return Value;
00854   default:
00855     llvm_unreachable("Unhandled getLocInfo()");
00856   }
00857 }
00858 
00859 SDValue SystemZTargetLowering::
00860 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
00861                      const SmallVectorImpl<ISD::InputArg> &Ins,
00862                      SDLoc DL, SelectionDAG &DAG,
00863                      SmallVectorImpl<SDValue> &InVals) const {
00864   MachineFunction &MF = DAG.getMachineFunction();
00865   MachineFrameInfo *MFI = MF.getFrameInfo();
00866   MachineRegisterInfo &MRI = MF.getRegInfo();
00867   SystemZMachineFunctionInfo *FuncInfo =
00868       MF.getInfo<SystemZMachineFunctionInfo>();
00869   auto *TFL =
00870       static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
00871 
00872   // Detect unsupported vector argument types.
00873   if (Subtarget.hasVector())
00874     VerifyVectorTypes(Ins);
00875 
00876   // Assign locations to all of the incoming arguments.
00877   SmallVector<CCValAssign, 16> ArgLocs;
00878   SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
00879   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
00880 
00881   unsigned NumFixedGPRs = 0;
00882   unsigned NumFixedFPRs = 0;
00883   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
00884     SDValue ArgValue;
00885     CCValAssign &VA = ArgLocs[I];
00886     EVT LocVT = VA.getLocVT();
00887     if (VA.isRegLoc()) {
00888       // Arguments passed in registers
00889       const TargetRegisterClass *RC;
00890       switch (LocVT.getSimpleVT().SimpleTy) {
00891       default:
00892         // Integers smaller than i64 should be promoted to i64.
00893         llvm_unreachable("Unexpected argument type");
00894       case MVT::i32:
00895         NumFixedGPRs += 1;
00896         RC = &SystemZ::GR32BitRegClass;
00897         break;
00898       case MVT::i64:
00899         NumFixedGPRs += 1;
00900         RC = &SystemZ::GR64BitRegClass;
00901         break;
00902       case MVT::f32:
00903         NumFixedFPRs += 1;
00904         RC = &SystemZ::FP32BitRegClass;
00905         break;
00906       case MVT::f64:
00907         NumFixedFPRs += 1;
00908         RC = &SystemZ::FP64BitRegClass;
00909         break;
00910       case MVT::v16i8:
00911       case MVT::v8i16:
00912       case MVT::v4i32:
00913       case MVT::v2i64:
00914       case MVT::v4f32:
00915       case MVT::v2f64:
00916         RC = &SystemZ::VR128BitRegClass;
00917         break;
00918       }
00919 
00920       unsigned VReg = MRI.createVirtualRegister(RC);
00921       MRI.addLiveIn(VA.getLocReg(), VReg);
00922       ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
00923     } else {
00924       assert(VA.isMemLoc() && "Argument not register or memory");
00925 
00926       // Create the frame index object for this incoming parameter.
00927       int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
00928                                       VA.getLocMemOffset(), true);
00929 
00930       // Create the SelectionDAG nodes corresponding to a load
00931       // from this parameter.  Unpromoted ints and floats are
00932       // passed as right-justified 8-byte values.
00933       EVT PtrVT = getPointerTy(DAG.getDataLayout());
00934       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
00935       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
00936         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
00937                           DAG.getIntPtrConstant(4, DL));
00938       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
00939                              MachinePointerInfo::getFixedStack(MF, FI), false,
00940                              false, false, 0);
00941     }
00942 
00943     // Convert the value of the argument register into the value that's
00944     // being passed.
00945     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
00946   }
00947 
00948   if (IsVarArg) {
00949     // Save the number of non-varargs registers for later use by va_start, etc.
00950     FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
00951     FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
00952 
00953     // Likewise the address (in the form of a frame index) of where the
00954     // first stack vararg would be.  The 1-byte size here is arbitrary.
00955     int64_t StackSize = CCInfo.getNextStackOffset();
00956     FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
00957 
00958     // ...and a similar frame index for the caller-allocated save area
00959     // that will be used to store the incoming registers.
00960     int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
00961     unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
00962     FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
00963 
00964     // Store the FPR varargs in the reserved frame slots.  (We store the
00965     // GPRs as part of the prologue.)
00966     if (NumFixedFPRs < SystemZ::NumArgFPRs) {
00967       SDValue MemOps[SystemZ::NumArgFPRs];
00968       for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
00969         unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
00970         int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
00971         SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
00972         unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
00973                                      &SystemZ::FP64BitRegClass);
00974         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
00975         MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
00976                                  MachinePointerInfo::getFixedStack(MF, FI),
00977                                  false, false, 0);
00978       }
00979       // Join the stores, which are independent of one another.
00980       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
00981                           makeArrayRef(&MemOps[NumFixedFPRs],
00982                                        SystemZ::NumArgFPRs-NumFixedFPRs));
00983     }
00984   }
00985 
00986   return Chain;
00987 }
00988 
00989 static bool canUseSiblingCall(const CCState &ArgCCInfo,
00990                               SmallVectorImpl<CCValAssign> &ArgLocs) {
00991   // Punt if there are any indirect or stack arguments, or if the call
00992   // needs the call-saved argument register R6.
00993   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
00994     CCValAssign &VA = ArgLocs[I];
00995     if (VA.getLocInfo() == CCValAssign::Indirect)
00996       return false;
00997     if (!VA.isRegLoc())
00998       return false;
00999     unsigned Reg = VA.getLocReg();
01000     if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
01001       return false;
01002   }
01003   return true;
01004 }
01005 
01006 SDValue
01007 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
01008                                  SmallVectorImpl<SDValue> &InVals) const {
01009   SelectionDAG &DAG = CLI.DAG;
01010   SDLoc &DL = CLI.DL;
01011   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01012   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
01013   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
01014   SDValue Chain = CLI.Chain;
01015   SDValue Callee = CLI.Callee;
01016   bool &IsTailCall = CLI.IsTailCall;
01017   CallingConv::ID CallConv = CLI.CallConv;
01018   bool IsVarArg = CLI.IsVarArg;
01019   MachineFunction &MF = DAG.getMachineFunction();
01020   EVT PtrVT = getPointerTy(MF.getDataLayout());
01021 
01022   // Detect unsupported vector argument and return types.
01023   if (Subtarget.hasVector()) {
01024     VerifyVectorTypes(Outs);
01025     VerifyVectorTypes(Ins);
01026   }
01027 
01028   // Analyze the operands of the call, assigning locations to each operand.
01029   SmallVector<CCValAssign, 16> ArgLocs;
01030   SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
01031   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
01032 
01033   // We don't support GuaranteedTailCallOpt, only automatically-detected
01034   // sibling calls.
01035   if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
01036     IsTailCall = false;
01037 
01038   // Get a count of how many bytes are to be pushed on the stack.
01039   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
01040 
01041   // Mark the start of the call.
01042   if (!IsTailCall)
01043     Chain = DAG.getCALLSEQ_START(Chain,
01044                                  DAG.getConstant(NumBytes, DL, PtrVT, true),
01045                                  DL);
01046 
01047   // Copy argument values to their designated locations.
01048   SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
01049   SmallVector<SDValue, 8> MemOpChains;
01050   SDValue StackPtr;
01051   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
01052     CCValAssign &VA = ArgLocs[I];
01053     SDValue ArgValue = OutVals[I];
01054 
01055     if (VA.getLocInfo() == CCValAssign::Indirect) {
01056       // Store the argument in a stack slot and pass its address.
01057       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
01058       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
01059       MemOpChains.push_back(DAG.getStore(
01060           Chain, DL, ArgValue, SpillSlot,
01061           MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
01062       ArgValue = SpillSlot;
01063     } else
01064       ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
01065 
01066     if (VA.isRegLoc())
01067       // Queue up the argument copies and emit them at the end.
01068       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
01069     else {
01070       assert(VA.isMemLoc() && "Argument not register or memory");
01071 
01072       // Work out the address of the stack slot.  Unpromoted ints and
01073       // floats are passed as right-justified 8-byte values.
01074       if (!StackPtr.getNode())
01075         StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
01076       unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
01077       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
01078         Offset += 4;
01079       SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
01080                                     DAG.getIntPtrConstant(Offset, DL));
01081 
01082       // Emit the store.
01083       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
01084                                          MachinePointerInfo(),
01085                                          false, false, 0));
01086     }
01087   }
01088 
01089   // Join the stores, which are independent of one another.
01090   if (!MemOpChains.empty())
01091     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
01092 
01093   // Accept direct calls by converting symbolic call addresses to the
01094   // associated Target* opcodes.  Force %r1 to be used for indirect
01095   // tail calls.
01096   SDValue Glue;
01097   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01098     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
01099     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
01100   } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01101     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
01102     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
01103   } else if (IsTailCall) {
01104     Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
01105     Glue = Chain.getValue(1);
01106     Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
01107   }
01108 
01109   // Build a sequence of copy-to-reg nodes, chained and glued together.
01110   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
01111     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
01112                              RegsToPass[I].second, Glue);
01113     Glue = Chain.getValue(1);
01114   }
01115 
01116   // The first call operand is the chain and the second is the target address.
01117   SmallVector<SDValue, 8> Ops;
01118   Ops.push_back(Chain);
01119   Ops.push_back(Callee);
01120 
01121   // Add argument registers to the end of the list so that they are
01122   // known live into the call.
01123   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
01124     Ops.push_back(DAG.getRegister(RegsToPass[I].first,
01125                                   RegsToPass[I].second.getValueType()));
01126 
01127   // Add a register mask operand representing the call-preserved registers.
01128   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
01129   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
01130   assert(Mask && "Missing call preserved mask for calling convention");
01131   Ops.push_back(DAG.getRegisterMask(Mask));
01132 
01133   // Glue the call to the argument copies, if any.
01134   if (Glue.getNode())
01135     Ops.push_back(Glue);
01136 
01137   // Emit the call.
01138   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01139   if (IsTailCall)
01140     return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
01141   Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
01142   Glue = Chain.getValue(1);
01143 
01144   // Mark the end of the call, which is glued to the call itself.
01145   Chain = DAG.getCALLSEQ_END(Chain,
01146                              DAG.getConstant(NumBytes, DL, PtrVT, true),
01147                              DAG.getConstant(0, DL, PtrVT, true),
01148                              Glue, DL);
01149   Glue = Chain.getValue(1);
01150 
01151   // Assign locations to each value returned by this call.
01152   SmallVector<CCValAssign, 16> RetLocs;
01153   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
01154   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
01155 
01156   // Copy all of the result registers out of their specified physreg.
01157   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
01158     CCValAssign &VA = RetLocs[I];
01159 
01160     // Copy the value out, gluing the copy to the end of the call sequence.
01161     SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
01162                                           VA.getLocVT(), Glue);
01163     Chain = RetValue.getValue(1);
01164     Glue = RetValue.getValue(2);
01165 
01166     // Convert the value of the return register into the value that's
01167     // being returned.
01168     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
01169   }
01170 
01171   return Chain;
01172 }
01173 
01174 bool SystemZTargetLowering::
01175 CanLowerReturn(CallingConv::ID CallConv,
01176                MachineFunction &MF, bool isVarArg,
01177                const SmallVectorImpl<ISD::OutputArg> &Outs,
01178                LLVMContext &Context) const {
01179   // Detect unsupported vector return types.
01180   if (Subtarget.hasVector())
01181     VerifyVectorTypes(Outs);
01182 
01183   SmallVector<CCValAssign, 16> RetLocs;
01184   CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
01185   return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
01186 }
01187 
01188 SDValue
01189 SystemZTargetLowering::LowerReturn(SDValue Chain,
01190                                    CallingConv::ID CallConv, bool IsVarArg,
01191                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
01192                                    const SmallVectorImpl<SDValue> &OutVals,
01193                                    SDLoc DL, SelectionDAG &DAG) const {
01194   MachineFunction &MF = DAG.getMachineFunction();
01195 
01196   // Detect unsupported vector return types.
01197   if (Subtarget.hasVector())
01198     VerifyVectorTypes(Outs);
01199 
01200   // Assign locations to each returned value.
01201   SmallVector<CCValAssign, 16> RetLocs;
01202   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
01203   RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
01204 
01205   // Quick exit for void returns
01206   if (RetLocs.empty())
01207     return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
01208 
01209   // Copy the result values into the output registers.
01210   SDValue Glue;
01211   SmallVector<SDValue, 4> RetOps;
01212   RetOps.push_back(Chain);
01213   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
01214     CCValAssign &VA = RetLocs[I];
01215     SDValue RetValue = OutVals[I];
01216 
01217     // Make the return register live on exit.
01218     assert(VA.isRegLoc() && "Can only return in registers!");
01219 
01220     // Promote the value as required.
01221     RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
01222 
01223     // Chain and glue the copies together.
01224     unsigned Reg = VA.getLocReg();
01225     Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
01226     Glue = Chain.getValue(1);
01227     RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
01228   }
01229 
01230   // Update chain and glue.
01231   RetOps[0] = Chain;
01232   if (Glue.getNode())
01233     RetOps.push_back(Glue);
01234 
01235   return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
01236 }
01237 
01238 SDValue SystemZTargetLowering::
01239 prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
01240   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
01241 }
01242 
01243 // Return true if Op is an intrinsic node with chain that returns the CC value
01244 // as its only (other) argument.  Provide the associated SystemZISD opcode and
01245 // the mask of valid CC values if so.
01246 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
01247                                       unsigned &CCValid) {
01248   unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
01249   switch (Id) {
01250   case Intrinsic::s390_tbegin:
01251     Opcode = SystemZISD::TBEGIN;
01252     CCValid = SystemZ::CCMASK_TBEGIN;
01253     return true;
01254 
01255   case Intrinsic::s390_tbegin_nofloat:
01256     Opcode = SystemZISD::TBEGIN_NOFLOAT;
01257     CCValid = SystemZ::CCMASK_TBEGIN;
01258     return true;
01259 
01260   case Intrinsic::s390_tend:
01261     Opcode = SystemZISD::TEND;
01262     CCValid = SystemZ::CCMASK_TEND;
01263     return true;
01264 
01265   default:
01266     return false;
01267   }
01268 }
01269 
01270 // Return true if Op is an intrinsic node without chain that returns the
01271 // CC value as its final argument.  Provide the associated SystemZISD
01272 // opcode and the mask of valid CC values if so.
01273 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
01274   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
01275   switch (Id) {
01276   case Intrinsic::s390_vpkshs:
01277   case Intrinsic::s390_vpksfs:
01278   case Intrinsic::s390_vpksgs:
01279     Opcode = SystemZISD::PACKS_CC;
01280     CCValid = SystemZ::CCMASK_VCMP;
01281     return true;
01282 
01283   case Intrinsic::s390_vpklshs:
01284   case Intrinsic::s390_vpklsfs:
01285   case Intrinsic::s390_vpklsgs:
01286     Opcode = SystemZISD::PACKLS_CC;
01287     CCValid = SystemZ::CCMASK_VCMP;
01288     return true;
01289 
01290   case Intrinsic::s390_vceqbs:
01291   case Intrinsic::s390_vceqhs:
01292   case Intrinsic::s390_vceqfs:
01293   case Intrinsic::s390_vceqgs:
01294     Opcode = SystemZISD::VICMPES;
01295     CCValid = SystemZ::CCMASK_VCMP;
01296     return true;
01297 
01298   case Intrinsic::s390_vchbs:
01299   case Intrinsic::s390_vchhs:
01300   case Intrinsic::s390_vchfs:
01301   case Intrinsic::s390_vchgs:
01302     Opcode = SystemZISD::VICMPHS;
01303     CCValid = SystemZ::CCMASK_VCMP;
01304     return true;
01305 
01306   case Intrinsic::s390_vchlbs:
01307   case Intrinsic::s390_vchlhs:
01308   case Intrinsic::s390_vchlfs:
01309   case Intrinsic::s390_vchlgs:
01310     Opcode = SystemZISD::VICMPHLS;
01311     CCValid = SystemZ::CCMASK_VCMP;
01312     return true;
01313 
01314   case Intrinsic::s390_vtm:
01315     Opcode = SystemZISD::VTM;
01316     CCValid = SystemZ::CCMASK_VCMP;
01317     return true;
01318 
01319   case Intrinsic::s390_vfaebs:
01320   case Intrinsic::s390_vfaehs:
01321   case Intrinsic::s390_vfaefs:
01322     Opcode = SystemZISD::VFAE_CC;
01323     CCValid = SystemZ::CCMASK_ANY;
01324     return true;
01325 
01326   case Intrinsic::s390_vfaezbs:
01327   case Intrinsic::s390_vfaezhs:
01328   case Intrinsic::s390_vfaezfs:
01329     Opcode = SystemZISD::VFAEZ_CC;
01330     CCValid = SystemZ::CCMASK_ANY;
01331     return true;
01332 
01333   case Intrinsic::s390_vfeebs:
01334   case Intrinsic::s390_vfeehs:
01335   case Intrinsic::s390_vfeefs:
01336     Opcode = SystemZISD::VFEE_CC;
01337     CCValid = SystemZ::CCMASK_ANY;
01338     return true;
01339 
01340   case Intrinsic::s390_vfeezbs:
01341   case Intrinsic::s390_vfeezhs:
01342   case Intrinsic::s390_vfeezfs:
01343     Opcode = SystemZISD::VFEEZ_CC;
01344     CCValid = SystemZ::CCMASK_ANY;
01345     return true;
01346 
01347   case Intrinsic::s390_vfenebs:
01348   case Intrinsic::s390_vfenehs:
01349   case Intrinsic::s390_vfenefs:
01350     Opcode = SystemZISD::VFENE_CC;
01351     CCValid = SystemZ::CCMASK_ANY;
01352     return true;
01353 
01354   case Intrinsic::s390_vfenezbs:
01355   case Intrinsic::s390_vfenezhs:
01356   case Intrinsic::s390_vfenezfs:
01357     Opcode = SystemZISD::VFENEZ_CC;
01358     CCValid = SystemZ::CCMASK_ANY;
01359     return true;
01360 
01361   case Intrinsic::s390_vistrbs:
01362   case Intrinsic::s390_vistrhs:
01363   case Intrinsic::s390_vistrfs:
01364     Opcode = SystemZISD::VISTR_CC;
01365     CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
01366     return true;
01367 
01368   case Intrinsic::s390_vstrcbs:
01369   case Intrinsic::s390_vstrchs:
01370   case Intrinsic::s390_vstrcfs:
01371     Opcode = SystemZISD::VSTRC_CC;
01372     CCValid = SystemZ::CCMASK_ANY;
01373     return true;
01374 
01375   case Intrinsic::s390_vstrczbs:
01376   case Intrinsic::s390_vstrczhs:
01377   case Intrinsic::s390_vstrczfs:
01378     Opcode = SystemZISD::VSTRCZ_CC;
01379     CCValid = SystemZ::CCMASK_ANY;
01380     return true;
01381 
01382   case Intrinsic::s390_vfcedbs:
01383     Opcode = SystemZISD::VFCMPES;
01384     CCValid = SystemZ::CCMASK_VCMP;
01385     return true;
01386 
01387   case Intrinsic::s390_vfchdbs:
01388     Opcode = SystemZISD::VFCMPHS;
01389     CCValid = SystemZ::CCMASK_VCMP;
01390     return true;
01391 
01392   case Intrinsic::s390_vfchedbs:
01393     Opcode = SystemZISD::VFCMPHES;
01394     CCValid = SystemZ::CCMASK_VCMP;
01395     return true;
01396 
01397   case Intrinsic::s390_vftcidb:
01398     Opcode = SystemZISD::VFTCI;
01399     CCValid = SystemZ::CCMASK_VCMP;
01400     return true;
01401 
01402   default:
01403     return false;
01404   }
01405 }
01406 
01407 // Emit an intrinsic with chain with a glued value instead of its CC result.
01408 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
01409                                              unsigned Opcode) {
01410   // Copy all operands except the intrinsic ID.
01411   unsigned NumOps = Op.getNumOperands();
01412   SmallVector<SDValue, 6> Ops;
01413   Ops.reserve(NumOps - 1);
01414   Ops.push_back(Op.getOperand(0));
01415   for (unsigned I = 2; I < NumOps; ++I)
01416     Ops.push_back(Op.getOperand(I));
01417 
01418   assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
01419   SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01420   SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
01421   SDValue OldChain = SDValue(Op.getNode(), 1);
01422   SDValue NewChain = SDValue(Intr.getNode(), 0);
01423   DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
01424   return Intr;
01425 }
01426 
01427 // Emit an intrinsic with a glued value instead of its CC result.
01428 static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
01429                                      unsigned Opcode) {
01430   // Copy all operands except the intrinsic ID.
01431   unsigned NumOps = Op.getNumOperands();
01432   SmallVector<SDValue, 6> Ops;
01433   Ops.reserve(NumOps - 1);
01434   for (unsigned I = 1; I < NumOps; ++I)
01435     Ops.push_back(Op.getOperand(I));
01436 
01437   if (Op->getNumValues() == 1)
01438     return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
01439   assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
01440   SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
01441   return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
01442 }
01443 
01444 // CC is a comparison that will be implemented using an integer or
01445 // floating-point comparison.  Return the condition code mask for
01446 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
01447 // unsigned comparisons and clear for signed ones.  In the floating-point
01448 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
01449 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
01450 #define CONV(X) \
01451   case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
01452   case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
01453   case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
01454 
01455   switch (CC) {
01456   default:
01457     llvm_unreachable("Invalid integer condition!");
01458 
01459   CONV(EQ);
01460   CONV(NE);
01461   CONV(GT);
01462   CONV(GE);
01463   CONV(LT);
01464   CONV(LE);
01465 
01466   case ISD::SETO:  return SystemZ::CCMASK_CMP_O;
01467   case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
01468   }
01469 #undef CONV
01470 }
01471 
01472 // Return a sequence for getting a 1 from an IPM result when CC has a
01473 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
01474 // The handling of CC values outside CCValid doesn't matter.
01475 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
01476   // Deal with cases where the result can be taken directly from a bit
01477   // of the IPM result.
01478   if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
01479     return IPMConversion(0, 0, SystemZ::IPM_CC);
01480   if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
01481     return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
01482 
01483   // Deal with cases where we can add a value to force the sign bit
01484   // to contain the right value.  Putting the bit in 31 means we can
01485   // use SRL rather than RISBG(L), and also makes it easier to get a
01486   // 0/-1 value, so it has priority over the other tests below.
01487   //
01488   // These sequences rely on the fact that the upper two bits of the
01489   // IPM result are zero.
01490   uint64_t TopBit = uint64_t(1) << 31;
01491   if (CCMask == (CCValid & SystemZ::CCMASK_0))
01492     return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
01493   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
01494     return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
01495   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01496                             | SystemZ::CCMASK_1
01497                             | SystemZ::CCMASK_2)))
01498     return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
01499   if (CCMask == (CCValid & SystemZ::CCMASK_3))
01500     return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
01501   if (CCMask == (CCValid & (SystemZ::CCMASK_1
01502                             | SystemZ::CCMASK_2
01503                             | SystemZ::CCMASK_3)))
01504     return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
01505 
01506   // Next try inverting the value and testing a bit.  0/1 could be
01507   // handled this way too, but we dealt with that case above.
01508   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
01509     return IPMConversion(-1, 0, SystemZ::IPM_CC);
01510 
01511   // Handle cases where adding a value forces a non-sign bit to contain
01512   // the right value.
01513   if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
01514     return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
01515   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
01516     return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
01517 
01518   // The remaining cases are 1, 2, 0/1/3 and 0/2/3.  All these are
01519   // can be done by inverting the low CC bit and applying one of the
01520   // sign-based extractions above.
01521   if (CCMask == (CCValid & SystemZ::CCMASK_1))
01522     return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
01523   if (CCMask == (CCValid & SystemZ::CCMASK_2))
01524     return IPMConversion(1 << SystemZ::IPM_CC,
01525                          TopBit - (3 << SystemZ::IPM_CC), 31);
01526   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01527                             | SystemZ::CCMASK_1
01528                             | SystemZ::CCMASK_3)))
01529     return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
01530   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01531                             | SystemZ::CCMASK_2
01532                             | SystemZ::CCMASK_3)))
01533     return IPMConversion(1 << SystemZ::IPM_CC,
01534                          TopBit - (1 << SystemZ::IPM_CC), 31);
01535 
01536   llvm_unreachable("Unexpected CC combination");
01537 }
01538 
01539 // If C can be converted to a comparison against zero, adjust the operands
01540 // as necessary.
01541 static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01542   if (C.ICmpType == SystemZICMP::UnsignedOnly)
01543     return;
01544 
01545   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
01546   if (!ConstOp1)
01547     return;
01548 
01549   int64_t Value = ConstOp1->getSExtValue();
01550   if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
01551       (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
01552       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
01553       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
01554     C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
01555     C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
01556   }
01557 }
01558 
01559 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
01560 // adjust the operands as necessary.
01561 static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01562   // For us to make any changes, it must a comparison between a single-use
01563   // load and a constant.
01564   if (!C.Op0.hasOneUse() ||
01565       C.Op0.getOpcode() != ISD::LOAD ||
01566       C.Op1.getOpcode() != ISD::Constant)
01567     return;
01568 
01569   // We must have an 8- or 16-bit load.
01570   auto *Load = cast<LoadSDNode>(C.Op0);
01571   unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
01572   if (NumBits != 8 && NumBits != 16)
01573     return;
01574 
01575   // The load must be an extending one and the constant must be within the
01576   // range of the unextended value.
01577   auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
01578   uint64_t Value = ConstOp1->getZExtValue();
01579   uint64_t Mask = (1 << NumBits) - 1;
01580   if (Load->getExtensionType() == ISD::SEXTLOAD) {
01581     // Make sure that ConstOp1 is in range of C.Op0.
01582     int64_t SignedValue = ConstOp1->getSExtValue();
01583     if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
01584       return;
01585     if (C.ICmpType != SystemZICMP::SignedOnly) {
01586       // Unsigned comparison between two sign-extended values is equivalent
01587       // to unsigned comparison between two zero-extended values.
01588       Value &= Mask;
01589     } else if (NumBits == 8) {
01590       // Try to treat the comparison as unsigned, so that we can use CLI.
01591       // Adjust CCMask and Value as necessary.
01592       if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
01593         // Test whether the high bit of the byte is set.
01594         Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
01595       else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
01596         // Test whether the high bit of the byte is clear.
01597         Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
01598       else
01599         // No instruction exists for this combination.
01600         return;
01601       C.ICmpType = SystemZICMP::UnsignedOnly;
01602     }
01603   } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
01604     if (Value > Mask)
01605       return;
01606     // If the constant is in range, we can use any comparison.
01607     C.ICmpType = SystemZICMP::Any;
01608   } else
01609     return;
01610 
01611   // Make sure that the first operand is an i32 of the right extension type.
01612   ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
01613                               ISD::SEXTLOAD :
01614                               ISD::ZEXTLOAD);
01615   if (C.Op0.getValueType() != MVT::i32 ||
01616       Load->getExtensionType() != ExtType)
01617     C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
01618                            Load->getChain(), Load->getBasePtr(),
01619                            Load->getPointerInfo(), Load->getMemoryVT(),
01620                            Load->isVolatile(), Load->isNonTemporal(),
01621                            Load->isInvariant(), Load->getAlignment());
01622 
01623   // Make sure that the second operand is an i32 with the right value.
01624   if (C.Op1.getValueType() != MVT::i32 ||
01625       Value != ConstOp1->getZExtValue())
01626     C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
01627 }
01628 
01629 // Return true if Op is either an unextended load, or a load suitable
01630 // for integer register-memory comparisons of type ICmpType.
01631 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
01632   auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
01633   if (Load) {
01634     // There are no instructions to compare a register with a memory byte.
01635     if (Load->getMemoryVT() == MVT::i8)
01636       return false;
01637     // Otherwise decide on extension type.
01638     switch (Load->getExtensionType()) {
01639     case ISD::NON_EXTLOAD:
01640       return true;
01641     case ISD::SEXTLOAD:
01642       return ICmpType != SystemZICMP::UnsignedOnly;
01643     case ISD::ZEXTLOAD:
01644       return ICmpType != SystemZICMP::SignedOnly;
01645     default:
01646       break;
01647     }
01648   }
01649   return false;
01650 }
01651 
01652 // Return true if it is better to swap the operands of C.
01653 static bool shouldSwapCmpOperands(const Comparison &C) {
01654   // Leave f128 comparisons alone, since they have no memory forms.
01655   if (C.Op0.getValueType() == MVT::f128)
01656     return false;
01657 
01658   // Always keep a floating-point constant second, since comparisons with
01659   // zero can use LOAD TEST and comparisons with other constants make a
01660   // natural memory operand.
01661   if (isa<ConstantFPSDNode>(C.Op1))
01662     return false;
01663 
01664   // Never swap comparisons with zero since there are many ways to optimize
01665   // those later.
01666   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
01667   if (ConstOp1 && ConstOp1->getZExtValue() == 0)
01668     return false;
01669 
01670   // Also keep natural memory operands second if the loaded value is
01671   // only used here.  Several comparisons have memory forms.
01672   if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
01673     return false;
01674 
01675   // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
01676   // In that case we generally prefer the memory to be second.
01677   if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
01678     // The only exceptions are when the second operand is a constant and
01679     // we can use things like CHHSI.
01680     if (!ConstOp1)
01681       return true;
01682     // The unsigned memory-immediate instructions can handle 16-bit
01683     // unsigned integers.
01684     if (C.ICmpType != SystemZICMP::SignedOnly &&
01685         isUInt<16>(ConstOp1->getZExtValue()))
01686       return false;
01687     // The signed memory-immediate instructions can handle 16-bit
01688     // signed integers.
01689     if (C.ICmpType != SystemZICMP::UnsignedOnly &&
01690         isInt<16>(ConstOp1->getSExtValue()))
01691       return false;
01692     return true;
01693   }
01694 
01695   // Try to promote the use of CGFR and CLGFR.
01696   unsigned Opcode0 = C.Op0.getOpcode();
01697   if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
01698     return true;
01699   if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
01700     return true;
01701   if (C.ICmpType != SystemZICMP::SignedOnly &&
01702       Opcode0 == ISD::AND &&
01703       C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
01704       cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
01705     return true;
01706 
01707   return false;
01708 }
01709 
01710 // Return a version of comparison CC mask CCMask in which the LT and GT
01711 // actions are swapped.
01712 static unsigned reverseCCMask(unsigned CCMask) {
01713   return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
01714           (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
01715           (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
01716           (CCMask & SystemZ::CCMASK_CMP_UO));
01717 }
01718 
01719 // Check whether C tests for equality between X and Y and whether X - Y
01720 // or Y - X is also computed.  In that case it's better to compare the
01721 // result of the subtraction against zero.
01722 static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01723   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
01724       C.CCMask == SystemZ::CCMASK_CMP_NE) {
01725     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
01726       SDNode *N = *I;
01727       if (N->getOpcode() == ISD::SUB &&
01728           ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
01729            (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
01730         C.Op0 = SDValue(N, 0);
01731         C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
01732         return;
01733       }
01734     }
01735   }
01736 }
01737 
01738 // Check whether C compares a floating-point value with zero and if that
01739 // floating-point value is also negated.  In this case we can use the
01740 // negation to set CC, so avoiding separate LOAD AND TEST and
01741 // LOAD (NEGATIVE/COMPLEMENT) instructions.
01742 static void adjustForFNeg(Comparison &C) {
01743   auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
01744   if (C1 && C1->isZero()) {
01745     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
01746       SDNode *N = *I;
01747       if (N->getOpcode() == ISD::FNEG) {
01748         C.Op0 = SDValue(N, 0);
01749         C.CCMask = reverseCCMask(C.CCMask);
01750         return;
01751       }
01752     }
01753   }
01754 }
01755 
01756 // Check whether C compares (shl X, 32) with 0 and whether X is
01757 // also sign-extended.  In that case it is better to test the result
01758 // of the sign extension using LTGFR.
01759 //
01760 // This case is important because InstCombine transforms a comparison
01761 // with (sext (trunc X)) into a comparison with (shl X, 32).
01762 static void adjustForLTGFR(Comparison &C) {
01763   // Check for a comparison between (shl X, 32) and 0.
01764   if (C.Op0.getOpcode() == ISD::SHL &&
01765       C.Op0.getValueType() == MVT::i64 &&
01766       C.Op1.getOpcode() == ISD::Constant &&
01767       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
01768     auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
01769     if (C1 && C1->getZExtValue() == 32) {
01770       SDValue ShlOp0 = C.Op0.getOperand(0);
01771       // See whether X has any SIGN_EXTEND_INREG uses.
01772       for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
01773         SDNode *N = *I;
01774         if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
01775             cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
01776           C.Op0 = SDValue(N, 0);
01777           return;
01778         }
01779       }
01780     }
01781   }
01782 }
01783 
01784 // If C compares the truncation of an extending load, try to compare
01785 // the untruncated value instead.  This exposes more opportunities to
01786 // reuse CC.
01787 static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01788   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
01789       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
01790       C.Op1.getOpcode() == ISD::Constant &&
01791       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
01792     auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
01793     if (L->getMemoryVT().getStoreSizeInBits()
01794         <= C.Op0.getValueType().getSizeInBits()) {
01795       unsigned Type = L->getExtensionType();
01796       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
01797           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
01798         C.Op0 = C.Op0.getOperand(0);
01799         C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
01800       }
01801     }
01802   }
01803 }
01804 
01805 // Return true if shift operation N has an in-range constant shift value.
01806 // Store it in ShiftVal if so.
01807 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
01808   auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
01809   if (!Shift)
01810     return false;
01811 
01812   uint64_t Amount = Shift->getZExtValue();
01813   if (Amount >= N.getValueType().getSizeInBits())
01814     return false;
01815 
01816   ShiftVal = Amount;
01817   return true;
01818 }
01819 
01820 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
01821 // instruction and whether the CC value is descriptive enough to handle
01822 // a comparison of type Opcode between the AND result and CmpVal.
01823 // CCMask says which comparison result is being tested and BitSize is
01824 // the number of bits in the operands.  If TEST UNDER MASK can be used,
01825 // return the corresponding CC mask, otherwise return 0.
01826 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
01827                                      uint64_t Mask, uint64_t CmpVal,
01828                                      unsigned ICmpType) {
01829   assert(Mask != 0 && "ANDs with zero should have been removed by now");
01830 
01831   // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
01832   if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
01833       !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
01834     return 0;
01835 
01836   // Work out the masks for the lowest and highest bits.
01837   unsigned HighShift = 63 - countLeadingZeros(Mask);
01838   uint64_t High = uint64_t(1) << HighShift;
01839   uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
01840 
01841   // Signed ordered comparisons are effectively unsigned if the sign
01842   // bit is dropped.
01843   bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
01844 
01845   // Check for equality comparisons with 0, or the equivalent.
01846   if (CmpVal == 0) {
01847     if (CCMask == SystemZ::CCMASK_CMP_EQ)
01848       return SystemZ::CCMASK_TM_ALL_0;
01849     if (CCMask == SystemZ::CCMASK_CMP_NE)
01850       return SystemZ::CCMASK_TM_SOME_1;
01851   }
01852   if (EffectivelyUnsigned && CmpVal <= Low) {
01853     if (CCMask == SystemZ::CCMASK_CMP_LT)
01854       return SystemZ::CCMASK_TM_ALL_0;
01855     if (CCMask == SystemZ::CCMASK_CMP_GE)
01856       return SystemZ::CCMASK_TM_SOME_1;
01857   }
01858   if (EffectivelyUnsigned && CmpVal < Low) {
01859     if (CCMask == SystemZ::CCMASK_CMP_LE)
01860       return SystemZ::CCMASK_TM_ALL_0;
01861     if (CCMask == SystemZ::CCMASK_CMP_GT)
01862       return SystemZ::CCMASK_TM_SOME_1;
01863   }
01864 
01865   // Check for equality comparisons with the mask, or the equivalent.
01866   if (CmpVal == Mask) {
01867     if (CCMask == SystemZ::CCMASK_CMP_EQ)
01868       return SystemZ::CCMASK_TM_ALL_1;
01869     if (CCMask == SystemZ::CCMASK_CMP_NE)
01870       return SystemZ::CCMASK_TM_SOME_0;
01871   }
01872   if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
01873     if (CCMask == SystemZ::CCMASK_CMP_GT)
01874       return SystemZ::CCMASK_TM_ALL_1;
01875     if (CCMask == SystemZ::CCMASK_CMP_LE)
01876       return SystemZ::CCMASK_TM_SOME_0;
01877   }
01878   if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
01879     if (CCMask == SystemZ::CCMASK_CMP_GE)
01880       return SystemZ::CCMASK_TM_ALL_1;
01881     if (CCMask == SystemZ::CCMASK_CMP_LT)
01882       return SystemZ::CCMASK_TM_SOME_0;
01883   }
01884 
01885   // Check for ordered comparisons with the top bit.
01886   if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
01887     if (CCMask == SystemZ::CCMASK_CMP_LE)
01888       return SystemZ::CCMASK_TM_MSB_0;
01889     if (CCMask == SystemZ::CCMASK_CMP_GT)
01890       return SystemZ::CCMASK_TM_MSB_1;
01891   }
01892   if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
01893     if (CCMask == SystemZ::CCMASK_CMP_LT)
01894       return SystemZ::CCMASK_TM_MSB_0;
01895     if (CCMask == SystemZ::CCMASK_CMP_GE)
01896       return SystemZ::CCMASK_TM_MSB_1;
01897   }
01898 
01899   // If there are just two bits, we can do equality checks for Low and High
01900   // as well.
01901   if (Mask == Low + High) {
01902     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
01903       return SystemZ::CCMASK_TM_MIXED_MSB_0;
01904     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
01905       return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
01906     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
01907       return SystemZ::CCMASK_TM_MIXED_MSB_1;
01908     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
01909       return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
01910   }
01911 
01912   // Looks like we've exhausted our options.
01913   return 0;
01914 }
01915 
01916 // See whether C can be implemented as a TEST UNDER MASK instruction.
01917 // Update the arguments with the TM version if so.
01918 static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01919   // Check that we have a comparison with a constant.
01920   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
01921   if (!ConstOp1)
01922     return;
01923   uint64_t CmpVal = ConstOp1->getZExtValue();
01924 
01925   // Check whether the nonconstant input is an AND with a constant mask.
01926   Comparison NewC(C);
01927   uint64_t MaskVal;
01928   ConstantSDNode *Mask = nullptr;
01929   if (C.Op0.getOpcode() == ISD::AND) {
01930     NewC.Op0 = C.Op0.getOperand(0);
01931     NewC.Op1 = C.Op0.getOperand(1);
01932     Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
01933     if (!Mask)
01934       return;
01935     MaskVal = Mask->getZExtValue();
01936   } else {
01937     // There is no instruction to compare with a 64-bit immediate
01938     // so use TMHH instead if possible.  We need an unsigned ordered
01939     // comparison with an i64 immediate.
01940     if (NewC.Op0.getValueType() != MVT::i64 ||
01941         NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
01942         NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
01943         NewC.ICmpType == SystemZICMP::SignedOnly)
01944       return;
01945     // Convert LE and GT comparisons into LT and GE.
01946     if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
01947         NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
01948       if (CmpVal == uint64_t(-1))
01949         return;
01950       CmpVal += 1;
01951       NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
01952     }
01953     // If the low N bits of Op1 are zero than the low N bits of Op0 can
01954     // be masked off without changing the result.
01955     MaskVal = -(CmpVal & -CmpVal);
01956     NewC.ICmpType = SystemZICMP::UnsignedOnly;
01957   }
01958   if (!MaskVal)
01959     return;
01960 
01961   // Check whether the combination of mask, comparison value and comparison
01962   // type are suitable.
01963   unsigned BitSize = NewC.Op0.getValueType().getSizeInBits();
01964   unsigned NewCCMask, ShiftVal;
01965   if (NewC.ICmpType != SystemZICMP::SignedOnly &&
01966       NewC.Op0.getOpcode() == ISD::SHL &&
01967       isSimpleShift(NewC.Op0, ShiftVal) &&
01968       (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
01969                                         MaskVal >> ShiftVal,
01970                                         CmpVal >> ShiftVal,
01971                                         SystemZICMP::Any))) {
01972     NewC.Op0 = NewC.Op0.getOperand(0);
01973     MaskVal >>= ShiftVal;
01974   } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
01975              NewC.Op0.getOpcode() == ISD::SRL &&
01976              isSimpleShift(NewC.Op0, ShiftVal) &&
01977              (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
01978                                                MaskVal << ShiftVal,
01979                                                CmpVal << ShiftVal,
01980                                                SystemZICMP::UnsignedOnly))) {
01981     NewC.Op0 = NewC.Op0.getOperand(0);
01982     MaskVal <<= ShiftVal;
01983   } else {
01984     NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
01985                                      NewC.ICmpType);
01986     if (!NewCCMask)
01987       return;
01988   }
01989 
01990   // Go ahead and make the change.
01991   C.Opcode = SystemZISD::TM;
01992   C.Op0 = NewC.Op0;
01993   if (Mask && Mask->getZExtValue() == MaskVal)
01994     C.Op1 = SDValue(Mask, 0);
01995   else
01996     C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
01997   C.CCValid = SystemZ::CCMASK_TM;
01998   C.CCMask = NewCCMask;
01999 }
02000 
02001 // Return a Comparison that tests the condition-code result of intrinsic
02002 // node Call against constant integer CC using comparison code Cond.
02003 // Opcode is the opcode of the SystemZISD operation for the intrinsic
02004 // and CCValid is the set of possible condition-code results.
02005 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
02006                                   SDValue Call, unsigned CCValid, uint64_t CC,
02007                                   ISD::CondCode Cond) {
02008   Comparison C(Call, SDValue());
02009   C.Opcode = Opcode;
02010   C.CCValid = CCValid;
02011   if (Cond == ISD::SETEQ)
02012     // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
02013     C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
02014   else if (Cond == ISD::SETNE)
02015     // ...and the inverse of that.
02016     C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
02017   else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
02018     // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
02019     // always true for CC>3.
02020     C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
02021   else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
02022     // ...and the inverse of that.
02023     C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
02024   else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
02025     // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
02026     // always true for CC>3.
02027     C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
02028   else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
02029     // ...and the inverse of that.
02030     C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
02031   else
02032     llvm_unreachable("Unexpected integer comparison type");
02033   C.CCMask &= CCValid;
02034   return C;
02035 }
02036 
02037 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
02038 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
02039                          ISD::CondCode Cond, SDLoc DL) {
02040   if (CmpOp1.getOpcode() == ISD::Constant) {
02041     uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
02042     unsigned Opcode, CCValid;
02043     if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
02044         CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
02045         isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
02046       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
02047     if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
02048         CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
02049         isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
02050       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
02051   }
02052   Comparison C(CmpOp0, CmpOp1);
02053   C.CCMask = CCMaskForCondCode(Cond);
02054   if (C.Op0.getValueType().isFloatingPoint()) {
02055     C.CCValid = SystemZ::CCMASK_FCMP;
02056     C.Opcode = SystemZISD::FCMP;
02057     adjustForFNeg(C);
02058   } else {
02059     C.CCValid = SystemZ::CCMASK_ICMP;
02060     C.Opcode = SystemZISD::ICMP;
02061     // Choose the type of comparison.  Equality and inequality tests can
02062     // use either signed or unsigned comparisons.  The choice also doesn't
02063     // matter if both sign bits are known to be clear.  In those cases we
02064     // want to give the main isel code the freedom to choose whichever
02065     // form fits best.
02066     if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
02067         C.CCMask == SystemZ::CCMASK_CMP_NE ||
02068         (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
02069       C.ICmpType = SystemZICMP::Any;
02070     else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
02071       C.ICmpType = SystemZICMP::UnsignedOnly;
02072     else
02073       C.ICmpType = SystemZICMP::SignedOnly;
02074     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
02075     adjustZeroCmp(DAG, DL, C);
02076     adjustSubwordCmp(DAG, DL, C);
02077     adjustForSubtraction(DAG, DL, C);
02078     adjustForLTGFR(C);
02079     adjustICmpTruncate(DAG, DL, C);
02080   }
02081 
02082   if (shouldSwapCmpOperands(C)) {
02083     std::swap(C.Op0, C.Op1);
02084     C.CCMask = reverseCCMask(C.CCMask);
02085   }
02086 
02087   adjustForTestUnderMask(DAG, DL, C);
02088   return C;
02089 }
02090 
02091 // Emit the comparison instruction described by C.
02092 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
02093   if (!C.Op1.getNode()) {
02094     SDValue Op;
02095     switch (C.Op0.getOpcode()) {
02096     case ISD::INTRINSIC_W_CHAIN:
02097       Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
02098       break;
02099     case ISD::INTRINSIC_WO_CHAIN:
02100       Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
02101       break;
02102     default:
02103       llvm_unreachable("Invalid comparison operands");
02104     }
02105     return SDValue(Op.getNode(), Op->getNumValues() - 1);
02106   }
02107   if (C.Opcode == SystemZISD::ICMP)
02108     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
02109                        DAG.getConstant(C.ICmpType, DL, MVT::i32));
02110   if (C.Opcode == SystemZISD::TM) {
02111     bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
02112                          bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
02113     return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
02114                        DAG.getConstant(RegisterOnly, DL, MVT::i32));
02115   }
02116   return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
02117 }
02118 
02119 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
02120 // 64 bits.  Extend is the extension type to use.  Store the high part
02121 // in Hi and the low part in Lo.
02122 static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
02123                             unsigned Extend, SDValue Op0, SDValue Op1,
02124                             SDValue &Hi, SDValue &Lo) {
02125   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
02126   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
02127   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
02128   Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
02129                    DAG.getConstant(32, DL, MVT::i64));
02130   Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
02131   Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
02132 }
02133 
02134 // Lower a binary operation that produces two VT results, one in each
02135 // half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
02136 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
02137 // on the extended Op0 and (unextended) Op1.  Store the even register result
02138 // in Even and the odd register result in Odd.
02139 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
02140                              unsigned Extend, unsigned Opcode,
02141                              SDValue Op0, SDValue Op1,
02142                              SDValue &Even, SDValue &Odd) {
02143   SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
02144   SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
02145                                SDValue(In128, 0), Op1);
02146   bool Is32Bit = is32Bit(VT);
02147   Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
02148   Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
02149 }
02150 
02151 // Return an i32 value that is 1 if the CC value produced by Glue is
02152 // in the mask CCMask and 0 otherwise.  CC is known to have a value
02153 // in CCValid, so other values can be ignored.
02154 static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
02155                          unsigned CCValid, unsigned CCMask) {
02156   IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
02157   SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
02158 
02159   if (Conversion.XORValue)
02160     Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
02161                          DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
02162 
02163   if (Conversion.AddValue)
02164     Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
02165                          DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
02166 
02167   // The SHR/AND sequence should get optimized to an RISBG.
02168   Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
02169                        DAG.getConstant(Conversion.Bit, DL, MVT::i32));
02170   if (Conversion.Bit != 31)
02171     Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
02172                          DAG.getConstant(1, DL, MVT::i32));
02173   return Result;
02174 }
02175 
02176 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
02177 // be done directly.  IsFP is true if CC is for a floating-point rather than
02178 // integer comparison.
02179 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
02180   switch (CC) {
02181   case ISD::SETOEQ:
02182   case ISD::SETEQ:
02183     return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
02184 
02185   case ISD::SETOGE:
02186   case ISD::SETGE:
02187     return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
02188 
02189   case ISD::SETOGT:
02190   case ISD::SETGT:
02191     return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
02192 
02193   case ISD::SETUGT:
02194     return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
02195 
02196   default:
02197     return 0;
02198   }
02199 }
02200 
02201 // Return the SystemZISD vector comparison operation for CC or its inverse,
02202 // or 0 if neither can be done directly.  Indicate in Invert whether the
02203 // result is for the inverse of CC.  IsFP is true if CC is for a
02204 // floating-point rather than integer comparison.
02205 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
02206                                             bool &Invert) {
02207   if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
02208     Invert = false;
02209     return Opcode;
02210   }
02211 
02212   CC = ISD::getSetCCInverse(CC, !IsFP);
02213   if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
02214     Invert = true;
02215     return Opcode;
02216   }
02217 
02218   return 0;
02219 }
02220 
02221 // Return a v2f64 that contains the extended form of elements Start and Start+1
02222 // of v4f32 value Op.
02223 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
02224                                   SDValue Op) {
02225   int Mask[] = { Start, -1, Start + 1, -1 };
02226   Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
02227   return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
02228 }
02229 
02230 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
02231 // producing a result of type VT.
02232 static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
02233                             EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
02234   // There is no hardware support for v4f32, so extend the vector into
02235   // two v2f64s and compare those.
02236   if (CmpOp0.getValueType() == MVT::v4f32) {
02237     SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
02238     SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
02239     SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
02240     SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
02241     SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
02242     SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
02243     return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
02244   }
02245   return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
02246 }
02247 
02248 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
02249 // an integer mask of type VT.
02250 static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
02251                                 ISD::CondCode CC, SDValue CmpOp0,
02252                                 SDValue CmpOp1) {
02253   bool IsFP = CmpOp0.getValueType().isFloatingPoint();
02254   bool Invert = false;
02255   SDValue Cmp;
02256   switch (CC) {
02257     // Handle tests for order using (or (ogt y x) (oge x y)).
02258   case ISD::SETUO:
02259     Invert = true;
02260   case ISD::SETO: {
02261     assert(IsFP && "Unexpected integer comparison");
02262     SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
02263     SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
02264     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
02265     break;
02266   }
02267 
02268     // Handle <> tests using (or (ogt y x) (ogt x y)).
02269   case ISD::SETUEQ:
02270     Invert = true;
02271   case ISD::SETONE: {
02272     assert(IsFP && "Unexpected integer comparison");
02273     SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
02274     SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
02275     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
02276     break;
02277   }
02278 
02279     // Otherwise a single comparison is enough.  It doesn't really
02280     // matter whether we try the inversion or the swap first, since
02281     // there are no cases where both work.
02282   default:
02283     if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
02284       Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
02285     else {
02286       CC = ISD::getSetCCSwappedOperands(CC);
02287       if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
02288         Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
02289       else
02290         llvm_unreachable("Unhandled comparison");
02291     }
02292     break;
02293   }
02294   if (Invert) {
02295     SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02296                                DAG.getConstant(65535, DL, MVT::i32));
02297     Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
02298     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
02299   }
02300   return Cmp;
02301 }
02302 
02303 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
02304                                           SelectionDAG &DAG) const {
02305   SDValue CmpOp0   = Op.getOperand(0);
02306   SDValue CmpOp1   = Op.getOperand(1);
02307   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
02308   SDLoc DL(Op);
02309   EVT VT = Op.getValueType();
02310   if (VT.isVector())
02311     return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
02312 
02313   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02314   SDValue Glue = emitCmp(DAG, DL, C);
02315   return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
02316 }
02317 
02318 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
02319   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
02320   SDValue CmpOp0   = Op.getOperand(2);
02321   SDValue CmpOp1   = Op.getOperand(3);
02322   SDValue Dest     = Op.getOperand(4);
02323   SDLoc DL(Op);
02324 
02325   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02326   SDValue Glue = emitCmp(DAG, DL, C);
02327   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
02328                      Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
02329                      DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
02330 }
02331 
02332 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
02333 // allowing Pos and Neg to be wider than CmpOp.
02334 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
02335   return (Neg.getOpcode() == ISD::SUB &&
02336           Neg.getOperand(0).getOpcode() == ISD::Constant &&
02337           cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
02338           Neg.getOperand(1) == Pos &&
02339           (Pos == CmpOp ||
02340            (Pos.getOpcode() == ISD::SIGN_EXTEND &&
02341             Pos.getOperand(0) == CmpOp)));
02342 }
02343 
02344 // Return the absolute or negative absolute of Op; IsNegative decides which.
02345 static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
02346                            bool IsNegative) {
02347   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
02348   if (IsNegative)
02349     Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
02350                      DAG.getConstant(0, DL, Op.getValueType()), Op);
02351   return Op;
02352 }
02353 
02354 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
02355                                               SelectionDAG &DAG) const {
02356   SDValue CmpOp0   = Op.getOperand(0);
02357   SDValue CmpOp1   = Op.getOperand(1);
02358   SDValue TrueOp   = Op.getOperand(2);
02359   SDValue FalseOp  = Op.getOperand(3);
02360   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
02361   SDLoc DL(Op);
02362 
02363   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02364 
02365   // Check for absolute and negative-absolute selections, including those
02366   // where the comparison value is sign-extended (for LPGFR and LNGFR).
02367   // This check supplements the one in DAGCombiner.
02368   if (C.Opcode == SystemZISD::ICMP &&
02369       C.CCMask != SystemZ::CCMASK_CMP_EQ &&
02370       C.CCMask != SystemZ::CCMASK_CMP_NE &&
02371       C.Op1.getOpcode() == ISD::Constant &&
02372       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
02373     if (isAbsolute(C.Op0, TrueOp, FalseOp))
02374       return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
02375     if (isAbsolute(C.Op0, FalseOp, TrueOp))
02376       return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
02377   }
02378 
02379   SDValue Glue = emitCmp(DAG, DL, C);
02380 
02381   // Special case for handling -1/0 results.  The shifts we use here
02382   // should get optimized with the IPM conversion sequence.
02383   auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
02384   auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
02385   if (TrueC && FalseC) {
02386     int64_t TrueVal = TrueC->getSExtValue();
02387     int64_t FalseVal = FalseC->getSExtValue();
02388     if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
02389       // Invert the condition if we want -1 on false.
02390       if (TrueVal == 0)
02391         C.CCMask ^= C.CCValid;
02392       SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
02393       EVT VT = Op.getValueType();
02394       // Extend the result to VT.  Upper bits are ignored.
02395       if (!is32Bit(VT))
02396         Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
02397       // Sign-extend from the low bit.
02398       SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
02399       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
02400       return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
02401     }
02402   }
02403 
02404   SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
02405                    DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
02406 
02407   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
02408   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
02409 }
02410 
02411 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
02412                                                   SelectionDAG &DAG) const {
02413   SDLoc DL(Node);
02414   const GlobalValue *GV = Node->getGlobal();
02415   int64_t Offset = Node->getOffset();
02416   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02417   Reloc::Model RM = DAG.getTarget().getRelocationModel();
02418   CodeModel::Model CM = DAG.getTarget().getCodeModel();
02419 
02420   SDValue Result;
02421   if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
02422     // Assign anchors at 1<<12 byte boundaries.
02423     uint64_t Anchor = Offset & ~uint64_t(0xfff);
02424     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
02425     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02426 
02427     // The offset can be folded into the address if it is aligned to a halfword.
02428     Offset -= Anchor;
02429     if (Offset != 0 && (Offset & 1) == 0) {
02430       SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
02431       Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
02432       Offset = 0;
02433     }
02434   } else {
02435     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
02436     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02437     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
02438                          MachinePointerInfo::getGOT(DAG.getMachineFunction()),
02439                          false, false, false, 0);
02440   }
02441 
02442   // If there was a non-zero offset that we didn't fold, create an explicit
02443   // addition for it.
02444   if (Offset != 0)
02445     Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
02446                          DAG.getConstant(Offset, DL, PtrVT));
02447 
02448   return Result;
02449 }
02450 
02451 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
02452                                                  SelectionDAG &DAG,
02453                                                  unsigned Opcode,
02454                                                  SDValue GOTOffset) const {
02455   SDLoc DL(Node);
02456   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02457   SDValue Chain = DAG.getEntryNode();
02458   SDValue Glue;
02459 
02460   // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
02461   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
02462   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
02463   Glue = Chain.getValue(1);
02464   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
02465   Glue = Chain.getValue(1);
02466 
02467   // The first call operand is the chain and the second is the TLS symbol.
02468   SmallVector<SDValue, 8> Ops;
02469   Ops.push_back(Chain);
02470   Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
02471                                            Node->getValueType(0),
02472                                            0, 0));
02473 
02474   // Add argument registers to the end of the list so that they are
02475   // known live into the call.
02476   Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
02477   Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
02478 
02479   // Add a register mask operand representing the call-preserved registers.
02480   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
02481   const uint32_t *Mask =
02482       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
02483   assert(Mask && "Missing call preserved mask for calling convention");
02484   Ops.push_back(DAG.getRegisterMask(Mask));
02485 
02486   // Glue the call to the argument copies.
02487   Ops.push_back(Glue);
02488 
02489   // Emit the call.
02490   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
02491   Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
02492   Glue = Chain.getValue(1);
02493 
02494   // Copy the return value from %r2.
02495   return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
02496 }
02497 
02498 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
02499                                                      SelectionDAG &DAG) const {
02500   if (DAG.getTarget().Options.EmulatedTLS)
02501     return LowerToTLSEmulatedModel(Node, DAG);
02502   SDLoc DL(Node);
02503   const GlobalValue *GV = Node->getGlobal();
02504   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02505   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
02506 
02507   // The high part of the thread pointer is in access register 0.
02508   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
02509                              DAG.getConstant(0, DL, MVT::i32));
02510   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
02511 
02512   // The low part of the thread pointer is in access register 1.
02513   SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
02514                              DAG.getConstant(1, DL, MVT::i32));
02515   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
02516 
02517   // Merge them into a single 64-bit address.
02518   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
02519                                     DAG.getConstant(32, DL, PtrVT));
02520   SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
02521 
02522   // Get the offset of GA from the thread pointer, based on the TLS model.
02523   SDValue Offset;
02524   switch (model) {
02525     case TLSModel::GeneralDynamic: {
02526       // Load the GOT offset of the tls_index (module ID / per-symbol offset).
02527       SystemZConstantPoolValue *CPV =
02528         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
02529 
02530       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02531       Offset = DAG.getLoad(
02532           PtrVT, DL, DAG.getEntryNode(), Offset,
02533           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02534           false, false, 0);
02535 
02536       // Call __tls_get_offset to retrieve the offset.
02537       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
02538       break;
02539     }
02540 
02541     case TLSModel::LocalDynamic: {
02542       // Load the GOT offset of the module ID.
02543       SystemZConstantPoolValue *CPV =
02544         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
02545 
02546       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02547       Offset = DAG.getLoad(
02548           PtrVT, DL, DAG.getEntryNode(), Offset,
02549           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02550           false, false, 0);
02551 
02552       // Call __tls_get_offset to retrieve the module base offset.
02553       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
02554 
02555       // Note: The SystemZLDCleanupPass will remove redundant computations
02556       // of the module base offset.  Count total number of local-dynamic
02557       // accesses to trigger execution of that pass.
02558       SystemZMachineFunctionInfo* MFI =
02559         DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
02560       MFI->incNumLocalDynamicTLSAccesses();
02561 
02562       // Add the per-symbol offset.
02563       CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
02564 
02565       SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
02566       DTPOffset = DAG.getLoad(
02567           PtrVT, DL, DAG.getEntryNode(), DTPOffset,
02568           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02569           false, false, 0);
02570 
02571       Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
02572       break;
02573     }
02574 
02575     case TLSModel::InitialExec: {
02576       // Load the offset from the GOT.
02577       Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
02578                                           SystemZII::MO_INDNTPOFF);
02579       Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
02580       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
02581                            MachinePointerInfo::getGOT(DAG.getMachineFunction()),
02582                            false, false, false, 0);
02583       break;
02584     }
02585 
02586     case TLSModel::LocalExec: {
02587       // Force the offset into the constant pool and load it from there.
02588       SystemZConstantPoolValue *CPV =
02589         SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
02590 
02591       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02592       Offset = DAG.getLoad(
02593           PtrVT, DL, DAG.getEntryNode(), Offset,
02594           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02595           false, false, 0);
02596       break;
02597     }
02598   }
02599 
02600   // Add the base and offset together.
02601   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
02602 }
02603 
02604 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
02605                                                  SelectionDAG &DAG) const {
02606   SDLoc DL(Node);
02607   const BlockAddress *BA = Node->getBlockAddress();
02608   int64_t Offset = Node->getOffset();
02609   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02610 
02611   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
02612   Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02613   return Result;
02614 }
02615 
02616 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
02617                                               SelectionDAG &DAG) const {
02618   SDLoc DL(JT);
02619   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02620   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
02621 
02622   // Use LARL to load the address of the table.
02623   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02624 }
02625 
02626 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
02627                                                  SelectionDAG &DAG) const {
02628   SDLoc DL(CP);
02629   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02630 
02631   SDValue Result;
02632   if (CP->isMachineConstantPoolEntry())
02633     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02634                                        CP->getAlignment());
02635   else
02636     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02637                                        CP->getAlignment(), CP->getOffset());
02638 
02639   // Use LARL to load the address of the constant pool entry.
02640   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02641 }
02642 
02643 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
02644                                             SelectionDAG &DAG) const {
02645   SDLoc DL(Op);
02646   SDValue In = Op.getOperand(0);
02647   EVT InVT = In.getValueType();
02648   EVT ResVT = Op.getValueType();
02649 
02650   // Convert loads directly.  This is normally done by DAGCombiner,
02651   // but we need this case for bitcasts that are created during lowering
02652   // and which are then lowered themselves.
02653   if (auto *LoadN = dyn_cast<LoadSDNode>(In))
02654     return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
02655                        LoadN->getMemOperand());
02656 
02657   if (InVT == MVT::i32 && ResVT == MVT::f32) {
02658     SDValue In64;
02659     if (Subtarget.hasHighWord()) {
02660       SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
02661                                        MVT::i64);
02662       In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
02663                                        MVT::i64, SDValue(U64, 0), In);
02664     } else {
02665       In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
02666       In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
02667                          DAG.getConstant(32, DL, MVT::i64));
02668     }
02669     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
02670     return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
02671                                       DL, MVT::f32, Out64);
02672   }
02673   if (InVT == MVT::f32 && ResVT == MVT::i32) {
02674     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
02675     SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
02676                                              MVT::f64, SDValue(U64, 0), In);
02677     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
02678     if (Subtarget.hasHighWord())
02679       return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
02680                                         MVT::i32, Out64);
02681     SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
02682                                 DAG.getConstant(32, DL, MVT::i64));
02683     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
02684   }
02685   llvm_unreachable("Unexpected bitcast combination");
02686 }
02687 
02688 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
02689                                             SelectionDAG &DAG) const {
02690   MachineFunction &MF = DAG.getMachineFunction();
02691   SystemZMachineFunctionInfo *FuncInfo =
02692     MF.getInfo<SystemZMachineFunctionInfo>();
02693   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02694 
02695   SDValue Chain   = Op.getOperand(0);
02696   SDValue Addr    = Op.getOperand(1);
02697   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02698   SDLoc DL(Op);
02699 
02700   // The initial values of each field.
02701   const unsigned NumFields = 4;
02702   SDValue Fields[NumFields] = {
02703     DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
02704     DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
02705     DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
02706     DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
02707   };
02708 
02709   // Store each field into its respective slot.
02710   SDValue MemOps[NumFields];
02711   unsigned Offset = 0;
02712   for (unsigned I = 0; I < NumFields; ++I) {
02713     SDValue FieldAddr = Addr;
02714     if (Offset != 0)
02715       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
02716                               DAG.getIntPtrConstant(Offset, DL));
02717     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
02718                              MachinePointerInfo(SV, Offset),
02719                              false, false, 0);
02720     Offset += 8;
02721   }
02722   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
02723 }
02724 
02725 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
02726                                            SelectionDAG &DAG) const {
02727   SDValue Chain      = Op.getOperand(0);
02728   SDValue DstPtr     = Op.getOperand(1);
02729   SDValue SrcPtr     = Op.getOperand(2);
02730   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
02731   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
02732   SDLoc DL(Op);
02733 
02734   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
02735                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
02736                        /*isTailCall*/false,
02737                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
02738 }
02739 
02740 SDValue SystemZTargetLowering::
02741 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
02742   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
02743   bool RealignOpt = !DAG.getMachineFunction().getFunction()->
02744     hasFnAttribute("no-realign-stack");
02745 
02746   SDValue Chain = Op.getOperand(0);
02747   SDValue Size  = Op.getOperand(1);
02748   SDValue Align = Op.getOperand(2);
02749   SDLoc DL(Op);
02750 
02751   // If user has set the no alignment function attribute, ignore
02752   // alloca alignments.
02753   uint64_t AlignVal = (RealignOpt ?
02754                        dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
02755 
02756   uint64_t StackAlign = TFI->getStackAlignment();
02757   uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
02758   uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
02759 
02760   unsigned SPReg = getStackPointerRegisterToSaveRestore();
02761   SDValue NeededSpace = Size;
02762 
02763   // Get a reference to the stack pointer.
02764   SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
02765 
02766   // Add extra space for alignment if needed.
02767   if (ExtraAlignSpace)
02768     NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
02769                               DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 
02770 
02771   // Get the new stack pointer value.
02772   SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
02773 
02774   // Copy the new stack pointer back.
02775   Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
02776 
02777   // The allocated data lives above the 160 bytes allocated for the standard
02778   // frame, plus any outgoing stack arguments.  We don't know how much that
02779   // amounts to yet, so emit a special ADJDYNALLOC placeholder.
02780   SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
02781   SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
02782 
02783   // Dynamically realign if needed.
02784   if (RequiredAlign > StackAlign) {
02785     Result =
02786       DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
02787                   DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
02788     Result =
02789       DAG.getNode(ISD::AND, DL, MVT::i64, Result,
02790                   DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
02791   }
02792 
02793   SDValue Ops[2] = { Result, Chain };
02794   return DAG.getMergeValues(Ops, DL);
02795 }
02796 
02797 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
02798                                               SelectionDAG &DAG) const {
02799   EVT VT = Op.getValueType();
02800   SDLoc DL(Op);
02801   SDValue Ops[2];
02802   if (is32Bit(VT))
02803     // Just do a normal 64-bit multiplication and extract the results.
02804     // We define this so that it can be used for constant division.
02805     lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
02806                     Op.getOperand(1), Ops[1], Ops[0]);
02807   else {
02808     // Do a full 128-bit multiplication based on UMUL_LOHI64:
02809     //
02810     //   (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
02811     //
02812     // but using the fact that the upper halves are either all zeros
02813     // or all ones:
02814     //
02815     //   (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
02816     //
02817     // and grouping the right terms together since they are quicker than the
02818     // multiplication:
02819     //
02820     //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
02821     SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
02822     SDValue LL = Op.getOperand(0);
02823     SDValue RL = Op.getOperand(1);
02824     SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
02825     SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
02826     // UMUL_LOHI64 returns the low result in the odd register and the high
02827     // result in the even register.  SMUL_LOHI is defined to return the
02828     // low half first, so the results are in reverse order.
02829     lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
02830                      LL, RL, Ops[1], Ops[0]);
02831     SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
02832     SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
02833     SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
02834     Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
02835   }
02836   return DAG.getMergeValues(Ops, DL);
02837 }
02838 
02839 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
02840                                               SelectionDAG &DAG) const {
02841   EVT VT = Op.getValueType();
02842   SDLoc DL(Op);
02843   SDValue Ops[2];
02844   if (is32Bit(VT))
02845     // Just do a normal 64-bit multiplication and extract the results.
02846     // We define this so that it can be used for constant division.
02847     lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
02848                     Op.getOperand(1), Ops[1], Ops[0]);
02849   else
02850     // UMUL_LOHI64 returns the low result in the odd register and the high
02851     // result in the even register.  UMUL_LOHI is defined to return the
02852     // low half first, so the results are in reverse order.
02853     lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
02854                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02855   return DAG.getMergeValues(Ops, DL);
02856 }
02857 
02858 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
02859                                             SelectionDAG &DAG) const {
02860   SDValue Op0 = Op.getOperand(0);
02861   SDValue Op1 = Op.getOperand(1);
02862   EVT VT = Op.getValueType();
02863   SDLoc DL(Op);
02864   unsigned Opcode;
02865 
02866   // We use DSGF for 32-bit division.
02867   if (is32Bit(VT)) {
02868     Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
02869     Opcode = SystemZISD::SDIVREM32;
02870   } else if (DAG.ComputeNumSignBits(Op1) > 32) {
02871     Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
02872     Opcode = SystemZISD::SDIVREM32;
02873   } else
02874     Opcode = SystemZISD::SDIVREM64;
02875 
02876   // DSG(F) takes a 64-bit dividend, so the even register in the GR128
02877   // input is "don't care".  The instruction returns the remainder in
02878   // the even register and the quotient in the odd register.
02879   SDValue Ops[2];
02880   lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
02881                    Op0, Op1, Ops[1], Ops[0]);
02882   return DAG.getMergeValues(Ops, DL);
02883 }
02884 
02885 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
02886                                             SelectionDAG &DAG) const {
02887   EVT VT = Op.getValueType();
02888   SDLoc DL(Op);
02889 
02890   // DL(G) uses a double-width dividend, so we need to clear the even
02891   // register in the GR128 input.  The instruction returns the remainder
02892   // in the even register and the quotient in the odd register.
02893   SDValue Ops[2];
02894   if (is32Bit(VT))
02895     lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
02896                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02897   else
02898     lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
02899                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02900   return DAG.getMergeValues(Ops, DL);
02901 }
02902 
02903 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
02904   assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
02905 
02906   // Get the known-zero masks for each operand.
02907   SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
02908   APInt KnownZero[2], KnownOne[2];
02909   DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
02910   DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
02911 
02912   // See if the upper 32 bits of one operand and the lower 32 bits of the
02913   // other are known zero.  They are the low and high operands respectively.
02914   uint64_t Masks[] = { KnownZero[0].getZExtValue(),
02915                        KnownZero[1].getZExtValue() };
02916   unsigned High, Low;
02917   if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
02918     High = 1, Low = 0;
02919   else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
02920     High = 0, Low = 1;
02921   else
02922     return Op;
02923 
02924   SDValue LowOp = Ops[Low];
02925   SDValue HighOp = Ops[High];
02926 
02927   // If the high part is a constant, we're better off using IILH.
02928   if (HighOp.getOpcode() == ISD::Constant)
02929     return Op;
02930 
02931   // If the low part is a constant that is outside the range of LHI,
02932   // then we're better off using IILF.
02933   if (LowOp.getOpcode() == ISD::Constant) {
02934     int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
02935     if (!isInt<16>(Value))
02936       return Op;
02937   }
02938 
02939   // Check whether the high part is an AND that doesn't change the
02940   // high 32 bits and just masks out low bits.  We can skip it if so.
02941   if (HighOp.getOpcode() == ISD::AND &&
02942       HighOp.getOperand(1).getOpcode() == ISD::Constant) {
02943     SDValue HighOp0 = HighOp.getOperand(0);
02944     uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
02945     if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
02946       HighOp = HighOp0;
02947   }
02948 
02949   // Take advantage of the fact that all GR32 operations only change the
02950   // low 32 bits by truncating Low to an i32 and inserting it directly
02951   // using a subreg.  The interesting cases are those where the truncation
02952   // can be folded.
02953   SDLoc DL(Op);
02954   SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
02955   return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
02956                                    MVT::i64, HighOp, Low32);
02957 }
02958 
02959 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
02960                                           SelectionDAG &DAG) const {
02961   EVT VT = Op.getValueType();
02962   SDLoc DL(Op);
02963   Op = Op.getOperand(0);
02964 
02965   // Handle vector types via VPOPCT.
02966   if (VT.isVector()) {
02967     Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
02968     Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
02969     switch (VT.getVectorElementType().getSizeInBits()) {
02970     case 8:
02971       break;
02972     case 16: {
02973       Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
02974       SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
02975       SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
02976       Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
02977       Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
02978       break;
02979     }
02980     case 32: {
02981       SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02982                                 DAG.getConstant(0, DL, MVT::i32));
02983       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
02984       break;
02985     }
02986     case 64: {
02987       SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02988                                 DAG.getConstant(0, DL, MVT::i32));
02989       Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
02990       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
02991       break;
02992     }
02993     default:
02994       llvm_unreachable("Unexpected type");
02995     }
02996     return Op;
02997   }
02998 
02999   // Get the known-zero mask for the operand.
03000   APInt KnownZero, KnownOne;
03001   DAG.computeKnownBits(Op, KnownZero, KnownOne);
03002   unsigned NumSignificantBits = (~KnownZero).getActiveBits();
03003   if (NumSignificantBits == 0)
03004     return DAG.getConstant(0, DL, VT);
03005 
03006   // Skip known-zero high parts of the operand.
03007   int64_t OrigBitSize = VT.getSizeInBits();
03008   int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
03009   BitSize = std::min(BitSize, OrigBitSize);
03010 
03011   // The POPCNT instruction counts the number of bits in each byte.
03012   Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
03013   Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
03014   Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
03015 
03016   // Add up per-byte counts in a binary tree.  All bits of Op at
03017   // position larger than BitSize remain zero throughout.
03018   for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
03019     SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
03020     if (BitSize != OrigBitSize)
03021       Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
03022                         DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
03023     Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
03024   }
03025 
03026   // Extract overall result from high byte.
03027   if (BitSize > 8)
03028     Op = DAG.getNode(ISD::SRL, DL, VT, Op,
03029                      DAG.getConstant(BitSize - 8, DL, VT));
03030 
03031   return Op;
03032 }
03033 
03034 // Op is an atomic load.  Lower it into a normal volatile load.
03035 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
03036                                                 SelectionDAG &DAG) const {
03037   auto *Node = cast<AtomicSDNode>(Op.getNode());
03038   return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
03039                         Node->getChain(), Node->getBasePtr(),
03040                         Node->getMemoryVT(), Node->getMemOperand());
03041 }
03042 
03043 // Op is an atomic store.  Lower it into a normal volatile store followed
03044 // by a serialization.
03045 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
03046                                                  SelectionDAG &DAG) const {
03047   auto *Node = cast<AtomicSDNode>(Op.getNode());
03048   SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
03049                                     Node->getBasePtr(), Node->getMemoryVT(),
03050                                     Node->getMemOperand());
03051   return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
03052                                     Chain), 0);
03053 }
03054 
03055 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
03056 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
03057 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
03058                                                    SelectionDAG &DAG,
03059                                                    unsigned Opcode) const {
03060   auto *Node = cast<AtomicSDNode>(Op.getNode());
03061 
03062   // 32-bit operations need no code outside the main loop.
03063   EVT NarrowVT = Node->getMemoryVT();
03064   EVT WideVT = MVT::i32;
03065   if (NarrowVT == WideVT)
03066     return Op;
03067 
03068   int64_t BitSize = NarrowVT.getSizeInBits();
03069   SDValue ChainIn = Node->getChain();
03070   SDValue Addr = Node->getBasePtr();
03071   SDValue Src2 = Node->getVal();
03072   MachineMemOperand *MMO = Node->getMemOperand();
03073   SDLoc DL(Node);
03074   EVT PtrVT = Addr.getValueType();
03075 
03076   // Convert atomic subtracts of constants into additions.
03077   if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
03078     if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
03079       Opcode = SystemZISD::ATOMIC_LOADW_ADD;
03080       Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
03081     }
03082 
03083   // Get the address of the containing word.
03084   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
03085                                     DAG.getConstant(-4, DL, PtrVT));
03086 
03087   // Get the number of bits that the word must be rotated left in order
03088   // to bring the field to the top bits of a GR32.
03089   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
03090                                  DAG.getConstant(3, DL, PtrVT));
03091   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
03092 
03093   // Get the complementing shift amount, for rotating a field in the top
03094   // bits back to its proper position.
03095   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
03096                                     DAG.getConstant(0, DL, WideVT), BitShift);
03097 
03098   // Extend the source operand to 32 bits and prepare it for the inner loop.
03099   // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
03100   // operations require the source to be shifted in advance.  (This shift
03101   // can be folded if the source is constant.)  For AND and NAND, the lower
03102   // bits must be set, while for other opcodes they should be left clear.
03103   if (Opcode != SystemZISD::ATOMIC_SWAPW)
03104     Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
03105                        DAG.getConstant(32 - BitSize, DL, WideVT));
03106   if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
03107       Opcode == SystemZISD::ATOMIC_LOADW_NAND)
03108     Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
03109                        DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
03110 
03111   // Construct the ATOMIC_LOADW_* node.
03112   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
03113   SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
03114                     DAG.getConstant(BitSize, DL, WideVT) };
03115   SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
03116                                              NarrowVT, MMO);
03117 
03118   // Rotate the result of the final CS so that the field is in the lower
03119   // bits of a GR32, then truncate it.
03120   SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
03121                                     DAG.getConstant(BitSize, DL, WideVT));
03122   SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
03123 
03124   SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
03125   return DAG.getMergeValues(RetOps, DL);
03126 }
03127 
03128 // Op is an ATOMIC_LOAD_SUB operation.  Lower 8- and 16-bit operations
03129 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
03130 // operations into additions.
03131 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
03132                                                     SelectionDAG &DAG) const {
03133   auto *Node = cast<AtomicSDNode>(Op.getNode());
03134   EVT MemVT = Node->getMemoryVT();
03135   if (MemVT == MVT::i32 || MemVT == MVT::i64) {
03136     // A full-width operation.
03137     assert(Op.getValueType() == MemVT && "Mismatched VTs");
03138     SDValue Src2 = Node->getVal();
03139     SDValue NegSrc2;
03140     SDLoc DL(Src2);
03141 
03142     if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
03143       // Use an addition if the operand is constant and either LAA(G) is
03144       // available or the negative value is in the range of A(G)FHI.
03145       int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
03146       if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
03147         NegSrc2 = DAG.getConstant(Value, DL, MemVT);
03148     } else if (Subtarget.hasInterlockedAccess1())
03149       // Use LAA(G) if available.
03150       NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
03151                             Src2);
03152 
03153     if (NegSrc2.getNode())
03154       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
03155                            Node->getChain(), Node->getBasePtr(), NegSrc2,
03156                            Node->getMemOperand(), Node->getOrdering(),
03157                            Node->getSynchScope());
03158 
03159     // Use the node as-is.
03160     return Op;
03161   }
03162 
03163   return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
03164 }
03165 
03166 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation.  Lower the first two
03167 // into a fullword ATOMIC_CMP_SWAPW operation.
03168 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
03169                                                     SelectionDAG &DAG) const {
03170   auto *Node = cast<AtomicSDNode>(Op.getNode());
03171 
03172   // We have native support for 32-bit compare and swap.
03173   EVT NarrowVT = Node->getMemoryVT();
03174   EVT WideVT = MVT::i32;
03175   if (NarrowVT == WideVT)
03176     return Op;
03177 
03178   int64_t BitSize = NarrowVT.getSizeInBits();
03179   SDValue ChainIn = Node->getOperand(0);
03180   SDValue Addr = Node->getOperand(1);
03181   SDValue CmpVal = Node->getOperand(2);
03182   SDValue SwapVal = Node->getOperand(3);
03183   MachineMemOperand *MMO = Node->getMemOperand();
03184   SDLoc DL(Node);
03185   EVT PtrVT = Addr.getValueType();
03186 
03187   // Get the address of the containing word.
03188   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
03189                                     DAG.getConstant(-4, DL, PtrVT));
03190 
03191   // Get the number of bits that the word must be rotated left in order
03192   // to bring the field to the top bits of a GR32.
03193   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
03194                                  DAG.getConstant(3, DL, PtrVT));
03195   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
03196 
03197   // Get the complementing shift amount, for rotating a field in the top
03198   // bits back to its proper position.
03199   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
03200                                     DAG.getConstant(0, DL, WideVT), BitShift);
03201 
03202   // Construct the ATOMIC_CMP_SWAPW node.
03203   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
03204   SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
03205                     NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
03206   SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
03207                                              VTList, Ops, NarrowVT, MMO);
03208   return AtomicOp;
03209 }
03210 
03211 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
03212                                               SelectionDAG &DAG) const {
03213   MachineFunction &MF = DAG.getMachineFunction();
03214   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
03215   return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
03216                             SystemZ::R15D, Op.getValueType());
03217 }
03218 
03219 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
03220                                                  SelectionDAG &DAG) const {
03221   MachineFunction &MF = DAG.getMachineFunction();
03222   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
03223   return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
03224                           SystemZ::R15D, Op.getOperand(1));
03225 }
03226 
03227 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
03228                                              SelectionDAG &DAG) const {
03229   bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
03230   if (!IsData)
03231     // Just preserve the chain.
03232     return Op.getOperand(0);
03233 
03234   SDLoc DL(Op);
03235   bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
03236   unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
03237   auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
03238   SDValue Ops[] = {
03239     Op.getOperand(0),
03240     DAG.getConstant(Code, DL, MVT::i32),
03241     Op.getOperand(1)
03242   };
03243   return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
03244                                  Node->getVTList(), Ops,
03245                                  Node->getMemoryVT(), Node->getMemOperand());
03246 }
03247 
03248 // Return an i32 that contains the value of CC immediately after After,
03249 // whose final operand must be MVT::Glue.
03250 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
03251   SDLoc DL(After);
03252   SDValue Glue = SDValue(After, After->getNumValues() - 1);
03253   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
03254   return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
03255                      DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
03256 }
03257 
03258 SDValue
03259 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
03260                                               SelectionDAG &DAG) const {
03261   unsigned Opcode, CCValid;
03262   if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
03263     assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
03264     SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
03265     SDValue CC = getCCResult(DAG, Glued.getNode());
03266     DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
03267     return SDValue();
03268   }
03269 
03270   return SDValue();
03271 }
03272 
03273 SDValue
03274 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
03275                                                SelectionDAG &DAG) const {
03276   unsigned Opcode, CCValid;
03277   if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
03278     SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
03279     SDValue CC = getCCResult(DAG, Glued.getNode());
03280     if (Op->getNumValues() == 1)
03281       return CC;
03282     assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
03283     return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
03284                        CC);
03285   }
03286 
03287   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03288   switch (Id) {
03289   case Intrinsic::s390_vpdi:
03290     return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
03291                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
03292 
03293   case Intrinsic::s390_vperm:
03294     return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
03295                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
03296 
03297   case Intrinsic::s390_vuphb:
03298   case Intrinsic::s390_vuphh:
03299   case Intrinsic::s390_vuphf:
03300     return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
03301                        Op.getOperand(1));
03302 
03303   case Intrinsic::s390_vuplhb:
03304   case Intrinsic::s390_vuplhh:
03305   case Intrinsic::s390_vuplhf:
03306     return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
03307                        Op.getOperand(1));
03308 
03309   case Intrinsic::s390_vuplb:
03310   case Intrinsic::s390_vuplhw:
03311   case Intrinsic::s390_vuplf:
03312     return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
03313                        Op.getOperand(1));
03314 
03315   case Intrinsic::s390_vupllb:
03316   case Intrinsic::s390_vupllh:
03317   case Intrinsic::s390_vupllf:
03318     return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
03319                        Op.getOperand(1));
03320 
03321   case Intrinsic::s390_vsumb:
03322   case Intrinsic::s390_vsumh:
03323   case Intrinsic::s390_vsumgh:
03324   case Intrinsic::s390_vsumgf:
03325   case Intrinsic::s390_vsumqf:
03326   case Intrinsic::s390_vsumqg:
03327     return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
03328                        Op.getOperand(1), Op.getOperand(2));
03329   }
03330 
03331   return SDValue();
03332 }
03333 
03334 namespace {
03335 // Says that SystemZISD operation Opcode can be used to perform the equivalent
03336 // of a VPERM with permute vector Bytes.  If Opcode takes three operands,
03337 // Operand is the constant third operand, otherwise it is the number of
03338 // bytes in each element of the result.
03339 struct Permute {
03340   unsigned Opcode;
03341   unsigned Operand;
03342   unsigned char Bytes[SystemZ::VectorBytes];
03343 };
03344 }
03345 
03346 static const Permute PermuteForms[] = {
03347   // VMRHG
03348   { SystemZISD::MERGE_HIGH, 8,
03349     { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
03350   // VMRHF
03351   { SystemZISD::MERGE_HIGH, 4,
03352     { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
03353   // VMRHH
03354   { SystemZISD::MERGE_HIGH, 2,
03355     { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
03356   // VMRHB
03357   { SystemZISD::MERGE_HIGH, 1,
03358     { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
03359   // VMRLG
03360   { SystemZISD::MERGE_LOW, 8,
03361     { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
03362   // VMRLF
03363   { SystemZISD::MERGE_LOW, 4,
03364     { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
03365   // VMRLH
03366   { SystemZISD::MERGE_LOW, 2,
03367     { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
03368   // VMRLB
03369   { SystemZISD::MERGE_LOW, 1,
03370     { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
03371   // VPKG
03372   { SystemZISD::PACK, 4,
03373     { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
03374   // VPKF
03375   { SystemZISD::PACK, 2,
03376     { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
03377   // VPKH
03378   { SystemZISD::PACK, 1,
03379     { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
03380   // VPDI V1, V2, 4  (low half of V1, high half of V2)
03381   { SystemZISD::PERMUTE_DWORDS, 4,
03382     { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
03383   // VPDI V1, V2, 1  (high half of V1, low half of V2)
03384   { SystemZISD::PERMUTE_DWORDS, 1,
03385     { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
03386 };
03387 
03388 // Called after matching a vector shuffle against a particular pattern.
03389 // Both the original shuffle and the pattern have two vector operands.
03390 // OpNos[0] is the operand of the original shuffle that should be used for
03391 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
03392 // OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
03393 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
03394 // for operands 0 and 1 of the pattern.
03395 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
03396   if (OpNos[0] < 0) {
03397     if (OpNos[1] < 0)
03398       return false;
03399     OpNo0 = OpNo1 = OpNos[1];
03400   } else if (OpNos[1] < 0) {
03401     OpNo0 = OpNo1 = OpNos[0];
03402   } else {
03403     OpNo0 = OpNos[0];
03404     OpNo1 = OpNos[1];
03405   }
03406   return true;
03407 }
03408 
03409 // Bytes is a VPERM-like permute vector, except that -1 is used for
03410 // undefined bytes.  Return true if the VPERM can be implemented using P.
03411 // When returning true set OpNo0 to the VPERM operand that should be
03412 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
03413 //
03414 // For example, if swapping the VPERM operands allows P to match, OpNo0
03415 // will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
03416 // operand, but rewriting it to use two duplicated operands allows it to
03417 // match P, then OpNo0 and OpNo1 will be the same.
03418 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
03419                          unsigned &OpNo0, unsigned &OpNo1) {
03420   int OpNos[] = { -1, -1 };
03421   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
03422     int Elt = Bytes[I];
03423     if (Elt >= 0) {
03424       // Make sure that the two permute vectors use the same suboperand
03425       // byte number.  Only the operand numbers (the high bits) are
03426       // allowed to differ.
03427       if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
03428         return false;
03429       int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
03430       int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
03431       // Make sure that the operand mappings are consistent with previous
03432       // elements.
03433       if (OpNos[ModelOpNo] == 1 - RealOpNo)
03434         return false;
03435       OpNos[ModelOpNo] = RealOpNo;
03436     }
03437   }
03438   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
03439 }
03440 
03441 // As above, but search for a matching permute.
03442 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
03443                                    unsigned &OpNo0, unsigned &OpNo1) {
03444   for (auto &P : PermuteForms)
03445     if (matchPermute(Bytes, P, OpNo0, OpNo1))
03446       return &P;
03447   return nullptr;
03448 }
03449 
03450 // Bytes is a VPERM-like permute vector, except that -1 is used for
03451 // undefined bytes.  This permute is an operand of an outer permute.
03452 // See whether redistributing the -1 bytes gives a shuffle that can be
03453 // implemented using P.  If so, set Transform to a VPERM-like permute vector
03454 // that, when applied to the result of P, gives the original permute in Bytes.
03455 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
03456                                const Permute &P,
03457                                SmallVectorImpl<int> &Transform) {
03458   unsigned To = 0;
03459   for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
03460     int Elt = Bytes[From];
03461     if (Elt < 0)
03462       // Byte number From of the result is undefined.
03463       Transform[From] = -1;
03464     else {
03465       while (P.Bytes[To] != Elt) {
03466         To += 1;
03467         if (To == SystemZ::VectorBytes)
03468           return false;
03469       }
03470       Transform[From] = To;
03471     }
03472   }
03473   return true;
03474 }
03475 
03476 // As above, but search for a matching permute.
03477 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
03478                                          SmallVectorImpl<int> &Transform) {
03479   for (auto &P : PermuteForms)
03480     if (matchDoublePermute(Bytes, P, Transform))
03481       return &P;
03482   return nullptr;
03483 }
03484 
03485 // Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
03486 // as if it had type vNi8.
03487 static void getVPermMask(ShuffleVectorSDNode *VSN,
03488                          SmallVectorImpl<int> &Bytes) {
03489   EVT VT = VSN->getValueType(0);
03490   unsigned NumElements = VT.getVectorNumElements();
03491   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03492   Bytes.resize(NumElements * BytesPerElement, -1);
03493   for (unsigned I = 0; I < NumElements; ++I) {
03494     int Index = VSN->getMaskElt(I);
03495     if (Index >= 0)
03496       for (unsigned J = 0; J < BytesPerElement; ++J)
03497         Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
03498   }
03499 }
03500 
03501 // Bytes is a VPERM-like permute vector, except that -1 is used for
03502 // undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
03503 // the result come from a contiguous sequence of bytes from one input.
03504 // Set Base to the selector for the first byte if so.
03505 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
03506                             unsigned BytesPerElement, int &Base) {
03507   Base = -1;
03508   for (unsigned I = 0; I < BytesPerElement; ++I) {
03509     if (Bytes[Start + I] >= 0) {
03510       unsigned Elem = Bytes[Start + I];
03511       if (Base < 0) {
03512         Base = Elem - I;
03513         // Make sure the bytes would come from one input operand.
03514         if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
03515           return false;
03516       } else if (unsigned(Base) != Elem - I)
03517         return false;
03518     }
03519   }
03520   return true;
03521 }
03522 
03523 // Bytes is a VPERM-like permute vector, except that -1 is used for
03524 // undefined bytes.  Return true if it can be performed using VSLDI.
03525 // When returning true, set StartIndex to the shift amount and OpNo0
03526 // and OpNo1 to the VPERM operands that should be used as the first
03527 // and second shift operand respectively.
03528 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
03529                                unsigned &StartIndex, unsigned &OpNo0,
03530                                unsigned &OpNo1) {
03531   int OpNos[] = { -1, -1 };
03532   int Shift = -1;
03533   for (unsigned I = 0; I < 16; ++I) {
03534     int Index = Bytes[I];
03535     if (Index >= 0) {
03536       int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
03537       int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
03538       int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
03539       if (Shift < 0)
03540         Shift = ExpectedShift;
03541       else if (Shift != ExpectedShift)
03542         return false;
03543       // Make sure that the operand mappings are consistent with previous
03544       // elements.
03545       if (OpNos[ModelOpNo] == 1 - RealOpNo)
03546         return false;
03547       OpNos[ModelOpNo] = RealOpNo;
03548     }
03549   }
03550   StartIndex = Shift;
03551   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
03552 }
03553 
03554 // Create a node that performs P on operands Op0 and Op1, casting the
03555 // operands to the appropriate type.  The type of the result is determined by P.
03556 static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
03557                               const Permute &P, SDValue Op0, SDValue Op1) {
03558   // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
03559   // elements of a PACK are twice as wide as the outputs.
03560   unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
03561                       P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
03562                       P.Operand);
03563   // Cast both operands to the appropriate type.
03564   MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
03565                               SystemZ::VectorBytes / InBytes);
03566   Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
03567   Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
03568   SDValue Op;
03569   if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
03570     SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
03571     Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
03572   } else if (P.Opcode == SystemZISD::PACK) {
03573     MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
03574                                  SystemZ::VectorBytes / P.Operand);
03575     Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
03576   } else {
03577     Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
03578   }
03579   return Op;
03580 }
03581 
03582 // Bytes is a VPERM-like permute vector, except that -1 is used for
03583 // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
03584 // VSLDI or VPERM.
03585 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
03586                                      const SmallVectorImpl<int> &Bytes) {
03587   for (unsigned I = 0; I < 2; ++I)
03588     Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
03589 
03590   // First see whether VSLDI can be used.
03591   unsigned StartIndex, OpNo0, OpNo1;
03592   if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
03593     return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
03594                        Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
03595 
03596   // Fall back on VPERM.  Construct an SDNode for the permute vector.
03597   SDValue IndexNodes[SystemZ::VectorBytes];
03598   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
03599     if (Bytes[I] >= 0)
03600       IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
03601     else
03602       IndexNodes[I] = DAG.getUNDEF(MVT::i32);
03603   SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
03604   return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
03605 }
03606 
03607 namespace {
03608 // Describes a general N-operand vector shuffle.
03609 struct GeneralShuffle {
03610   GeneralShuffle(EVT vt) : VT(vt) {}
03611   void addUndef();
03612   void add(SDValue, unsigned);
03613   SDValue getNode(SelectionDAG &, SDLoc);
03614 
03615   // The operands of the shuffle.
03616   SmallVector<SDValue, SystemZ::VectorBytes> Ops;
03617 
03618   // Index I is -1 if byte I of the result is undefined.  Otherwise the
03619   // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
03620   // Bytes[I] / SystemZ::VectorBytes.
03621   SmallVector<int, SystemZ::VectorBytes> Bytes;
03622 
03623   // The type of the shuffle result.
03624   EVT VT;
03625 };
03626 }
03627 
03628 // Add an extra undefined element to the shuffle.
03629 void GeneralShuffle::addUndef() {
03630   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03631   for (unsigned I = 0; I < BytesPerElement; ++I)
03632     Bytes.push_back(-1);
03633 }
03634 
03635 // Add an extra element to the shuffle, taking it from element Elem of Op.
03636 // A null Op indicates a vector input whose value will be calculated later;
03637 // there is at most one such input per shuffle and it always has the same
03638 // type as the result.
03639 void GeneralShuffle::add(SDValue Op, unsigned Elem) {
03640   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03641 
03642   // The source vector can have wider elements than the result,
03643   // either through an explicit TRUNCATE or because of type legalization.
03644   // We want the least significant part.
03645   EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
03646   unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
03647   assert(FromBytesPerElement >= BytesPerElement &&
03648          "Invalid EXTRACT_VECTOR_ELT");
03649   unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
03650                    (FromBytesPerElement - BytesPerElement));
03651 
03652   // Look through things like shuffles and bitcasts.
03653   while (Op.getNode()) {
03654     if (Op.getOpcode() == ISD::BITCAST)
03655       Op = Op.getOperand(0);
03656     else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
03657       // See whether the bytes we need come from a contiguous part of one
03658       // operand.
03659       SmallVector<int, SystemZ::VectorBytes> OpBytes;
03660       getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
03661       int NewByte;
03662       if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
03663         break;
03664       if (NewByte < 0) {
03665         addUndef();
03666         return;
03667       }
03668       Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
03669       Byte = unsigned(NewByte) % SystemZ::VectorBytes;
03670     } else if (Op.getOpcode() == ISD::UNDEF) {
03671       addUndef();
03672       return;
03673     } else
03674       break;
03675   }
03676 
03677   // Make sure that the source of the extraction is in Ops.
03678   unsigned OpNo = 0;
03679   for (; OpNo < Ops.size(); ++OpNo)
03680     if (Ops[OpNo] == Op)
03681       break;
03682   if (OpNo == Ops.size())
03683     Ops.push_back(Op);
03684 
03685   // Add the element to Bytes.
03686   unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
03687   for (unsigned I = 0; I < BytesPerElement; ++I)
03688     Bytes.push_back(Base + I);
03689 }
03690 
03691 // Return SDNodes for the completed shuffle.
03692 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
03693   assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
03694 
03695   if (Ops.size() == 0)
03696     return DAG.getUNDEF(VT);
03697 
03698   // Make sure that there are at least two shuffle operands.
03699   if (Ops.size() == 1)
03700     Ops.push_back(DAG.getUNDEF(MVT::v16i8));
03701 
03702   // Create a tree of shuffles, deferring root node until after the loop.
03703   // Try to redistribute the undefined elements of non-root nodes so that
03704   // the non-root shuffles match something like a pack or merge, then adjust
03705   // the parent node's permute vector to compensate for the new order.
03706   // Among other things, this copes with vectors like <2 x i16> that were
03707   // padded with undefined elements during type legalization.
03708   //
03709   // In the best case this redistribution will lead to the whole tree
03710   // using packs and merges.  It should rarely be a loss in other cases.
03711   unsigned Stride = 1;
03712   for (; Stride * 2 < Ops.size(); Stride *= 2) {
03713     for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
03714       SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
03715 
03716       // Create a mask for just these two operands.
03717       SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
03718       for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
03719         unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
03720         unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
03721         if (OpNo == I)
03722           NewBytes[J] = Byte;
03723         else if (OpNo == I + Stride)
03724           NewBytes[J] = SystemZ::VectorBytes + Byte;
03725         else
03726           NewBytes[J] = -1;
03727       }
03728       // See if it would be better to reorganize NewMask to avoid using VPERM.
03729       SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
03730       if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
03731         Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
03732         // Applying NewBytesMap to Ops[I] gets back to NewBytes.
03733         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
03734           if (NewBytes[J] >= 0) {
03735             assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
03736                    "Invalid double permute");
03737             Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
03738           } else
03739             assert(NewBytesMap[J] < 0 && "Invalid double permute");
03740         }
03741       } else {
03742         // Just use NewBytes on the operands.
03743         Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
03744         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
03745           if (NewBytes[J] >= 0)
03746             Bytes[J] = I * SystemZ::VectorBytes + J;
03747       }
03748     }
03749   }
03750 
03751   // Now we just have 2 inputs.  Put the second operand in Ops[1].
03752   if (Stride > 1) {
03753     Ops[1] = Ops[Stride];
03754     for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
03755       if (Bytes[I] >= int(SystemZ::VectorBytes))
03756         Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
03757   }
03758 
03759   // Look for an instruction that can do the permute without resorting
03760   // to VPERM.
03761   unsigned OpNo0, OpNo1;
03762   SDValue Op;
03763   if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
03764     Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
03765   else
03766     Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
03767   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03768 }
03769 
03770 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
03771 static bool isScalarToVector(SDValue Op) {
03772   for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
03773     if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
03774       return false;
03775   return true;
03776 }
03777 
03778 // Return a vector of type VT that contains Value in the first element.
03779 // The other elements don't matter.
03780 static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
03781                                    SDValue Value) {
03782   // If we have a constant, replicate it to all elements and let the
03783   // BUILD_VECTOR lowering take care of it.
03784   if (Value.getOpcode() == ISD::Constant ||
03785       Value.getOpcode() == ISD::ConstantFP) {
03786     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
03787     return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
03788   }
03789   if (Value.getOpcode() == ISD::UNDEF)
03790     return DAG.getUNDEF(VT);
03791   return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
03792 }
03793 
03794 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
03795 // element 1.  Used for cases in which replication is cheap.
03796 static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
03797                                  SDValue Op0, SDValue Op1) {
03798   if (Op0.getOpcode() == ISD::UNDEF) {
03799     if (Op1.getOpcode() == ISD::UNDEF)
03800       return DAG.getUNDEF(VT);
03801     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
03802   }
03803   if (Op1.getOpcode() == ISD::UNDEF)
03804     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
03805   return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
03806                      buildScalarToVector(DAG, DL, VT, Op0),
03807                      buildScalarToVector(DAG, DL, VT, Op1));
03808 }
03809 
03810 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
03811 // vector for them.
03812 static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
03813                           SDValue Op1) {
03814   if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
03815     return DAG.getUNDEF(MVT::v2i64);
03816   // If one of the two inputs is undefined then replicate the other one,
03817   // in order to avoid using another register unnecessarily.
03818   if (Op0.getOpcode() == ISD::UNDEF)
03819     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
03820   else if (Op1.getOpcode() == ISD::UNDEF)
03821     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
03822   else {
03823     Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
03824     Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
03825   }
03826   return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
03827 }
03828 
03829 // Try to represent constant BUILD_VECTOR node BVN using a
03830 // SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
03831 // on success.
03832 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
03833   EVT ElemVT = BVN->getValueType(0).getVectorElementType();
03834   unsigned BytesPerElement = ElemVT.getStoreSize();
03835   for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
03836     SDValue Op = BVN->getOperand(I);
03837     if (Op.getOpcode() != ISD::UNDEF) {
03838       uint64_t Value;
03839       if (Op.getOpcode() == ISD::Constant)
03840         Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
03841       else if (Op.getOpcode() == ISD::ConstantFP)
03842         Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
03843                  .getZExtValue());
03844       else
03845         return false;
03846       for (unsigned J = 0; J < BytesPerElement; ++J) {
03847         uint64_t Byte = (Value >> (J * 8)) & 0xff;
03848         if (Byte == 0xff)
03849           Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
03850         else if (Byte != 0)
03851           return false;
03852       }
03853     }
03854   }
03855   return true;
03856 }
03857 
03858 // Try to load a vector constant in which BitsPerElement-bit value Value
03859 // is replicated to fill the vector.  VT is the type of the resulting
03860 // constant, which may have elements of a different size from BitsPerElement.
03861 // Return the SDValue of the constant on success, otherwise return
03862 // an empty value.
03863 static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
03864                                        const SystemZInstrInfo *TII,
03865                                        SDLoc DL, EVT VT, uint64_t Value,
03866                                        unsigned BitsPerElement) {
03867   // Signed 16-bit values can be replicated using VREPI.
03868   int64_t SignedValue = SignExtend64(Value, BitsPerElement);
03869   if (isInt<16>(SignedValue)) {
03870     MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
03871                                  SystemZ::VectorBits / BitsPerElement);
03872     SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
03873                              DAG.getConstant(SignedValue, DL, MVT::i32));
03874     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03875   }
03876   // See whether rotating the constant left some N places gives a value that
03877   // is one less than a power of 2 (i.e. all zeros followed by all ones).
03878   // If so we can use VGM.
03879   unsigned Start, End;
03880   if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
03881     // isRxSBGMask returns the bit numbers for a full 64-bit value,
03882     // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
03883     // bit numbers for an BitsPerElement value, so that 0 denotes
03884     // 1 << (BitsPerElement-1).
03885     Start -= 64 - BitsPerElement;
03886     End -= 64 - BitsPerElement;
03887     MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
03888                                  SystemZ::VectorBits / BitsPerElement);
03889     SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
03890                              DAG.getConstant(Start, DL, MVT::i32),
03891                              DAG.getConstant(End, DL, MVT::i32));
03892     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03893   }
03894   return SDValue();
03895 }
03896 
03897 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
03898 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
03899 // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
03900 // would benefit from this representation and return it if so.
03901 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
03902                                      BuildVectorSDNode *BVN) {
03903   EVT VT = BVN->getValueType(0);
03904   unsigned NumElements = VT.getVectorNumElements();
03905 
03906   // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
03907   // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
03908   // need a BUILD_VECTOR, add an additional placeholder operand for that
03909   // BUILD_VECTOR and store its operands in ResidueOps.
03910   GeneralShuffle GS(VT);
03911   SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
03912   bool FoundOne = false;
03913   for (unsigned I = 0; I < NumElements; ++I) {
03914     SDValue Op = BVN->getOperand(I);
03915     if (Op.getOpcode() == ISD::TRUNCATE)
03916       Op = Op.getOperand(0);
03917     if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
03918         Op.getOperand(1).getOpcode() == ISD::Constant) {
03919       unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
03920       GS.add(Op.getOperand(0), Elem);
03921       FoundOne = true;
03922     } else if (Op.getOpcode() == ISD::UNDEF) {
03923       GS.addUndef();
03924     } else {
03925       GS.add(SDValue(), ResidueOps.size());
03926       ResidueOps.push_back(BVN->getOperand(I));
03927     }
03928   }
03929 
03930   // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
03931   if (!FoundOne)
03932     return SDValue();
03933 
03934   // Create the BUILD_VECTOR for the remaining elements, if any.
03935   if (!ResidueOps.empty()) {
03936     while (ResidueOps.size() < NumElements)
03937       ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
03938     for (auto &Op : GS.Ops) {
03939       if (!Op.getNode()) {
03940         Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
03941         break;
03942       }
03943     }
03944   }
03945   return GS.getNode(DAG, SDLoc(BVN));
03946 }
03947 
03948 // Combine GPR scalar values Elems into a vector of type VT.
03949 static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
03950                            SmallVectorImpl<SDValue> &Elems) {
03951   // See whether there is a single replicated value.
03952   SDValue Single;
03953   unsigned int NumElements = Elems.size();
03954   unsigned int Count = 0;
03955   for (auto Elem : Elems) {
03956     if (Elem.getOpcode() != ISD::UNDEF) {
03957       if (!Single.getNode())
03958         Single = Elem;
03959       else if (Elem != Single) {
03960         Single = SDValue();
03961         break;
03962       }
03963       Count += 1;
03964     }
03965   }
03966   // There are three cases here:
03967   //
03968   // - if the only defined element is a loaded one, the best sequence
03969   //   is a replicating load.
03970   //
03971   // - otherwise, if the only defined element is an i64 value, we will
03972   //   end up with the same VLVGP sequence regardless of whether we short-cut
03973   //   for replication or fall through to the later code.
03974   //
03975   // - otherwise, if the only defined element is an i32 or smaller value,
03976   //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
03977   //   This is only a win if the single defined element is used more than once.
03978   //   In other cases we're better off using a single VLVGx.
03979   if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
03980     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
03981 
03982   // The best way of building a v2i64 from two i64s is to use VLVGP.
03983   if (VT == MVT::v2i64)
03984     return joinDwords(DAG, DL, Elems[0], Elems[1]);
03985 
03986   // Use a 64-bit merge high to combine two doubles.
03987   if (VT == MVT::v2f64)
03988     return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
03989 
03990   // Build v4f32 values directly from the FPRs:
03991   //
03992   //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
03993   //         V              V         VMRHF
03994   //      <ABxx>         <CDxx>
03995   //                V                 VMRHG
03996   //              <ABCD>
03997   if (VT == MVT::v4f32) {
03998     SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
03999     SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
04000     // Avoid unnecessary undefs by reusing the other operand.
04001     if (Op01.getOpcode() == ISD::UNDEF)
04002       Op01 = Op23;
04003     else if (Op23.getOpcode() == ISD::UNDEF)
04004       Op23 = Op01;
04005     // Merging identical replications is a no-op.
04006     if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
04007       return Op01;
04008     Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
04009     Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
04010     SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
04011                              DL, MVT::v2i64, Op01, Op23);
04012     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
04013   }
04014 
04015   // Collect the constant terms.
04016   SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
04017   SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
04018 
04019   unsigned NumConstants = 0;
04020   for (unsigned I = 0; I < NumElements; ++I) {
04021     SDValue Elem = Elems[I];
04022     if (Elem.getOpcode() == ISD::Constant ||
04023         Elem.getOpcode() == ISD::ConstantFP) {
04024       NumConstants += 1;
04025       Constants[I] = Elem;
04026       Done[I] = true;
04027     }
04028   }
04029   // If there was at least one constant, fill in the other elements of
04030   // Constants with undefs to get a full vector constant and use that
04031   // as the starting point.
04032   SDValue Result;
04033   if (NumConstants > 0) {
04034     for (unsigned I = 0; I < NumElements; ++I)
04035       if (!Constants[I].getNode())
04036         Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
04037     Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
04038   } else {
04039     // Otherwise try to use VLVGP to start the sequence in order to
04040     // avoid a false dependency on any previous contents of the vector
04041     // register.  This only makes sense if one of the associated elements
04042     // is defined.
04043     unsigned I1 = NumElements / 2 - 1;
04044     unsigned I2 = NumElements - 1;
04045     bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
04046     bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
04047     if (Def1 || Def2) {
04048       SDValue Elem1 = Elems[Def1 ? I1 : I2];
04049       SDValue Elem2 = Elems[Def2 ? I2 : I1];
04050       Result = DAG.getNode(ISD::BITCAST, DL, VT,
04051                            joinDwords(DAG, DL, Elem1, Elem2));
04052       Done[I1] = true;
04053       Done[I2] = true;
04054     } else
04055       Result = DAG.getUNDEF(VT);
04056   }
04057 
04058   // Use VLVGx to insert the other elements.
04059   for (unsigned I = 0; I < NumElements; ++I)
04060     if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
04061       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
04062                            DAG.getConstant(I, DL, MVT::i32));
04063   return Result;
04064 }
04065 
04066 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
04067                                                  SelectionDAG &DAG) const {
04068   const SystemZInstrInfo *TII =
04069     static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04070   auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
04071   SDLoc DL(Op);
04072   EVT VT = Op.getValueType();
04073 
04074   if (BVN->isConstant()) {
04075     // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
04076     // preferred way of creating all-zero and all-one vectors so give it
04077     // priority over other methods below.
04078     uint64_t Mask = 0;
04079     if (tryBuildVectorByteMask(BVN, Mask)) {
04080       SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
04081                                DAG.getConstant(Mask, DL, MVT::i32));
04082       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
04083     }
04084 
04085     // Try using some form of replication.
04086     APInt SplatBits, SplatUndef;
04087     unsigned SplatBitSize;
04088     bool HasAnyUndefs;
04089     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
04090                              8, true) &&
04091         SplatBitSize <= 64) {
04092       // First try assuming that any undefined bits above the highest set bit
04093       // and below the lowest set bit are 1s.  This increases the likelihood of
04094       // being able to use a sign-extended element value in VECTOR REPLICATE
04095       // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
04096       uint64_t SplatBitsZ = SplatBits.getZExtValue();
04097       uint64_t SplatUndefZ = SplatUndef.getZExtValue();
04098       uint64_t Lower = (SplatUndefZ
04099                         & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
04100       uint64_t Upper = (SplatUndefZ
04101                         & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
04102       uint64_t Value = SplatBitsZ | Upper | Lower;
04103       SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
04104                                            SplatBitSize);
04105       if (Op.getNode())
04106         return Op;
04107 
04108       // Now try assuming that any undefined bits between the first and
04109       // last defined set bits are set.  This increases the chances of
04110       // using a non-wraparound mask.
04111       uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
04112       Value = SplatBitsZ | Middle;
04113       Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
04114       if (Op.getNode())
04115         return Op;
04116     }
04117 
04118     // Fall back to loading it from memory.
04119     return SDValue();
04120   }
04121 
04122   // See if we should use shuffles to construct the vector from other vectors.
04123   SDValue Res = tryBuildVectorShuffle(DAG, BVN);
04124   if (Res.getNode())
04125     return Res;
04126 
04127   // Detect SCALAR_TO_VECTOR conversions.
04128   if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
04129     return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
04130 
04131   // Otherwise use buildVector to build the vector up from GPRs.
04132   unsigned NumElements = Op.getNumOperands();
04133   SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
04134   for (unsigned I = 0; I < NumElements; ++I)
04135     Ops[I] = Op.getOperand(I);
04136   return buildVector(DAG, DL, VT, Ops);
04137 }
04138 
04139 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
04140                                                    SelectionDAG &DAG) const {
04141   auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
04142   SDLoc DL(Op);
04143   EVT VT = Op.getValueType();
04144   unsigned NumElements = VT.getVectorNumElements();
04145 
04146   if (VSN->isSplat()) {
04147     SDValue Op0 = Op.getOperand(0);
04148     unsigned Index = VSN->getSplatIndex();
04149     assert(Index < VT.getVectorNumElements() &&
04150            "Splat index should be defined and in first operand");
04151     // See whether the value we're splatting is directly available as a scalar.
04152     if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
04153         Op0.getOpcode() == ISD::BUILD_VECTOR)
04154       return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
04155     // Otherwise keep it as a vector-to-vector operation.
04156     return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
04157                        DAG.getConstant(Index, DL, MVT::i32));
04158   }
04159 
04160   GeneralShuffle GS(VT);
04161   for (unsigned I = 0; I < NumElements; ++I) {
04162     int Elt = VSN->getMaskElt(I);
04163     if (Elt < 0)
04164       GS.addUndef();
04165     else
04166       GS.add(Op.getOperand(unsigned(Elt) / NumElements),
04167              unsigned(Elt) % NumElements);
04168   }
04169   return GS.getNode(DAG, SDLoc(VSN));
04170 }
04171 
04172 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
04173                                                      SelectionDAG &DAG) const {
04174   SDLoc DL(Op);
04175   // Just insert the scalar into element 0 of an undefined vector.
04176   return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
04177                      Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
04178                      Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
04179 }
04180 
04181 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
04182                                                       SelectionDAG &DAG) const {
04183   // Handle insertions of floating-point values.
04184   SDLoc DL(Op);
04185   SDValue Op0 = Op.getOperand(0);
04186   SDValue Op1 = Op.getOperand(1);
04187   SDValue Op2 = Op.getOperand(2);
04188   EVT VT = Op.getValueType();
04189 
04190   // Insertions into constant indices of a v2f64 can be done using VPDI.
04191   // However, if the inserted value is a bitcast or a constant then it's
04192   // better to use GPRs, as below.
04193   if (VT == MVT::v2f64 &&
04194       Op1.getOpcode() != ISD::BITCAST &&
04195       Op1.getOpcode() != ISD::ConstantFP &&
04196       Op2.getOpcode() == ISD::Constant) {
04197     uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
04198     unsigned Mask = VT.getVectorNumElements() - 1;
04199     if (Index <= Mask)
04200       return Op;
04201   }
04202 
04203   // Otherwise bitcast to the equivalent integer form and insert via a GPR.
04204   MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
04205   MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
04206   SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
04207                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
04208                             DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
04209   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
04210 }
04211 
04212 SDValue
04213 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
04214                                                SelectionDAG &DAG) const {
04215   // Handle extractions of floating-point values.
04216   SDLoc DL(Op);
04217   SDValue Op0 = Op.getOperand(0);
04218   SDValue Op1 = Op.getOperand(1);
04219   EVT VT = Op.getValueType();
04220   EVT VecVT = Op0.getValueType();
04221 
04222   // Extractions of constant indices can be done directly.
04223   if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
04224     uint64_t Index = CIndexN->getZExtValue();
04225     unsigned Mask = VecVT.getVectorNumElements() - 1;
04226     if (Index <= Mask)
04227       return Op;
04228   }
04229 
04230   // Otherwise bitcast to the equivalent integer form and extract via a GPR.
04231   MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
04232   MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
04233   SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
04234                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
04235   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
04236 }
04237 
04238 SDValue
04239 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
04240                                               unsigned UnpackHigh) const {
04241   SDValue PackedOp = Op.getOperand(0);
04242   EVT OutVT = Op.getValueType();
04243   EVT InVT = PackedOp.getValueType();
04244   unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
04245   unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
04246   do {
04247     FromBits *= 2;
04248     EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
04249                                  SystemZ::VectorBits / FromBits);
04250     PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
04251   } while (FromBits != ToBits);
04252   return PackedOp;
04253 }
04254 
04255 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
04256                                           unsigned ByScalar) const {
04257   // Look for cases where a vector shift can use the *_BY_SCALAR form.
04258   SDValue Op0 = Op.getOperand(0);
04259   SDValue Op1 = Op.getOperand(1);
04260   SDLoc DL(Op);
04261   EVT VT = Op.getValueType();
04262   unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
04263 
04264   // See whether the shift vector is a splat represented as BUILD_VECTOR.
04265   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
04266     APInt SplatBits, SplatUndef;
04267     unsigned SplatBitSize;
04268     bool HasAnyUndefs;
04269     // Check for constant splats.  Use ElemBitSize as the minimum element
04270     // width and reject splats that need wider elements.
04271     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
04272                              ElemBitSize, true) &&
04273         SplatBitSize == ElemBitSize) {
04274       SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
04275                                       DL, MVT::i32);
04276       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04277     }
04278     // Check for variable splats.
04279     BitVector UndefElements;
04280     SDValue Splat = BVN->getSplatValue(&UndefElements);
04281     if (Splat) {
04282       // Since i32 is the smallest legal type, we either need a no-op
04283       // or a truncation.
04284       SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
04285       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04286     }
04287   }
04288 
04289   // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
04290   // and the shift amount is directly available in a GPR.
04291   if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
04292     if (VSN->isSplat()) {
04293       SDValue VSNOp0 = VSN->getOperand(0);
04294       unsigned Index = VSN->getSplatIndex();
04295       assert(Index < VT.getVectorNumElements() &&
04296              "Splat index should be defined and in first operand");
04297       if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
04298           VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
04299         // Since i32 is the smallest legal type, we either need a no-op
04300         // or a truncation.
04301         SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
04302                                     VSNOp0.getOperand(Index));
04303         return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04304       }
04305     }
04306   }
04307 
04308   // Otherwise just treat the current form as legal.
04309   return Op;
04310 }
04311 
04312 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
04313                                               SelectionDAG &DAG) const {
04314   switch (Op.getOpcode()) {
04315   case ISD::BR_CC:
04316     return lowerBR_CC(Op, DAG);
04317   case ISD::SELECT_CC:
04318     return lowerSELECT_CC(Op, DAG);
04319   case ISD::SETCC:
04320     return lowerSETCC(Op, DAG);
04321   case ISD::GlobalAddress:
04322     return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
04323   case ISD::GlobalTLSAddress:
04324     return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
04325   case ISD::BlockAddress:
04326     return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
04327   case ISD::JumpTable:
04328     return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
04329   case ISD::ConstantPool:
04330     return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
04331   case ISD::BITCAST:
04332     return lowerBITCAST(Op, DAG);
04333   case ISD::VASTART:
04334     return lowerVASTART(Op, DAG);
04335   case ISD::VACOPY:
04336     return lowerVACOPY(Op, DAG);
04337   case ISD::DYNAMIC_STACKALLOC:
04338     return lowerDYNAMIC_STACKALLOC(Op, DAG);
04339   case ISD::SMUL_LOHI:
04340     return lowerSMUL_LOHI(Op, DAG);
04341   case ISD::UMUL_LOHI:
04342     return lowerUMUL_LOHI(Op, DAG);
04343   case ISD::SDIVREM:
04344     return lowerSDIVREM(Op, DAG);
04345   case ISD::UDIVREM:
04346     return lowerUDIVREM(Op, DAG);
04347   case ISD::OR:
04348     return lowerOR(Op, DAG);
04349   case ISD::CTPOP:
04350     return lowerCTPOP(Op, DAG);
04351   case ISD::CTLZ_ZERO_UNDEF:
04352     return DAG.getNode(ISD::CTLZ, SDLoc(Op),
04353                        Op.getValueType(), Op.getOperand(0));
04354   case ISD::CTTZ_ZERO_UNDEF:
04355     return DAG.getNode(ISD::CTTZ, SDLoc(Op),
04356                        Op.getValueType(), Op.getOperand(0));
04357   case ISD::ATOMIC_SWAP:
04358     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
04359   case ISD::ATOMIC_STORE:
04360     return lowerATOMIC_STORE(Op, DAG);
04361   case ISD::ATOMIC_LOAD:
04362     return lowerATOMIC_LOAD(Op, DAG);
04363   case ISD::ATOMIC_LOAD_ADD:
04364     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
04365   case ISD::ATOMIC_LOAD_SUB:
04366     return lowerATOMIC_LOAD_SUB(Op, DAG);
04367   case ISD::ATOMIC_LOAD_AND:
04368     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
04369   case ISD::ATOMIC_LOAD_OR:
04370     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
04371   case ISD::ATOMIC_LOAD_XOR:
04372     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
04373   case ISD::ATOMIC_LOAD_NAND:
04374     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
04375   case ISD::ATOMIC_LOAD_MIN:
04376     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
04377   case ISD::ATOMIC_LOAD_MAX:
04378     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
04379   case ISD::ATOMIC_LOAD_UMIN:
04380     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
04381   case ISD::ATOMIC_LOAD_UMAX:
04382     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
04383   case ISD::ATOMIC_CMP_SWAP:
04384     return lowerATOMIC_CMP_SWAP(Op, DAG);
04385   case ISD::STACKSAVE:
04386     return lowerSTACKSAVE(Op, DAG);
04387   case ISD::STACKRESTORE:
04388     return lowerSTACKRESTORE(Op, DAG);
04389   case ISD::PREFETCH:
04390     return lowerPREFETCH(Op, DAG);
04391   case ISD::INTRINSIC_W_CHAIN:
04392     return lowerINTRINSIC_W_CHAIN(Op, DAG);
04393   case ISD::INTRINSIC_WO_CHAIN:
04394     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
04395   case ISD::BUILD_VECTOR:
04396     return lowerBUILD_VECTOR(Op, DAG);
04397   case ISD::VECTOR_SHUFFLE:
04398     return lowerVECTOR_SHUFFLE(Op, DAG);
04399   case ISD::SCALAR_TO_VECTOR:
04400     return lowerSCALAR_TO_VECTOR(Op, DAG);
04401   case ISD::INSERT_VECTOR_ELT:
04402     return lowerINSERT_VECTOR_ELT(Op, DAG);
04403   case ISD::EXTRACT_VECTOR_ELT:
04404     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
04405   case ISD::SIGN_EXTEND_VECTOR_INREG:
04406     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
04407   case ISD::ZERO_EXTEND_VECTOR_INREG:
04408     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
04409   case ISD::SHL:
04410     return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
04411   case ISD::SRL:
04412     return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
04413   case ISD::SRA:
04414     return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
04415   default:
04416     llvm_unreachable("Unexpected node to lower");
04417   }
04418 }
04419 
04420 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
04421 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
04422   switch ((SystemZISD::NodeType)Opcode) {
04423     case SystemZISD::FIRST_NUMBER: break;
04424     OPCODE(RET_FLAG);
04425     OPCODE(CALL);
04426     OPCODE(SIBCALL);
04427     OPCODE(TLS_GDCALL);
04428     OPCODE(TLS_LDCALL);
04429     OPCODE(PCREL_WRAPPER);
04430     OPCODE(PCREL_OFFSET);
04431     OPCODE(IABS);
04432     OPCODE(ICMP);
04433     OPCODE(FCMP);
04434     OPCODE(TM);
04435     OPCODE(BR_CCMASK);
04436     OPCODE(SELECT_CCMASK);
04437     OPCODE(ADJDYNALLOC);
04438     OPCODE(EXTRACT_ACCESS);
04439     OPCODE(POPCNT);
04440     OPCODE(UMUL_LOHI64);
04441     OPCODE(SDIVREM32);
04442     OPCODE(SDIVREM64);
04443     OPCODE(UDIVREM32);
04444     OPCODE(UDIVREM64);
04445     OPCODE(MVC);
04446     OPCODE(MVC_LOOP);
04447     OPCODE(NC);
04448     OPCODE(NC_LOOP);
04449     OPCODE(OC);
04450     OPCODE(OC_LOOP);
04451     OPCODE(XC);
04452     OPCODE(XC_LOOP);
04453     OPCODE(CLC);
04454     OPCODE(CLC_LOOP);
04455     OPCODE(STPCPY);
04456     OPCODE(STRCMP);
04457     OPCODE(SEARCH_STRING);
04458     OPCODE(IPM);
04459     OPCODE(SERIALIZE);
04460     OPCODE(TBEGIN);
04461     OPCODE(TBEGIN_NOFLOAT);
04462     OPCODE(TEND);
04463     OPCODE(BYTE_MASK);
04464     OPCODE(ROTATE_MASK);
04465     OPCODE(REPLICATE);
04466     OPCODE(JOIN_DWORDS);
04467     OPCODE(SPLAT);
04468     OPCODE(MERGE_HIGH);
04469     OPCODE(MERGE_LOW);
04470     OPCODE(SHL_DOUBLE);
04471     OPCODE(PERMUTE_DWORDS);
04472     OPCODE(PERMUTE);
04473     OPCODE(PACK);
04474     OPCODE(PACKS_CC);
04475     OPCODE(PACKLS_CC);
04476     OPCODE(UNPACK_HIGH);
04477     OPCODE(UNPACKL_HIGH);
04478     OPCODE(UNPACK_LOW);
04479     OPCODE(UNPACKL_LOW);
04480     OPCODE(VSHL_BY_SCALAR);
04481     OPCODE(VSRL_BY_SCALAR);
04482     OPCODE(VSRA_BY_SCALAR);
04483     OPCODE(VSUM);
04484     OPCODE(VICMPE);
04485     OPCODE(VICMPH);
04486     OPCODE(VICMPHL);
04487     OPCODE(VICMPES);
04488     OPCODE(VICMPHS);
04489     OPCODE(VICMPHLS);
04490     OPCODE(VFCMPE);
04491     OPCODE(VFCMPH);
04492     OPCODE(VFCMPHE);
04493     OPCODE(VFCMPES);
04494     OPCODE(VFCMPHS);
04495     OPCODE(VFCMPHES);
04496     OPCODE(VFTCI);
04497     OPCODE(VEXTEND);
04498     OPCODE(VROUND);
04499     OPCODE(VTM);
04500     OPCODE(VFAE_CC);
04501     OPCODE(VFAEZ_CC);
04502     OPCODE(VFEE_CC);
04503     OPCODE(VFEEZ_CC);
04504     OPCODE(VFENE_CC);
04505     OPCODE(VFENEZ_CC);
04506     OPCODE(VISTR_CC);
04507     OPCODE(VSTRC_CC);
04508     OPCODE(VSTRCZ_CC);
04509     OPCODE(ATOMIC_SWAPW);
04510     OPCODE(ATOMIC_LOADW_ADD);
04511     OPCODE(ATOMIC_LOADW_SUB);
04512     OPCODE(ATOMIC_LOADW_AND);
04513     OPCODE(ATOMIC_LOADW_OR);
04514     OPCODE(ATOMIC_LOADW_XOR);
04515     OPCODE(ATOMIC_LOADW_NAND);
04516     OPCODE(ATOMIC_LOADW_MIN);
04517     OPCODE(ATOMIC_LOADW_MAX);
04518     OPCODE(ATOMIC_LOADW_UMIN);
04519     OPCODE(ATOMIC_LOADW_UMAX);
04520     OPCODE(ATOMIC_CMP_SWAPW);
04521     OPCODE(PREFETCH);
04522   }
04523   return nullptr;
04524 #undef OPCODE
04525 }
04526 
04527 // Return true if VT is a vector whose elements are a whole number of bytes
04528 // in width.
04529 static bool canTreatAsByteVector(EVT VT) {
04530   return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
04531 }
04532 
04533 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
04534 // producing a result of type ResVT.  Op is a possibly bitcast version
04535 // of the input vector and Index is the index (based on type VecVT) that
04536 // should be extracted.  Return the new extraction if a simplification
04537 // was possible or if Force is true.
04538 SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
04539                                               SDValue Op, unsigned Index,
04540                                               DAGCombinerInfo &DCI,
04541                                               bool Force) const {
04542   SelectionDAG &DAG = DCI.DAG;
04543 
04544   // The number of bytes being extracted.
04545   unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
04546 
04547   for (;;) {
04548     unsigned Opcode = Op.getOpcode();
04549     if (Opcode == ISD::BITCAST)
04550       // Look through bitcasts.
04551       Op = Op.getOperand(0);
04552     else if (Opcode == ISD::VECTOR_SHUFFLE &&
04553              canTreatAsByteVector(Op.getValueType())) {
04554       // Get a VPERM-like permute mask and see whether the bytes covered
04555       // by the extracted element are a contiguous sequence from one
04556       // source operand.
04557       SmallVector<int, SystemZ::VectorBytes> Bytes;
04558       getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
04559       int First;
04560       if (!getShuffleInput(Bytes, Index * BytesPerElement,
04561                            BytesPerElement, First))
04562         break;
04563       if (First < 0)
04564         return DAG.getUNDEF(ResVT);
04565       // Make sure the contiguous sequence starts at a multiple of the
04566       // original element size.
04567       unsigned Byte = unsigned(First) % Bytes.size();
04568       if (Byte % BytesPerElement != 0)
04569         break;
04570       // We can get the extracted value directly from an input.
04571       Index = Byte / BytesPerElement;
04572       Op = Op.getOperand(unsigned(First) / Bytes.size());
04573       Force = true;
04574     } else if (Opcode == ISD::BUILD_VECTOR &&
04575                canTreatAsByteVector(Op.getValueType())) {
04576       // We can only optimize this case if the BUILD_VECTOR elements are
04577       // at least as wide as the extracted value.
04578       EVT OpVT = Op.getValueType();
04579       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
04580       if (OpBytesPerElement < BytesPerElement)
04581         break;
04582       // Make sure that the least-significant bit of the extracted value
04583       // is the least significant bit of an input.
04584       unsigned End = (Index + 1) * BytesPerElement;
04585       if (End % OpBytesPerElement != 0)
04586         break;
04587       // We're extracting the low part of one operand of the BUILD_VECTOR.
04588       Op = Op.getOperand(End / OpBytesPerElement - 1);
04589       if (!Op.getValueType().isInteger()) {
04590         EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
04591         Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
04592         DCI.AddToWorklist(Op.getNode());
04593       }
04594       EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
04595       Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
04596       if (VT != ResVT) {
04597         DCI.AddToWorklist(Op.getNode());
04598         Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
04599       }
04600       return Op;
04601     } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
04602                 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
04603                 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
04604                canTreatAsByteVector(Op.getValueType()) &&
04605                canTreatAsByteVector(Op.getOperand(0).getValueType())) {
04606       // Make sure that only the unextended bits are significant.
04607       EVT ExtVT = Op.getValueType();
04608       EVT OpVT = Op.getOperand(0).getValueType();
04609       unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
04610       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
04611       unsigned Byte = Index * BytesPerElement;
04612       unsigned SubByte = Byte % ExtBytesPerElement;
04613       unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
04614       if (SubByte < MinSubByte ||
04615           SubByte + BytesPerElement > ExtBytesPerElement)
04616         break;
04617       // Get the byte offset of the unextended element
04618       Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
04619       // ...then add the byte offset relative to that element.
04620       Byte += SubByte - MinSubByte;
04621       if (Byte % BytesPerElement != 0)
04622         break;
04623       Op = Op.getOperand(0);
04624       Index = Byte / BytesPerElement;
04625       Force = true;
04626     } else
04627       break;
04628   }
04629   if (Force) {
04630     if (Op.getValueType() != VecVT) {
04631       Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
04632       DCI.AddToWorklist(Op.getNode());
04633     }
04634     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
04635                        DAG.getConstant(Index, DL, MVT::i32));
04636   }
04637   return SDValue();
04638 }
04639 
04640 // Optimize vector operations in scalar value Op on the basis that Op
04641 // is truncated to TruncVT.
04642 SDValue
04643 SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
04644                                               DAGCombinerInfo &DCI) const {
04645   // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
04646   // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
04647   // of type TruncVT.
04648   if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04649       TruncVT.getSizeInBits() % 8 == 0) {
04650     SDValue Vec = Op.getOperand(0);
04651     EVT VecVT = Vec.getValueType();
04652     if (canTreatAsByteVector(VecVT)) {
04653       if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
04654         unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
04655         unsigned TruncBytes = TruncVT.getStoreSize();
04656         if (BytesPerElement % TruncBytes == 0) {
04657           // Calculate the value of Y' in the above description.  We are
04658           // splitting the original elements into Scale equal-sized pieces
04659           // and for truncation purposes want the last (least-significant)
04660           // of these pieces for IndexN.  This is easiest to do by calculating
04661           // the start index of the following element and then subtracting 1.
04662           unsigned Scale = BytesPerElement / TruncBytes;
04663           unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
04664 
04665           // Defer the creation of the bitcast from X to combineExtract,
04666           // which might be able to optimize the extraction.
04667           VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
04668                                    VecVT.getStoreSize() / TruncBytes);
04669           EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
04670           return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
04671         }
04672       }
04673     }
04674   }
04675   return SDValue();
04676 }
04677 
04678 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
04679                                                  DAGCombinerInfo &DCI) const {
04680   SelectionDAG &DAG = DCI.DAG;
04681   unsigned Opcode = N->getOpcode();
04682   if (Opcode == ISD::SIGN_EXTEND) {
04683     // Convert (sext (ashr (shl X, C1), C2)) to
04684     // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
04685     // cheap as narrower ones.
04686     SDValue N0 = N->getOperand(0);
04687     EVT VT = N->getValueType(0);
04688     if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
04689       auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
04690       SDValue Inner = N0.getOperand(0);
04691       if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
04692         if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
04693           unsigned Extra = (VT.getSizeInBits() -
04694                             N0.getValueType().getSizeInBits());
04695           unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
04696           unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
04697           EVT ShiftVT = N0.getOperand(1).getValueType();
04698           SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
04699                                     Inner.getOperand(0));
04700           SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
04701                                     DAG.getConstant(NewShlAmt, SDLoc(Inner),
04702                                                     ShiftVT));
04703           return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
04704                              DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
04705         }
04706       }
04707     }
04708   }
04709   if (Opcode == SystemZISD::MERGE_HIGH ||
04710       Opcode == SystemZISD::MERGE_LOW) {
04711     SDValue Op0 = N->getOperand(0);
04712     SDValue Op1 = N->getOperand(1);
04713     if (Op0.getOpcode() == ISD::BITCAST)
04714       Op0 = Op0.getOperand(0);
04715     if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
04716         cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
04717       // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
04718       // for v4f32.
04719       if (Op1 == N->getOperand(0))
04720         return Op1;
04721       // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
04722       EVT VT = Op1.getValueType();
04723       unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
04724       if (ElemBytes <= 4) {
04725         Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
04726                   SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
04727         EVT InVT = VT.changeVectorElementTypeToInteger();
04728         EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
04729                                      SystemZ::VectorBytes / ElemBytes / 2);
04730         if (VT != InVT) {
04731           Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
04732           DCI.AddToWorklist(Op1.getNode());
04733         }
04734         SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
04735         DCI.AddToWorklist(Op.getNode());
04736         return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
04737       }
04738     }
04739   }
04740   // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
04741   // for the extraction to be done on a vMiN value, so that we can use VSTE.
04742   // If X has wider elements then convert it to:
04743   // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
04744   if (Opcode == ISD::STORE) {
04745     auto *SN = cast<StoreSDNode>(N);
04746     EVT MemVT = SN->getMemoryVT();
04747     if (MemVT.isInteger()) {
04748       SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
04749                                              SN->getValue(), DCI);
04750       if (Value.getNode()) {
04751         DCI.AddToWorklist(Value.getNode());
04752 
04753         // Rewrite the store with the new form of stored value.
04754         return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
04755                                  SN->getBasePtr(), SN->getMemoryVT(),
04756                                  SN->getMemOperand());
04757       }
04758     }
04759   }
04760   // Try to simplify a vector extraction.
04761   if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
04762     if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
04763       SDValue Op0 = N->getOperand(0);
04764       EVT VecVT = Op0.getValueType();
04765       return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
04766                             IndexN->getZExtValue(), DCI, false);
04767     }
04768   }
04769   // (join_dwords X, X) == (replicate X)
04770   if (Opcode == SystemZISD::JOIN_DWORDS &&
04771       N->getOperand(0) == N->getOperand(1))
04772     return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
04773                        N->getOperand(0));
04774   // (fround (extract_vector_elt X 0))
04775   // (fround (extract_vector_elt X 1)) ->
04776   // (extract_vector_elt (VROUND X) 0)
04777   // (extract_vector_elt (VROUND X) 1)
04778   //
04779   // This is a special case since the target doesn't really support v2f32s.
04780   if (Opcode == ISD::FP_ROUND) {
04781     SDValue Op0 = N->getOperand(0);
04782     if (N->getValueType(0) == MVT::f32 &&
04783         Op0.hasOneUse() &&
04784         Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04785         Op0.getOperand(0).getValueType() == MVT::v2f64 &&
04786         Op0.getOperand(1).getOpcode() == ISD::Constant &&
04787         cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
04788       SDValue Vec = Op0.getOperand(0);
04789       for (auto *U : Vec->uses()) {
04790         if (U != Op0.getNode() &&
04791             U->hasOneUse() &&
04792             U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04793             U->getOperand(0) == Vec &&
04794             U->getOperand(1).getOpcode() == ISD::Constant &&
04795             cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
04796           SDValue OtherRound = SDValue(*U->use_begin(), 0);
04797           if (OtherRound.getOpcode() == ISD::FP_ROUND &&
04798               OtherRound.getOperand(0) == SDValue(U, 0) &&
04799               OtherRound.getValueType() == MVT::f32) {
04800             SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
04801                                          MVT::v4f32, Vec);
04802             DCI.AddToWorklist(VRound.getNode());
04803             SDValue Extract1 =
04804               DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
04805                           VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
04806             DCI.AddToWorklist(Extract1.getNode());
04807             DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
04808             SDValue Extract0 =
04809               DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
04810                           VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
04811             return Extract0;
04812           }
04813         }
04814       }
04815     }
04816   }
04817   return SDValue();
04818 }
04819 
04820 //===----------------------------------------------------------------------===//
04821 // Custom insertion
04822 //===----------------------------------------------------------------------===//
04823 
04824 // Create a new basic block after MBB.
04825 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
04826   MachineFunction &MF = *MBB->getParent();
04827   MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
04828   MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
04829   return NewMBB;
04830 }
04831 
04832 // Split MBB after MI and return the new block (the one that contains
04833 // instructions after MI).
04834 static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
04835                                           MachineBasicBlock *MBB) {
04836   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
04837   NewMBB->splice(NewMBB->begin(), MBB,
04838                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
04839   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
04840   return NewMBB;
04841 }
04842 
04843 // Split MBB before MI and return the new block (the one that contains MI).
04844 static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
04845                                            MachineBasicBlock *MBB) {
04846   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
04847   NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
04848   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
04849   return NewMBB;
04850 }
04851 
04852 // Force base value Base into a register before MI.  Return the register.
04853 static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
04854                          const SystemZInstrInfo *TII) {
04855   if (Base.isReg())
04856     return Base.getReg();
04857 
04858   MachineBasicBlock *MBB = MI->getParent();
04859   MachineFunction &MF = *MBB->getParent();
04860   MachineRegisterInfo &MRI = MF.getRegInfo();
04861 
04862   unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
04863   BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
04864     .addOperand(Base).addImm(0).addReg(0);
04865   return Reg;
04866 }
04867 
04868 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
04869 MachineBasicBlock *
04870 SystemZTargetLowering::emitSelect(MachineInstr *MI,
04871                                   MachineBasicBlock *MBB) const {
04872   const SystemZInstrInfo *TII =
04873       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04874 
04875   unsigned DestReg  = MI->getOperand(0).getReg();
04876   unsigned TrueReg  = MI->getOperand(1).getReg();
04877   unsigned FalseReg = MI->getOperand(2).getReg();
04878   unsigned CCValid  = MI->getOperand(3).getImm();
04879   unsigned CCMask   = MI->getOperand(4).getImm();
04880   DebugLoc DL       = MI->getDebugLoc();
04881 
04882   MachineBasicBlock *StartMBB = MBB;
04883   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
04884   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
04885 
04886   //  StartMBB:
04887   //   BRC CCMask, JoinMBB
04888   //   # fallthrough to FalseMBB
04889   MBB = StartMBB;
04890   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
04891     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
04892   MBB->addSuccessor(JoinMBB);
04893   MBB->addSuccessor(FalseMBB);
04894 
04895   //  FalseMBB:
04896   //   # fallthrough to JoinMBB
04897   MBB = FalseMBB;
04898   MBB->addSuccessor(JoinMBB);
04899 
04900   //  JoinMBB:
04901   //   %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
04902   //  ...
04903   MBB = JoinMBB;
04904   BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
04905     .addReg(TrueReg).addMBB(StartMBB)
04906     .addReg(FalseReg).addMBB(FalseMBB);
04907 
04908   MI->eraseFromParent();
04909   return JoinMBB;
04910 }
04911 
04912 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
04913 // StoreOpcode is the store to use and Invert says whether the store should
04914 // happen when the condition is false rather than true.  If a STORE ON
04915 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
04916 MachineBasicBlock *
04917 SystemZTargetLowering::emitCondStore(MachineInstr *MI,
04918                                      MachineBasicBlock *MBB,
04919                                      unsigned StoreOpcode, unsigned STOCOpcode,
04920                                      bool Invert) const {
04921   const SystemZInstrInfo *TII =
04922       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04923 
04924   unsigned SrcReg     = MI->getOperand(0).getReg();
04925   MachineOperand Base = MI->getOperand(1);
04926   int64_t Disp        = MI->getOperand(2).getImm();
04927   unsigned IndexReg   = MI->getOperand(3).getReg();
04928   unsigned CCValid    = MI->getOperand(4).getImm();
04929   unsigned CCMask     = MI->getOperand(5).getImm();
04930   DebugLoc DL         = MI->getDebugLoc();
04931 
04932   StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
04933 
04934   // Use STOCOpcode if possible.  We could use different store patterns in
04935   // order to avoid matching the index register, but the performance trade-offs
04936   // might be more complicated in that case.
04937   if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
04938     if (Invert)
04939       CCMask ^= CCValid;
04940     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
04941       .addReg(SrcReg).addOperand(Base).addImm(Disp)
04942       .addImm(CCValid).addImm(CCMask);
04943     MI->eraseFromParent();
04944     return MBB;
04945   }
04946 
04947   // Get the condition needed to branch around the store.
04948   if (!Invert)
04949     CCMask ^= CCValid;
04950 
04951   MachineBasicBlock *StartMBB = MBB;
04952   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
04953   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
04954 
04955   //  StartMBB:
04956   //   BRC CCMask, JoinMBB
04957   //   # fallthrough to FalseMBB
04958   MBB = StartMBB;
04959   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
04960     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
04961   MBB->addSuccessor(JoinMBB);
04962   MBB->addSuccessor(FalseMBB);
04963 
04964   //  FalseMBB:
04965   //   store %SrcReg, %Disp(%Index,%Base)
04966   //   # fallthrough to JoinMBB
04967   MBB = FalseMBB;
04968   BuildMI(MBB, DL, TII->get(StoreOpcode))
04969     .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
04970   MBB->addSuccessor(JoinMBB);
04971 
04972   MI->eraseFromParent();
04973   return JoinMBB;
04974 }
04975 
04976 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
04977 // or ATOMIC_SWAP{,W} instruction MI.  BinOpcode is the instruction that
04978 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
04979 // BitSize is the width of the field in bits, or 0 if this is a partword
04980 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
04981 // is one of the operands.  Invert says whether the field should be
04982 // inverted after performing BinOpcode (e.g. for NAND).
04983 MachineBasicBlock *
04984 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
04985                                             MachineBasicBlock *MBB,
04986                                             unsigned BinOpcode,
04987                                             unsigned BitSize,
04988                                             bool Invert) const {
04989   MachineFunction &MF = *MBB->getParent();
04990   const SystemZInstrInfo *TII =
04991       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04992   MachineRegisterInfo &MRI = MF.getRegInfo();
04993   bool IsSubWord = (BitSize < 32);
04994 
04995   // Extract the operands.  Base can be a register or a frame index.
04996   // Src2 can be a register or immediate.
04997   unsigned Dest        = MI->getOperand(0).getReg();
04998   MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
04999   int64_t Disp         = MI->getOperand(2).getImm();
05000   MachineOperand Src2  = earlyUseOperand(MI->getOperand(3));
05001   unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
05002   unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
05003   DebugLoc DL          = MI->getDebugLoc();
05004   if (IsSubWord)
05005     BitSize = MI->getOperand(6).getImm();
05006 
05007   // Subword operations use 32-bit registers.
05008   const TargetRegisterClass *RC = (BitSize <= 32 ?
05009                                    &SystemZ::GR32BitRegClass :
05010                                    &SystemZ::GR64BitRegClass);
05011   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
05012   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
05013 
05014   // Get the right opcodes for the displacement.
05015   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
05016   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
05017   assert(LOpcode && CSOpcode && "Displacement out of range");
05018 
05019   // Create virtual registers for temporary results.
05020   unsigned OrigVal       = MRI.createVirtualRegister(RC);
05021   unsigned OldVal        = MRI.createVirtualRegister(RC);
05022   unsigned NewVal        = (BinOpcode || IsSubWord ?
05023                             MRI.createVirtualRegister(RC) : Src2.getReg());
05024   unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
05025   unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
05026 
05027   // Insert a basic block for the main loop.
05028   MachineBasicBlock *StartMBB = MBB;
05029   MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
05030   MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
05031 
05032   //  StartMBB:
05033   //   ...
05034   //   %OrigVal = L Disp(%Base)
05035   //   # fall through to LoopMMB
05036   MBB = StartMBB;
05037   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
05038     .addOperand(Base).addImm(Disp).addReg(0);
05039   MBB->addSuccessor(LoopMBB);
05040 
05041   //  LoopMBB:
05042   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
05043   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
05044   //   %RotatedNewVal = OP %RotatedOldVal, %Src2
05045   //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
05046   //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
05047   //   JNE LoopMBB
05048   //   # fall through to DoneMMB
05049   MBB = LoopMBB;
05050   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
05051     .addReg(OrigVal).addMBB(StartMBB)
05052     .addReg(Dest).addMBB(LoopMBB);
05053   if (IsSubWord)
05054     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
05055       .addReg(OldVal).addReg(BitShift).addImm(0);
05056   if (Invert) {
05057     // Perform the operation normally and then invert every bit of the field.
05058     unsigned Tmp = MRI.createVirtualRegister(RC);
05059     BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
05060       .addReg(RotatedOldVal).addOperand(Src2);
05061     if (BitSize <= 32)
05062       // XILF with the upper BitSize bits set.
05063       BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
05064         .addReg(Tmp).addImm(-1U << (32 - BitSize));
05065     else {
05066       // Use LCGR and add -1 to the result, which is more compact than
05067       // an XILF, XILH pair.
05068       unsigned Tmp2 = MRI.createVirtualRegister(RC);
05069       BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
05070       BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
05071         .addReg(Tmp2).addImm(-1);
05072     }
05073   } else if (BinOpcode)
05074     // A simply binary operation.
05075     BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
05076       .addReg(RotatedOldVal).addOperand(Src2);
05077   else if (IsSubWord)
05078     // Use RISBG to rotate Src2 into position and use it to replace the
05079     // field in RotatedOldVal.
05080     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
05081       .addReg(RotatedOldVal).addReg(Src2.getReg())
05082       .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
05083   if (IsSubWord)
05084     BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
05085       .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
05086   BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
05087     .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
05088   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05089     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
05090   MBB->addSuccessor(LoopMBB);
05091   MBB->addSuccessor(DoneMBB);
05092 
05093   MI->eraseFromParent();
05094   return DoneMBB;
05095 }
05096 
05097 // Implement EmitInstrWithCustomInserter for pseudo
05098 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI.  CompareOpcode is the
05099 // instruction that should be used to compare the current field with the
05100 // minimum or maximum value.  KeepOldMask is the BRC condition-code mask
05101 // for when the