LLVM  mainline
SystemZISelLowering.cpp
Go to the documentation of this file.
00001 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the SystemZTargetLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "SystemZISelLowering.h"
00015 #include "SystemZCallingConv.h"
00016 #include "SystemZConstantPoolValue.h"
00017 #include "SystemZMachineFunctionInfo.h"
00018 #include "SystemZTargetMachine.h"
00019 #include "llvm/CodeGen/CallingConvLower.h"
00020 #include "llvm/CodeGen/MachineInstrBuilder.h"
00021 #include "llvm/CodeGen/MachineRegisterInfo.h"
00022 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00023 #include "llvm/IR/Intrinsics.h"
00024 #include <cctype>
00025 
00026 using namespace llvm;
00027 
00028 #define DEBUG_TYPE "systemz-lower"
00029 
00030 namespace {
00031 // Represents a sequence for extracting a 0/1 value from an IPM result:
00032 // (((X ^ XORValue) + AddValue) >> Bit)
00033 struct IPMConversion {
00034   IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
00035     : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
00036 
00037   int64_t XORValue;
00038   int64_t AddValue;
00039   unsigned Bit;
00040 };
00041 
00042 // Represents information about a comparison.
00043 struct Comparison {
00044   Comparison(SDValue Op0In, SDValue Op1In)
00045     : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
00046 
00047   // The operands to the comparison.
00048   SDValue Op0, Op1;
00049 
00050   // The opcode that should be used to compare Op0 and Op1.
00051   unsigned Opcode;
00052 
00053   // A SystemZICMP value.  Only used for integer comparisons.
00054   unsigned ICmpType;
00055 
00056   // The mask of CC values that Opcode can produce.
00057   unsigned CCValid;
00058 
00059   // The mask of CC values for which the original condition is true.
00060   unsigned CCMask;
00061 };
00062 } // end anonymous namespace
00063 
00064 // Classify VT as either 32 or 64 bit.
00065 static bool is32Bit(EVT VT) {
00066   switch (VT.getSimpleVT().SimpleTy) {
00067   case MVT::i32:
00068     return true;
00069   case MVT::i64:
00070     return false;
00071   default:
00072     llvm_unreachable("Unsupported type");
00073   }
00074 }
00075 
00076 // Return a version of MachineOperand that can be safely used before the
00077 // final use.
00078 static MachineOperand earlyUseOperand(MachineOperand Op) {
00079   if (Op.isReg())
00080     Op.setIsKill(false);
00081   return Op;
00082 }
00083 
00084 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
00085                                              const SystemZSubtarget &STI)
00086     : TargetLowering(tm), Subtarget(STI) {
00087   MVT PtrVT = getPointerTy();
00088 
00089   // Set up the register classes.
00090   if (Subtarget.hasHighWord())
00091     addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
00092   else
00093     addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
00094   addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
00095   if (Subtarget.hasVector()) {
00096     addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
00097     addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
00098   } else {
00099     addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
00100     addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
00101   }
00102   addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
00103 
00104   if (Subtarget.hasVector()) {
00105     addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
00106     addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
00107     addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
00108     addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
00109     addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
00110     addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
00111   }
00112 
00113   // Compute derived properties from the register classes
00114   computeRegisterProperties(Subtarget.getRegisterInfo());
00115 
00116   // Set up special registers.
00117   setExceptionPointerRegister(SystemZ::R6D);
00118   setExceptionSelectorRegister(SystemZ::R7D);
00119   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
00120 
00121   // TODO: It may be better to default to latency-oriented scheduling, however
00122   // LLVM's current latency-oriented scheduler can't handle physreg definitions
00123   // such as SystemZ has with CC, so set this to the register-pressure
00124   // scheduler, because it can.
00125   setSchedulingPreference(Sched::RegPressure);
00126 
00127   setBooleanContents(ZeroOrOneBooleanContent);
00128   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00129 
00130   // Instructions are strings of 2-byte aligned 2-byte values.
00131   setMinFunctionAlignment(2);
00132 
00133   // Handle operations that are handled in a similar way for all types.
00134   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
00135        I <= MVT::LAST_FP_VALUETYPE;
00136        ++I) {
00137     MVT VT = MVT::SimpleValueType(I);
00138     if (isTypeLegal(VT)) {
00139       // Lower SET_CC into an IPM-based sequence.
00140       setOperationAction(ISD::SETCC, VT, Custom);
00141 
00142       // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
00143       setOperationAction(ISD::SELECT, VT, Expand);
00144 
00145       // Lower SELECT_CC and BR_CC into separate comparisons and branches.
00146       setOperationAction(ISD::SELECT_CC, VT, Custom);
00147       setOperationAction(ISD::BR_CC,     VT, Custom);
00148     }
00149   }
00150 
00151   // Expand jump table branches as address arithmetic followed by an
00152   // indirect jump.
00153   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
00154 
00155   // Expand BRCOND into a BR_CC (see above).
00156   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00157 
00158   // Handle integer types.
00159   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
00160        I <= MVT::LAST_INTEGER_VALUETYPE;
00161        ++I) {
00162     MVT VT = MVT::SimpleValueType(I);
00163     if (isTypeLegal(VT)) {
00164       // Expand individual DIV and REMs into DIVREMs.
00165       setOperationAction(ISD::SDIV, VT, Expand);
00166       setOperationAction(ISD::UDIV, VT, Expand);
00167       setOperationAction(ISD::SREM, VT, Expand);
00168       setOperationAction(ISD::UREM, VT, Expand);
00169       setOperationAction(ISD::SDIVREM, VT, Custom);
00170       setOperationAction(ISD::UDIVREM, VT, Custom);
00171 
00172       // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
00173       // stores, putting a serialization instruction after the stores.
00174       setOperationAction(ISD::ATOMIC_LOAD,  VT, Custom);
00175       setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00176 
00177       // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
00178       // available, or if the operand is constant.
00179       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00180 
00181       // Use POPCNT on z196 and above.
00182       if (Subtarget.hasPopulationCount())
00183         setOperationAction(ISD::CTPOP, VT, Custom);
00184       else
00185         setOperationAction(ISD::CTPOP, VT, Expand);
00186 
00187       // No special instructions for these.
00188       setOperationAction(ISD::CTTZ,            VT, Expand);
00189       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00190       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00191       setOperationAction(ISD::ROTR,            VT, Expand);
00192 
00193       // Use *MUL_LOHI where possible instead of MULH*.
00194       setOperationAction(ISD::MULHS, VT, Expand);
00195       setOperationAction(ISD::MULHU, VT, Expand);
00196       setOperationAction(ISD::SMUL_LOHI, VT, Custom);
00197       setOperationAction(ISD::UMUL_LOHI, VT, Custom);
00198 
00199       // Only z196 and above have native support for conversions to unsigned.
00200       if (!Subtarget.hasFPExtension())
00201         setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00202     }
00203   }
00204 
00205   // Type legalization will convert 8- and 16-bit atomic operations into
00206   // forms that operate on i32s (but still keeping the original memory VT).
00207   // Lower them into full i32 operations.
00208   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
00209   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
00210   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
00211   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
00212   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
00213   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
00214   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
00215   setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
00216   setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
00217   setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
00218   setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
00219   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
00220 
00221   // z10 has instructions for signed but not unsigned FP conversion.
00222   // Handle unsigned 32-bit types as signed 64-bit types.
00223   if (!Subtarget.hasFPExtension()) {
00224     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
00225     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00226   }
00227 
00228   // We have native support for a 64-bit CTLZ, via FLOGR.
00229   setOperationAction(ISD::CTLZ, MVT::i32, Promote);
00230   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
00231 
00232   // Give LowerOperation the chance to replace 64-bit ORs with subregs.
00233   setOperationAction(ISD::OR, MVT::i64, Custom);
00234 
00235   // FIXME: Can we support these natively?
00236   setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
00237   setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
00238   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
00239 
00240   // We have native instructions for i8, i16 and i32 extensions, but not i1.
00241   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00242   for (MVT VT : MVT::integer_valuetypes()) {
00243     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00244     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00245     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1, Promote);
00246   }
00247 
00248   // Handle the various types of symbolic address.
00249   setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
00250   setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
00251   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
00252   setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
00253   setOperationAction(ISD::JumpTable,        PtrVT, Custom);
00254 
00255   // We need to handle dynamic allocations specially because of the
00256   // 160-byte area at the bottom of the stack.
00257   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
00258 
00259   // Use custom expanders so that we can force the function to use
00260   // a frame pointer.
00261   setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
00262   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
00263 
00264   // Handle prefetches with PFD or PFDRL.
00265   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
00266 
00267   for (MVT VT : MVT::vector_valuetypes()) {
00268     // Assume by default that all vector operations need to be expanded.
00269     for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
00270       if (getOperationAction(Opcode, VT) == Legal)
00271         setOperationAction(Opcode, VT, Expand);
00272 
00273     // Likewise all truncating stores and extending loads.
00274     for (MVT InnerVT : MVT::vector_valuetypes()) {
00275       setTruncStoreAction(VT, InnerVT, Expand);
00276       setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00277       setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00278       setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00279     }
00280 
00281     if (isTypeLegal(VT)) {
00282       // These operations are legal for anything that can be stored in a
00283       // vector register, even if there is no native support for the format
00284       // as such.  In particular, we can do these for v4f32 even though there
00285       // are no specific instructions for that format.
00286       setOperationAction(ISD::LOAD, VT, Legal);
00287       setOperationAction(ISD::STORE, VT, Legal);
00288       setOperationAction(ISD::VSELECT, VT, Legal);
00289       setOperationAction(ISD::BITCAST, VT, Legal);
00290       setOperationAction(ISD::UNDEF, VT, Legal);
00291 
00292       // Likewise, except that we need to replace the nodes with something
00293       // more specific.
00294       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
00295       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
00296     }
00297   }
00298 
00299   // Handle integer vector types.
00300   for (MVT VT : MVT::integer_vector_valuetypes()) {
00301     if (isTypeLegal(VT)) {
00302       // These operations have direct equivalents.
00303       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
00304       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
00305       setOperationAction(ISD::ADD, VT, Legal);
00306       setOperationAction(ISD::SUB, VT, Legal);
00307       if (VT != MVT::v2i64)
00308         setOperationAction(ISD::MUL, VT, Legal);
00309       setOperationAction(ISD::AND, VT, Legal);
00310       setOperationAction(ISD::OR, VT, Legal);
00311       setOperationAction(ISD::XOR, VT, Legal);
00312       setOperationAction(ISD::CTPOP, VT, Custom);
00313       setOperationAction(ISD::CTTZ, VT, Legal);
00314       setOperationAction(ISD::CTLZ, VT, Legal);
00315       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
00316       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
00317 
00318       // Convert a GPR scalar to a vector by inserting it into element 0.
00319       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
00320 
00321       // Use a series of unpacks for extensions.
00322       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
00323       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
00324 
00325       // Detect shifts by a scalar amount and convert them into
00326       // V*_BY_SCALAR.
00327       setOperationAction(ISD::SHL, VT, Custom);
00328       setOperationAction(ISD::SRA, VT, Custom);
00329       setOperationAction(ISD::SRL, VT, Custom);
00330 
00331       // At present ROTL isn't matched by DAGCombiner.  ROTR should be
00332       // converted into ROTL.
00333       setOperationAction(ISD::ROTL, VT, Expand);
00334       setOperationAction(ISD::ROTR, VT, Expand);
00335 
00336       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
00337       // and inverting the result as necessary.
00338       setOperationAction(ISD::SETCC, VT, Custom);
00339     }
00340   }
00341 
00342   if (Subtarget.hasVector()) {
00343     // There should be no need to check for float types other than v2f64
00344     // since <2 x f32> isn't a legal type.
00345     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00346     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00347     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00348     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00349   }
00350 
00351   // Handle floating-point types.
00352   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
00353        I <= MVT::LAST_FP_VALUETYPE;
00354        ++I) {
00355     MVT VT = MVT::SimpleValueType(I);
00356     if (isTypeLegal(VT)) {
00357       // We can use FI for FRINT.
00358       setOperationAction(ISD::FRINT, VT, Legal);
00359 
00360       // We can use the extended form of FI for other rounding operations.
00361       if (Subtarget.hasFPExtension()) {
00362         setOperationAction(ISD::FNEARBYINT, VT, Legal);
00363         setOperationAction(ISD::FFLOOR, VT, Legal);
00364         setOperationAction(ISD::FCEIL, VT, Legal);
00365         setOperationAction(ISD::FTRUNC, VT, Legal);
00366         setOperationAction(ISD::FROUND, VT, Legal);
00367       }
00368 
00369       // No special instructions for these.
00370       setOperationAction(ISD::FSIN, VT, Expand);
00371       setOperationAction(ISD::FCOS, VT, Expand);
00372       setOperationAction(ISD::FREM, VT, Expand);
00373     }
00374   }
00375 
00376   // Handle floating-point vector types.
00377   if (Subtarget.hasVector()) {
00378     // Scalar-to-vector conversion is just a subreg.
00379     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00380     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00381 
00382     // Some insertions and extractions can be done directly but others
00383     // need to go via integers.
00384     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
00385     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
00386     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00387     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00388 
00389     // These operations have direct equivalents.
00390     setOperationAction(ISD::FADD, MVT::v2f64, Legal);
00391     setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
00392     setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
00393     setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
00394     setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00395     setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00396     setOperationAction(ISD::FABS, MVT::v2f64, Legal);
00397     setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00398     setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
00399     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00400     setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00401     setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00402     setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00403     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00404   }
00405 
00406   // We have fused multiply-addition for f32 and f64 but not f128.
00407   setOperationAction(ISD::FMA, MVT::f32,  Legal);
00408   setOperationAction(ISD::FMA, MVT::f64,  Legal);
00409   setOperationAction(ISD::FMA, MVT::f128, Expand);
00410 
00411   // Needed so that we don't try to implement f128 constant loads using
00412   // a load-and-extend of a f80 constant (in cases where the constant
00413   // would fit in an f80).
00414   for (MVT VT : MVT::fp_valuetypes())
00415     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
00416 
00417   // Floating-point truncation and stores need to be done separately.
00418   setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
00419   setTruncStoreAction(MVT::f128, MVT::f32, Expand);
00420   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
00421 
00422   // We have 64-bit FPR<->GPR moves, but need special handling for
00423   // 32-bit forms.
00424   if (!Subtarget.hasVector()) {
00425     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
00426     setOperationAction(ISD::BITCAST, MVT::f32, Custom);
00427   }
00428 
00429   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
00430   // structure, but VAEND is a no-op.
00431   setOperationAction(ISD::VASTART, MVT::Other, Custom);
00432   setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
00433   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
00434 
00435   // Codes for which we want to perform some z-specific combinations.
00436   setTargetDAGCombine(ISD::SIGN_EXTEND);
00437   setTargetDAGCombine(ISD::STORE);
00438   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
00439   setTargetDAGCombine(ISD::FP_ROUND);
00440 
00441   // Handle intrinsics.
00442   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
00443   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00444 
00445   // We want to use MVC in preference to even a single load/store pair.
00446   MaxStoresPerMemcpy = 0;
00447   MaxStoresPerMemcpyOptSize = 0;
00448 
00449   // The main memset sequence is a byte store followed by an MVC.
00450   // Two STC or MV..I stores win over that, but the kind of fused stores
00451   // generated by target-independent code don't when the byte value is
00452   // variable.  E.g.  "STC <reg>;MHI <reg>,257;STH <reg>" is not better
00453   // than "STC;MVC".  Handle the choice in target-specific code instead.
00454   MaxStoresPerMemset = 0;
00455   MaxStoresPerMemsetOptSize = 0;
00456 }
00457 
00458 EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
00459   if (!VT.isVector())
00460     return MVT::i32;
00461   return VT.changeVectorElementTypeToInteger();
00462 }
00463 
00464 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
00465   VT = VT.getScalarType();
00466 
00467   if (!VT.isSimple())
00468     return false;
00469 
00470   switch (VT.getSimpleVT().SimpleTy) {
00471   case MVT::f32:
00472   case MVT::f64:
00473     return true;
00474   case MVT::f128:
00475     return false;
00476   default:
00477     break;
00478   }
00479 
00480   return false;
00481 }
00482 
00483 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
00484   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
00485   return Imm.isZero() || Imm.isNegZero();
00486 }
00487 
00488 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
00489   // We can use CGFI or CLGFI.
00490   return isInt<32>(Imm) || isUInt<32>(Imm);
00491 }
00492 
00493 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
00494   // We can use ALGFI or SLGFI.
00495   return isUInt<32>(Imm) || isUInt<32>(-Imm);
00496 }
00497 
00498 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
00499                                                            unsigned,
00500                                                            unsigned,
00501                                                            bool *Fast) const {
00502   // Unaligned accesses should never be slower than the expanded version.
00503   // We check specifically for aligned accesses in the few cases where
00504   // they are required.
00505   if (Fast)
00506     *Fast = true;
00507   return true;
00508 }
00509 
00510 bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
00511                                                   Type *Ty,
00512                                                   unsigned AS) const {
00513   // Punt on globals for now, although they can be used in limited
00514   // RELATIVE LONG cases.
00515   if (AM.BaseGV)
00516     return false;
00517 
00518   // Require a 20-bit signed offset.
00519   if (!isInt<20>(AM.BaseOffs))
00520     return false;
00521 
00522   // Indexing is OK but no scale factor can be applied.
00523   return AM.Scale == 0 || AM.Scale == 1;
00524 }
00525 
00526 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
00527   if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
00528     return false;
00529   unsigned FromBits = FromType->getPrimitiveSizeInBits();
00530   unsigned ToBits = ToType->getPrimitiveSizeInBits();
00531   return FromBits > ToBits;
00532 }
00533 
00534 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
00535   if (!FromVT.isInteger() || !ToVT.isInteger())
00536     return false;
00537   unsigned FromBits = FromVT.getSizeInBits();
00538   unsigned ToBits = ToVT.getSizeInBits();
00539   return FromBits > ToBits;
00540 }
00541 
00542 //===----------------------------------------------------------------------===//
00543 // Inline asm support
00544 //===----------------------------------------------------------------------===//
00545 
00546 TargetLowering::ConstraintType
00547 SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
00548   if (Constraint.size() == 1) {
00549     switch (Constraint[0]) {
00550     case 'a': // Address register
00551     case 'd': // Data register (equivalent to 'r')
00552     case 'f': // Floating-point register
00553     case 'h': // High-part register
00554     case 'r': // General-purpose register
00555       return C_RegisterClass;
00556 
00557     case 'Q': // Memory with base and unsigned 12-bit displacement
00558     case 'R': // Likewise, plus an index
00559     case 'S': // Memory with base and signed 20-bit displacement
00560     case 'T': // Likewise, plus an index
00561     case 'm': // Equivalent to 'T'.
00562       return C_Memory;
00563 
00564     case 'I': // Unsigned 8-bit constant
00565     case 'J': // Unsigned 12-bit constant
00566     case 'K': // Signed 16-bit constant
00567     case 'L': // Signed 20-bit displacement (on all targets we support)
00568     case 'M': // 0x7fffffff
00569       return C_Other;
00570 
00571     default:
00572       break;
00573     }
00574   }
00575   return TargetLowering::getConstraintType(Constraint);
00576 }
00577 
00578 TargetLowering::ConstraintWeight SystemZTargetLowering::
00579 getSingleConstraintMatchWeight(AsmOperandInfo &info,
00580                                const char *constraint) const {
00581   ConstraintWeight weight = CW_Invalid;
00582   Value *CallOperandVal = info.CallOperandVal;
00583   // If we don't have a value, we can't do a match,
00584   // but allow it at the lowest weight.
00585   if (!CallOperandVal)
00586     return CW_Default;
00587   Type *type = CallOperandVal->getType();
00588   // Look at the constraint type.
00589   switch (*constraint) {
00590   default:
00591     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
00592     break;
00593 
00594   case 'a': // Address register
00595   case 'd': // Data register (equivalent to 'r')
00596   case 'h': // High-part register
00597   case 'r': // General-purpose register
00598     if (CallOperandVal->getType()->isIntegerTy())
00599       weight = CW_Register;
00600     break;
00601 
00602   case 'f': // Floating-point register
00603     if (type->isFloatingPointTy())
00604       weight = CW_Register;
00605     break;
00606 
00607   case 'I': // Unsigned 8-bit constant
00608     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00609       if (isUInt<8>(C->getZExtValue()))
00610         weight = CW_Constant;
00611     break;
00612 
00613   case 'J': // Unsigned 12-bit constant
00614     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00615       if (isUInt<12>(C->getZExtValue()))
00616         weight = CW_Constant;
00617     break;
00618 
00619   case 'K': // Signed 16-bit constant
00620     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00621       if (isInt<16>(C->getSExtValue()))
00622         weight = CW_Constant;
00623     break;
00624 
00625   case 'L': // Signed 20-bit displacement (on all targets we support)
00626     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00627       if (isInt<20>(C->getSExtValue()))
00628         weight = CW_Constant;
00629     break;
00630 
00631   case 'M': // 0x7fffffff
00632     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
00633       if (C->getZExtValue() == 0x7fffffff)
00634         weight = CW_Constant;
00635     break;
00636   }
00637   return weight;
00638 }
00639 
00640 // Parse a "{tNNN}" register constraint for which the register type "t"
00641 // has already been verified.  MC is the class associated with "t" and
00642 // Map maps 0-based register numbers to LLVM register numbers.
00643 static std::pair<unsigned, const TargetRegisterClass *>
00644 parseRegisterNumber(const std::string &Constraint,
00645                     const TargetRegisterClass *RC, const unsigned *Map) {
00646   assert(*(Constraint.end()-1) == '}' && "Missing '}'");
00647   if (isdigit(Constraint[2])) {
00648     std::string Suffix(Constraint.data() + 2, Constraint.size() - 2);
00649     unsigned Index = atoi(Suffix.c_str());
00650     if (Index < 16 && Map[Index])
00651       return std::make_pair(Map[Index], RC);
00652   }
00653   return std::make_pair(0U, nullptr);
00654 }
00655 
00656 std::pair<unsigned, const TargetRegisterClass *>
00657 SystemZTargetLowering::getRegForInlineAsmConstraint(
00658     const TargetRegisterInfo *TRI, const std::string &Constraint,
00659     MVT VT) const {
00660   if (Constraint.size() == 1) {
00661     // GCC Constraint Letters
00662     switch (Constraint[0]) {
00663     default: break;
00664     case 'd': // Data register (equivalent to 'r')
00665     case 'r': // General-purpose register
00666       if (VT == MVT::i64)
00667         return std::make_pair(0U, &SystemZ::GR64BitRegClass);
00668       else if (VT == MVT::i128)
00669         return std::make_pair(0U, &SystemZ::GR128BitRegClass);
00670       return std::make_pair(0U, &SystemZ::GR32BitRegClass);
00671 
00672     case 'a': // Address register
00673       if (VT == MVT::i64)
00674         return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
00675       else if (VT == MVT::i128)
00676         return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
00677       return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
00678 
00679     case 'h': // High-part register (an LLVM extension)
00680       return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
00681 
00682     case 'f': // Floating-point register
00683       if (VT == MVT::f64)
00684         return std::make_pair(0U, &SystemZ::FP64BitRegClass);
00685       else if (VT == MVT::f128)
00686         return std::make_pair(0U, &SystemZ::FP128BitRegClass);
00687       return std::make_pair(0U, &SystemZ::FP32BitRegClass);
00688     }
00689   }
00690   if (Constraint[0] == '{') {
00691     // We need to override the default register parsing for GPRs and FPRs
00692     // because the interpretation depends on VT.  The internal names of
00693     // the registers are also different from the external names
00694     // (F0D and F0S instead of F0, etc.).
00695     if (Constraint[1] == 'r') {
00696       if (VT == MVT::i32)
00697         return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
00698                                    SystemZMC::GR32Regs);
00699       if (VT == MVT::i128)
00700         return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
00701                                    SystemZMC::GR128Regs);
00702       return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
00703                                  SystemZMC::GR64Regs);
00704     }
00705     if (Constraint[1] == 'f') {
00706       if (VT == MVT::f32)
00707         return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
00708                                    SystemZMC::FP32Regs);
00709       if (VT == MVT::f128)
00710         return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
00711                                    SystemZMC::FP128Regs);
00712       return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
00713                                  SystemZMC::FP64Regs);
00714     }
00715   }
00716   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
00717 }
00718 
00719 void SystemZTargetLowering::
00720 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
00721                              std::vector<SDValue> &Ops,
00722                              SelectionDAG &DAG) const {
00723   // Only support length 1 constraints for now.
00724   if (Constraint.length() == 1) {
00725     switch (Constraint[0]) {
00726     case 'I': // Unsigned 8-bit constant
00727       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00728         if (isUInt<8>(C->getZExtValue()))
00729           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00730                                               Op.getValueType()));
00731       return;
00732 
00733     case 'J': // Unsigned 12-bit constant
00734       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00735         if (isUInt<12>(C->getZExtValue()))
00736           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00737                                               Op.getValueType()));
00738       return;
00739 
00740     case 'K': // Signed 16-bit constant
00741       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00742         if (isInt<16>(C->getSExtValue()))
00743           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
00744                                               Op.getValueType()));
00745       return;
00746 
00747     case 'L': // Signed 20-bit displacement (on all targets we support)
00748       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00749         if (isInt<20>(C->getSExtValue()))
00750           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
00751                                               Op.getValueType()));
00752       return;
00753 
00754     case 'M': // 0x7fffffff
00755       if (auto *C = dyn_cast<ConstantSDNode>(Op))
00756         if (C->getZExtValue() == 0x7fffffff)
00757           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
00758                                               Op.getValueType()));
00759       return;
00760     }
00761   }
00762   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
00763 }
00764 
00765 //===----------------------------------------------------------------------===//
00766 // Calling conventions
00767 //===----------------------------------------------------------------------===//
00768 
00769 #include "SystemZGenCallingConv.inc"
00770 
00771 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
00772                                                      Type *ToType) const {
00773   return isTruncateFree(FromType, ToType);
00774 }
00775 
00776 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
00777   if (!CI->isTailCall())
00778     return false;
00779   return true;
00780 }
00781 
00782 // We do not yet support 128-bit single-element vector types.  If the user
00783 // attempts to use such types as function argument or return type, prefer
00784 // to error out instead of emitting code violating the ABI.
00785 static void VerifyVectorType(MVT VT, EVT ArgVT) {
00786   if (ArgVT.isVector() && !VT.isVector())
00787     report_fatal_error("Unsupported vector argument or return type");
00788 }
00789 
00790 static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
00791   for (unsigned i = 0; i < Ins.size(); ++i)
00792     VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
00793 }
00794 
00795 static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
00796   for (unsigned i = 0; i < Outs.size(); ++i)
00797     VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
00798 }
00799 
00800 // Value is a value that has been passed to us in the location described by VA
00801 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
00802 // any loads onto Chain.
00803 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
00804                                    CCValAssign &VA, SDValue Chain,
00805                                    SDValue Value) {
00806   // If the argument has been promoted from a smaller type, insert an
00807   // assertion to capture this.
00808   if (VA.getLocInfo() == CCValAssign::SExt)
00809     Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
00810                         DAG.getValueType(VA.getValVT()));
00811   else if (VA.getLocInfo() == CCValAssign::ZExt)
00812     Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
00813                         DAG.getValueType(VA.getValVT()));
00814 
00815   if (VA.isExtInLoc())
00816     Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
00817   else if (VA.getLocInfo() == CCValAssign::Indirect)
00818     Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
00819                         MachinePointerInfo(), false, false, false, 0);
00820   else if (VA.getLocInfo() == CCValAssign::BCvt) {
00821     // If this is a short vector argument loaded from the stack,
00822     // extend from i64 to full vector size and then bitcast.
00823     assert(VA.getLocVT() == MVT::i64);
00824     assert(VA.getValVT().isVector());
00825     Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
00826                         Value, DAG.getUNDEF(MVT::i64));
00827     Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
00828   } else
00829     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
00830   return Value;
00831 }
00832 
00833 // Value is a value of type VA.getValVT() that we need to copy into
00834 // the location described by VA.  Return a copy of Value converted to
00835 // VA.getValVT().  The caller is responsible for handling indirect values.
00836 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
00837                                    CCValAssign &VA, SDValue Value) {
00838   switch (VA.getLocInfo()) {
00839   case CCValAssign::SExt:
00840     return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
00841   case CCValAssign::ZExt:
00842     return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
00843   case CCValAssign::AExt:
00844     return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
00845   case CCValAssign::BCvt:
00846     // If this is a short vector argument to be stored to the stack,
00847     // bitcast to v2i64 and then extract first element.
00848     assert(VA.getLocVT() == MVT::i64);
00849     assert(VA.getValVT().isVector());
00850     Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
00851     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
00852                        DAG.getConstant(0, DL, MVT::i32));
00853   case CCValAssign::Full:
00854     return Value;
00855   default:
00856     llvm_unreachable("Unhandled getLocInfo()");
00857   }
00858 }
00859 
00860 SDValue SystemZTargetLowering::
00861 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
00862                      const SmallVectorImpl<ISD::InputArg> &Ins,
00863                      SDLoc DL, SelectionDAG &DAG,
00864                      SmallVectorImpl<SDValue> &InVals) const {
00865   MachineFunction &MF = DAG.getMachineFunction();
00866   MachineFrameInfo *MFI = MF.getFrameInfo();
00867   MachineRegisterInfo &MRI = MF.getRegInfo();
00868   SystemZMachineFunctionInfo *FuncInfo =
00869       MF.getInfo<SystemZMachineFunctionInfo>();
00870   auto *TFL =
00871       static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
00872 
00873   // Detect unsupported vector argument types.
00874   if (Subtarget.hasVector())
00875     VerifyVectorTypes(Ins);
00876 
00877   // Assign locations to all of the incoming arguments.
00878   SmallVector<CCValAssign, 16> ArgLocs;
00879   SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
00880   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
00881 
00882   unsigned NumFixedGPRs = 0;
00883   unsigned NumFixedFPRs = 0;
00884   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
00885     SDValue ArgValue;
00886     CCValAssign &VA = ArgLocs[I];
00887     EVT LocVT = VA.getLocVT();
00888     if (VA.isRegLoc()) {
00889       // Arguments passed in registers
00890       const TargetRegisterClass *RC;
00891       switch (LocVT.getSimpleVT().SimpleTy) {
00892       default:
00893         // Integers smaller than i64 should be promoted to i64.
00894         llvm_unreachable("Unexpected argument type");
00895       case MVT::i32:
00896         NumFixedGPRs += 1;
00897         RC = &SystemZ::GR32BitRegClass;
00898         break;
00899       case MVT::i64:
00900         NumFixedGPRs += 1;
00901         RC = &SystemZ::GR64BitRegClass;
00902         break;
00903       case MVT::f32:
00904         NumFixedFPRs += 1;
00905         RC = &SystemZ::FP32BitRegClass;
00906         break;
00907       case MVT::f64:
00908         NumFixedFPRs += 1;
00909         RC = &SystemZ::FP64BitRegClass;
00910         break;
00911       case MVT::v16i8:
00912       case MVT::v8i16:
00913       case MVT::v4i32:
00914       case MVT::v2i64:
00915       case MVT::v4f32:
00916       case MVT::v2f64:
00917         RC = &SystemZ::VR128BitRegClass;
00918         break;
00919       }
00920 
00921       unsigned VReg = MRI.createVirtualRegister(RC);
00922       MRI.addLiveIn(VA.getLocReg(), VReg);
00923       ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
00924     } else {
00925       assert(VA.isMemLoc() && "Argument not register or memory");
00926 
00927       // Create the frame index object for this incoming parameter.
00928       int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
00929                                       VA.getLocMemOffset(), true);
00930 
00931       // Create the SelectionDAG nodes corresponding to a load
00932       // from this parameter.  Unpromoted ints and floats are
00933       // passed as right-justified 8-byte values.
00934       EVT PtrVT = getPointerTy();
00935       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
00936       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
00937         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
00938                           DAG.getIntPtrConstant(4, DL));
00939       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
00940                              MachinePointerInfo::getFixedStack(FI),
00941                              false, false, false, 0);
00942     }
00943 
00944     // Convert the value of the argument register into the value that's
00945     // being passed.
00946     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
00947   }
00948 
00949   if (IsVarArg) {
00950     // Save the number of non-varargs registers for later use by va_start, etc.
00951     FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
00952     FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
00953 
00954     // Likewise the address (in the form of a frame index) of where the
00955     // first stack vararg would be.  The 1-byte size here is arbitrary.
00956     int64_t StackSize = CCInfo.getNextStackOffset();
00957     FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
00958 
00959     // ...and a similar frame index for the caller-allocated save area
00960     // that will be used to store the incoming registers.
00961     int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
00962     unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
00963     FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
00964 
00965     // Store the FPR varargs in the reserved frame slots.  (We store the
00966     // GPRs as part of the prologue.)
00967     if (NumFixedFPRs < SystemZ::NumArgFPRs) {
00968       SDValue MemOps[SystemZ::NumArgFPRs];
00969       for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
00970         unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
00971         int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
00972         SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
00973         unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
00974                                      &SystemZ::FP64BitRegClass);
00975         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
00976         MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
00977                                  MachinePointerInfo::getFixedStack(FI),
00978                                  false, false, 0);
00979 
00980       }
00981       // Join the stores, which are independent of one another.
00982       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
00983                           makeArrayRef(&MemOps[NumFixedFPRs],
00984                                        SystemZ::NumArgFPRs-NumFixedFPRs));
00985     }
00986   }
00987 
00988   return Chain;
00989 }
00990 
00991 static bool canUseSiblingCall(const CCState &ArgCCInfo,
00992                               SmallVectorImpl<CCValAssign> &ArgLocs) {
00993   // Punt if there are any indirect or stack arguments, or if the call
00994   // needs the call-saved argument register R6.
00995   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
00996     CCValAssign &VA = ArgLocs[I];
00997     if (VA.getLocInfo() == CCValAssign::Indirect)
00998       return false;
00999     if (!VA.isRegLoc())
01000       return false;
01001     unsigned Reg = VA.getLocReg();
01002     if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
01003       return false;
01004   }
01005   return true;
01006 }
01007 
01008 SDValue
01009 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
01010                                  SmallVectorImpl<SDValue> &InVals) const {
01011   SelectionDAG &DAG = CLI.DAG;
01012   SDLoc &DL = CLI.DL;
01013   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01014   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
01015   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
01016   SDValue Chain = CLI.Chain;
01017   SDValue Callee = CLI.Callee;
01018   bool &IsTailCall = CLI.IsTailCall;
01019   CallingConv::ID CallConv = CLI.CallConv;
01020   bool IsVarArg = CLI.IsVarArg;
01021   MachineFunction &MF = DAG.getMachineFunction();
01022   EVT PtrVT = getPointerTy();
01023 
01024   // Detect unsupported vector argument and return types.
01025   if (Subtarget.hasVector()) {
01026     VerifyVectorTypes(Outs);
01027     VerifyVectorTypes(Ins);
01028   }
01029 
01030   // Analyze the operands of the call, assigning locations to each operand.
01031   SmallVector<CCValAssign, 16> ArgLocs;
01032   SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
01033   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
01034 
01035   // We don't support GuaranteedTailCallOpt, only automatically-detected
01036   // sibling calls.
01037   if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
01038     IsTailCall = false;
01039 
01040   // Get a count of how many bytes are to be pushed on the stack.
01041   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
01042 
01043   // Mark the start of the call.
01044   if (!IsTailCall)
01045     Chain = DAG.getCALLSEQ_START(Chain,
01046                                  DAG.getConstant(NumBytes, DL, PtrVT, true),
01047                                  DL);
01048 
01049   // Copy argument values to their designated locations.
01050   SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
01051   SmallVector<SDValue, 8> MemOpChains;
01052   SDValue StackPtr;
01053   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
01054     CCValAssign &VA = ArgLocs[I];
01055     SDValue ArgValue = OutVals[I];
01056 
01057     if (VA.getLocInfo() == CCValAssign::Indirect) {
01058       // Store the argument in a stack slot and pass its address.
01059       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
01060       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
01061       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
01062                                          MachinePointerInfo::getFixedStack(FI),
01063                                          false, false, 0));
01064       ArgValue = SpillSlot;
01065     } else
01066       ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
01067 
01068     if (VA.isRegLoc())
01069       // Queue up the argument copies and emit them at the end.
01070       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
01071     else {
01072       assert(VA.isMemLoc() && "Argument not register or memory");
01073 
01074       // Work out the address of the stack slot.  Unpromoted ints and
01075       // floats are passed as right-justified 8-byte values.
01076       if (!StackPtr.getNode())
01077         StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
01078       unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
01079       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
01080         Offset += 4;
01081       SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
01082                                     DAG.getIntPtrConstant(Offset, DL));
01083 
01084       // Emit the store.
01085       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
01086                                          MachinePointerInfo(),
01087                                          false, false, 0));
01088     }
01089   }
01090 
01091   // Join the stores, which are independent of one another.
01092   if (!MemOpChains.empty())
01093     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
01094 
01095   // Accept direct calls by converting symbolic call addresses to the
01096   // associated Target* opcodes.  Force %r1 to be used for indirect
01097   // tail calls.
01098   SDValue Glue;
01099   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01100     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
01101     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
01102   } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01103     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
01104     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
01105   } else if (IsTailCall) {
01106     Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
01107     Glue = Chain.getValue(1);
01108     Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
01109   }
01110 
01111   // Build a sequence of copy-to-reg nodes, chained and glued together.
01112   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
01113     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
01114                              RegsToPass[I].second, Glue);
01115     Glue = Chain.getValue(1);
01116   }
01117 
01118   // The first call operand is the chain and the second is the target address.
01119   SmallVector<SDValue, 8> Ops;
01120   Ops.push_back(Chain);
01121   Ops.push_back(Callee);
01122 
01123   // Add argument registers to the end of the list so that they are
01124   // known live into the call.
01125   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
01126     Ops.push_back(DAG.getRegister(RegsToPass[I].first,
01127                                   RegsToPass[I].second.getValueType()));
01128 
01129   // Add a register mask operand representing the call-preserved registers.
01130   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
01131   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
01132   assert(Mask && "Missing call preserved mask for calling convention");
01133   Ops.push_back(DAG.getRegisterMask(Mask));
01134 
01135   // Glue the call to the argument copies, if any.
01136   if (Glue.getNode())
01137     Ops.push_back(Glue);
01138 
01139   // Emit the call.
01140   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01141   if (IsTailCall)
01142     return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
01143   Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
01144   Glue = Chain.getValue(1);
01145 
01146   // Mark the end of the call, which is glued to the call itself.
01147   Chain = DAG.getCALLSEQ_END(Chain,
01148                              DAG.getConstant(NumBytes, DL, PtrVT, true),
01149                              DAG.getConstant(0, DL, PtrVT, true),
01150                              Glue, DL);
01151   Glue = Chain.getValue(1);
01152 
01153   // Assign locations to each value returned by this call.
01154   SmallVector<CCValAssign, 16> RetLocs;
01155   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
01156   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
01157 
01158   // Copy all of the result registers out of their specified physreg.
01159   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
01160     CCValAssign &VA = RetLocs[I];
01161 
01162     // Copy the value out, gluing the copy to the end of the call sequence.
01163     SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
01164                                           VA.getLocVT(), Glue);
01165     Chain = RetValue.getValue(1);
01166     Glue = RetValue.getValue(2);
01167 
01168     // Convert the value of the return register into the value that's
01169     // being returned.
01170     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
01171   }
01172 
01173   return Chain;
01174 }
01175 
01176 SDValue
01177 SystemZTargetLowering::LowerReturn(SDValue Chain,
01178                                    CallingConv::ID CallConv, bool IsVarArg,
01179                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
01180                                    const SmallVectorImpl<SDValue> &OutVals,
01181                                    SDLoc DL, SelectionDAG &DAG) const {
01182   MachineFunction &MF = DAG.getMachineFunction();
01183 
01184   // Detect unsupported vector return types.
01185   if (Subtarget.hasVector())
01186     VerifyVectorTypes(Outs);
01187 
01188   // Assign locations to each returned value.
01189   SmallVector<CCValAssign, 16> RetLocs;
01190   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
01191   RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
01192 
01193   // Quick exit for void returns
01194   if (RetLocs.empty())
01195     return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
01196 
01197   // Copy the result values into the output registers.
01198   SDValue Glue;
01199   SmallVector<SDValue, 4> RetOps;
01200   RetOps.push_back(Chain);
01201   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
01202     CCValAssign &VA = RetLocs[I];
01203     SDValue RetValue = OutVals[I];
01204 
01205     // Make the return register live on exit.
01206     assert(VA.isRegLoc() && "Can only return in registers!");
01207 
01208     // Promote the value as required.
01209     RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
01210 
01211     // Chain and glue the copies together.
01212     unsigned Reg = VA.getLocReg();
01213     Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
01214     Glue = Chain.getValue(1);
01215     RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
01216   }
01217 
01218   // Update chain and glue.
01219   RetOps[0] = Chain;
01220   if (Glue.getNode())
01221     RetOps.push_back(Glue);
01222 
01223   return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
01224 }
01225 
01226 SDValue SystemZTargetLowering::
01227 prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
01228   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
01229 }
01230 
01231 // Return true if Op is an intrinsic node with chain that returns the CC value
01232 // as its only (other) argument.  Provide the associated SystemZISD opcode and
01233 // the mask of valid CC values if so.
01234 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
01235                                       unsigned &CCValid) {
01236   unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
01237   switch (Id) {
01238   case Intrinsic::s390_tbegin:
01239     Opcode = SystemZISD::TBEGIN;
01240     CCValid = SystemZ::CCMASK_TBEGIN;
01241     return true;
01242 
01243   case Intrinsic::s390_tbegin_nofloat:
01244     Opcode = SystemZISD::TBEGIN_NOFLOAT;
01245     CCValid = SystemZ::CCMASK_TBEGIN;
01246     return true;
01247 
01248   case Intrinsic::s390_tend:
01249     Opcode = SystemZISD::TEND;
01250     CCValid = SystemZ::CCMASK_TEND;
01251     return true;
01252 
01253   default:
01254     return false;
01255   }
01256 }
01257 
01258 // Return true if Op is an intrinsic node without chain that returns the
01259 // CC value as its final argument.  Provide the associated SystemZISD
01260 // opcode and the mask of valid CC values if so.
01261 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
01262   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
01263   switch (Id) {
01264   case Intrinsic::s390_vpkshs:
01265   case Intrinsic::s390_vpksfs:
01266   case Intrinsic::s390_vpksgs:
01267     Opcode = SystemZISD::PACKS_CC;
01268     CCValid = SystemZ::CCMASK_VCMP;
01269     return true;
01270 
01271   case Intrinsic::s390_vpklshs:
01272   case Intrinsic::s390_vpklsfs:
01273   case Intrinsic::s390_vpklsgs:
01274     Opcode = SystemZISD::PACKLS_CC;
01275     CCValid = SystemZ::CCMASK_VCMP;
01276     return true;
01277 
01278   case Intrinsic::s390_vceqbs:
01279   case Intrinsic::s390_vceqhs:
01280   case Intrinsic::s390_vceqfs:
01281   case Intrinsic::s390_vceqgs:
01282     Opcode = SystemZISD::VICMPES;
01283     CCValid = SystemZ::CCMASK_VCMP;
01284     return true;
01285 
01286   case Intrinsic::s390_vchbs:
01287   case Intrinsic::s390_vchhs:
01288   case Intrinsic::s390_vchfs:
01289   case Intrinsic::s390_vchgs:
01290     Opcode = SystemZISD::VICMPHS;
01291     CCValid = SystemZ::CCMASK_VCMP;
01292     return true;
01293 
01294   case Intrinsic::s390_vchlbs:
01295   case Intrinsic::s390_vchlhs:
01296   case Intrinsic::s390_vchlfs:
01297   case Intrinsic::s390_vchlgs:
01298     Opcode = SystemZISD::VICMPHLS;
01299     CCValid = SystemZ::CCMASK_VCMP;
01300     return true;
01301 
01302   case Intrinsic::s390_vtm:
01303     Opcode = SystemZISD::VTM;
01304     CCValid = SystemZ::CCMASK_VCMP;
01305     return true;
01306 
01307   case Intrinsic::s390_vfaebs:
01308   case Intrinsic::s390_vfaehs:
01309   case Intrinsic::s390_vfaefs:
01310     Opcode = SystemZISD::VFAE_CC;
01311     CCValid = SystemZ::CCMASK_ANY;
01312     return true;
01313 
01314   case Intrinsic::s390_vfaezbs:
01315   case Intrinsic::s390_vfaezhs:
01316   case Intrinsic::s390_vfaezfs:
01317     Opcode = SystemZISD::VFAEZ_CC;
01318     CCValid = SystemZ::CCMASK_ANY;
01319     return true;
01320 
01321   case Intrinsic::s390_vfeebs:
01322   case Intrinsic::s390_vfeehs:
01323   case Intrinsic::s390_vfeefs:
01324     Opcode = SystemZISD::VFEE_CC;
01325     CCValid = SystemZ::CCMASK_ANY;
01326     return true;
01327 
01328   case Intrinsic::s390_vfeezbs:
01329   case Intrinsic::s390_vfeezhs:
01330   case Intrinsic::s390_vfeezfs:
01331     Opcode = SystemZISD::VFEEZ_CC;
01332     CCValid = SystemZ::CCMASK_ANY;
01333     return true;
01334 
01335   case Intrinsic::s390_vfenebs:
01336   case Intrinsic::s390_vfenehs:
01337   case Intrinsic::s390_vfenefs:
01338     Opcode = SystemZISD::VFENE_CC;
01339     CCValid = SystemZ::CCMASK_ANY;
01340     return true;
01341 
01342   case Intrinsic::s390_vfenezbs:
01343   case Intrinsic::s390_vfenezhs:
01344   case Intrinsic::s390_vfenezfs:
01345     Opcode = SystemZISD::VFENEZ_CC;
01346     CCValid = SystemZ::CCMASK_ANY;
01347     return true;
01348 
01349   case Intrinsic::s390_vistrbs:
01350   case Intrinsic::s390_vistrhs:
01351   case Intrinsic::s390_vistrfs:
01352     Opcode = SystemZISD::VISTR_CC;
01353     CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
01354     return true;
01355 
01356   case Intrinsic::s390_vstrcbs:
01357   case Intrinsic::s390_vstrchs:
01358   case Intrinsic::s390_vstrcfs:
01359     Opcode = SystemZISD::VSTRC_CC;
01360     CCValid = SystemZ::CCMASK_ANY;
01361     return true;
01362 
01363   case Intrinsic::s390_vstrczbs:
01364   case Intrinsic::s390_vstrczhs:
01365   case Intrinsic::s390_vstrczfs:
01366     Opcode = SystemZISD::VSTRCZ_CC;
01367     CCValid = SystemZ::CCMASK_ANY;
01368     return true;
01369 
01370   case Intrinsic::s390_vfcedbs:
01371     Opcode = SystemZISD::VFCMPES;
01372     CCValid = SystemZ::CCMASK_VCMP;
01373     return true;
01374 
01375   case Intrinsic::s390_vfchdbs:
01376     Opcode = SystemZISD::VFCMPHS;
01377     CCValid = SystemZ::CCMASK_VCMP;
01378     return true;
01379 
01380   case Intrinsic::s390_vfchedbs:
01381     Opcode = SystemZISD::VFCMPHES;
01382     CCValid = SystemZ::CCMASK_VCMP;
01383     return true;
01384 
01385   case Intrinsic::s390_vftcidb:
01386     Opcode = SystemZISD::VFTCI;
01387     CCValid = SystemZ::CCMASK_VCMP;
01388     return true;
01389 
01390   default:
01391     return false;
01392   }
01393 }
01394 
01395 // Emit an intrinsic with chain with a glued value instead of its CC result.
01396 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
01397                                              unsigned Opcode) {
01398   // Copy all operands except the intrinsic ID.
01399   unsigned NumOps = Op.getNumOperands();
01400   SmallVector<SDValue, 6> Ops;
01401   Ops.reserve(NumOps - 1);
01402   Ops.push_back(Op.getOperand(0));
01403   for (unsigned I = 2; I < NumOps; ++I)
01404     Ops.push_back(Op.getOperand(I));
01405 
01406   assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
01407   SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01408   SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
01409   SDValue OldChain = SDValue(Op.getNode(), 1);
01410   SDValue NewChain = SDValue(Intr.getNode(), 0);
01411   DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
01412   return Intr;
01413 }
01414 
01415 // Emit an intrinsic with a glued value instead of its CC result.
01416 static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
01417                                      unsigned Opcode) {
01418   // Copy all operands except the intrinsic ID.
01419   unsigned NumOps = Op.getNumOperands();
01420   SmallVector<SDValue, 6> Ops;
01421   Ops.reserve(NumOps - 1);
01422   for (unsigned I = 1; I < NumOps; ++I)
01423     Ops.push_back(Op.getOperand(I));
01424 
01425   if (Op->getNumValues() == 1)
01426     return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
01427   assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
01428   SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
01429   return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
01430 }
01431 
01432 // CC is a comparison that will be implemented using an integer or
01433 // floating-point comparison.  Return the condition code mask for
01434 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
01435 // unsigned comparisons and clear for signed ones.  In the floating-point
01436 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
01437 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
01438 #define CONV(X) \
01439   case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
01440   case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
01441   case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
01442 
01443   switch (CC) {
01444   default:
01445     llvm_unreachable("Invalid integer condition!");
01446 
01447   CONV(EQ);
01448   CONV(NE);
01449   CONV(GT);
01450   CONV(GE);
01451   CONV(LT);
01452   CONV(LE);
01453 
01454   case ISD::SETO:  return SystemZ::CCMASK_CMP_O;
01455   case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
01456   }
01457 #undef CONV
01458 }
01459 
01460 // Return a sequence for getting a 1 from an IPM result when CC has a
01461 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
01462 // The handling of CC values outside CCValid doesn't matter.
01463 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
01464   // Deal with cases where the result can be taken directly from a bit
01465   // of the IPM result.
01466   if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
01467     return IPMConversion(0, 0, SystemZ::IPM_CC);
01468   if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
01469     return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
01470 
01471   // Deal with cases where we can add a value to force the sign bit
01472   // to contain the right value.  Putting the bit in 31 means we can
01473   // use SRL rather than RISBG(L), and also makes it easier to get a
01474   // 0/-1 value, so it has priority over the other tests below.
01475   //
01476   // These sequences rely on the fact that the upper two bits of the
01477   // IPM result are zero.
01478   uint64_t TopBit = uint64_t(1) << 31;
01479   if (CCMask == (CCValid & SystemZ::CCMASK_0))
01480     return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
01481   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
01482     return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
01483   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01484                             | SystemZ::CCMASK_1
01485                             | SystemZ::CCMASK_2)))
01486     return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
01487   if (CCMask == (CCValid & SystemZ::CCMASK_3))
01488     return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
01489   if (CCMask == (CCValid & (SystemZ::CCMASK_1
01490                             | SystemZ::CCMASK_2
01491                             | SystemZ::CCMASK_3)))
01492     return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
01493 
01494   // Next try inverting the value and testing a bit.  0/1 could be
01495   // handled this way too, but we dealt with that case above.
01496   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
01497     return IPMConversion(-1, 0, SystemZ::IPM_CC);
01498 
01499   // Handle cases where adding a value forces a non-sign bit to contain
01500   // the right value.
01501   if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
01502     return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
01503   if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
01504     return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
01505 
01506   // The remaining cases are 1, 2, 0/1/3 and 0/2/3.  All these are
01507   // can be done by inverting the low CC bit and applying one of the
01508   // sign-based extractions above.
01509   if (CCMask == (CCValid & SystemZ::CCMASK_1))
01510     return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
01511   if (CCMask == (CCValid & SystemZ::CCMASK_2))
01512     return IPMConversion(1 << SystemZ::IPM_CC,
01513                          TopBit - (3 << SystemZ::IPM_CC), 31);
01514   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01515                             | SystemZ::CCMASK_1
01516                             | SystemZ::CCMASK_3)))
01517     return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
01518   if (CCMask == (CCValid & (SystemZ::CCMASK_0
01519                             | SystemZ::CCMASK_2
01520                             | SystemZ::CCMASK_3)))
01521     return IPMConversion(1 << SystemZ::IPM_CC,
01522                          TopBit - (1 << SystemZ::IPM_CC), 31);
01523 
01524   llvm_unreachable("Unexpected CC combination");
01525 }
01526 
01527 // If C can be converted to a comparison against zero, adjust the operands
01528 // as necessary.
01529 static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01530   if (C.ICmpType == SystemZICMP::UnsignedOnly)
01531     return;
01532 
01533   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
01534   if (!ConstOp1)
01535     return;
01536 
01537   int64_t Value = ConstOp1->getSExtValue();
01538   if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
01539       (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
01540       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
01541       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
01542     C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
01543     C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
01544   }
01545 }
01546 
01547 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
01548 // adjust the operands as necessary.
01549 static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01550   // For us to make any changes, it must a comparison between a single-use
01551   // load and a constant.
01552   if (!C.Op0.hasOneUse() ||
01553       C.Op0.getOpcode() != ISD::LOAD ||
01554       C.Op1.getOpcode() != ISD::Constant)
01555     return;
01556 
01557   // We must have an 8- or 16-bit load.
01558   auto *Load = cast<LoadSDNode>(C.Op0);
01559   unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
01560   if (NumBits != 8 && NumBits != 16)
01561     return;
01562 
01563   // The load must be an extending one and the constant must be within the
01564   // range of the unextended value.
01565   auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
01566   uint64_t Value = ConstOp1->getZExtValue();
01567   uint64_t Mask = (1 << NumBits) - 1;
01568   if (Load->getExtensionType() == ISD::SEXTLOAD) {
01569     // Make sure that ConstOp1 is in range of C.Op0.
01570     int64_t SignedValue = ConstOp1->getSExtValue();
01571     if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
01572       return;
01573     if (C.ICmpType != SystemZICMP::SignedOnly) {
01574       // Unsigned comparison between two sign-extended values is equivalent
01575       // to unsigned comparison between two zero-extended values.
01576       Value &= Mask;
01577     } else if (NumBits == 8) {
01578       // Try to treat the comparison as unsigned, so that we can use CLI.
01579       // Adjust CCMask and Value as necessary.
01580       if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
01581         // Test whether the high bit of the byte is set.
01582         Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
01583       else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
01584         // Test whether the high bit of the byte is clear.
01585         Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
01586       else
01587         // No instruction exists for this combination.
01588         return;
01589       C.ICmpType = SystemZICMP::UnsignedOnly;
01590     }
01591   } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
01592     if (Value > Mask)
01593       return;
01594     assert(C.ICmpType == SystemZICMP::Any &&
01595            "Signedness shouldn't matter here.");
01596   } else
01597     return;
01598 
01599   // Make sure that the first operand is an i32 of the right extension type.
01600   ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
01601                               ISD::SEXTLOAD :
01602                               ISD::ZEXTLOAD);
01603   if (C.Op0.getValueType() != MVT::i32 ||
01604       Load->getExtensionType() != ExtType)
01605     C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
01606                            Load->getChain(), Load->getBasePtr(),
01607                            Load->getPointerInfo(), Load->getMemoryVT(),
01608                            Load->isVolatile(), Load->isNonTemporal(),
01609                            Load->isInvariant(), Load->getAlignment());
01610 
01611   // Make sure that the second operand is an i32 with the right value.
01612   if (C.Op1.getValueType() != MVT::i32 ||
01613       Value != ConstOp1->getZExtValue())
01614     C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
01615 }
01616 
01617 // Return true if Op is either an unextended load, or a load suitable
01618 // for integer register-memory comparisons of type ICmpType.
01619 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
01620   auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
01621   if (Load) {
01622     // There are no instructions to compare a register with a memory byte.
01623     if (Load->getMemoryVT() == MVT::i8)
01624       return false;
01625     // Otherwise decide on extension type.
01626     switch (Load->getExtensionType()) {
01627     case ISD::NON_EXTLOAD:
01628       return true;
01629     case ISD::SEXTLOAD:
01630       return ICmpType != SystemZICMP::UnsignedOnly;
01631     case ISD::ZEXTLOAD:
01632       return ICmpType != SystemZICMP::SignedOnly;
01633     default:
01634       break;
01635     }
01636   }
01637   return false;
01638 }
01639 
01640 // Return true if it is better to swap the operands of C.
01641 static bool shouldSwapCmpOperands(const Comparison &C) {
01642   // Leave f128 comparisons alone, since they have no memory forms.
01643   if (C.Op0.getValueType() == MVT::f128)
01644     return false;
01645 
01646   // Always keep a floating-point constant second, since comparisons with
01647   // zero can use LOAD TEST and comparisons with other constants make a
01648   // natural memory operand.
01649   if (isa<ConstantFPSDNode>(C.Op1))
01650     return false;
01651 
01652   // Never swap comparisons with zero since there are many ways to optimize
01653   // those later.
01654   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
01655   if (ConstOp1 && ConstOp1->getZExtValue() == 0)
01656     return false;
01657 
01658   // Also keep natural memory operands second if the loaded value is
01659   // only used here.  Several comparisons have memory forms.
01660   if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
01661     return false;
01662 
01663   // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
01664   // In that case we generally prefer the memory to be second.
01665   if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
01666     // The only exceptions are when the second operand is a constant and
01667     // we can use things like CHHSI.
01668     if (!ConstOp1)
01669       return true;
01670     // The unsigned memory-immediate instructions can handle 16-bit
01671     // unsigned integers.
01672     if (C.ICmpType != SystemZICMP::SignedOnly &&
01673         isUInt<16>(ConstOp1->getZExtValue()))
01674       return false;
01675     // The signed memory-immediate instructions can handle 16-bit
01676     // signed integers.
01677     if (C.ICmpType != SystemZICMP::UnsignedOnly &&
01678         isInt<16>(ConstOp1->getSExtValue()))
01679       return false;
01680     return true;
01681   }
01682 
01683   // Try to promote the use of CGFR and CLGFR.
01684   unsigned Opcode0 = C.Op0.getOpcode();
01685   if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
01686     return true;
01687   if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
01688     return true;
01689   if (C.ICmpType != SystemZICMP::SignedOnly &&
01690       Opcode0 == ISD::AND &&
01691       C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
01692       cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
01693     return true;
01694 
01695   return false;
01696 }
01697 
01698 // Return a version of comparison CC mask CCMask in which the LT and GT
01699 // actions are swapped.
01700 static unsigned reverseCCMask(unsigned CCMask) {
01701   return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
01702           (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
01703           (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
01704           (CCMask & SystemZ::CCMASK_CMP_UO));
01705 }
01706 
01707 // Check whether C tests for equality between X and Y and whether X - Y
01708 // or Y - X is also computed.  In that case it's better to compare the
01709 // result of the subtraction against zero.
01710 static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01711   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
01712       C.CCMask == SystemZ::CCMASK_CMP_NE) {
01713     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
01714       SDNode *N = *I;
01715       if (N->getOpcode() == ISD::SUB &&
01716           ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
01717            (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
01718         C.Op0 = SDValue(N, 0);
01719         C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
01720         return;
01721       }
01722     }
01723   }
01724 }
01725 
01726 // Check whether C compares a floating-point value with zero and if that
01727 // floating-point value is also negated.  In this case we can use the
01728 // negation to set CC, so avoiding separate LOAD AND TEST and
01729 // LOAD (NEGATIVE/COMPLEMENT) instructions.
01730 static void adjustForFNeg(Comparison &C) {
01731   auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
01732   if (C1 && C1->isZero()) {
01733     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
01734       SDNode *N = *I;
01735       if (N->getOpcode() == ISD::FNEG) {
01736         C.Op0 = SDValue(N, 0);
01737         C.CCMask = reverseCCMask(C.CCMask);
01738         return;
01739       }
01740     }
01741   }
01742 }
01743 
01744 // Check whether C compares (shl X, 32) with 0 and whether X is
01745 // also sign-extended.  In that case it is better to test the result
01746 // of the sign extension using LTGFR.
01747 //
01748 // This case is important because InstCombine transforms a comparison
01749 // with (sext (trunc X)) into a comparison with (shl X, 32).
01750 static void adjustForLTGFR(Comparison &C) {
01751   // Check for a comparison between (shl X, 32) and 0.
01752   if (C.Op0.getOpcode() == ISD::SHL &&
01753       C.Op0.getValueType() == MVT::i64 &&
01754       C.Op1.getOpcode() == ISD::Constant &&
01755       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
01756     auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
01757     if (C1 && C1->getZExtValue() == 32) {
01758       SDValue ShlOp0 = C.Op0.getOperand(0);
01759       // See whether X has any SIGN_EXTEND_INREG uses.
01760       for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
01761         SDNode *N = *I;
01762         if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
01763             cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
01764           C.Op0 = SDValue(N, 0);
01765           return;
01766         }
01767       }
01768     }
01769   }
01770 }
01771 
01772 // If C compares the truncation of an extending load, try to compare
01773 // the untruncated value instead.  This exposes more opportunities to
01774 // reuse CC.
01775 static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01776   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
01777       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
01778       C.Op1.getOpcode() == ISD::Constant &&
01779       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
01780     auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
01781     if (L->getMemoryVT().getStoreSizeInBits()
01782         <= C.Op0.getValueType().getSizeInBits()) {
01783       unsigned Type = L->getExtensionType();
01784       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
01785           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
01786         C.Op0 = C.Op0.getOperand(0);
01787         C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
01788       }
01789     }
01790   }
01791 }
01792 
01793 // Return true if shift operation N has an in-range constant shift value.
01794 // Store it in ShiftVal if so.
01795 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
01796   auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
01797   if (!Shift)
01798     return false;
01799 
01800   uint64_t Amount = Shift->getZExtValue();
01801   if (Amount >= N.getValueType().getSizeInBits())
01802     return false;
01803 
01804   ShiftVal = Amount;
01805   return true;
01806 }
01807 
01808 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
01809 // instruction and whether the CC value is descriptive enough to handle
01810 // a comparison of type Opcode between the AND result and CmpVal.
01811 // CCMask says which comparison result is being tested and BitSize is
01812 // the number of bits in the operands.  If TEST UNDER MASK can be used,
01813 // return the corresponding CC mask, otherwise return 0.
01814 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
01815                                      uint64_t Mask, uint64_t CmpVal,
01816                                      unsigned ICmpType) {
01817   assert(Mask != 0 && "ANDs with zero should have been removed by now");
01818 
01819   // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
01820   if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
01821       !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
01822     return 0;
01823 
01824   // Work out the masks for the lowest and highest bits.
01825   unsigned HighShift = 63 - countLeadingZeros(Mask);
01826   uint64_t High = uint64_t(1) << HighShift;
01827   uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
01828 
01829   // Signed ordered comparisons are effectively unsigned if the sign
01830   // bit is dropped.
01831   bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
01832 
01833   // Check for equality comparisons with 0, or the equivalent.
01834   if (CmpVal == 0) {
01835     if (CCMask == SystemZ::CCMASK_CMP_EQ)
01836       return SystemZ::CCMASK_TM_ALL_0;
01837     if (CCMask == SystemZ::CCMASK_CMP_NE)
01838       return SystemZ::CCMASK_TM_SOME_1;
01839   }
01840   if (EffectivelyUnsigned && CmpVal <= Low) {
01841     if (CCMask == SystemZ::CCMASK_CMP_LT)
01842       return SystemZ::CCMASK_TM_ALL_0;
01843     if (CCMask == SystemZ::CCMASK_CMP_GE)
01844       return SystemZ::CCMASK_TM_SOME_1;
01845   }
01846   if (EffectivelyUnsigned && CmpVal < Low) {
01847     if (CCMask == SystemZ::CCMASK_CMP_LE)
01848       return SystemZ::CCMASK_TM_ALL_0;
01849     if (CCMask == SystemZ::CCMASK_CMP_GT)
01850       return SystemZ::CCMASK_TM_SOME_1;
01851   }
01852 
01853   // Check for equality comparisons with the mask, or the equivalent.
01854   if (CmpVal == Mask) {
01855     if (CCMask == SystemZ::CCMASK_CMP_EQ)
01856       return SystemZ::CCMASK_TM_ALL_1;
01857     if (CCMask == SystemZ::CCMASK_CMP_NE)
01858       return SystemZ::CCMASK_TM_SOME_0;
01859   }
01860   if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
01861     if (CCMask == SystemZ::CCMASK_CMP_GT)
01862       return SystemZ::CCMASK_TM_ALL_1;
01863     if (CCMask == SystemZ::CCMASK_CMP_LE)
01864       return SystemZ::CCMASK_TM_SOME_0;
01865   }
01866   if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
01867     if (CCMask == SystemZ::CCMASK_CMP_GE)
01868       return SystemZ::CCMASK_TM_ALL_1;
01869     if (CCMask == SystemZ::CCMASK_CMP_LT)
01870       return SystemZ::CCMASK_TM_SOME_0;
01871   }
01872 
01873   // Check for ordered comparisons with the top bit.
01874   if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
01875     if (CCMask == SystemZ::CCMASK_CMP_LE)
01876       return SystemZ::CCMASK_TM_MSB_0;
01877     if (CCMask == SystemZ::CCMASK_CMP_GT)
01878       return SystemZ::CCMASK_TM_MSB_1;
01879   }
01880   if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
01881     if (CCMask == SystemZ::CCMASK_CMP_LT)
01882       return SystemZ::CCMASK_TM_MSB_0;
01883     if (CCMask == SystemZ::CCMASK_CMP_GE)
01884       return SystemZ::CCMASK_TM_MSB_1;
01885   }
01886 
01887   // If there are just two bits, we can do equality checks for Low and High
01888   // as well.
01889   if (Mask == Low + High) {
01890     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
01891       return SystemZ::CCMASK_TM_MIXED_MSB_0;
01892     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
01893       return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
01894     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
01895       return SystemZ::CCMASK_TM_MIXED_MSB_1;
01896     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
01897       return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
01898   }
01899 
01900   // Looks like we've exhausted our options.
01901   return 0;
01902 }
01903 
01904 // See whether C can be implemented as a TEST UNDER MASK instruction.
01905 // Update the arguments with the TM version if so.
01906 static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
01907   // Check that we have a comparison with a constant.
01908   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
01909   if (!ConstOp1)
01910     return;
01911   uint64_t CmpVal = ConstOp1->getZExtValue();
01912 
01913   // Check whether the nonconstant input is an AND with a constant mask.
01914   Comparison NewC(C);
01915   uint64_t MaskVal;
01916   ConstantSDNode *Mask = nullptr;
01917   if (C.Op0.getOpcode() == ISD::AND) {
01918     NewC.Op0 = C.Op0.getOperand(0);
01919     NewC.Op1 = C.Op0.getOperand(1);
01920     Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
01921     if (!Mask)
01922       return;
01923     MaskVal = Mask->getZExtValue();
01924   } else {
01925     // There is no instruction to compare with a 64-bit immediate
01926     // so use TMHH instead if possible.  We need an unsigned ordered
01927     // comparison with an i64 immediate.
01928     if (NewC.Op0.getValueType() != MVT::i64 ||
01929         NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
01930         NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
01931         NewC.ICmpType == SystemZICMP::SignedOnly)
01932       return;
01933     // Convert LE and GT comparisons into LT and GE.
01934     if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
01935         NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
01936       if (CmpVal == uint64_t(-1))
01937         return;
01938       CmpVal += 1;
01939       NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
01940     }
01941     // If the low N bits of Op1 are zero than the low N bits of Op0 can
01942     // be masked off without changing the result.
01943     MaskVal = -(CmpVal & -CmpVal);
01944     NewC.ICmpType = SystemZICMP::UnsignedOnly;
01945   }
01946   if (!MaskVal)
01947     return;
01948 
01949   // Check whether the combination of mask, comparison value and comparison
01950   // type are suitable.
01951   unsigned BitSize = NewC.Op0.getValueType().getSizeInBits();
01952   unsigned NewCCMask, ShiftVal;
01953   if (NewC.ICmpType != SystemZICMP::SignedOnly &&
01954       NewC.Op0.getOpcode() == ISD::SHL &&
01955       isSimpleShift(NewC.Op0, ShiftVal) &&
01956       (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
01957                                         MaskVal >> ShiftVal,
01958                                         CmpVal >> ShiftVal,
01959                                         SystemZICMP::Any))) {
01960     NewC.Op0 = NewC.Op0.getOperand(0);
01961     MaskVal >>= ShiftVal;
01962   } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
01963              NewC.Op0.getOpcode() == ISD::SRL &&
01964              isSimpleShift(NewC.Op0, ShiftVal) &&
01965              (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
01966                                                MaskVal << ShiftVal,
01967                                                CmpVal << ShiftVal,
01968                                                SystemZICMP::UnsignedOnly))) {
01969     NewC.Op0 = NewC.Op0.getOperand(0);
01970     MaskVal <<= ShiftVal;
01971   } else {
01972     NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
01973                                      NewC.ICmpType);
01974     if (!NewCCMask)
01975       return;
01976   }
01977 
01978   // Go ahead and make the change.
01979   C.Opcode = SystemZISD::TM;
01980   C.Op0 = NewC.Op0;
01981   if (Mask && Mask->getZExtValue() == MaskVal)
01982     C.Op1 = SDValue(Mask, 0);
01983   else
01984     C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
01985   C.CCValid = SystemZ::CCMASK_TM;
01986   C.CCMask = NewCCMask;
01987 }
01988 
01989 // Return a Comparison that tests the condition-code result of intrinsic
01990 // node Call against constant integer CC using comparison code Cond.
01991 // Opcode is the opcode of the SystemZISD operation for the intrinsic
01992 // and CCValid is the set of possible condition-code results.
01993 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
01994                                   SDValue Call, unsigned CCValid, uint64_t CC,
01995                                   ISD::CondCode Cond) {
01996   Comparison C(Call, SDValue());
01997   C.Opcode = Opcode;
01998   C.CCValid = CCValid;
01999   if (Cond == ISD::SETEQ)
02000     // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
02001     C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
02002   else if (Cond == ISD::SETNE)
02003     // ...and the inverse of that.
02004     C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
02005   else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
02006     // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
02007     // always true for CC>3.
02008     C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
02009   else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
02010     // ...and the inverse of that.
02011     C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
02012   else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
02013     // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
02014     // always true for CC>3.
02015     C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
02016   else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
02017     // ...and the inverse of that.
02018     C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
02019   else
02020     llvm_unreachable("Unexpected integer comparison type");
02021   C.CCMask &= CCValid;
02022   return C;
02023 }
02024 
02025 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
02026 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
02027                          ISD::CondCode Cond, SDLoc DL) {
02028   if (CmpOp1.getOpcode() == ISD::Constant) {
02029     uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
02030     unsigned Opcode, CCValid;
02031     if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
02032         CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
02033         isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
02034       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
02035     if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
02036         CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
02037         isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
02038       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
02039   }
02040   Comparison C(CmpOp0, CmpOp1);
02041   C.CCMask = CCMaskForCondCode(Cond);
02042   if (C.Op0.getValueType().isFloatingPoint()) {
02043     C.CCValid = SystemZ::CCMASK_FCMP;
02044     C.Opcode = SystemZISD::FCMP;
02045     adjustForFNeg(C);
02046   } else {
02047     C.CCValid = SystemZ::CCMASK_ICMP;
02048     C.Opcode = SystemZISD::ICMP;
02049     // Choose the type of comparison.  Equality and inequality tests can
02050     // use either signed or unsigned comparisons.  The choice also doesn't
02051     // matter if both sign bits are known to be clear.  In those cases we
02052     // want to give the main isel code the freedom to choose whichever
02053     // form fits best.
02054     if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
02055         C.CCMask == SystemZ::CCMASK_CMP_NE ||
02056         (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
02057       C.ICmpType = SystemZICMP::Any;
02058     else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
02059       C.ICmpType = SystemZICMP::UnsignedOnly;
02060     else
02061       C.ICmpType = SystemZICMP::SignedOnly;
02062     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
02063     adjustZeroCmp(DAG, DL, C);
02064     adjustSubwordCmp(DAG, DL, C);
02065     adjustForSubtraction(DAG, DL, C);
02066     adjustForLTGFR(C);
02067     adjustICmpTruncate(DAG, DL, C);
02068   }
02069 
02070   if (shouldSwapCmpOperands(C)) {
02071     std::swap(C.Op0, C.Op1);
02072     C.CCMask = reverseCCMask(C.CCMask);
02073   }
02074 
02075   adjustForTestUnderMask(DAG, DL, C);
02076   return C;
02077 }
02078 
02079 // Emit the comparison instruction described by C.
02080 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
02081   if (!C.Op1.getNode()) {
02082     SDValue Op;
02083     switch (C.Op0.getOpcode()) {
02084     case ISD::INTRINSIC_W_CHAIN:
02085       Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
02086       break;
02087     case ISD::INTRINSIC_WO_CHAIN:
02088       Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
02089       break;
02090     default:
02091       llvm_unreachable("Invalid comparison operands");
02092     }
02093     return SDValue(Op.getNode(), Op->getNumValues() - 1);
02094   }
02095   if (C.Opcode == SystemZISD::ICMP)
02096     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
02097                        DAG.getConstant(C.ICmpType, DL, MVT::i32));
02098   if (C.Opcode == SystemZISD::TM) {
02099     bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
02100                          bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
02101     return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
02102                        DAG.getConstant(RegisterOnly, DL, MVT::i32));
02103   }
02104   return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
02105 }
02106 
02107 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
02108 // 64 bits.  Extend is the extension type to use.  Store the high part
02109 // in Hi and the low part in Lo.
02110 static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
02111                             unsigned Extend, SDValue Op0, SDValue Op1,
02112                             SDValue &Hi, SDValue &Lo) {
02113   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
02114   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
02115   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
02116   Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
02117                    DAG.getConstant(32, DL, MVT::i64));
02118   Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
02119   Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
02120 }
02121 
02122 // Lower a binary operation that produces two VT results, one in each
02123 // half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
02124 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
02125 // on the extended Op0 and (unextended) Op1.  Store the even register result
02126 // in Even and the odd register result in Odd.
02127 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
02128                              unsigned Extend, unsigned Opcode,
02129                              SDValue Op0, SDValue Op1,
02130                              SDValue &Even, SDValue &Odd) {
02131   SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
02132   SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
02133                                SDValue(In128, 0), Op1);
02134   bool Is32Bit = is32Bit(VT);
02135   Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
02136   Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
02137 }
02138 
02139 // Return an i32 value that is 1 if the CC value produced by Glue is
02140 // in the mask CCMask and 0 otherwise.  CC is known to have a value
02141 // in CCValid, so other values can be ignored.
02142 static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
02143                          unsigned CCValid, unsigned CCMask) {
02144   IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
02145   SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
02146 
02147   if (Conversion.XORValue)
02148     Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
02149                          DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
02150 
02151   if (Conversion.AddValue)
02152     Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
02153                          DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
02154 
02155   // The SHR/AND sequence should get optimized to an RISBG.
02156   Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
02157                        DAG.getConstant(Conversion.Bit, DL, MVT::i32));
02158   if (Conversion.Bit != 31)
02159     Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
02160                          DAG.getConstant(1, DL, MVT::i32));
02161   return Result;
02162 }
02163 
02164 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
02165 // be done directly.  IsFP is true if CC is for a floating-point rather than
02166 // integer comparison.
02167 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
02168   switch (CC) {
02169   case ISD::SETOEQ:
02170   case ISD::SETEQ:
02171     return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
02172 
02173   case ISD::SETOGE:
02174   case ISD::SETGE:
02175     return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
02176 
02177   case ISD::SETOGT:
02178   case ISD::SETGT:
02179     return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
02180 
02181   case ISD::SETUGT:
02182     return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
02183 
02184   default:
02185     return 0;
02186   }
02187 }
02188 
02189 // Return the SystemZISD vector comparison operation for CC or its inverse,
02190 // or 0 if neither can be done directly.  Indicate in Invert whether the
02191 // result is for the inverse of CC.  IsFP is true if CC is for a
02192 // floating-point rather than integer comparison.
02193 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
02194                                             bool &Invert) {
02195   if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
02196     Invert = false;
02197     return Opcode;
02198   }
02199 
02200   CC = ISD::getSetCCInverse(CC, !IsFP);
02201   if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
02202     Invert = true;
02203     return Opcode;
02204   }
02205 
02206   return 0;
02207 }
02208 
02209 // Return a v2f64 that contains the extended form of elements Start and Start+1
02210 // of v4f32 value Op.
02211 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
02212                                   SDValue Op) {
02213   int Mask[] = { Start, -1, Start + 1, -1 };
02214   Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
02215   return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
02216 }
02217 
02218 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
02219 // producing a result of type VT.
02220 static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
02221                             EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
02222   // There is no hardware support for v4f32, so extend the vector into
02223   // two v2f64s and compare those.
02224   if (CmpOp0.getValueType() == MVT::v4f32) {
02225     SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
02226     SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
02227     SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
02228     SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
02229     SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
02230     SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
02231     return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
02232   }
02233   return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
02234 }
02235 
02236 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
02237 // an integer mask of type VT.
02238 static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
02239                                 ISD::CondCode CC, SDValue CmpOp0,
02240                                 SDValue CmpOp1) {
02241   bool IsFP = CmpOp0.getValueType().isFloatingPoint();
02242   bool Invert = false;
02243   SDValue Cmp;
02244   switch (CC) {
02245     // Handle tests for order using (or (ogt y x) (oge x y)).
02246   case ISD::SETUO:
02247     Invert = true;
02248   case ISD::SETO: {
02249     assert(IsFP && "Unexpected integer comparison");
02250     SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
02251     SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
02252     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
02253     break;
02254   }
02255 
02256     // Handle <> tests using (or (ogt y x) (ogt x y)).
02257   case ISD::SETUEQ:
02258     Invert = true;
02259   case ISD::SETONE: {
02260     assert(IsFP && "Unexpected integer comparison");
02261     SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
02262     SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
02263     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
02264     break;
02265   }
02266 
02267     // Otherwise a single comparison is enough.  It doesn't really
02268     // matter whether we try the inversion or the swap first, since
02269     // there are no cases where both work.
02270   default:
02271     if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
02272       Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
02273     else {
02274       CC = ISD::getSetCCSwappedOperands(CC);
02275       if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
02276         Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
02277       else
02278         llvm_unreachable("Unhandled comparison");
02279     }
02280     break;
02281   }
02282   if (Invert) {
02283     SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02284                                DAG.getConstant(65535, DL, MVT::i32));
02285     Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
02286     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
02287   }
02288   return Cmp;
02289 }
02290 
02291 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
02292                                           SelectionDAG &DAG) const {
02293   SDValue CmpOp0   = Op.getOperand(0);
02294   SDValue CmpOp1   = Op.getOperand(1);
02295   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
02296   SDLoc DL(Op);
02297   EVT VT = Op.getValueType();
02298   if (VT.isVector())
02299     return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
02300 
02301   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02302   SDValue Glue = emitCmp(DAG, DL, C);
02303   return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
02304 }
02305 
02306 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
02307   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
02308   SDValue CmpOp0   = Op.getOperand(2);
02309   SDValue CmpOp1   = Op.getOperand(3);
02310   SDValue Dest     = Op.getOperand(4);
02311   SDLoc DL(Op);
02312 
02313   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02314   SDValue Glue = emitCmp(DAG, DL, C);
02315   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
02316                      Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
02317                      DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
02318 }
02319 
02320 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
02321 // allowing Pos and Neg to be wider than CmpOp.
02322 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
02323   return (Neg.getOpcode() == ISD::SUB &&
02324           Neg.getOperand(0).getOpcode() == ISD::Constant &&
02325           cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
02326           Neg.getOperand(1) == Pos &&
02327           (Pos == CmpOp ||
02328            (Pos.getOpcode() == ISD::SIGN_EXTEND &&
02329             Pos.getOperand(0) == CmpOp)));
02330 }
02331 
02332 // Return the absolute or negative absolute of Op; IsNegative decides which.
02333 static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
02334                            bool IsNegative) {
02335   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
02336   if (IsNegative)
02337     Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
02338                      DAG.getConstant(0, DL, Op.getValueType()), Op);
02339   return Op;
02340 }
02341 
02342 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
02343                                               SelectionDAG &DAG) const {
02344   SDValue CmpOp0   = Op.getOperand(0);
02345   SDValue CmpOp1   = Op.getOperand(1);
02346   SDValue TrueOp   = Op.getOperand(2);
02347   SDValue FalseOp  = Op.getOperand(3);
02348   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
02349   SDLoc DL(Op);
02350 
02351   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
02352 
02353   // Check for absolute and negative-absolute selections, including those
02354   // where the comparison value is sign-extended (for LPGFR and LNGFR).
02355   // This check supplements the one in DAGCombiner.
02356   if (C.Opcode == SystemZISD::ICMP &&
02357       C.CCMask != SystemZ::CCMASK_CMP_EQ &&
02358       C.CCMask != SystemZ::CCMASK_CMP_NE &&
02359       C.Op1.getOpcode() == ISD::Constant &&
02360       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
02361     if (isAbsolute(C.Op0, TrueOp, FalseOp))
02362       return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
02363     if (isAbsolute(C.Op0, FalseOp, TrueOp))
02364       return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
02365   }
02366 
02367   SDValue Glue = emitCmp(DAG, DL, C);
02368 
02369   // Special case for handling -1/0 results.  The shifts we use here
02370   // should get optimized with the IPM conversion sequence.
02371   auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
02372   auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
02373   if (TrueC && FalseC) {
02374     int64_t TrueVal = TrueC->getSExtValue();
02375     int64_t FalseVal = FalseC->getSExtValue();
02376     if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
02377       // Invert the condition if we want -1 on false.
02378       if (TrueVal == 0)
02379         C.CCMask ^= C.CCValid;
02380       SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
02381       EVT VT = Op.getValueType();
02382       // Extend the result to VT.  Upper bits are ignored.
02383       if (!is32Bit(VT))
02384         Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
02385       // Sign-extend from the low bit.
02386       SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
02387       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
02388       return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
02389     }
02390   }
02391 
02392   SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
02393                    DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
02394 
02395   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
02396   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
02397 }
02398 
02399 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
02400                                                   SelectionDAG &DAG) const {
02401   SDLoc DL(Node);
02402   const GlobalValue *GV = Node->getGlobal();
02403   int64_t Offset = Node->getOffset();
02404   EVT PtrVT = getPointerTy();
02405   Reloc::Model RM = DAG.getTarget().getRelocationModel();
02406   CodeModel::Model CM = DAG.getTarget().getCodeModel();
02407 
02408   SDValue Result;
02409   if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
02410     // Assign anchors at 1<<12 byte boundaries.
02411     uint64_t Anchor = Offset & ~uint64_t(0xfff);
02412     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
02413     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02414 
02415     // The offset can be folded into the address if it is aligned to a halfword.
02416     Offset -= Anchor;
02417     if (Offset != 0 && (Offset & 1) == 0) {
02418       SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
02419       Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
02420       Offset = 0;
02421     }
02422   } else {
02423     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
02424     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02425     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
02426                          MachinePointerInfo::getGOT(), false, false, false, 0);
02427   }
02428 
02429   // If there was a non-zero offset that we didn't fold, create an explicit
02430   // addition for it.
02431   if (Offset != 0)
02432     Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
02433                          DAG.getConstant(Offset, DL, PtrVT));
02434 
02435   return Result;
02436 }
02437 
02438 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
02439                                                  SelectionDAG &DAG,
02440                                                  unsigned Opcode,
02441                                                  SDValue GOTOffset) const {
02442   SDLoc DL(Node);
02443   EVT PtrVT = getPointerTy();
02444   SDValue Chain = DAG.getEntryNode();
02445   SDValue Glue;
02446 
02447   // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
02448   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
02449   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
02450   Glue = Chain.getValue(1);
02451   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
02452   Glue = Chain.getValue(1);
02453 
02454   // The first call operand is the chain and the second is the TLS symbol.
02455   SmallVector<SDValue, 8> Ops;
02456   Ops.push_back(Chain);
02457   Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
02458                                            Node->getValueType(0),
02459                                            0, 0));
02460 
02461   // Add argument registers to the end of the list so that they are
02462   // known live into the call.
02463   Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
02464   Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
02465 
02466   // Add a register mask operand representing the call-preserved registers.
02467   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
02468   const uint32_t *Mask =
02469       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
02470   assert(Mask && "Missing call preserved mask for calling convention");
02471   Ops.push_back(DAG.getRegisterMask(Mask));
02472 
02473   // Glue the call to the argument copies.
02474   Ops.push_back(Glue);
02475 
02476   // Emit the call.
02477   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
02478   Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
02479   Glue = Chain.getValue(1);
02480 
02481   // Copy the return value from %r2.
02482   return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
02483 }
02484 
02485 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
02486                  SelectionDAG &DAG) const {
02487   SDLoc DL(Node);
02488   const GlobalValue *GV = Node->getGlobal();
02489   EVT PtrVT = getPointerTy();
02490   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
02491 
02492   // The high part of the thread pointer is in access register 0.
02493   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
02494                              DAG.getConstant(0, DL, MVT::i32));
02495   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
02496 
02497   // The low part of the thread pointer is in access register 1.
02498   SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
02499                              DAG.getConstant(1, DL, MVT::i32));
02500   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
02501 
02502   // Merge them into a single 64-bit address.
02503   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
02504                                     DAG.getConstant(32, DL, PtrVT));
02505   SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
02506 
02507   // Get the offset of GA from the thread pointer, based on the TLS model.
02508   SDValue Offset;
02509   switch (model) {
02510     case TLSModel::GeneralDynamic: {
02511       // Load the GOT offset of the tls_index (module ID / per-symbol offset).
02512       SystemZConstantPoolValue *CPV =
02513         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
02514 
02515       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02516       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02517                            Offset, MachinePointerInfo::getConstantPool(),
02518                            false, false, false, 0);
02519 
02520       // Call __tls_get_offset to retrieve the offset.
02521       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
02522       break;
02523     }
02524 
02525     case TLSModel::LocalDynamic: {
02526       // Load the GOT offset of the module ID.
02527       SystemZConstantPoolValue *CPV =
02528         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
02529 
02530       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02531       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02532                            Offset, MachinePointerInfo::getConstantPool(),
02533                            false, false, false, 0);
02534 
02535       // Call __tls_get_offset to retrieve the module base offset.
02536       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
02537 
02538       // Note: The SystemZLDCleanupPass will remove redundant computations
02539       // of the module base offset.  Count total number of local-dynamic
02540       // accesses to trigger execution of that pass.
02541       SystemZMachineFunctionInfo* MFI =
02542         DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
02543       MFI->incNumLocalDynamicTLSAccesses();
02544 
02545       // Add the per-symbol offset.
02546       CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
02547 
02548       SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
02549       DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02550                               DTPOffset, MachinePointerInfo::getConstantPool(),
02551                               false, false, false, 0);
02552 
02553       Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
02554       break;
02555     }
02556 
02557     case TLSModel::InitialExec: {
02558       // Load the offset from the GOT.
02559       Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
02560                                           SystemZII::MO_INDNTPOFF);
02561       Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
02562       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02563                            Offset, MachinePointerInfo::getGOT(),
02564                            false, false, false, 0);
02565       break;
02566     }
02567 
02568     case TLSModel::LocalExec: {
02569       // Force the offset into the constant pool and load it from there.
02570       SystemZConstantPoolValue *CPV =
02571         SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
02572 
02573       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
02574       Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
02575                            Offset, MachinePointerInfo::getConstantPool(),
02576                            false, false, false, 0);
02577       break;
02578     }
02579   }
02580 
02581   // Add the base and offset together.
02582   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
02583 }
02584 
02585 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
02586                                                  SelectionDAG &DAG) const {
02587   SDLoc DL(Node);
02588   const BlockAddress *BA = Node->getBlockAddress();
02589   int64_t Offset = Node->getOffset();
02590   EVT PtrVT = getPointerTy();
02591 
02592   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
02593   Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02594   return Result;
02595 }
02596 
02597 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
02598                                               SelectionDAG &DAG) const {
02599   SDLoc DL(JT);
02600   EVT PtrVT = getPointerTy();
02601   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
02602 
02603   // Use LARL to load the address of the table.
02604   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02605 }
02606 
02607 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
02608                                                  SelectionDAG &DAG) const {
02609   SDLoc DL(CP);
02610   EVT PtrVT = getPointerTy();
02611 
02612   SDValue Result;
02613   if (CP->isMachineConstantPoolEntry())
02614     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02615                CP->getAlignment());
02616   else
02617     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02618                CP->getAlignment(), CP->getOffset());
02619 
02620   // Use LARL to load the address of the constant pool entry.
02621   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
02622 }
02623 
02624 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
02625                                             SelectionDAG &DAG) const {
02626   SDLoc DL(Op);
02627   SDValue In = Op.getOperand(0);
02628   EVT InVT = In.getValueType();
02629   EVT ResVT = Op.getValueType();
02630 
02631   // Convert loads directly.  This is normally done by DAGCombiner,
02632   // but we need this case for bitcasts that are created during lowering
02633   // and which are then lowered themselves.
02634   if (auto *LoadN = dyn_cast<LoadSDNode>(In))
02635     return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
02636                        LoadN->getMemOperand());
02637 
02638   if (InVT == MVT::i32 && ResVT == MVT::f32) {
02639     SDValue In64;
02640     if (Subtarget.hasHighWord()) {
02641       SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
02642                                        MVT::i64);
02643       In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
02644                                        MVT::i64, SDValue(U64, 0), In);
02645     } else {
02646       In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
02647       In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
02648                          DAG.getConstant(32, DL, MVT::i64));
02649     }
02650     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
02651     return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
02652                                       DL, MVT::f32, Out64);
02653   }
02654   if (InVT == MVT::f32 && ResVT == MVT::i32) {
02655     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
02656     SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
02657                                              MVT::f64, SDValue(U64, 0), In);
02658     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
02659     if (Subtarget.hasHighWord())
02660       return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
02661                                         MVT::i32, Out64);
02662     SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
02663                                 DAG.getConstant(32, DL, MVT::i64));
02664     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
02665   }
02666   llvm_unreachable("Unexpected bitcast combination");
02667 }
02668 
02669 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
02670                                             SelectionDAG &DAG) const {
02671   MachineFunction &MF = DAG.getMachineFunction();
02672   SystemZMachineFunctionInfo *FuncInfo =
02673     MF.getInfo<SystemZMachineFunctionInfo>();
02674   EVT PtrVT = getPointerTy();
02675 
02676   SDValue Chain   = Op.getOperand(0);
02677   SDValue Addr    = Op.getOperand(1);
02678   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02679   SDLoc DL(Op);
02680 
02681   // The initial values of each field.
02682   const unsigned NumFields = 4;
02683   SDValue Fields[NumFields] = {
02684     DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
02685     DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
02686     DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
02687     DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
02688   };
02689 
02690   // Store each field into its respective slot.
02691   SDValue MemOps[NumFields];
02692   unsigned Offset = 0;
02693   for (unsigned I = 0; I < NumFields; ++I) {
02694     SDValue FieldAddr = Addr;
02695     if (Offset != 0)
02696       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
02697                               DAG.getIntPtrConstant(Offset, DL));
02698     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
02699                              MachinePointerInfo(SV, Offset),
02700                              false, false, 0);
02701     Offset += 8;
02702   }
02703   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
02704 }
02705 
02706 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
02707                                            SelectionDAG &DAG) const {
02708   SDValue Chain      = Op.getOperand(0);
02709   SDValue DstPtr     = Op.getOperand(1);
02710   SDValue SrcPtr     = Op.getOperand(2);
02711   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
02712   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
02713   SDLoc DL(Op);
02714 
02715   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
02716                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
02717                        /*isTailCall*/false,
02718                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
02719 }
02720 
02721 SDValue SystemZTargetLowering::
02722 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
02723   SDValue Chain = Op.getOperand(0);
02724   SDValue Size  = Op.getOperand(1);
02725   SDLoc DL(Op);
02726 
02727   unsigned SPReg = getStackPointerRegisterToSaveRestore();
02728 
02729   // Get a reference to the stack pointer.
02730   SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
02731 
02732   // Get the new stack pointer value.
02733   SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
02734 
02735   // Copy the new stack pointer back.
02736   Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
02737 
02738   // The allocated data lives above the 160 bytes allocated for the standard
02739   // frame, plus any outgoing stack arguments.  We don't know how much that
02740   // amounts to yet, so emit a special ADJDYNALLOC placeholder.
02741   SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
02742   SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
02743 
02744   SDValue Ops[2] = { Result, Chain };
02745   return DAG.getMergeValues(Ops, DL);
02746 }
02747 
02748 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
02749                                               SelectionDAG &DAG) const {
02750   EVT VT = Op.getValueType();
02751   SDLoc DL(Op);
02752   SDValue Ops[2];
02753   if (is32Bit(VT))
02754     // Just do a normal 64-bit multiplication and extract the results.
02755     // We define this so that it can be used for constant division.
02756     lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
02757                     Op.getOperand(1), Ops[1], Ops[0]);
02758   else {
02759     // Do a full 128-bit multiplication based on UMUL_LOHI64:
02760     //
02761     //   (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
02762     //
02763     // but using the fact that the upper halves are either all zeros
02764     // or all ones:
02765     //
02766     //   (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
02767     //
02768     // and grouping the right terms together since they are quicker than the
02769     // multiplication:
02770     //
02771     //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
02772     SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
02773     SDValue LL = Op.getOperand(0);
02774     SDValue RL = Op.getOperand(1);
02775     SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
02776     SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
02777     // UMUL_LOHI64 returns the low result in the odd register and the high
02778     // result in the even register.  SMUL_LOHI is defined to return the
02779     // low half first, so the results are in reverse order.
02780     lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
02781                      LL, RL, Ops[1], Ops[0]);
02782     SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
02783     SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
02784     SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
02785     Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
02786   }
02787   return DAG.getMergeValues(Ops, DL);
02788 }
02789 
02790 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
02791                                               SelectionDAG &DAG) const {
02792   EVT VT = Op.getValueType();
02793   SDLoc DL(Op);
02794   SDValue Ops[2];
02795   if (is32Bit(VT))
02796     // Just do a normal 64-bit multiplication and extract the results.
02797     // We define this so that it can be used for constant division.
02798     lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
02799                     Op.getOperand(1), Ops[1], Ops[0]);
02800   else
02801     // UMUL_LOHI64 returns the low result in the odd register and the high
02802     // result in the even register.  UMUL_LOHI is defined to return the
02803     // low half first, so the results are in reverse order.
02804     lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
02805                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02806   return DAG.getMergeValues(Ops, DL);
02807 }
02808 
02809 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
02810                                             SelectionDAG &DAG) const {
02811   SDValue Op0 = Op.getOperand(0);
02812   SDValue Op1 = Op.getOperand(1);
02813   EVT VT = Op.getValueType();
02814   SDLoc DL(Op);
02815   unsigned Opcode;
02816 
02817   // We use DSGF for 32-bit division.
02818   if (is32Bit(VT)) {
02819     Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
02820     Opcode = SystemZISD::SDIVREM32;
02821   } else if (DAG.ComputeNumSignBits(Op1) > 32) {
02822     Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
02823     Opcode = SystemZISD::SDIVREM32;
02824   } else    
02825     Opcode = SystemZISD::SDIVREM64;
02826 
02827   // DSG(F) takes a 64-bit dividend, so the even register in the GR128
02828   // input is "don't care".  The instruction returns the remainder in
02829   // the even register and the quotient in the odd register.
02830   SDValue Ops[2];
02831   lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
02832                    Op0, Op1, Ops[1], Ops[0]);
02833   return DAG.getMergeValues(Ops, DL);
02834 }
02835 
02836 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
02837                                             SelectionDAG &DAG) const {
02838   EVT VT = Op.getValueType();
02839   SDLoc DL(Op);
02840 
02841   // DL(G) uses a double-width dividend, so we need to clear the even
02842   // register in the GR128 input.  The instruction returns the remainder
02843   // in the even register and the quotient in the odd register.
02844   SDValue Ops[2];
02845   if (is32Bit(VT))
02846     lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
02847                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02848   else
02849     lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
02850                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
02851   return DAG.getMergeValues(Ops, DL);
02852 }
02853 
02854 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
02855   assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
02856 
02857   // Get the known-zero masks for each operand.
02858   SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
02859   APInt KnownZero[2], KnownOne[2];
02860   DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
02861   DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
02862 
02863   // See if the upper 32 bits of one operand and the lower 32 bits of the
02864   // other are known zero.  They are the low and high operands respectively.
02865   uint64_t Masks[] = { KnownZero[0].getZExtValue(),
02866                        KnownZero[1].getZExtValue() };
02867   unsigned High, Low;
02868   if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
02869     High = 1, Low = 0;
02870   else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
02871     High = 0, Low = 1;
02872   else
02873     return Op;
02874 
02875   SDValue LowOp = Ops[Low];
02876   SDValue HighOp = Ops[High];
02877 
02878   // If the high part is a constant, we're better off using IILH.
02879   if (HighOp.getOpcode() == ISD::Constant)
02880     return Op;
02881 
02882   // If the low part is a constant that is outside the range of LHI,
02883   // then we're better off using IILF.
02884   if (LowOp.getOpcode() == ISD::Constant) {
02885     int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
02886     if (!isInt<16>(Value))
02887       return Op;
02888   }
02889 
02890   // Check whether the high part is an AND that doesn't change the
02891   // high 32 bits and just masks out low bits.  We can skip it if so.
02892   if (HighOp.getOpcode() == ISD::AND &&
02893       HighOp.getOperand(1).getOpcode() == ISD::Constant) {
02894     SDValue HighOp0 = HighOp.getOperand(0);
02895     uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
02896     if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
02897       HighOp = HighOp0;
02898   }
02899 
02900   // Take advantage of the fact that all GR32 operations only change the
02901   // low 32 bits by truncating Low to an i32 and inserting it directly
02902   // using a subreg.  The interesting cases are those where the truncation
02903   // can be folded.
02904   SDLoc DL(Op);
02905   SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
02906   return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
02907                                    MVT::i64, HighOp, Low32);
02908 }
02909 
02910 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
02911                                           SelectionDAG &DAG) const {
02912   EVT VT = Op.getValueType();
02913   SDLoc DL(Op);
02914   Op = Op.getOperand(0);
02915 
02916   // Handle vector types via VPOPCT.
02917   if (VT.isVector()) {
02918     Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
02919     Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
02920     switch (VT.getVectorElementType().getSizeInBits()) {
02921     case 8:
02922       break;
02923     case 16: {
02924       Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
02925       SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
02926       SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
02927       Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
02928       Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
02929       break;
02930     }
02931     case 32: {
02932       SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02933                                 DAG.getConstant(0, DL, MVT::i32));
02934       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
02935       break;
02936     }
02937     case 64: {
02938       SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
02939                                 DAG.getConstant(0, DL, MVT::i32));
02940       Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
02941       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
02942       break;
02943     }
02944     default:
02945       llvm_unreachable("Unexpected type");
02946     }
02947     return Op;
02948   }
02949 
02950   // Get the known-zero mask for the operand.
02951   APInt KnownZero, KnownOne;
02952   DAG.computeKnownBits(Op, KnownZero, KnownOne);
02953   unsigned NumSignificantBits = (~KnownZero).getActiveBits();
02954   if (NumSignificantBits == 0)
02955     return DAG.getConstant(0, DL, VT);
02956 
02957   // Skip known-zero high parts of the operand.
02958   int64_t OrigBitSize = VT.getSizeInBits();
02959   int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
02960   BitSize = std::min(BitSize, OrigBitSize);
02961 
02962   // The POPCNT instruction counts the number of bits in each byte.
02963   Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
02964   Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
02965   Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
02966 
02967   // Add up per-byte counts in a binary tree.  All bits of Op at
02968   // position larger than BitSize remain zero throughout.
02969   for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
02970     SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
02971     if (BitSize != OrigBitSize)
02972       Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
02973                         DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
02974     Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
02975   }
02976 
02977   // Extract overall result from high byte.
02978   if (BitSize > 8)
02979     Op = DAG.getNode(ISD::SRL, DL, VT, Op,
02980                      DAG.getConstant(BitSize - 8, DL, VT));
02981 
02982   return Op;
02983 }
02984 
02985 // Op is an atomic load.  Lower it into a normal volatile load.
02986 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
02987                                                 SelectionDAG &DAG) const {
02988   auto *Node = cast<AtomicSDNode>(Op.getNode());
02989   return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
02990                         Node->getChain(), Node->getBasePtr(),
02991                         Node->getMemoryVT(), Node->getMemOperand());
02992 }
02993 
02994 // Op is an atomic store.  Lower it into a normal volatile store followed
02995 // by a serialization.
02996 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
02997                                                  SelectionDAG &DAG) const {
02998   auto *Node = cast<AtomicSDNode>(Op.getNode());
02999   SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
03000                                     Node->getBasePtr(), Node->getMemoryVT(),
03001                                     Node->getMemOperand());
03002   return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
03003                                     Chain), 0);
03004 }
03005 
03006 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
03007 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
03008 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
03009                                                    SelectionDAG &DAG,
03010                                                    unsigned Opcode) const {
03011   auto *Node = cast<AtomicSDNode>(Op.getNode());
03012 
03013   // 32-bit operations need no code outside the main loop.
03014   EVT NarrowVT = Node->getMemoryVT();
03015   EVT WideVT = MVT::i32;
03016   if (NarrowVT == WideVT)
03017     return Op;
03018 
03019   int64_t BitSize = NarrowVT.getSizeInBits();
03020   SDValue ChainIn = Node->getChain();
03021   SDValue Addr = Node->getBasePtr();
03022   SDValue Src2 = Node->getVal();
03023   MachineMemOperand *MMO = Node->getMemOperand();
03024   SDLoc DL(Node);
03025   EVT PtrVT = Addr.getValueType();
03026 
03027   // Convert atomic subtracts of constants into additions.
03028   if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
03029     if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
03030       Opcode = SystemZISD::ATOMIC_LOADW_ADD;
03031       Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
03032     }
03033 
03034   // Get the address of the containing word.
03035   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
03036                                     DAG.getConstant(-4, DL, PtrVT));
03037 
03038   // Get the number of bits that the word must be rotated left in order
03039   // to bring the field to the top bits of a GR32.
03040   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
03041                                  DAG.getConstant(3, DL, PtrVT));
03042   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
03043 
03044   // Get the complementing shift amount, for rotating a field in the top
03045   // bits back to its proper position.
03046   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
03047                                     DAG.getConstant(0, DL, WideVT), BitShift);
03048 
03049   // Extend the source operand to 32 bits and prepare it for the inner loop.
03050   // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
03051   // operations require the source to be shifted in advance.  (This shift
03052   // can be folded if the source is constant.)  For AND and NAND, the lower
03053   // bits must be set, while for other opcodes they should be left clear.
03054   if (Opcode != SystemZISD::ATOMIC_SWAPW)
03055     Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
03056                        DAG.getConstant(32 - BitSize, DL, WideVT));
03057   if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
03058       Opcode == SystemZISD::ATOMIC_LOADW_NAND)
03059     Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
03060                        DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
03061 
03062   // Construct the ATOMIC_LOADW_* node.
03063   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
03064   SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
03065                     DAG.getConstant(BitSize, DL, WideVT) };
03066   SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
03067                                              NarrowVT, MMO);
03068 
03069   // Rotate the result of the final CS so that the field is in the lower
03070   // bits of a GR32, then truncate it.
03071   SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
03072                                     DAG.getConstant(BitSize, DL, WideVT));
03073   SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
03074 
03075   SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
03076   return DAG.getMergeValues(RetOps, DL);
03077 }
03078 
03079 // Op is an ATOMIC_LOAD_SUB operation.  Lower 8- and 16-bit operations
03080 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
03081 // operations into additions.
03082 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
03083                                                     SelectionDAG &DAG) const {
03084   auto *Node = cast<AtomicSDNode>(Op.getNode());
03085   EVT MemVT = Node->getMemoryVT();
03086   if (MemVT == MVT::i32 || MemVT == MVT::i64) {
03087     // A full-width operation.
03088     assert(Op.getValueType() == MemVT && "Mismatched VTs");
03089     SDValue Src2 = Node->getVal();
03090     SDValue NegSrc2;
03091     SDLoc DL(Src2);
03092 
03093     if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
03094       // Use an addition if the operand is constant and either LAA(G) is
03095       // available or the negative value is in the range of A(G)FHI.
03096       int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
03097       if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
03098         NegSrc2 = DAG.getConstant(Value, DL, MemVT);
03099     } else if (Subtarget.hasInterlockedAccess1())
03100       // Use LAA(G) if available.
03101       NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
03102                             Src2);
03103 
03104     if (NegSrc2.getNode())
03105       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
03106                            Node->getChain(), Node->getBasePtr(), NegSrc2,
03107                            Node->getMemOperand(), Node->getOrdering(),
03108                            Node->getSynchScope());
03109 
03110     // Use the node as-is.
03111     return Op;
03112   }
03113 
03114   return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
03115 }
03116 
03117 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation.  Lower the first two
03118 // into a fullword ATOMIC_CMP_SWAPW operation.
03119 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
03120                                                     SelectionDAG &DAG) const {
03121   auto *Node = cast<AtomicSDNode>(Op.getNode());
03122 
03123   // We have native support for 32-bit compare and swap.
03124   EVT NarrowVT = Node->getMemoryVT();
03125   EVT WideVT = MVT::i32;
03126   if (NarrowVT == WideVT)
03127     return Op;
03128 
03129   int64_t BitSize = NarrowVT.getSizeInBits();
03130   SDValue ChainIn = Node->getOperand(0);
03131   SDValue Addr = Node->getOperand(1);
03132   SDValue CmpVal = Node->getOperand(2);
03133   SDValue SwapVal = Node->getOperand(3);
03134   MachineMemOperand *MMO = Node->getMemOperand();
03135   SDLoc DL(Node);
03136   EVT PtrVT = Addr.getValueType();
03137 
03138   // Get the address of the containing word.
03139   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
03140                                     DAG.getConstant(-4, DL, PtrVT));
03141 
03142   // Get the number of bits that the word must be rotated left in order
03143   // to bring the field to the top bits of a GR32.
03144   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
03145                                  DAG.getConstant(3, DL, PtrVT));
03146   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
03147 
03148   // Get the complementing shift amount, for rotating a field in the top
03149   // bits back to its proper position.
03150   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
03151                                     DAG.getConstant(0, DL, WideVT), BitShift);
03152 
03153   // Construct the ATOMIC_CMP_SWAPW node.
03154   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
03155   SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
03156                     NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
03157   SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
03158                                              VTList, Ops, NarrowVT, MMO);
03159   return AtomicOp;
03160 }
03161 
03162 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
03163                                               SelectionDAG &DAG) const {
03164   MachineFunction &MF = DAG.getMachineFunction();
03165   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
03166   return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
03167                             SystemZ::R15D, Op.getValueType());
03168 }
03169 
03170 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
03171                                                  SelectionDAG &DAG) const {
03172   MachineFunction &MF = DAG.getMachineFunction();
03173   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
03174   return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
03175                           SystemZ::R15D, Op.getOperand(1));
03176 }
03177 
03178 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
03179                                              SelectionDAG &DAG) const {
03180   bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
03181   if (!IsData)
03182     // Just preserve the chain.
03183     return Op.getOperand(0);
03184 
03185   SDLoc DL(Op);
03186   bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
03187   unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
03188   auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
03189   SDValue Ops[] = {
03190     Op.getOperand(0),
03191     DAG.getConstant(Code, DL, MVT::i32),
03192     Op.getOperand(1)
03193   };
03194   return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
03195                                  Node->getVTList(), Ops,
03196                                  Node->getMemoryVT(), Node->getMemOperand());
03197 }
03198 
03199 // Return an i32 that contains the value of CC immediately after After,
03200 // whose final operand must be MVT::Glue.
03201 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
03202   SDLoc DL(After);
03203   SDValue Glue = SDValue(After, After->getNumValues() - 1);
03204   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
03205   return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
03206                      DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
03207 }
03208 
03209 SDValue
03210 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
03211                                               SelectionDAG &DAG) const {
03212   unsigned Opcode, CCValid;
03213   if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
03214     assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
03215     SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
03216     SDValue CC = getCCResult(DAG, Glued.getNode());
03217     DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
03218     return SDValue();
03219   }
03220 
03221   return SDValue();
03222 }
03223 
03224 SDValue
03225 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
03226                                                SelectionDAG &DAG) const {
03227   unsigned Opcode, CCValid;
03228   if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
03229     SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
03230     SDValue CC = getCCResult(DAG, Glued.getNode());
03231     if (Op->getNumValues() == 1)
03232       return CC;
03233     assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
03234     return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
03235         Glued, CC);
03236   }
03237 
03238   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03239   switch (Id) {
03240   case Intrinsic::s390_vpdi:
03241     return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
03242                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
03243 
03244   case Intrinsic::s390_vperm:
03245     return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
03246                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
03247 
03248   case Intrinsic::s390_vuphb:
03249   case Intrinsic::s390_vuphh:
03250   case Intrinsic::s390_vuphf:
03251     return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
03252                        Op.getOperand(1));
03253 
03254   case Intrinsic::s390_vuplhb:
03255   case Intrinsic::s390_vuplhh:
03256   case Intrinsic::s390_vuplhf:
03257     return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
03258                        Op.getOperand(1));
03259 
03260   case Intrinsic::s390_vuplb:
03261   case Intrinsic::s390_vuplhw:
03262   case Intrinsic::s390_vuplf:
03263     return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
03264                        Op.getOperand(1));
03265 
03266   case Intrinsic::s390_vupllb:
03267   case Intrinsic::s390_vupllh:
03268   case Intrinsic::s390_vupllf:
03269     return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
03270                        Op.getOperand(1));
03271 
03272   case Intrinsic::s390_vsumb:
03273   case Intrinsic::s390_vsumh:
03274   case Intrinsic::s390_vsumgh:
03275   case Intrinsic::s390_vsumgf:
03276   case Intrinsic::s390_vsumqf:
03277   case Intrinsic::s390_vsumqg:
03278     return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
03279                        Op.getOperand(1), Op.getOperand(2));
03280   }
03281 
03282   return SDValue();
03283 }
03284 
03285 namespace {
03286 // Says that SystemZISD operation Opcode can be used to perform the equivalent
03287 // of a VPERM with permute vector Bytes.  If Opcode takes three operands,
03288 // Operand is the constant third operand, otherwise it is the number of
03289 // bytes in each element of the result.
03290 struct Permute {
03291   unsigned Opcode;
03292   unsigned Operand;
03293   unsigned char Bytes[SystemZ::VectorBytes];
03294 };
03295 }
03296 
03297 static const Permute PermuteForms[] = {
03298   // VMRHG
03299   { SystemZISD::MERGE_HIGH, 8,
03300     { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
03301   // VMRHF
03302   { SystemZISD::MERGE_HIGH, 4,
03303     { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
03304   // VMRHH
03305   { SystemZISD::MERGE_HIGH, 2,
03306     { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
03307   // VMRHB
03308   { SystemZISD::MERGE_HIGH, 1,
03309     { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
03310   // VMRLG
03311   { SystemZISD::MERGE_LOW, 8,
03312     { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
03313   // VMRLF
03314   { SystemZISD::MERGE_LOW, 4,
03315     { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
03316   // VMRLH
03317   { SystemZISD::MERGE_LOW, 2,
03318     { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
03319   // VMRLB
03320   { SystemZISD::MERGE_LOW, 1,
03321     { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
03322   // VPKG
03323   { SystemZISD::PACK, 4,
03324     { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
03325   // VPKF
03326   { SystemZISD::PACK, 2,
03327     { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
03328   // VPKH
03329   { SystemZISD::PACK, 1,
03330     { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
03331   // VPDI V1, V2, 4  (low half of V1, high half of V2)
03332   { SystemZISD::PERMUTE_DWORDS, 4,
03333     { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
03334   // VPDI V1, V2, 1  (high half of V1, low half of V2)
03335   { SystemZISD::PERMUTE_DWORDS, 1,
03336     { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
03337 };
03338 
03339 // Called after matching a vector shuffle against a particular pattern.
03340 // Both the original shuffle and the pattern have two vector operands.
03341 // OpNos[0] is the operand of the original shuffle that should be used for
03342 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
03343 // OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
03344 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
03345 // for operands 0 and 1 of the pattern.
03346 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
03347   if (OpNos[0] < 0) {
03348     if (OpNos[1] < 0)
03349       return false;
03350     OpNo0 = OpNo1 = OpNos[1];
03351   } else if (OpNos[1] < 0) {
03352     OpNo0 = OpNo1 = OpNos[0];
03353   } else {
03354     OpNo0 = OpNos[0];
03355     OpNo1 = OpNos[1];
03356   }
03357   return true;
03358 }
03359 
03360 // Bytes is a VPERM-like permute vector, except that -1 is used for
03361 // undefined bytes.  Return true if the VPERM can be implemented using P.
03362 // When returning true set OpNo0 to the VPERM operand that should be
03363 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
03364 //
03365 // For example, if swapping the VPERM operands allows P to match, OpNo0
03366 // will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
03367 // operand, but rewriting it to use two duplicated operands allows it to
03368 // match P, then OpNo0 and OpNo1 will be the same.
03369 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
03370                          unsigned &OpNo0, unsigned &OpNo1) {
03371   int OpNos[] = { -1, -1 };
03372   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
03373     int Elt = Bytes[I];
03374     if (Elt >= 0) {
03375       // Make sure that the two permute vectors use the same suboperand
03376       // byte number.  Only the operand numbers (the high bits) are
03377       // allowed to differ.
03378       if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
03379         return false;
03380       int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
03381       int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
03382       // Make sure that the operand mappings are consistent with previous
03383       // elements.
03384       if (OpNos[ModelOpNo] == 1 - RealOpNo)
03385         return false;
03386       OpNos[ModelOpNo] = RealOpNo;
03387     }
03388   }
03389   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
03390 }
03391 
03392 // As above, but search for a matching permute.
03393 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
03394                                    unsigned &OpNo0, unsigned &OpNo1) {
03395   for (auto &P : PermuteForms)
03396     if (matchPermute(Bytes, P, OpNo0, OpNo1))
03397       return &P;
03398   return nullptr;
03399 }
03400 
03401 // Bytes is a VPERM-like permute vector, except that -1 is used for
03402 // undefined bytes.  This permute is an operand of an outer permute.
03403 // See whether redistributing the -1 bytes gives a shuffle that can be
03404 // implemented using P.  If so, set Transform to a VPERM-like permute vector
03405 // that, when applied to the result of P, gives the original permute in Bytes.
03406 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
03407                                const Permute &P,
03408                                SmallVectorImpl<int> &Transform) {
03409   unsigned To = 0;
03410   for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
03411     int Elt = Bytes[From];
03412     if (Elt < 0)
03413       // Byte number From of the result is undefined.
03414       Transform[From] = -1;
03415     else {
03416       while (P.Bytes[To] != Elt) {
03417         To += 1;
03418         if (To == SystemZ::VectorBytes)
03419           return false;
03420       }
03421       Transform[From] = To;
03422     }
03423   }
03424   return true;
03425 }
03426 
03427 // As above, but search for a matching permute.
03428 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
03429                                          SmallVectorImpl<int> &Transform) {
03430   for (auto &P : PermuteForms)
03431     if (matchDoublePermute(Bytes, P, Transform))
03432       return &P;
03433   return nullptr;
03434 }
03435 
03436 // Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
03437 // as if it had type vNi8.
03438 static void getVPermMask(ShuffleVectorSDNode *VSN,
03439                          SmallVectorImpl<int> &Bytes) {
03440   EVT VT = VSN->getValueType(0);
03441   unsigned NumElements = VT.getVectorNumElements();
03442   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03443   Bytes.resize(NumElements * BytesPerElement, -1);
03444   for (unsigned I = 0; I < NumElements; ++I) {
03445     int Index = VSN->getMaskElt(I);
03446     if (Index >= 0)
03447       for (unsigned J = 0; J < BytesPerElement; ++J)
03448         Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
03449   }
03450 }
03451 
03452 // Bytes is a VPERM-like permute vector, except that -1 is used for
03453 // undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
03454 // the result come from a contiguous sequence of bytes from one input.
03455 // Set Base to the selector for the first byte if so.
03456 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
03457                             unsigned BytesPerElement, int &Base) {
03458   Base = -1;
03459   for (unsigned I = 0; I < BytesPerElement; ++I) {
03460     if (Bytes[Start + I] >= 0) {
03461       unsigned Elem = Bytes[Start + I];
03462       if (Base < 0) {
03463         Base = Elem - I;
03464         // Make sure the bytes would come from one input operand.
03465         if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
03466           return false;
03467       } else if (unsigned(Base) != Elem - I)
03468         return false;
03469     }
03470   }
03471   return true;
03472 }
03473 
03474 // Bytes is a VPERM-like permute vector, except that -1 is used for
03475 // undefined bytes.  Return true if it can be performed using VSLDI.
03476 // When returning true, set StartIndex to the shift amount and OpNo0
03477 // and OpNo1 to the VPERM operands that should be used as the first
03478 // and second shift operand respectively.
03479 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
03480                                unsigned &StartIndex, unsigned &OpNo0,
03481                                unsigned &OpNo1) {
03482   int OpNos[] = { -1, -1 };
03483   int Shift = -1;
03484   for (unsigned I = 0; I < 16; ++I) {
03485     int Index = Bytes[I];
03486     if (Index >= 0) {
03487       int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
03488       int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
03489       int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
03490       if (Shift < 0)
03491         Shift = ExpectedShift;
03492       else if (Shift != ExpectedShift)
03493         return false;
03494       // Make sure that the operand mappings are consistent with previous
03495       // elements.
03496       if (OpNos[ModelOpNo] == 1 - RealOpNo)
03497         return false;
03498       OpNos[ModelOpNo] = RealOpNo;
03499     }
03500   }
03501   StartIndex = Shift;
03502   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
03503 }
03504 
03505 // Create a node that performs P on operands Op0 and Op1, casting the
03506 // operands to the appropriate type.  The type of the result is determined by P.
03507 static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
03508                               const Permute &P, SDValue Op0, SDValue Op1) {
03509   // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
03510   // elements of a PACK are twice as wide as the outputs.
03511   unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
03512                       P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
03513                       P.Operand);
03514   // Cast both operands to the appropriate type.
03515   MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
03516                               SystemZ::VectorBytes / InBytes);
03517   Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
03518   Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
03519   SDValue Op;
03520   if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
03521     SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
03522     Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
03523   } else if (P.Opcode == SystemZISD::PACK) {
03524     MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
03525                                  SystemZ::VectorBytes / P.Operand);
03526     Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
03527   } else {
03528     Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
03529   }
03530   return Op;
03531 }
03532 
03533 // Bytes is a VPERM-like permute vector, except that -1 is used for
03534 // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
03535 // VSLDI or VPERM.
03536 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
03537                                      const SmallVectorImpl<int> &Bytes) {
03538   for (unsigned I = 0; I < 2; ++I)
03539     Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
03540 
03541   // First see whether VSLDI can be used.
03542   unsigned StartIndex, OpNo0, OpNo1;
03543   if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
03544     return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
03545                        Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
03546 
03547   // Fall back on VPERM.  Construct an SDNode for the permute vector.
03548   SDValue IndexNodes[SystemZ::VectorBytes];
03549   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
03550     if (Bytes[I] >= 0)
03551       IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
03552     else
03553       IndexNodes[I] = DAG.getUNDEF(MVT::i32);
03554   SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
03555   return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
03556 }
03557 
03558 namespace {
03559 // Describes a general N-operand vector shuffle.
03560 struct GeneralShuffle {
03561   GeneralShuffle(EVT vt) : VT(vt) {}
03562   void addUndef();
03563   void add(SDValue, unsigned);
03564   SDValue getNode(SelectionDAG &, SDLoc);
03565 
03566   // The operands of the shuffle.
03567   SmallVector<SDValue, SystemZ::VectorBytes> Ops;
03568 
03569   // Index I is -1 if byte I of the result is undefined.  Otherwise the
03570   // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
03571   // Bytes[I] / SystemZ::VectorBytes.
03572   SmallVector<int, SystemZ::VectorBytes> Bytes;
03573 
03574   // The type of the shuffle result.
03575   EVT VT;
03576 };
03577 }
03578 
03579 // Add an extra undefined element to the shuffle.
03580 void GeneralShuffle::addUndef() {
03581   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03582   for (unsigned I = 0; I < BytesPerElement; ++I)
03583     Bytes.push_back(-1);
03584 }
03585 
03586 // Add an extra element to the shuffle, taking it from element Elem of Op.
03587 // A null Op indicates a vector input whose value will be calculated later;
03588 // there is at most one such input per shuffle and it always has the same
03589 // type as the result.
03590 void GeneralShuffle::add(SDValue Op, unsigned Elem) {
03591   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
03592 
03593   // The source vector can have wider elements than the result,
03594   // either through an explicit TRUNCATE or because of type legalization.
03595   // We want the least significant part.
03596   EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
03597   unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
03598   assert(FromBytesPerElement >= BytesPerElement &&
03599          "Invalid EXTRACT_VECTOR_ELT");
03600   unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
03601                    (FromBytesPerElement - BytesPerElement));
03602 
03603   // Look through things like shuffles and bitcasts.
03604   while (Op.getNode()) {
03605     if (Op.getOpcode() == ISD::BITCAST)
03606       Op = Op.getOperand(0);
03607     else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
03608       // See whether the bytes we need come from a contiguous part of one
03609       // operand.
03610       SmallVector<int, SystemZ::VectorBytes> OpBytes;
03611       getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
03612       int NewByte;
03613       if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
03614         break;
03615       if (NewByte < 0) {
03616         addUndef();
03617         return;
03618       }
03619       Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
03620       Byte = unsigned(NewByte) % SystemZ::VectorBytes;
03621     } else if (Op.getOpcode() == ISD::UNDEF) {
03622       addUndef();
03623       return;
03624     } else
03625       break;
03626   }
03627 
03628   // Make sure that the source of the extraction is in Ops.
03629   unsigned OpNo = 0;
03630   for (; OpNo < Ops.size(); ++OpNo)
03631     if (Ops[OpNo] == Op)
03632       break;
03633   if (OpNo == Ops.size())
03634     Ops.push_back(Op);
03635 
03636   // Add the element to Bytes.
03637   unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
03638   for (unsigned I = 0; I < BytesPerElement; ++I)
03639     Bytes.push_back(Base + I);
03640 }
03641 
03642 // Return SDNodes for the completed shuffle.
03643 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
03644   assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
03645 
03646   if (Ops.size() == 0)
03647     return DAG.getUNDEF(VT);
03648 
03649   // Make sure that there are at least two shuffle operands.
03650   if (Ops.size() == 1)
03651     Ops.push_back(DAG.getUNDEF(MVT::v16i8));
03652 
03653   // Create a tree of shuffles, deferring root node until after the loop.
03654   // Try to redistribute the undefined elements of non-root nodes so that
03655   // the non-root shuffles match something like a pack or merge, then adjust
03656   // the parent node's permute vector to compensate for the new order.
03657   // Among other things, this copes with vectors like <2 x i16> that were
03658   // padded with undefined elements during type legalization.
03659   //
03660   // In the best case this redistribution will lead to the whole tree
03661   // using packs and merges.  It should rarely be a loss in other cases.
03662   unsigned Stride = 1;
03663   for (; Stride * 2 < Ops.size(); Stride *= 2) {
03664     for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
03665       SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
03666 
03667       // Create a mask for just these two operands.
03668       SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
03669       for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
03670         unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
03671         unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
03672         if (OpNo == I)
03673           NewBytes[J] = Byte;
03674         else if (OpNo == I + Stride)
03675           NewBytes[J] = SystemZ::VectorBytes + Byte;
03676         else
03677           NewBytes[J] = -1;
03678       }
03679       // See if it would be better to reorganize NewMask to avoid using VPERM.
03680       SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
03681       if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
03682         Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
03683         // Applying NewBytesMap to Ops[I] gets back to NewBytes.
03684         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
03685           if (NewBytes[J] >= 0) {
03686             assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
03687                    "Invalid double permute");
03688             Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
03689           } else
03690             assert(NewBytesMap[J] < 0 && "Invalid double permute");
03691         }
03692       } else {
03693         // Just use NewBytes on the operands.
03694         Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
03695         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
03696           if (NewBytes[J] >= 0)
03697             Bytes[J] = I * SystemZ::VectorBytes + J;
03698       }
03699     }
03700   }
03701 
03702   // Now we just have 2 inputs.  Put the second operand in Ops[1].
03703   if (Stride > 1) {
03704     Ops[1] = Ops[Stride];
03705     for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
03706       if (Bytes[I] >= int(SystemZ::VectorBytes))
03707         Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
03708   }
03709 
03710   // Look for an instruction that can do the permute without resorting
03711   // to VPERM.
03712   unsigned OpNo0, OpNo1;
03713   SDValue Op;
03714   if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
03715     Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
03716   else
03717     Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
03718   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03719 }
03720 
03721 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
03722 static bool isScalarToVector(SDValue Op) {
03723   for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
03724     if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
03725       return false;
03726   return true;
03727 }
03728 
03729 // Return a vector of type VT that contains Value in the first element.
03730 // The other elements don't matter.
03731 static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
03732                                    SDValue Value) {
03733   // If we have a constant, replicate it to all elements and let the
03734   // BUILD_VECTOR lowering take care of it.
03735   if (Value.getOpcode() == ISD::Constant ||
03736       Value.getOpcode() == ISD::ConstantFP) {
03737     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
03738     return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
03739   }
03740   if (Value.getOpcode() == ISD::UNDEF)
03741     return DAG.getUNDEF(VT);
03742   return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
03743 }
03744 
03745 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
03746 // element 1.  Used for cases in which replication is cheap.
03747 static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
03748                                  SDValue Op0, SDValue Op1) {
03749   if (Op0.getOpcode() == ISD::UNDEF) {
03750     if (Op1.getOpcode() == ISD::UNDEF)
03751       return DAG.getUNDEF(VT);
03752     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
03753   }
03754   if (Op1.getOpcode() == ISD::UNDEF)
03755     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
03756   return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
03757                      buildScalarToVector(DAG, DL, VT, Op0),
03758                      buildScalarToVector(DAG, DL, VT, Op1));
03759 }
03760 
03761 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
03762 // vector for them.
03763 static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
03764                           SDValue Op1) {
03765   if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
03766     return DAG.getUNDEF(MVT::v2i64);
03767   // If one of the two inputs is undefined then replicate the other one,
03768   // in order to avoid using another register unnecessarily.
03769   if (Op0.getOpcode() == ISD::UNDEF)
03770     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
03771   else if (Op1.getOpcode() == ISD::UNDEF)
03772     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
03773   else {
03774     Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
03775     Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
03776   }
03777   return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
03778 }
03779 
03780 // Try to represent constant BUILD_VECTOR node BVN using a
03781 // SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
03782 // on success.
03783 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
03784   EVT ElemVT = BVN->getValueType(0).getVectorElementType();
03785   unsigned BytesPerElement = ElemVT.getStoreSize();
03786   for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
03787     SDValue Op = BVN->getOperand(I);
03788     if (Op.getOpcode() != ISD::UNDEF) {
03789       uint64_t Value;
03790       if (Op.getOpcode() == ISD::Constant)
03791         Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
03792       else if (Op.getOpcode() == ISD::ConstantFP)
03793         Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
03794                  .getZExtValue());
03795       else
03796         return false;
03797       for (unsigned J = 0; J < BytesPerElement; ++J) {
03798         uint64_t Byte = (Value >> (J * 8)) & 0xff;
03799         if (Byte == 0xff)
03800           Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
03801         else if (Byte != 0)
03802           return false;
03803       }
03804     }
03805   }
03806   return true;
03807 }
03808 
03809 // Try to load a vector constant in which BitsPerElement-bit value Value
03810 // is replicated to fill the vector.  VT is the type of the resulting
03811 // constant, which may have elements of a different size from BitsPerElement.
03812 // Return the SDValue of the constant on success, otherwise return
03813 // an empty value.
03814 static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
03815                                        const SystemZInstrInfo *TII,
03816                                        SDLoc DL, EVT VT, uint64_t Value,
03817                                        unsigned BitsPerElement) {
03818   // Signed 16-bit values can be replicated using VREPI.
03819   int64_t SignedValue = SignExtend64(Value, BitsPerElement);
03820   if (isInt<16>(SignedValue)) {
03821     MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
03822                                  SystemZ::VectorBits / BitsPerElement);
03823     SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
03824                              DAG.getConstant(SignedValue, DL, MVT::i32));
03825     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03826   }
03827   // See whether rotating the constant left some N places gives a value that
03828   // is one less than a power of 2 (i.e. all zeros followed by all ones).
03829   // If so we can use VGM.
03830   unsigned Start, End;
03831   if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
03832     // isRxSBGMask returns the bit numbers for a full 64-bit value,
03833     // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
03834     // bit numbers for an BitsPerElement value, so that 0 denotes
03835     // 1 << (BitsPerElement-1).
03836     Start -= 64 - BitsPerElement;
03837     End -= 64 - BitsPerElement;
03838     MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
03839                                  SystemZ::VectorBits / BitsPerElement);
03840     SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
03841                              DAG.getConstant(Start, DL, MVT::i32),
03842                              DAG.getConstant(End, DL, MVT::i32));
03843     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03844   }
03845   return SDValue();
03846 }
03847 
03848 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
03849 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
03850 // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
03851 // would benefit from this representation and return it if so.
03852 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
03853                                      BuildVectorSDNode *BVN) {
03854   EVT VT = BVN->getValueType(0);
03855   unsigned NumElements = VT.getVectorNumElements();
03856 
03857   // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
03858   // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
03859   // need a BUILD_VECTOR, add an additional placeholder operand for that
03860   // BUILD_VECTOR and store its operands in ResidueOps.
03861   GeneralShuffle GS(VT);
03862   SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
03863   bool FoundOne = false;
03864   for (unsigned I = 0; I < NumElements; ++I) {
03865     SDValue Op = BVN->getOperand(I);
03866     if (Op.getOpcode() == ISD::TRUNCATE)
03867       Op = Op.getOperand(0);
03868     if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
03869         Op.getOperand(1).getOpcode() == ISD::Constant) {
03870       unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
03871       GS.add(Op.getOperand(0), Elem);
03872       FoundOne = true;
03873     } else if (Op.getOpcode() == ISD::UNDEF) {
03874       GS.addUndef();
03875     } else {
03876       GS.add(SDValue(), ResidueOps.size());
03877       ResidueOps.push_back(Op);
03878     }
03879   }
03880 
03881   // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
03882   if (!FoundOne)
03883     return SDValue();
03884 
03885   // Create the BUILD_VECTOR for the remaining elements, if any.
03886   if (!ResidueOps.empty()) {
03887     while (ResidueOps.size() < NumElements)
03888       ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
03889     for (auto &Op : GS.Ops) {
03890       if (!Op.getNode()) {
03891         Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
03892         break;
03893       }
03894     }
03895   }
03896   return GS.getNode(DAG, SDLoc(BVN));
03897 }
03898 
03899 // Combine GPR scalar values Elems into a vector of type VT.
03900 static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
03901                            SmallVectorImpl<SDValue> &Elems) {
03902   // See whether there is a single replicated value.
03903   SDValue Single;
03904   unsigned int NumElements = Elems.size();
03905   unsigned int Count = 0;
03906   for (auto Elem : Elems) {
03907     if (Elem.getOpcode() != ISD::UNDEF) {
03908       if (!Single.getNode())
03909         Single = Elem;
03910       else if (Elem != Single) {
03911         Single = SDValue();
03912         break;
03913       }
03914       Count += 1;
03915     }
03916   }
03917   // There are three cases here:
03918   //
03919   // - if the only defined element is a loaded one, the best sequence
03920   //   is a replicating load.
03921   //
03922   // - otherwise, if the only defined element is an i64 value, we will
03923   //   end up with the same VLVGP sequence regardless of whether we short-cut
03924   //   for replication or fall through to the later code.
03925   //
03926   // - otherwise, if the only defined element is an i32 or smaller value,
03927   //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
03928   //   This is only a win if the single defined element is used more than once.
03929   //   In other cases we're better off using a single VLVGx.
03930   if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
03931     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
03932 
03933   // The best way of building a v2i64 from two i64s is to use VLVGP.
03934   if (VT == MVT::v2i64)
03935     return joinDwords(DAG, DL, Elems[0], Elems[1]);
03936 
03937   // Use a 64-bit merge high to combine two doubles.
03938   if (VT == MVT::v2f64)
03939     return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
03940 
03941   // Build v4f32 values directly from the FPRs:
03942   //
03943   //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
03944   //         V              V         VMRHF
03945   //      <ABxx>         <CDxx>
03946   //                V                 VMRHG
03947   //              <ABCD>
03948   if (VT == MVT::v4f32) {
03949     SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
03950     SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
03951     // Avoid unnecessary undefs by reusing the other operand.
03952     if (Op01.getOpcode() == ISD::UNDEF)
03953       Op01 = Op23;
03954     else if (Op23.getOpcode() == ISD::UNDEF)
03955       Op23 = Op01;
03956     // Merging identical replications is a no-op.
03957     if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
03958       return Op01;
03959     Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
03960     Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
03961     SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
03962                              DL, MVT::v2i64, Op01, Op23);
03963     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
03964   }
03965 
03966   // Collect the constant terms.
03967   SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
03968   SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
03969 
03970   unsigned NumConstants = 0;
03971   for (unsigned I = 0; I < NumElements; ++I) {
03972     SDValue Elem = Elems[I];
03973     if (Elem.getOpcode() == ISD::Constant ||
03974         Elem.getOpcode() == ISD::ConstantFP) {
03975       NumConstants += 1;
03976       Constants[I] = Elem;
03977       Done[I] = true;
03978     }
03979   }
03980   // If there was at least one constant, fill in the other elements of
03981   // Constants with undefs to get a full vector constant and use that
03982   // as the starting point.
03983   SDValue Result;
03984   if (NumConstants > 0) {
03985     for (unsigned I = 0; I < NumElements; ++I)
03986       if (!Constants[I].getNode())
03987         Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
03988     Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
03989   } else {
03990     // Otherwise try to use VLVGP to start the sequence in order to
03991     // avoid a false dependency on any previous contents of the vector
03992     // register.  This only makes sense if one of the associated elements
03993     // is defined.
03994     unsigned I1 = NumElements / 2 - 1;
03995     unsigned I2 = NumElements - 1;
03996     bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
03997     bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
03998     if (Def1 || Def2) {
03999       SDValue Elem1 = Elems[Def1 ? I1 : I2];
04000       SDValue Elem2 = Elems[Def2 ? I2 : I1];
04001       Result = DAG.getNode(ISD::BITCAST, DL, VT,
04002                            joinDwords(DAG, DL, Elem1, Elem2));
04003       Done[I1] = true;
04004       Done[I2] = true;
04005     } else
04006       Result = DAG.getUNDEF(VT);
04007   }
04008 
04009   // Use VLVGx to insert the other elements.
04010   for (unsigned I = 0; I < NumElements; ++I)
04011     if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
04012       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
04013                            DAG.getConstant(I, DL, MVT::i32));
04014   return Result;
04015 }
04016 
04017 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
04018                                                  SelectionDAG &DAG) const {
04019   const SystemZInstrInfo *TII =
04020     static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04021   auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
04022   SDLoc DL(Op);
04023   EVT VT = Op.getValueType();
04024 
04025   if (BVN->isConstant()) {
04026     // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
04027     // preferred way of creating all-zero and all-one vectors so give it
04028     // priority over other methods below.
04029     uint64_t Mask = 0;
04030     if (tryBuildVectorByteMask(BVN, Mask)) {
04031       SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
04032                                DAG.getConstant(Mask, DL, MVT::i32));
04033       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
04034     }
04035 
04036     // Try using some form of replication.
04037     APInt SplatBits, SplatUndef;
04038     unsigned SplatBitSize;
04039     bool HasAnyUndefs;
04040     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
04041                              8, true) &&
04042         SplatBitSize <= 64) {
04043       // First try assuming that any undefined bits above the highest set bit
04044       // and below the lowest set bit are 1s.  This increases the likelihood of
04045       // being able to use a sign-extended element value in VECTOR REPLICATE
04046       // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
04047       uint64_t SplatBitsZ = SplatBits.getZExtValue();
04048       uint64_t SplatUndefZ = SplatUndef.getZExtValue();
04049       uint64_t Lower = (SplatUndefZ
04050                         & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
04051       uint64_t Upper = (SplatUndefZ
04052                         & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
04053       uint64_t Value = SplatBitsZ | Upper | Lower;
04054       SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
04055                                            SplatBitSize);
04056       if (Op.getNode())
04057         return Op;
04058 
04059       // Now try assuming that any undefined bits between the first and
04060       // last defined set bits are set.  This increases the chances of
04061       // using a non-wraparound mask.
04062       uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
04063       Value = SplatBitsZ | Middle;
04064       Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
04065       if (Op.getNode())
04066         return Op;
04067     }
04068 
04069     // Fall back to loading it from memory.
04070     return SDValue();
04071   }
04072 
04073   // See if we should use shuffles to construct the vector from other vectors.
04074   SDValue Res = tryBuildVectorShuffle(DAG, BVN);
04075   if (Res.getNode())
04076     return Res;
04077 
04078   // Detect SCALAR_TO_VECTOR conversions.
04079   if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
04080     return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
04081 
04082   // Otherwise use buildVector to build the vector up from GPRs.
04083   unsigned NumElements = Op.getNumOperands();
04084   SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
04085   for (unsigned I = 0; I < NumElements; ++I)
04086     Ops[I] = Op.getOperand(I);
04087   return buildVector(DAG, DL, VT, Ops);
04088 }
04089 
04090 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
04091                                                    SelectionDAG &DAG) const {
04092   auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
04093   SDLoc DL(Op);
04094   EVT VT = Op.getValueType();
04095   unsigned NumElements = VT.getVectorNumElements();
04096 
04097   if (VSN->isSplat()) {
04098     SDValue Op0 = Op.getOperand(0);
04099     unsigned Index = VSN->getSplatIndex();
04100     assert(Index < VT.getVectorNumElements() &&
04101            "Splat index should be defined and in first operand");
04102     // See whether the value we're splatting is directly available as a scalar.
04103     if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
04104         Op0.getOpcode() == ISD::BUILD_VECTOR)
04105       return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
04106     // Otherwise keep it as a vector-to-vector operation.
04107     return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
04108                        DAG.getConstant(Index, DL, MVT::i32));
04109   }
04110 
04111   GeneralShuffle GS(VT);
04112   for (unsigned I = 0; I < NumElements; ++I) {
04113     int Elt = VSN->getMaskElt(I);
04114     if (Elt < 0)
04115       GS.addUndef();
04116     else
04117       GS.add(Op.getOperand(unsigned(Elt) / NumElements),
04118              unsigned(Elt) % NumElements);
04119   }
04120   return GS.getNode(DAG, SDLoc(VSN));
04121 }
04122 
04123 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
04124                                                      SelectionDAG &DAG) const {
04125   SDLoc DL(Op);
04126   // Just insert the scalar into element 0 of an undefined vector.
04127   return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
04128                      Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
04129                      Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
04130 }
04131 
04132 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
04133                                                       SelectionDAG &DAG) const {
04134   // Handle insertions of floating-point values.
04135   SDLoc DL(Op);
04136   SDValue Op0 = Op.getOperand(0);
04137   SDValue Op1 = Op.getOperand(1);
04138   SDValue Op2 = Op.getOperand(2);
04139   EVT VT = Op.getValueType();
04140 
04141   // Insertions into constant indices of a v2f64 can be done using VPDI.
04142   // However, if the inserted value is a bitcast or a constant then it's
04143   // better to use GPRs, as below.
04144   if (VT == MVT::v2f64 &&
04145       Op1.getOpcode() != ISD::BITCAST &&
04146       Op1.getOpcode() != ISD::ConstantFP &&
04147       Op2.getOpcode() == ISD::Constant) {
04148     uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
04149     unsigned Mask = VT.getVectorNumElements() - 1;
04150     if (Index <= Mask)
04151       return Op;
04152   }
04153 
04154   // Otherwise bitcast to the equivalent integer form and insert via a GPR.
04155   MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
04156   MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
04157   SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
04158                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
04159                             DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
04160   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
04161 }
04162 
04163 SDValue
04164 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
04165                                                SelectionDAG &DAG) const {
04166   // Handle extractions of floating-point values.
04167   SDLoc DL(Op);
04168   SDValue Op0 = Op.getOperand(0);
04169   SDValue Op1 = Op.getOperand(1);
04170   EVT VT = Op.getValueType();
04171   EVT VecVT = Op0.getValueType();
04172 
04173   // Extractions of constant indices can be done directly.
04174   if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
04175     uint64_t Index = CIndexN->getZExtValue();
04176     unsigned Mask = VecVT.getVectorNumElements() - 1;
04177     if (Index <= Mask)
04178       return Op;
04179   }
04180 
04181   // Otherwise bitcast to the equivalent integer form and extract via a GPR.
04182   MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
04183   MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
04184   SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
04185                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
04186   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
04187 }
04188 
04189 SDValue
04190 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
04191                 unsigned UnpackHigh) const {
04192   SDValue PackedOp = Op.getOperand(0);
04193   EVT OutVT = Op.getValueType();
04194   EVT InVT = PackedOp.getValueType();
04195   unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
04196   unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
04197   do {
04198     FromBits *= 2;
04199     EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
04200                                  SystemZ::VectorBits / FromBits);
04201     PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
04202   } while (FromBits != ToBits);
04203   return PackedOp;
04204 }
04205 
04206 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
04207                                           unsigned ByScalar) const {
04208   // Look for cases where a vector shift can use the *_BY_SCALAR form.
04209   SDValue Op0 = Op.getOperand(0);
04210   SDValue Op1 = Op.getOperand(1);
04211   SDLoc DL(Op);
04212   EVT VT = Op.getValueType();
04213   unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
04214 
04215   // See whether the shift vector is a splat represented as BUILD_VECTOR.
04216   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
04217     APInt SplatBits, SplatUndef;
04218     unsigned SplatBitSize;
04219     bool HasAnyUndefs;
04220     // Check for constant splats.  Use ElemBitSize as the minimum element
04221     // width and reject splats that need wider elements.
04222     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
04223                              ElemBitSize, true) &&
04224         SplatBitSize == ElemBitSize) {
04225       SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
04226                                       DL, MVT::i32);
04227       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04228     }
04229     // Check for variable splats.
04230     BitVector UndefElements;
04231     SDValue Splat = BVN->getSplatValue(&UndefElements);
04232     if (Splat) {
04233       // Since i32 is the smallest legal type, we either need a no-op
04234       // or a truncation.
04235       SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
04236       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04237     }
04238   }
04239 
04240   // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
04241   // and the shift amount is directly available in a GPR.
04242   if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
04243     if (VSN->isSplat()) {
04244       SDValue VSNOp0 = VSN->getOperand(0);
04245       unsigned Index = VSN->getSplatIndex();
04246       assert(Index < VT.getVectorNumElements() &&
04247              "Splat index should be defined and in first operand");
04248       if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
04249           VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
04250         // Since i32 is the smallest legal type, we either need a no-op
04251         // or a truncation.
04252         SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
04253                                     VSNOp0.getOperand(Index));
04254         return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
04255       }
04256     }
04257   }
04258 
04259   // Otherwise just treat the current form as legal.
04260   return Op;
04261 }
04262 
04263 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
04264                                               SelectionDAG &DAG) const {
04265   switch (Op.getOpcode()) {
04266   case ISD::BR_CC:
04267     return lowerBR_CC(Op, DAG);
04268   case ISD::SELECT_CC:
04269     return lowerSELECT_CC(Op, DAG);
04270   case ISD::SETCC:
04271     return lowerSETCC(Op, DAG);
04272   case ISD::GlobalAddress:
04273     return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
04274   case ISD::GlobalTLSAddress:
04275     return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
04276   case ISD::BlockAddress:
04277     return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
04278   case ISD::JumpTable:
04279     return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
04280   case ISD::ConstantPool:
04281     return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
04282   case ISD::BITCAST:
04283     return lowerBITCAST(Op, DAG);
04284   case ISD::VASTART:
04285     return lowerVASTART(Op, DAG);
04286   case ISD::VACOPY:
04287     return lowerVACOPY(Op, DAG);
04288   case ISD::DYNAMIC_STACKALLOC:
04289     return lowerDYNAMIC_STACKALLOC(Op, DAG);
04290   case ISD::SMUL_LOHI:
04291     return lowerSMUL_LOHI(Op, DAG);
04292   case ISD::UMUL_LOHI:
04293     return lowerUMUL_LOHI(Op, DAG);
04294   case ISD::SDIVREM:
04295     return lowerSDIVREM(Op, DAG);
04296   case ISD::UDIVREM:
04297     return lowerUDIVREM(Op, DAG);
04298   case ISD::OR:
04299     return lowerOR(Op, DAG);
04300   case ISD::CTPOP:
04301     return lowerCTPOP(Op, DAG);
04302   case ISD::CTLZ_ZERO_UNDEF:
04303     return DAG.getNode(ISD::CTLZ, SDLoc(Op),
04304                        Op.getValueType(), Op.getOperand(0));
04305   case ISD::CTTZ_ZERO_UNDEF:
04306     return DAG.getNode(ISD::CTTZ, SDLoc(Op),
04307                        Op.getValueType(), Op.getOperand(0));
04308   case ISD::ATOMIC_SWAP:
04309     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
04310   case ISD::ATOMIC_STORE:
04311     return lowerATOMIC_STORE(Op, DAG);
04312   case ISD::ATOMIC_LOAD:
04313     return lowerATOMIC_LOAD(Op, DAG);
04314   case ISD::ATOMIC_LOAD_ADD:
04315     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
04316   case ISD::ATOMIC_LOAD_SUB:
04317     return lowerATOMIC_LOAD_SUB(Op, DAG);
04318   case ISD::ATOMIC_LOAD_AND:
04319     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
04320   case ISD::ATOMIC_LOAD_OR:
04321     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
04322   case ISD::ATOMIC_LOAD_XOR:
04323     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
04324   case ISD::ATOMIC_LOAD_NAND:
04325     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
04326   case ISD::ATOMIC_LOAD_MIN:
04327     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
04328   case ISD::ATOMIC_LOAD_MAX:
04329     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
04330   case ISD::ATOMIC_LOAD_UMIN:
04331     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
04332   case ISD::ATOMIC_LOAD_UMAX:
04333     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
04334   case ISD::ATOMIC_CMP_SWAP:
04335     return lowerATOMIC_CMP_SWAP(Op, DAG);
04336   case ISD::STACKSAVE:
04337     return lowerSTACKSAVE(Op, DAG);
04338   case ISD::STACKRESTORE:
04339     return lowerSTACKRESTORE(Op, DAG);
04340   case ISD::PREFETCH:
04341     return lowerPREFETCH(Op, DAG);
04342   case ISD::INTRINSIC_W_CHAIN:
04343     return lowerINTRINSIC_W_CHAIN(Op, DAG);
04344   case ISD::INTRINSIC_WO_CHAIN:
04345     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
04346   case ISD::BUILD_VECTOR:
04347     return lowerBUILD_VECTOR(Op, DAG);
04348   case ISD::VECTOR_SHUFFLE:
04349     return lowerVECTOR_SHUFFLE(Op, DAG);
04350   case ISD::SCALAR_TO_VECTOR:
04351     return lowerSCALAR_TO_VECTOR(Op, DAG);
04352   case ISD::INSERT_VECTOR_ELT:
04353     return lowerINSERT_VECTOR_ELT(Op, DAG);
04354   case ISD::EXTRACT_VECTOR_ELT:
04355     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
04356   case ISD::SIGN_EXTEND_VECTOR_INREG:
04357     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
04358   case ISD::ZERO_EXTEND_VECTOR_INREG:
04359     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
04360   case ISD::SHL:
04361     return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
04362   case ISD::SRL:
04363     return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
04364   case ISD::SRA:
04365     return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
04366   default:
04367     llvm_unreachable("Unexpected node to lower");
04368   }
04369 }
04370 
04371 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
04372 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
04373   switch ((SystemZISD::NodeType)Opcode) {
04374     case SystemZISD::FIRST_NUMBER: break;
04375     OPCODE(RET_FLAG);
04376     OPCODE(CALL);
04377     OPCODE(SIBCALL);
04378     OPCODE(TLS_GDCALL);
04379     OPCODE(TLS_LDCALL);
04380     OPCODE(PCREL_WRAPPER);
04381     OPCODE(PCREL_OFFSET);
04382     OPCODE(IABS);
04383     OPCODE(ICMP);
04384     OPCODE(FCMP);
04385     OPCODE(TM);
04386     OPCODE(BR_CCMASK);
04387     OPCODE(SELECT_CCMASK);
04388     OPCODE(ADJDYNALLOC);
04389     OPCODE(EXTRACT_ACCESS);
04390     OPCODE(POPCNT);
04391     OPCODE(UMUL_LOHI64);
04392     OPCODE(SDIVREM32);
04393     OPCODE(SDIVREM64);
04394     OPCODE(UDIVREM32);
04395     OPCODE(UDIVREM64);
04396     OPCODE(MVC);
04397     OPCODE(MVC_LOOP);
04398     OPCODE(NC);
04399     OPCODE(NC_LOOP);
04400     OPCODE(OC);
04401     OPCODE(OC_LOOP);
04402     OPCODE(XC);
04403     OPCODE(XC_LOOP);
04404     OPCODE(CLC);
04405     OPCODE(CLC_LOOP);
04406     OPCODE(STPCPY);
04407     OPCODE(STRCMP);
04408     OPCODE(SEARCH_STRING);
04409     OPCODE(IPM);
04410     OPCODE(SERIALIZE);
04411     OPCODE(TBEGIN);
04412     OPCODE(TBEGIN_NOFLOAT);
04413     OPCODE(TEND);
04414     OPCODE(BYTE_MASK);
04415     OPCODE(ROTATE_MASK);
04416     OPCODE(REPLICATE);
04417     OPCODE(JOIN_DWORDS);
04418     OPCODE(SPLAT);
04419     OPCODE(MERGE_HIGH);
04420     OPCODE(MERGE_LOW);
04421     OPCODE(SHL_DOUBLE);
04422     OPCODE(PERMUTE_DWORDS);
04423     OPCODE(PERMUTE);
04424     OPCODE(PACK);
04425     OPCODE(PACKS_CC);
04426     OPCODE(PACKLS_CC);
04427     OPCODE(UNPACK_HIGH);
04428     OPCODE(UNPACKL_HIGH);
04429     OPCODE(UNPACK_LOW);
04430     OPCODE(UNPACKL_LOW);
04431     OPCODE(VSHL_BY_SCALAR);
04432     OPCODE(VSRL_BY_SCALAR);
04433     OPCODE(VSRA_BY_SCALAR);
04434     OPCODE(VSUM);
04435     OPCODE(VICMPE);
04436     OPCODE(VICMPH);
04437     OPCODE(VICMPHL);
04438     OPCODE(VICMPES);
04439     OPCODE(VICMPHS);
04440     OPCODE(VICMPHLS);
04441     OPCODE(VFCMPE);
04442     OPCODE(VFCMPH);
04443     OPCODE(VFCMPHE);
04444     OPCODE(VFCMPES);
04445     OPCODE(VFCMPHS);
04446     OPCODE(VFCMPHES);
04447     OPCODE(VFTCI);
04448     OPCODE(VEXTEND);
04449     OPCODE(VROUND);
04450     OPCODE(VTM);
04451     OPCODE(VFAE_CC);
04452     OPCODE(VFAEZ_CC);
04453     OPCODE(VFEE_CC);
04454     OPCODE(VFEEZ_CC);
04455     OPCODE(VFENE_CC);
04456     OPCODE(VFENEZ_CC);
04457     OPCODE(VISTR_CC);
04458     OPCODE(VSTRC_CC);
04459     OPCODE(VSTRCZ_CC);
04460     OPCODE(ATOMIC_SWAPW);
04461     OPCODE(ATOMIC_LOADW_ADD);
04462     OPCODE(ATOMIC_LOADW_SUB);
04463     OPCODE(ATOMIC_LOADW_AND);
04464     OPCODE(ATOMIC_LOADW_OR);
04465     OPCODE(ATOMIC_LOADW_XOR);
04466     OPCODE(ATOMIC_LOADW_NAND);
04467     OPCODE(ATOMIC_LOADW_MIN);
04468     OPCODE(ATOMIC_LOADW_MAX);
04469     OPCODE(ATOMIC_LOADW_UMIN);
04470     OPCODE(ATOMIC_LOADW_UMAX);
04471     OPCODE(ATOMIC_CMP_SWAPW);
04472     OPCODE(PREFETCH);
04473   }
04474   return nullptr;
04475 #undef OPCODE
04476 }
04477 
04478 // Return true if VT is a vector whose elements are a whole number of bytes
04479 // in width.
04480 static bool canTreatAsByteVector(EVT VT) {
04481   return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
04482 }
04483 
04484 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
04485 // producing a result of type ResVT.  Op is a possibly bitcast version
04486 // of the input vector and Index is the index (based on type VecVT) that
04487 // should be extracted.  Return the new extraction if a simplification
04488 // was possible or if Force is true.
04489 SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
04490                                               SDValue Op, unsigned Index,
04491                                               DAGCombinerInfo &DCI,
04492                                               bool Force) const {
04493   SelectionDAG &DAG = DCI.DAG;
04494 
04495   // The number of bytes being extracted.
04496   unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
04497 
04498   for (;;) {
04499     unsigned Opcode = Op.getOpcode();
04500     if (Opcode == ISD::BITCAST)
04501       // Look through bitcasts.
04502       Op = Op.getOperand(0);
04503     else if (Opcode == ISD::VECTOR_SHUFFLE &&
04504              canTreatAsByteVector(Op.getValueType())) {
04505       // Get a VPERM-like permute mask and see whether the bytes covered
04506       // by the extracted element are a contiguous sequence from one
04507       // source operand.
04508       SmallVector<int, SystemZ::VectorBytes> Bytes;
04509       getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
04510       int First;
04511       if (!getShuffleInput(Bytes, Index * BytesPerElement,
04512                            BytesPerElement, First))
04513         break;
04514       if (First < 0)
04515         return DAG.getUNDEF(ResVT);
04516       // Make sure the contiguous sequence starts at a multiple of the
04517       // original element size.
04518       unsigned Byte = unsigned(First) % Bytes.size();
04519       if (Byte % BytesPerElement != 0)
04520         break;
04521       // We can get the extracted value directly from an input.
04522       Index = Byte / BytesPerElement;
04523       Op = Op.getOperand(unsigned(First) / Bytes.size());
04524       Force = true;
04525     } else if (Opcode == ISD::BUILD_VECTOR &&
04526                canTreatAsByteVector(Op.getValueType())) {
04527       // We can only optimize this case if the BUILD_VECTOR elements are
04528       // at least as wide as the extracted value.
04529       EVT OpVT = Op.getValueType();
04530       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
04531       if (OpBytesPerElement < BytesPerElement)
04532         break;
04533       // Make sure that the least-significant bit of the extracted value
04534       // is the least significant bit of an input.
04535       unsigned End = (Index + 1) * BytesPerElement;
04536       if (End % OpBytesPerElement != 0)
04537         break;
04538       // We're extracting the low part of one operand of the BUILD_VECTOR.
04539       Op = Op.getOperand(End / OpBytesPerElement - 1);
04540       if (!Op.getValueType().isInteger()) {
04541         EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
04542         Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
04543         DCI.AddToWorklist(Op.getNode());
04544       }
04545       EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
04546       Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
04547       if (VT != ResVT) {
04548         DCI.AddToWorklist(Op.getNode());
04549         Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
04550       }
04551       return Op;
04552     } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
04553     Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
04554     Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
04555          canTreatAsByteVector(Op.getValueType()) &&
04556                canTreatAsByteVector(Op.getOperand(0).getValueType())) {
04557       // Make sure that only the unextended bits are significant.
04558       EVT ExtVT = Op.getValueType();
04559       EVT OpVT = Op.getOperand(0).getValueType();
04560       unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
04561       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
04562       unsigned Byte = Index * BytesPerElement;
04563       unsigned SubByte = Byte % ExtBytesPerElement;
04564       unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
04565       if (SubByte < MinSubByte ||
04566     SubByte + BytesPerElement > ExtBytesPerElement)
04567   break;
04568       // Get the byte offset of the unextended element
04569       Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
04570       // ...then add the byte offset relative to that element.
04571       Byte += SubByte - MinSubByte;
04572       if (Byte % BytesPerElement != 0)
04573   break;
04574       Op = Op.getOperand(0);
04575       Index = Byte / BytesPerElement;
04576       Force = true;
04577     } else
04578       break;
04579   }
04580   if (Force) {
04581     if (Op.getValueType() != VecVT) {
04582       Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
04583       DCI.AddToWorklist(Op.getNode());
04584     }
04585     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
04586                        DAG.getConstant(Index, DL, MVT::i32));
04587   }
04588   return SDValue();
04589 }
04590 
04591 // Optimize vector operations in scalar value Op on the basis that Op
04592 // is truncated to TruncVT.
04593 SDValue
04594 SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
04595                                               DAGCombinerInfo &DCI) const {
04596   // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
04597   // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
04598   // of type TruncVT.
04599   if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04600       TruncVT.getSizeInBits() % 8 == 0) {
04601     SDValue Vec = Op.getOperand(0);
04602     EVT VecVT = Vec.getValueType();
04603     if (canTreatAsByteVector(VecVT)) {
04604       if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
04605         unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
04606         unsigned TruncBytes = TruncVT.getStoreSize();
04607         if (BytesPerElement % TruncBytes == 0) {
04608           // Calculate the value of Y' in the above description.  We are
04609           // splitting the original elements into Scale equal-sized pieces
04610           // and for truncation purposes want the last (least-significant)
04611           // of these pieces for IndexN.  This is easiest to do by calculating
04612           // the start index of the following element and then subtracting 1.
04613           unsigned Scale = BytesPerElement / TruncBytes;
04614           unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
04615 
04616           // Defer the creation of the bitcast from X to combineExtract,
04617           // which might be able to optimize the extraction.
04618           VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
04619                                    VecVT.getStoreSize() / TruncBytes);
04620           EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
04621           return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
04622         }
04623       }
04624     }
04625   }
04626   return SDValue();
04627 }
04628 
04629 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
04630                                                  DAGCombinerInfo &DCI) const {
04631   SelectionDAG &DAG = DCI.DAG;
04632   unsigned Opcode = N->getOpcode();
04633   if (Opcode == ISD::SIGN_EXTEND) {
04634     // Convert (sext (ashr (shl X, C1), C2)) to
04635     // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
04636     // cheap as narrower ones.
04637     SDValue N0 = N->getOperand(0);
04638     EVT VT = N->getValueType(0);
04639     if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
04640       auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
04641       SDValue Inner = N0.getOperand(0);
04642       if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
04643         if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
04644           unsigned Extra = (VT.getSizeInBits() -
04645                             N0.getValueType().getSizeInBits());
04646           unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
04647           unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
04648           EVT ShiftVT = N0.getOperand(1).getValueType();
04649           SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
04650                                     Inner.getOperand(0));
04651           SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
04652                                     DAG.getConstant(NewShlAmt, SDLoc(Inner),
04653                                                     ShiftVT));
04654           return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
04655                              DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
04656         }
04657       }
04658     }
04659   }
04660   if (Opcode == SystemZISD::MERGE_HIGH ||
04661       Opcode == SystemZISD::MERGE_LOW) {
04662     SDValue Op0 = N->getOperand(0);
04663     SDValue Op1 = N->getOperand(1);
04664     if (Op0.getOpcode() == ISD::BITCAST)
04665       Op0 = Op0.getOperand(0);
04666     if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
04667         cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
04668       // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
04669       // for v4f32.
04670       if (Op1 == N->getOperand(0))
04671         return Op1;
04672       // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
04673       EVT VT = Op1.getValueType();
04674       unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
04675       if (ElemBytes <= 4) {
04676         Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
04677                   SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
04678         EVT InVT = VT.changeVectorElementTypeToInteger();
04679         EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
04680                                      SystemZ::VectorBytes / ElemBytes / 2);
04681         if (VT != InVT) {
04682           Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
04683           DCI.AddToWorklist(Op1.getNode());
04684         }
04685         SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
04686         DCI.AddToWorklist(Op.getNode());
04687         return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
04688       }
04689     }
04690   }
04691   // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
04692   // for the extraction to be done on a vMiN value, so that we can use VSTE.
04693   // If X has wider elements then convert it to:
04694   // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
04695   if (Opcode == ISD::STORE) {
04696     auto *SN = cast<StoreSDNode>(N);
04697     EVT MemVT = SN->getMemoryVT();
04698     if (MemVT.isInteger()) {
04699       SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
04700                                              SN->getValue(), DCI);
04701       if (Value.getNode()) {
04702         DCI.AddToWorklist(Value.getNode());
04703 
04704         // Rewrite the store with the new form of stored value.
04705         return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
04706                                  SN->getBasePtr(), SN->getMemoryVT(),
04707                                  SN->getMemOperand());
04708       }
04709     }
04710   }
04711   // Try to simplify a vector extraction.
04712   if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
04713     if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
04714       SDValue Op0 = N->getOperand(0);
04715       EVT VecVT = Op0.getValueType();
04716       return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
04717                             IndexN->getZExtValue(), DCI, false);
04718     }
04719   }
04720   // (join_dwords X, X) == (replicate X)
04721   if (Opcode == SystemZISD::JOIN_DWORDS &&
04722       N->getOperand(0) == N->getOperand(1))
04723     return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
04724                        N->getOperand(0));
04725   // (fround (extract_vector_elt X 0))
04726   // (fround (extract_vector_elt X 1)) ->
04727   // (extract_vector_elt (VROUND X) 0)
04728   // (extract_vector_elt (VROUND X) 1)
04729   //
04730   // This is a special case since the target doesn't really support v2f32s.
04731   if (Opcode == ISD::FP_ROUND) {
04732     SDValue Op0 = N->getOperand(0);
04733     if (N->getValueType(0) == MVT::f32 &&
04734         Op0.hasOneUse() &&
04735         Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04736         Op0.getOperand(0).getValueType() == MVT::v2f64 &&
04737         Op0.getOperand(1).getOpcode() == ISD::Constant &&
04738         cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
04739       SDValue Vec = Op0.getOperand(0);
04740       for (auto *U : Vec->uses()) {
04741         if (U != Op0.getNode() &&
04742             U->hasOneUse() &&
04743             U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04744             U->getOperand(0) == Vec &&
04745             U->getOperand(1).getOpcode() == ISD::Constant &&
04746             cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
04747           SDValue OtherRound = SDValue(*U->use_begin(), 0);
04748           if (OtherRound.getOpcode() == ISD::FP_ROUND &&
04749               OtherRound.getOperand(0) == SDValue(U, 0) &&
04750               OtherRound.getValueType() == MVT::f32) {
04751             SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
04752                                          MVT::v4f32, Vec);
04753             DCI.AddToWorklist(VRound.getNode());
04754             SDValue Extract1 =
04755               DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
04756                           VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
04757             DCI.AddToWorklist(Extract1.getNode());
04758             DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
04759             SDValue Extract0 =
04760               DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
04761                           VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
04762             return Extract0;
04763           }
04764         }
04765       }
04766     }
04767   }
04768   return SDValue();
04769 }
04770 
04771 //===----------------------------------------------------------------------===//
04772 // Custom insertion
04773 //===----------------------------------------------------------------------===//
04774 
04775 // Create a new basic block after MBB.
04776 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
04777   MachineFunction &MF = *MBB->getParent();
04778   MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
04779   MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
04780   return NewMBB;
04781 }
04782 
04783 // Split MBB after MI and return the new block (the one that contains
04784 // instructions after MI).
04785 static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
04786                                           MachineBasicBlock *MBB) {
04787   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
04788   NewMBB->splice(NewMBB->begin(), MBB,
04789                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
04790   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
04791   return NewMBB;
04792 }
04793 
04794 // Split MBB before MI and return the new block (the one that contains MI).
04795 static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
04796                                            MachineBasicBlock *MBB) {
04797   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
04798   NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
04799   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
04800   return NewMBB;
04801 }
04802 
04803 // Force base value Base into a register before MI.  Return the register.
04804 static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
04805                          const SystemZInstrInfo *TII) {
04806   if (Base.isReg())
04807     return Base.getReg();
04808 
04809   MachineBasicBlock *MBB = MI->getParent();
04810   MachineFunction &MF = *MBB->getParent();
04811   MachineRegisterInfo &MRI = MF.getRegInfo();
04812 
04813   unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
04814   BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
04815     .addOperand(Base).addImm(0).addReg(0);
04816   return Reg;
04817 }
04818 
04819 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
04820 MachineBasicBlock *
04821 SystemZTargetLowering::emitSelect(MachineInstr *MI,
04822                                   MachineBasicBlock *MBB) const {
04823   const SystemZInstrInfo *TII =
04824       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04825 
04826   unsigned DestReg  = MI->getOperand(0).getReg();
04827   unsigned TrueReg  = MI->getOperand(1).getReg();
04828   unsigned FalseReg = MI->getOperand(2).getReg();
04829   unsigned CCValid  = MI->getOperand(3).getImm();
04830   unsigned CCMask   = MI->getOperand(4).getImm();
04831   DebugLoc DL       = MI->getDebugLoc();
04832 
04833   MachineBasicBlock *StartMBB = MBB;
04834   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
04835   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
04836 
04837   //  StartMBB:
04838   //   BRC CCMask, JoinMBB
04839   //   # fallthrough to FalseMBB
04840   MBB = StartMBB;
04841   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
04842     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
04843   MBB->addSuccessor(JoinMBB);
04844   MBB->addSuccessor(FalseMBB);
04845 
04846   //  FalseMBB:
04847   //   # fallthrough to JoinMBB
04848   MBB = FalseMBB;
04849   MBB->addSuccessor(JoinMBB);
04850 
04851   //  JoinMBB:
04852   //   %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
04853   //  ...
04854   MBB = JoinMBB;
04855   BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
04856     .addReg(TrueReg).addMBB(StartMBB)
04857     .addReg(FalseReg).addMBB(FalseMBB);
04858 
04859   MI->eraseFromParent();
04860   return JoinMBB;
04861 }
04862 
04863 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
04864 // StoreOpcode is the store to use and Invert says whether the store should
04865 // happen when the condition is false rather than true.  If a STORE ON
04866 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
04867 MachineBasicBlock *
04868 SystemZTargetLowering::emitCondStore(MachineInstr *MI,
04869                                      MachineBasicBlock *MBB,
04870                                      unsigned StoreOpcode, unsigned STOCOpcode,
04871                                      bool Invert) const {
04872   const SystemZInstrInfo *TII =
04873       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04874 
04875   unsigned SrcReg     = MI->getOperand(0).getReg();
04876   MachineOperand Base = MI->getOperand(1);
04877   int64_t Disp        = MI->getOperand(2).getImm();
04878   unsigned IndexReg   = MI->getOperand(3).getReg();
04879   unsigned CCValid    = MI->getOperand(4).getImm();
04880   unsigned CCMask     = MI->getOperand(5).getImm();
04881   DebugLoc DL         = MI->getDebugLoc();
04882 
04883   StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
04884 
04885   // Use STOCOpcode if possible.  We could use different store patterns in
04886   // order to avoid matching the index register, but the performance trade-offs
04887   // might be more complicated in that case.
04888   if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
04889     if (Invert)
04890       CCMask ^= CCValid;
04891     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
04892       .addReg(SrcReg).addOperand(Base).addImm(Disp)
04893       .addImm(CCValid).addImm(CCMask);
04894     MI->eraseFromParent();
04895     return MBB;
04896   }
04897 
04898   // Get the condition needed to branch around the store.
04899   if (!Invert)
04900     CCMask ^= CCValid;
04901 
04902   MachineBasicBlock *StartMBB = MBB;
04903   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
04904   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
04905 
04906   //  StartMBB:
04907   //   BRC CCMask, JoinMBB
04908   //   # fallthrough to FalseMBB
04909   MBB = StartMBB;
04910   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
04911     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
04912   MBB->addSuccessor(JoinMBB);
04913   MBB->addSuccessor(FalseMBB);
04914 
04915   //  FalseMBB:
04916   //   store %SrcReg, %Disp(%Index,%Base)
04917   //   # fallthrough to JoinMBB
04918   MBB = FalseMBB;
04919   BuildMI(MBB, DL, TII->get(StoreOpcode))
04920     .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
04921   MBB->addSuccessor(JoinMBB);
04922 
04923   MI->eraseFromParent();
04924   return JoinMBB;
04925 }
04926 
04927 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
04928 // or ATOMIC_SWAP{,W} instruction MI.  BinOpcode is the instruction that
04929 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
04930 // BitSize is the width of the field in bits, or 0 if this is a partword
04931 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
04932 // is one of the operands.  Invert says whether the field should be
04933 // inverted after performing BinOpcode (e.g. for NAND).
04934 MachineBasicBlock *
04935 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
04936                                             MachineBasicBlock *MBB,
04937                                             unsigned BinOpcode,
04938                                             unsigned BitSize,
04939                                             bool Invert) const {
04940   MachineFunction &MF = *MBB->getParent();
04941   const SystemZInstrInfo *TII =
04942       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
04943   MachineRegisterInfo &MRI = MF.getRegInfo();
04944   bool IsSubWord = (BitSize < 32);
04945 
04946   // Extract the operands.  Base can be a register or a frame index.
04947   // Src2 can be a register or immediate.
04948   unsigned Dest        = MI->getOperand(0).getReg();
04949   MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
04950   int64_t Disp         = MI->getOperand(2).getImm();
04951   MachineOperand Src2  = earlyUseOperand(MI->getOperand(3));
04952   unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
04953   unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
04954   DebugLoc DL          = MI->getDebugLoc();
04955   if (IsSubWord)
04956     BitSize = MI->getOperand(6).getImm();
04957 
04958   // Subword operations use 32-bit registers.
04959   const TargetRegisterClass *RC = (BitSize <= 32 ?
04960                                    &SystemZ::GR32BitRegClass :
04961                                    &SystemZ::GR64BitRegClass);
04962   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
04963   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
04964 
04965   // Get the right opcodes for the displacement.
04966   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
04967   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
04968   assert(LOpcode && CSOpcode && "Displacement out of range");
04969 
04970   // Create virtual registers for temporary results.
04971   unsigned OrigVal       = MRI.createVirtualRegister(RC);
04972   unsigned OldVal        = MRI.createVirtualRegister(RC);
04973   unsigned NewVal        = (BinOpcode || IsSubWord ?
04974                             MRI.createVirtualRegister(RC) : Src2.getReg());
04975   unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
04976   unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
04977 
04978   // Insert a basic block for the main loop.
04979   MachineBasicBlock *StartMBB = MBB;
04980   MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
04981   MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
04982 
04983   //  StartMBB:
04984   //   ...
04985   //   %OrigVal = L Disp(%Base)
04986   //   # fall through to LoopMMB
04987   MBB = StartMBB;
04988   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
04989     .addOperand(Base).addImm(Disp).addReg(0);
04990   MBB->addSuccessor(LoopMBB);
04991 
04992   //  LoopMBB:
04993   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
04994   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
04995   //   %RotatedNewVal = OP %RotatedOldVal, %Src2
04996   //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
04997   //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
04998   //   JNE LoopMBB
04999   //   # fall through to DoneMMB
05000   MBB = LoopMBB;
05001   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
05002     .addReg(OrigVal).addMBB(StartMBB)
05003     .addReg(Dest).addMBB(LoopMBB);
05004   if (IsSubWord)
05005     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
05006       .addReg(OldVal).addReg(BitShift).addImm(0);
05007   if (Invert) {
05008     // Perform the operation normally and then invert every bit of the field.
05009     unsigned Tmp = MRI.createVirtualRegister(RC);
05010     BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
05011       .addReg(RotatedOldVal).addOperand(Src2);
05012     if (BitSize <= 32)
05013       // XILF with the upper BitSize bits set.
05014       BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
05015         .addReg(Tmp).addImm(-1U << (32 - BitSize));
05016     else {
05017       // Use LCGR and add -1 to the result, which is more compact than
05018       // an XILF, XILH pair.
05019       unsigned Tmp2 = MRI.createVirtualRegister(RC);
05020       BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
05021       BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
05022         .addReg(Tmp2).addImm(-1);
05023     }
05024   } else if (BinOpcode)
05025     // A simply binary operation.
05026     BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
05027       .addReg(RotatedOldVal).addOperand(Src2);
05028   else if (IsSubWord)
05029     // Use RISBG to rotate Src2 into position and use it to replace the
05030     // field in RotatedOldVal.
05031     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
05032       .addReg(RotatedOldVal).addReg(Src2.getReg())
05033       .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
05034   if (IsSubWord)
05035     BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
05036       .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
05037   BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
05038     .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
05039   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05040     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
05041   MBB->addSuccessor(LoopMBB);
05042   MBB->addSuccessor(DoneMBB);
05043 
05044   MI->eraseFromParent();
05045   return DoneMBB;
05046 }
05047 
05048 // Implement EmitInstrWithCustomInserter for pseudo
05049 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI.  CompareOpcode is the
05050 // instruction that should be used to compare the current field with the
05051 // minimum or maximum value.  KeepOldMask is the BRC condition-code mask
05052 // for when the current field should be kept.  BitSize is the width of
05053 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
05054 MachineBasicBlock *
05055 SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
05056                                             MachineBasicBlock *MBB,
05057                                             unsigned CompareOpcode,
05058                                             unsigned KeepOldMask,
05059                                             unsigned BitSize) const {
05060   MachineFunction &MF = *MBB->getParent();
05061   const SystemZInstrInfo *TII =
05062       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
05063   MachineRegisterInfo &MRI = MF.getRegInfo();
05064   bool IsSubWord = (BitSize < 32);
05065 
05066   // Extract the operands.  Base can be a register or a frame index.
05067   unsigned Dest        = MI->getOperand(0).getReg();
05068   MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
05069   int64_t  Disp        = MI->getOperand(2).getImm();
05070   unsigned Src2        = MI->getOperand(3).getReg();
05071   unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
05072   unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
05073   DebugLoc DL          = MI->getDebugLoc();
05074   if (IsSubWord)
05075     BitSize = MI->getOperand(6).getImm();
05076 
05077   // Subword operations use 32-bit registers.
05078   const TargetRegisterClass *RC = (BitSize <= 32 ?
05079                                    &SystemZ::GR32BitRegClass :
05080                                    &SystemZ::GR64BitRegClass);
05081   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
05082   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
05083 
05084   // Get the right opcodes for the displacement.
05085   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
05086   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
05087   assert(LOpcode && CSOpcode && "Displacement out of range");
05088 
05089   // Create virtual registers for temporary results.
05090   unsigned OrigVal       = MRI.createVirtualRegister(RC);
05091   unsigned OldVal        = MRI.createVirtualRegister(RC);
05092   unsigned NewVal        = MRI.createVirtualRegister(RC);
05093   unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
05094   unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
05095   unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
05096 
05097   // Insert 3 basic blocks for the loop.
05098   MachineBasicBlock *StartMBB  = MBB;
05099   MachineBasicBlock *DoneMBB   = splitBlockBefore(MI, MBB);
05100   MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB);
05101   MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
05102   MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
05103 
05104   //  StartMBB:
05105   //   ...
05106   //   %OrigVal     = L Disp(%Base)
05107   //   # fall through to LoopMMB
05108   MBB = StartMBB;
05109   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
05110     .addOperand(Base).addImm(Disp).addReg(0);
05111   MBB->addSuccessor(LoopMBB);
05112 
05113   //  LoopMBB:
05114   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
05115   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
05116   //   CompareOpcode %RotatedOldVal, %Src2
05117   //   BRC KeepOldMask, UpdateMBB
05118   MBB = LoopMBB;
05119   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
05120     .addReg(OrigVal).addMBB(StartMBB)
05121     .addReg(Dest).addMBB(UpdateMBB);
05122   if (IsSubWord)
05123     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
05124       .addReg(OldVal).addReg(BitShift).addImm(0);
05125   BuildMI(MBB, DL, TII->get(CompareOpcode))
05126     .addReg(RotatedOldVal).addReg(Src2);
05127   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
05128     .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
05129   MBB->addSuccessor(UpdateMBB);
05130   MBB->addSuccessor(UseAltMBB);
05131 
05132   //  UseAltMBB:
05133   //   %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
05134   //   # fall through to UpdateMMB
05135   MBB = UseAltMBB;
05136   if (IsSubWord)
05137     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
05138       .addReg(RotatedOldVal).addReg(Src2)
05139       .addImm(32).addImm(31 + BitSize).addImm(0);
05140   MBB->